Finding the occurrence of a specific character from a text file - Python - python

I want to write a function that takes a file name and a list as arguments and reads the text within the corresponding file, then creates a dictionary whose keys are the characters of the provided list, and the values are the counts of these characters within the text. If the file does not exist, then the function returns an empty dictionary.
For example:
sample.txt
This is an example sentence
This is yet another sentence
Here is the code that I've written so far:
def text2dict(filename, characters):
count_word = dict()
for char in characters:
count_word[char] = 0
with open(filename) as input_text:
text = input_text.read()
words = text.lower().split()
for word in words:
_word = word.strip('.,:-)()')
if word in count_word:
count_word[_word] += 1
return count_word
file_name = "sample.txt"
list_char = ["a", "b", "c", "t"]
text2dict(file_name, list_char)
Expected Output:
{'a':3, 'b':0, 'c':2, 't':6}
The output I got:
{'a': 0, 'b': 0, 'c': 0, 't': 0}

You can use "".count() for that. Also there is no need to pre-fill the dictionary anymore, as we are not using iadd.
def text2dict(filename, characters):
count_letters = dict()
with open(filename) as input_text:
text = input_text.read()
for k in characters:
count_letters[k] = text.count(k)
return count_letters
With this you get the expected result
>>> file_name = r"sample.txt"
>>> list_char = ["a", "b", "c", "t"]
>>> print(text2dict(file_name, list_char))
{'a': 3, 'b': 0, 'c': 2, 't': 6}

You are checking if a word is existing in the dictionnary, but the dictionnary only contains letters.
Spoiler:
if you want a working version right away:
def text2dict(filename, characters):
count_word = dict()
for char in characters:
count_word[char] = 0
with open(filename) as input_text:
text = input_text.read()
words = text.lower().split()
for word in words:
_word = list(word)
for i in _word:
if i in count_word:
count_word[i] += 1
return count_word

You can use collections.Counter().
from collections import Counter
file = open('test.txt')
rows = [row.strip().replace(' ','').lower() for row in file]
wanted = ["a", "b", "c", "t"]
finalDict = {letters:dict(Counter(r)).get(letters,0) for r in rows for letters in wanted}
output
{'a': 1, 'b': None, 'c': 1, 't': 4}
Shortened down all the way, for funnsies
from collections import Counter
finalDict = {letters:dict(Counter(r)).get(letters) for r in [row.strip().replace(' ','').lower() for row in open('test.txt')] for letters in ["a", "b", "c", "t"]}

Related

How to count when using list of data

test = "aaaabbcdeiiifkdkkffkk"
search = ["a", "c", "d", "e"]
data = test.count(search)
data
When I run this, I get
TypeError: must be str, not list
I want to know how I can count when using [ ] instead of a single string.
you can use a loop on search
data = [test.count(s) for s in search]
count() takes a str parameter and not a list.
test = "aaaabbcdeiiifkdkkffkk"
data = test.count("a")
More info here.
You need to loop throught the search list, so this list comprehension can do the job:
[test.count(i) for i in search]
But i prefer this
test = "aaaabbcdeiiifkdkkffkk"
search = ["a", "c", "d", "e"]
data = {}
for i in search:
data.update({i: test.count(i)})
Or you can use a one liner.
data = {c: test.count(c) for c in search}
Out: {'a': 4, 'c': 1, 'd': 2, 'e': 1}
You can use multiple ways to find out, to count the number of occurrence, .count() only takes one str value
Method 1: Using List Comprehension
data = [test.count(s) for s in search]
Method 2: Using Map
data = list(map(test.count,search))
You can create multiple other methods to do so referring the above.
Using collections.Counter
Ex:
from collections import Counter
test = "aaaabbcdeiiifkdkkffkk"
search = ["a", "c", "d", "e"]
count_data = Counter(test) #--> Counter({'k': 5, 'a': 4, 'i': 3, 'f': 3, 'b': 2, 'd': 2, 'c': 1, 'e': 1})
for i in search:
print(i, count_data.get(i, 0))
Output:
a 4
c 1
d 2
e 1
Loop over search strings and apply count to each one.
data = []
pos = 0
for s in search:
data[pos] = test.count(s)
pos = pos + 1
print(data)

index word in dictionary

I have a text file where I want each word in the text file in a dictionary and then print out the index position each time the word is in the text file.
The code I have is only giving me the number of times the word is in the text file. How can I change this?
I have already converted to lowercase.
dicti = {}
for eachword in wordsintxt:
freq = dicti.get(eachword, None)
if freq == None:
dicti[eachword] = 1
else:
dicti[eachword] = freq + 1
print(dicti)
Change your code to keep the indices themselves, rather than merely count them:
for index, eachword in enumerate(wordsintxt):
freq = dicti.get(eachword, None)
if freq == None:
dicti[eachword] = []
else:
dicti[eachword].append(index)
If you still need the word frequency: that's easy to recover:
freq = len(dicti[word])
Update per OP comment
Without enumerate, simply provide that functionality yourself:
for index in range(len(wordsintxt)):
eachword = wordsintxt[i]
I'm not sure why you'd want to do that; the operation is idiomatic and common enough that Python developers created enumerate for exactly that purpose.
You can use this:
wordsintxt = ["hello", "world", "the", "a", "Hello", "my", "name", "is", "the"]
words_data = {}
for i, word in enumerate(wordsintxt):
word = word.lower()
words_data[word] = words_data.get(word, {'freq': 0, 'indexes': []})
words_data[word]['freq'] += 1
words_data[word]['indexes'].append(i)
for k, v in words_data.items():
print(k, '\t', v)
Which prints:
hello {'freq': 2, 'indexes': [0, 4]}
world {'freq': 1, 'indexes': [1]}
the {'freq': 2, 'indexes': [2, 8]}
a {'freq': 1, 'indexes': [3]}
my {'freq': 1, 'indexes': [5]}
name {'freq': 1, 'indexes': [6]}
is {'freq': 1, 'indexes': [7]}
You can avoid checking if the value exists in your dictionary and then performing a custom action by just using data[key] = data.get(key, STARTING_VALUE)
Greetings!
Use collections.defaultdict with enumerate, just append all the indexes you retrieve from enumerate
from collections import defaultdict
with open('test.txt') as f:
content = f.read()
words = content.split()
dd = defaultdict(list)
for i, v in enumerate(words):
dd[v.lower()].append(i)
print(dd)
# defaultdict(<class 'list'>, {'i': [0, 6, 35, 54, 57], 'have': [1, 36, 58],... 'lowercase.': [62]})

How to alternate lower and upper case on dictionary keys? Python 3

im new on python 3.
What I want to do is to alternate upper and lowercase but only on a dictionary key.
my dictionary is created from a list, its key is the word (or list element) and its value is the times this element appears in the list.
kb = str(input("Give me a string: "));
txt = kb.lower(); #Turn string into lowercase
cadena = txt.split(); #Turn string into list
dicc = {};
for word in cadena:
if (word in dicc):
dicc[word] = dicc[word] + 1
else:
dicc[word] = 1
print(dicc)
With this code i can get for example:
input: "Hi I like PYthon i am UsING python"
{'hi': 1, 'i': 2, 'like': 1, 'python': 2, 'am': 1, 'using': 1}
but what I am trying to get is actually is:
{'hi': 1, 'I': 2, 'like': 1, 'PYTHON': 2, 'am': 1, 'USING': 1}
I tried using this:
for n in dicc.keys():
if (g%2 == 0):
n.upper()
else:
n.lower()
print(dicc)
But it seems that I have no idea of what I'm doing.
Any help would be appreciated.
Using itertools and collections.OrderedDict (to guarantee order in Python < 3.7)
Setup
import itertools
from collections import OrderedDict
s = 'Hi I like PYthon i am UsING python'
switcher = itertools.cycle((str.lower, str.upper))
d = OrderedDict()
final = OrderedDict()
First, create an OrderedDictionary just to count the occurences of strings in your list (since you want matches to be case insensitive based on your output):
for word in s.lower().split():
d.setdefault(word, 0)
d[word] += 1
Next, use itertools.cycle to call str.lower or str.upper on keys and create your final dictionary:
for k, v in d.items():
final[next(switcher)(k)] = v
print(final)
OrderedDict([('hi', 1), ('I', 2), ('like', 1), ('PYTHON', 2), ('am', 1), ('USING', 1)])
Your n in dicc.keys() line is wrong. You are trying to use n as both the position in the array of keys and the key itself.
Also the semicolons are unnecessary.
This should do what you want:
from collections import OrderedDict
# Receive user input
kb = str(input("Give me a string: "))
txt = kb.lower()
cadena = txt.split()
dicc = OrderedDict()
# Construct the word counter
for word in cadena:
if word in dicc:
dicc[word] += 1
else:
dicc[word] = 1
If you just want to print the output with alternating case, you can do something like this:
# Print the word counter with alternating case
elems = []
for i, (word, wordcount) in enumerate(dicc.items()):
if i % 2 == 0:
word = word.upper()
elems.append('{}: {}'.format(word, wordcount)
print('{' + ', '.join(elems) + '}')
Or you can make a new OrderedDict with alternating case...
dicc_alt_case = OrderedDict((word.upper() if (i % 2 == 0) else word, wordcount)
for word, wordcount in dicc.items())

How to apply a for loop to a string in Python

How do I apply a for loop to a string in Python which allows me to count each letter in a word? The ultimate goal is to discover the most common letter.
This is the code so far:
print "Type 'exit' at any time to exit the program."
continue_running = True
while continue_running:
word = raw_input("Please enter a word: ")
if word == "exit":
continue_running = False
else:
while not word.isalpha():
word = raw_input("ERROR: Please type a single word containing alphabetic characters only:")
print word
if len(word) == 1:
print word + " has " + str(len(word)) + " letter."
else:
print word + " has " + str(len(word)) + " letters."
if sum(1 for v in word.upper() if v in ["A", "E", "I", "O", "U"]) == 1:
print "It also has ", sum(1 for v in word.upper() if v in ["A", "E", "I", "O", "U"]), " vowel."
else:
print "It also has ", sum(1 for v in word.upper() if v in ["A", "E", "I", "O", "U"]), " vowels."
if sum(1 for c in word if c.isupper()) == 1:
print "It has ", sum(1 for c in word if c.isupper()), " capital letter."
else:
print "It has ", sum(1 for c in word if c.isupper()), " capital letters."
for loop in word:
I know I can use the:
(collections.Counter(word.upper()).most_common(1)[0])
format, but this isn't the way I want to do it.
You can simply loop directly over strings; they are sequences just like lists and tuples; each character in the string is a separate element:
for character in word.upper():
# count the character.
Counts can then be collected in a dictionary:
counts = {}
for character in word.upper():
# count the character.
counts[character] = counts.get(character, 0) + 1
after which you'd pick the most common one; you can use the max() function for that:
most_common = max(counts, key=counts.__getitem__) # maximum by value
Demo:
>>> word = 'fooBARbazaar'
>>> counts = {}
>>> for character in word.upper():
... # count the character.
... counts[character] = counts.get(character, 0) + 1
...
>>> counts
{'A': 4, 'B': 2, 'F': 1, 'O': 2, 'R': 2, 'Z': 1}
>>> max(counts, key=counts.__getitem__)
'A'
If you don't want to re-invent the wheel try using Counter
>>> from collections import Counter
>>> x = Counter("stackoverflow")
>>> x
Counter({'o': 2, 'a': 1, 'c': 1, 'e': 1, 'f': 1, 'k': 1, 'l': 1, 's': 1, 'r': 1, 't': 1, 'w': 1, 'v': 1})
>>> print max(x, key=x.__getitem__)
o
>>>

Determining Letter Frequency Of Cipher Text

I am trying to make a tool that finds the frequencies of letters in some type of cipher text.
Lets suppose it is all lowercase a-z no numbers. The encoded message is in a txt file
I am trying to build a script to help in cracking of substitution or possibly transposition ciphers.
Code so far:
cipher = open('cipher.txt','U').read()
cipherfilter = cipher.lower()
cipherletters = list(cipherfilter)
alpha = list('abcdefghijklmnopqrstuvwxyz')
occurrences = {}
for letter in alpha:
occurrences[letter] = cipherfilter.count(letter)
for letter in occurrences:
print letter, occurrences[letter]
All it does so far is show how many times a letter appears.
How would I print the frequency of all letters found in this file.
import collections
d = collections.defaultdict(int)
for c in 'test':
d[c] += 1
print d # defaultdict(<type 'int'>, {'s': 1, 'e': 1, 't': 2})
From a file:
myfile = open('test.txt')
for line in myfile:
line = line.rstrip('\n')
for c in line:
d[c] += 1
For the genius that is the defaultdict container, we must give thanks and praise. Otherwise we'd all be doing something silly like this:
s = "andnowforsomethingcompletelydifferent"
d = {}
for letter in s:
if letter not in d:
d[letter] = 1
else:
d[letter] += 1
The modern way:
from collections import Counter
string = "ihavesometextbutidontmindsharing"
Counter(string)
#>>> Counter({'i': 4, 't': 4, 'e': 3, 'n': 3, 's': 2, 'h': 2, 'm': 2, 'o': 2, 'a': 2, 'd': 2, 'x': 1, 'r': 1, 'u': 1, 'b': 1, 'v': 1, 'g': 1})
If you want to know the relative frequency of a letter c, you would have to divide number of occurrences of c by the length of the input.
For instance, taking Adam's example:
s = "andnowforsomethingcompletelydifferent"
n = len(s) # n = 37
and storing the absolute frequence of each letter in
dict[letter]
we obtain the relative frequencies by:
from string import ascii_lowercase # this is "a...z"
for c in ascii_lowercase:
print c, dict[c]/float(n)
putting it all together, we get something like this:
# get input
s = "andnowforsomethingcompletelydifferent"
n = len(s) # n = 37
# get absolute frequencies of letters
import collections
dict = collections.defaultdict(int)
for c in s:
dict[c] += 1
# print relative frequencies
from string import ascii_lowercase # this is "a...z"
for c in ascii_lowercase:
print c, dict[c]/float(n)

Categories

Resources