dynamically creating keys and values in a Python dictionary - python

The problem I am trying to solve is reading in a file in that contains a list of words. Then counting the number of vowels in each word and display each word in a table along with the number of its vowels and the total vowels in the word, and at the end display the total number of vowels in all of the words.
I am trying to solve the problem by reading the file in through a for loop and creating a dictionary that is associated with every word like
mississippi['a_count' : 0, 'e_ocunt' : 0, 'i_count' : 4 ,'o_count' : 0, 'u_count' : 0, 'y_count' : 0]
My problem is that I am not sure how to create the dictionaries as the variable changes due to a loop. I am just ending up with empty dictionaries.
here's a screenshot of my output http://imgur.com/mksgdTc
my test code in the file is Mississippi California Wisconsin all on different lines.
try:
word_file = open("vowel.txt", "r")
count = 0
dic = {}
a_count = 0
e_count = 0
i_count = 0
o_count = 0
u_count = 0
y_count = 0
total_count = 0
#this establishes the top of the table
print('Number','{:>8}'.format('word'),'{:>8}'.format('A'),'{:>4}'.format('E'),'{:>4}'.format('I'),'{:>4}'.format('O'),'{:>4}'.format('U'),'{:>4}'.format('Y'),'{:>8}'.format('Total'))
print("__________________________________________________________")
for word in word_file:
count+=1
word = {}
print(word)
word_a_count = 0
word_e_count = 0
word_i_count = 0
word_o_count = 0
word_u_count = 0
word_y_count = 0
word_total_count = 0
for letters in word:
print(letters)
if letters.lower() == "a":
a_count+= 1
total_count += 1
word_a_count +=1
word['a_count'] = word_a_count
if letters.lower() == "e":
e_count+= 1
total_count += 1
word_e_count +=1
word['e_count'] = word_e_count
if letters.lower() == "i":
i_count+= 1
total_count += 1
word_i_count +=1
word['i_count'] = word_i_count
if letters.lower() == "o":
o_count+= 1
total_count += 1
word_o_count +=1
word['o_count'] = word_o_count
if letters.lower() == "u":
u_count+= 1
total_count += 1
word_u_count +=1
word['u_count'] = word_u_count
if letters.lower() == "y":
y_count+= 1
total_count += 1
word_y_count +=1
word['y_count'] = word_y_count
print('Totals','{:>8}'.format(' '),'{:>8}'.format(word['a_count']),'{:>4}'.format\
(word['e_count']),'{:>4}'.format(word['i_count']),'{:>4}'.format\
(word['o_count']),'{:>4}'.format(word['u_count']),'{:>4}'.\
format(word['y_count']))
#this creates the bottom barrier of the table
print("__________________________________________________________")
#code for totals print
print('Totals','{:>8}'.format(' '),'{:>8}'.format(a_count),'{:>4}'.format(e_count),'{:>4}'.format(i_count),'{:>4}'.format(o_count),'{:>4}'.format(u_count),'{:>4}'.format(y_count),'{:>6}'.format(total_count))
except IOError:
print("The file does not seem to exists. The program is halting.")

Focus on this section -- word is re-assigned as an empty dict on every iteration of the loop:
for word in word_file:
count+=1
word = {}
However, commenting word = {} out now throws an error when the first vowel is read from file (since now the dict isn't empty). Remember that word is the current line in the text file that you are iterating over, so word['u_count'] = word_u_count is interpreted as an instruction to change a character in the string. Python strings are immutable, so an error is thrown.
Your program is much longer than it needs to be - when you notice repetition in your code consider refactoring to take advantage of loops and iteration, to make your program more concise. You could separate all the logic for counting the letters in a word into one procedure:
def countletters(word, letterstocount):
count = {}
word = word.lower()
for char in word:
if char in letterstocount:
if char in count:
count[char] += 1
else:
count[char] = 1
return count
#example call
vowels = "aeiou"
print(countletters('Arizona', vowels))
which you then call for each word in your file.

In Python 2 I'd do something like this...
#! /usr/bin/env python
'''
Count vowels in a list of words & show a grand total
Words come from a plain text file with one word per line
'''
import sys
vowels = 'aeiouy'
def make_count_dict():
''' Create a dict for counting vowels with all values initialised to 0 '''
return dict(zip(vowels, (0,)*len(vowels)))
def get_counts(d):
return ' '.join('%2d' % d[k] for k in vowels)
def count_vowels(wordlist):
hline = '_'*45
print '%3s: %-20s: %s' % ('Num', 'Word', ' '.join('%2s' % v for v in vowels))
print hline
total_counts = make_count_dict()
for num, word in enumerate(wordlist, start=1):
word_counts = make_count_dict()
for ch in word.lower():
if ch in vowels:
word_counts[ch] += 1
total_counts[ch] += 1
print '%3d: %-20s: %s' % (num, word, get_counts(word_counts))
print hline
print '%-25s: %s' % ('Total', get_counts(total_counts))
def main():
fname = len(sys.argv) > 1 and sys.argv[1]
if fname:
try:
with open(fname, 'r') as f:
wordlist = f.read().splitlines()
except IOError:
print "Can't find file '%s'; aborting." % fname
exit(1)
else:
wordlist = ['Mississippi', 'California', 'Wisconsin']
count_vowels(wordlist)
if __name__ == '__main__':
main()

Related

Why converting list to dictionary does not work in python?

I have a program that should print like this. In the last part, I converted the list to dictionary, but it does not work
Enter rows of text for word counting. Empty row to quit.
I'm on a high way to hell
I'm on a high way to hell
It's going really well
Well it's only hell
a : 2 times
going : 1 times
hell : 3 times
high : 2 times
i'm : 2 times
it's : 2 times
on : 2 times
only : 1 times
really : 1 times
to : 2 times
way : 2 times
well : 2 times
I wrote this code but does not give the expected output
def word_count(str):
counts = dict()
words = str.split()
for word in words:
if word in counts:
counts[word] += 1
else:
counts[word] = 1
return counts
def listToString(s):
str1 = ""
for ele in s:
str1 += ele
return str1
def Convert(a):
it = iter(a)
res_dct = dict(zip(it, it))
return res_dct
def main():
print("Enter rows of text for word counting. Empty row to quit.")
user_values = []
prompt = ""
line = input(prompt)
while line:
user_values.append(line)
line = input(prompt)
user_list=(listToString(user_values))
user_list=word_count(user_list)
user_list = Convert(user_list)
for k, v in sorted(user_list.items()): # sorts
print(k," : ",v," times")
if __name__ == "__main__":
main()
Fix
listToString method join sentences without space, so you create new words. Just do user_list = " ".join(user_values)
don't need Convert at all
specify a key sorter that use the lowercase version, because if not uppercase letters come before sorted(user_list.items(), key=lambda x: x[0].lower())
def main():
print("Enter rows of text for word counting. Empty row to quit.")
user_values = []
line = input(">")
while line:
user_values.append(line)
line = input(">")
user_list = " ".join(user_values)
user_list = word_count(user_list)
for k, v in sorted(user_list.items(), key=lambda x: x[0].lower()):
print(k, " : ", v, " times")
Improve
With collections.Counter
def main():
print("Enter rows of text for word counting. Empty row to quit.")
user_values = []
line = input(">")
while line:
user_values.append(line)
line = input(">")
user_list = Counter(" ".join(user_values).split())
for k, v in user_list.most_common():
print(k, " : ", v, " times")
Your code can work except for a few of things:
Convert doesn't do what you think it should and it's totally superfluous.
listToString doesn't work right and can be replaced with string.join()
In word_count change word to lower case.
Something like this:
def word_count(str):
counts = dict()
words = str.split()
for word in words:
word = word.lower()
if word in counts:
counts[word] += 1
else:
counts[word] = 1
return counts
def main():
print("Enter rows of text for word counting. Empty row to quit.")
user_values = []
prompt = ""
line = input(prompt)
while line:
user_values.append(line)
line = input(prompt)
user_list = " ".join(user_values)
user_list=word_count(user_list)
for k, v in sorted(user_list.items()): # sorts
print(k," : ",v," times")
if __name__ == "__main__":
main()

Passing words to a function

I am trying to pass words to a function that will be checked with words in a seperate file. It only passes the last word created from the first file and not each word. So I think how it should work is as soon as it creates a word it should pass that word to the function then check it and return the results, but I'm confused as to how. If the word matches a word in the second file it should print the word then a 0 or if it doesn't match a word then print 1.
import sys
argc = len(sys.argv)
cmdlength = argc - 1
if cmdlength != 2:
print ("Usage error, expected 2 args got " + str(cmdlength))
exit()
else:
word = ""
with open(sys.argv[1],"r") as fh:
while True:
ch=fh.read(1)
if not ch:
print(word)
print("End of file")
break
if ch == ' ':
print(word)
word = ''
else:
word += ch
def check_word(word):
count = 0
count2 = 0
with open(sys.argv[2],"r") as fh2:
lines = fh2.readlines()
for line in lines:
if word in line:
print(word , ": " , "0")
count += 1
else:
print(word, ": " , "1")
count += 1
check_word(word)
When you call the check_word function, you are doing it once after the while loop ends, for that reason only the last word is passed to the function. You should call the function after each word is armed, in your case, when ch == ' ':
if ch == ' ':
print(word)
check_word(word)
word = ''

Python - Every other letter in a string capitalized within a loop where the data was handed down from a copied list of user input

I was instructed to have a user input at least 8 words into a list and then perform various manipulations to the data within the list. One of the manipulations it asks me to do is to create a loop that makes every other letter in the strings capitalized (hElLo WoRlD.) For better readability, I left out the other manipulations that I have done to the code.
import sys
def main():
words = []
wordCount = 0
userWord = input("Enter at least 8 words or 'bye' to leave the program: ").split(' ')
while True:
if len(userWord)<8:
print("Please print at least 8 words, try again.")
sys.exit()
elif wordCount >= 8 and userWord[wordCount] != 'bye':
words.append(userWord[wordCount])
wordCount = wordCount + 1
else:
break
every_other (userWord)
def every_other(words):
words6 = words.copy()
st = ""
for i in range(len(words6)):
if (i%2) == 0:
st += words6[i].upper()
else:
st += words6[i]
print ('This is your list with every other letter capitalized: ', words6)
return st
main()
I am not getting any error messages but the code doesn't seem to be running starting at def every_other.
You'll have to print the function every_other as it returns a string:
import sys
def main():
words = []
wordCount = 0
userWord = input("Enter at least 8 words or 'bye' to leave the program: ").split(' ')
while True:
if len(userWord)<8:
print("Please print at least 8 words, try again.")
sys.exit()
elif wordCount >= 8 and userWord[wordCount] != 'bye':
words.append(userWord[wordCount])
wordCount = wordCount + 1
else:
break
print('This is your list with every other letter capitalized: ', every_other(userWord))
def every_other(words):
words6 = words.copy()
st = ""
for i in range(len(words6)):
if (i%2) == 0:
st += words6[i].upper()
else:
st += words6[i]
return st
#print ('This is your list with every other letter capitalized: ', words6) # This will never run as the function has already returned
main()
If you want to capitalize every second character:
import sys
def main():
words = []
wordCount = 0
userWord = input("Enter at least 8 words or 'bye' to leave the program: ").split(' ')
while True:
if len(userWord)<8:
print("Please print at least 8 words, try again.")
sys.exit()
elif wordCount >= 8 and userWord[wordCount] != 'bye':
words.append(userWord[wordCount])
wordCount = wordCount + 1
else:
break
print('This is your list with every other letter capitalized: ', every_other(userWord))
def every_other(words):
st = ""
new_st = ""
for w in words:
st+=w
print(str(st))
for count, val in enumerate(st):
if (count % 2) == 0:
val = val.upper()
new_st+=val
return new_st
main()

How to count the amount of vowels and consonants in a text file?

I am trying to correctly count the number of vowels and consonants in a text file but I am lost currently. I have the other parts that need to be found done.
# Home work 4
from string import punctuation
fname = raw_input("Enter name of the file: ")
fvar = open(fname, "r")
punctuationList = "!#$%&'(),.:;?"
numLines = 0
numWords = 0
numChars = 0
numPunc = 0
numVowl = 0
numCons = 0
if line in "aeiou":
numVowl = + 1
else:
numCons += 1
for line in fvar:
wordsList = line.split()
numLines += 1
numWords += len(wordsList)
numChars += len(line)
for punctuation in punctuationList:
numPunc += 1
print "Lines %d" % numLines
print "Words %d" % numWords
print "The amount of charcters is %d" % numChars
print "The amount of punctuation is %d" % numPunc
print "The amount of vowls is %d" % numVowl
print "The amount of consonants is %d" % numCons
You need to loop over all the characters in the line, testing whether they're vowels, consonants, or punctuation.
for line in fvar:
wordsList = line.split()
numLines += 1
numWords += len(wordsList)
numChars += len(line)
for char in line:
if char in 'aeiou':
numVowl += 1
elif char in 'bcdfghjklmnpqrstvwxyz'
numCons += 1
else:
numPunc += 1
You can try this:
f = [i.strip('\n').split() for i in open('file.txt')]
new_lines = [[sum(b in 'bcdfghjklmnpqrstvwxyz' for b in i), sum(b in "aeiou" for b in i)] for i in f]
total_consonants = sum(a for a, b in new_lines)
total_vowels = sum(b for a, b in new_lines)
I would write a function that returns a 3-tuple of the counts you care about when given a string.
import string
def count_helper(s) -> ("vowel count", "consonant count", "punctuation count"):
vowels = set('aeiou')
consonants = set(string.ascii_lowercase).difference(vowels)
# you could also do set('bcdfghjklmnpqrstvwxyz'), but I recommend this approach
# because it's more obviously correct (you can't possibly typo and miss a letter)
c_vowel = c_consonant = c_punctuation = 0
for ch in s:
if ch in vowels: c_vowel += 1
elif ch in consonants: c_consonant += 1
else: c_punctuation += 1
return (c_vowel, c_consonant, c_punctuation)
Then as you iterate through the file, pass each line to count_helper.
counts = {'vowels': 0, 'consonants': 0, 'punctuation': 0}
for line in f:
v, c, p = count_helper(line)
counts['vowels'] += v
counts['consonants'] += c
counts['punctuation'] += p

Text file indexing using python 3.4.3

I try to write a Python 3.4 code to index text document from external
and this my attempt. when run it error message:
raw input is not defined
What I want is:
to tokenize the document which is out of python 34 folder
to remove stop words
to stem
indexing
The code:
import string
def RemovePunc():
line = []
i = 0
text_input = ""
total_text_input = "C:Users\Kelil\Desktop\IRS_Assignment\project.txt"
#This part removes the punctuation and converts input text to lowercase
while i != 1:
text_input = raw_input
if text_input == ".":
i = 1
else:
new_char_string = ""
for char in text_input:
if char in string.punctuation:
char = " "
new_char_string = new_char_string + char
line = line + [new_char_string.lower()]
#This is a list with all of the text that was entered in
total_text_input = (total_text_input + new_char_string).lower()
return line
def RemoveStopWords(line):
line_stop_words = []
stop_words = ['a','able','about','across','after','all','almost','also','am','among',
'an','and','any','are','as','at','be','because','been','but','by','can',
'cannot','could','dear','did','do','does','either','else','ever','every',
'for','from','get','got','had','has','have','he','her','hers','him','his',
'how','however','i','if','in','into','is','it','its','just','least','let',
'like','likely','may','me','might','most','must','my','neither','no','nor',
'not','of','off','often','on','only','or','other','our','own','rather','said',
'say','says','she','should','since','so','some','than','that','the','their',
'them','then','there','these','they','this','tis','to','too','twas','us',
'wants','was','we','were','what','when','where','which','while','who',
'whom', 'why', 'will', 'with', 'would', 'yet', 'you', 'your']
#this part removes the stop words for the list of inputs
line_stop_words = []
sent = ""
word = ""
test = []
for sent in line:
word_list = string.split(sent)
new_string = ""
for word in word_list:
if word not in stop_words:
new_string = new_string + word + " "
new_string = string.split(new_string)
line_stop_words = line_stop_words + [new_string]
return(line_stop_words)
def StemWords(line_stop_words):
leaf_words = "s","es","ed","er","ly","ing"
i=0
while i < 6:
count = 0
length = len(leaf_words[i])
while count < len(line_stop_words):
line = line_stop_words[count]
count2 = 0
while count2 < len(line):
#line is the particular list(or line) that we are dealing with, count if the specific word
if leaf_words[i] == line[count2][-length:]:
line[count2] = line[count2][:-length]
count2 = count2 + 1
line_stop_words[count] = line
count2 = 0
count = count + 1
count = 0
i = i + 1
return(line_stop_words)
def indexDupe(lineCount,occur):
if str(lineCount) in occur:
return True
else:
return False
def Indexing(line_stop_words):
line_limit = len(line_stop_words)
index = []
line_count = 0
while line_count < line_limit:
for x in line_stop_words[line_count]:
count = 0
while count <= len(index):
if count == len(index):
index = index + [[x,[str(line_count+1)]]]
break
else:
if x == index[count][0]:
if indexDupe(line_count+1,index[count][1]) == False:
index[count][1] += str(line_count+1)
break
count = count + 1
line_count = line_count + 1
return(index)
def OutputIndex(index):
print ("Index:")
count = 0
indexLength = len(index)
while count < indexLength:
print (index[count][0],)
count2 = 0
lineOccur = len(index[count][1])
while count2 < lineOccur:
print (index[count][1][count2],)
if count2 == lineOccur -1:
print ("")
break
else:
print (",",)
count2 += 1
count += 1
line = RemovePunc()
line_stop_words = RemoveStopWords(line)
line_stop_words = StemWords(line_stop_words)
index = Indexing(line_stop_words)
OutputIndex(index)
#smichak already put the right answer in the comments. raw_input was renamed to input in Python 3. So you want:
text_input = input()
Don't forget those parentheses, since you want to call the function.

Categories

Resources