counting total number of words in a text file - python

I am new to python and trying to print the total number of words in a text file and the total number of specific words in the file provided by the user.
I tested my code, but results output of single word,but i need only the overall word count of all the words in the file and also the overall wordcount of words provided by the user.
Code:
name = raw_input("Enter the query x ")
name1 = raw_input("Enter the query y ")
file=open("xmlfil.xml","r+")
wordcount={}
for word in file.read().split():
if word not in wordcount:
wordcount[word] = 1
else:
wordcount[word] += 1
for k,v in wordcount.items():
print k, v
for name in file.read().split():
if name not in wordcount:
wordcount[name] = 1
else:
wordcount[name] += 1
for k,v in wordcount.items():
print k, v
for name1 in file.read().split():
if name1 not in wordcount:
wordcount[name1] = 1
else:
wordcount[name1] += 1
for k,v in wordcount.items():
print k, v

MyFile=open('test.txt','r')
words={}
count=0
given_words=['The','document','1']
for x in MyFile.read().split():
count+=1
if x in given_words:
words.setdefault(x,0)
words[str(x)]+=1
MyFile.close()
print count, words
Sample output
17 {'1': 1, 'The': 1, 'document': 1}
Please do not name the variable to handle open() result file as then you'll overwrite the constructor function for the file type.

You can get what you need easily via Counter
from collections import Counter
c = Counter()
with open('your_file', 'rb') as f:
for ln in f:
c.update(ln.split())
total = sum(c.values())
specific = c['your_specific_word']

Related

Im trying to print the amount of every verible from input, but my code doesn't write what I want it though it seems right. python

Here is thecode and it keeps giving me the same thing though it should give a different output.
line = input('Car: ')
while line:
def word_count(str):
counts = dict()
words = str.split()
for word in words:
if word in counts:
counts[word] += 1
else:
counts[word] = 1
return counts
line = input('Car: ')
print( word_count(line))
Here is the output I get:
{}
You can try
line = 'temp'
while line:
line = input('Car: ')
def word_count(str):
counts = dict()
words = str.split()
for word in words:
if word in counts:
counts[word] += 1
else:
counts[word] = 1
return counts
if line:
print(word_count(line))

Occurrence of words with same length

My function takes a string as input that is the name of a file and should return a dictionary. The dictionary will have key/value pairs where keys are integers that correspond to word lengths and the values are the number of words that appear in the file with that length.
The file consists of the following sentence:
and then the last assignment ended and everyone was sad
So theoretically the returned diction would look like this:
{ 3:5, 4:2, 5:1, 8:1, 10:1}
So far I have this:
"""
COMP 1005 - Fall 2016
Assignment 10
Problem 1
"""
def wordLengthStats(filename):
file = open(filename, 'r')
wordcount={}
for line in file.read().split():
if line not in wordcount:
wordcount[line] = 1
else:
wordcount[line] += 1
for k,v in wordcount.items():
print (k, v)
return None
def main():
'''
main method to test your wordLengthStats method
'''
d = wordLengthStats("sample.txt")
print("d should be { 3:5, 4:2, 5:1, 8:1, 10:1} ")
print("d is", d)
if __name__ == '__main__':
main()
The sentence is just an example, I need to make it so that any input should work. Any help on approaching this problem would be greatly appreciated.
For every word in the sentence, you need to add an entry to the dictionary where the length of the word is the key:
def wordLengthStats(filename):
file = open(filename, 'r')
wordcount={}
for word in file.read().split():
key = len(word)
if key not in wordcount:
wordcount[key] = 1
else:
wordcount[key] += 1
for k,v in wordcount.items():
print (k, v)
return None

How to count one specific word in Python?

I want to count a specific word in the file.
For example how many times does 'apple' appear in the file.
I tried this:
#!/usr/bin/env python
import re
logfile = open("log_file", "r")
wordcount={}
for word in logfile.read().split():
if word not in wordcount:
wordcount[word] = 1
else:
wordcount[word] += 1
for k,v in wordcount.items():
print k, v
by replacing 'word' with 'apple', but it still counts all possible words in my file.
Any advice would be greatly appreciated. :)
You could just use str.count() since you only care about occurrences of a single word:
with open("log_file") as f:
contents = f.read()
count = contents.count("apple")
However, to avoid some corner cases, such as erroneously counting words like "applejack", I suggest that you use a regex:
import re
with open("log_file") as f:
contents = f.read()
count = sum(1 for match in re.finditer(r"\bapple\b", contents))
\b in the regex ensures that the pattern begins and ends on a word boundary (as opposed to a substring within a longer string).
If you only care about one word then you do not need to create a dictionary to keep track of every word count. You can just iterate over the file line-by-line and find the occurrences of the word you are interested in.
#!/usr/bin/env python
logfile = open("log_file", "r")
wordcount=0
my_word="apple"
for line in logfile:
if my_word in line.split():
wordcount += 1
print my_word, wordcount
However, if you also want to count all the words, and just print the word count for the word you are interested in then these minor changes to your code should work:
#!/usr/bin/env python
import re
logfile = open("log_file", "r")
wordcount={}
for word in logfile.read().split():
if word not in wordcount:
wordcount[word] = 1
else:
wordcount[word] += 1
# print only the count for my_word instead of iterating over entire dictionary
my_word="apple"
print my_word, wordcount[my_word]
You can use the Counter dictionary for this
from collections import Counter
with open("log_file", "r") as logfile:
word_counts = Counter(logfile.read().split())
print word_counts.get('apple')
This is an example of counting words in array of words. I am assuming file reader will be pretty much similar.
def count(word, array):
n=0
for x in array:
if x== word:
n+=1
return n
text= 'apple orange kiwi apple orange grape kiwi apple apple'
ar = text.split()
print(count('apple', ar))
def Freq(x,y):
d={}
open_file = open(x,"r")
lines = open_file.readlines()
for line in lines:
word = line.lower()
words = word.split()
for i in words:
if i in d:
d[i] = d[i] + 1
else:
d[i] = 1
print(d)
fi=open("text.txt","r")
cash=0
visa=0
amex=0
for line in fi:
k=line.split()
print(k)
if 'Cash' in k:
cash=cash+1
elif 'Visa' in k:
visa=visa+1
elif 'Amex' in k:
amex=amex+1
print("# persons paid by cash are:",cash)
print("# persons paid by Visa card are :",visa)
print("#persons paid by Amex card are :",amex)
fi.close()

Python Word Count of Text File

I'm trying to get a count of the frequency of a word in a Text File using a python function. I can get the frequency of all of the words separately, but I'm trying to get a count of specific words by having them in a list. Here's what I have so far but I am currently stuck. My
def repeatedWords():
with open(fname) as f:
wordcount={}
for word in word_list:
for word in f.read().split():
if word not in wordcount:
wordcount[word] = 1
else:
wordcount[word] += 1
for k,v in wordcount.items():
print k, v
word_list = [‘Emma’, ‘Woodhouse’, ‘father’, ‘Taylor’, ‘Miss’, ‘been’, ‘she’, ‘her’]
repeatedWords('file.txt')
Updated, still showing all words:
def repeatedWords(fname, word_list):
with open(fname) as f:
wordcount = {}
for word in word_list:
for word in f.read().split():
wordcount[word] = wordcount.get(word, 0) + 1
for k,v in wordcount.items():
print k, v
word_list = ['Emma', 'Woodhouse', 'father', 'Taylor', 'Miss', 'been', 'she', 'her']
repeatedWords('Emma.txt', word_list)
So you want the frequency of only the specific words in that list (Emma, Woodhouse, Father...)? If so, this code might help (try running it):
word_list = ['Emma','Woodhouse','father','Taylor','Miss','been','she','her']
#i'm using this example text in place of the file you are using
text = 'This is an example text. It will contain words you are looking for, like Emma, Emma, Emma, Woodhouse, Woodhouse, Father, Father, Taylor,Miss,been,she,her,her,her. I made them repeat to show that the code works.'
text = text.replace(',',' ') #these statements remove irrelevant punctuation
text = text.replace('.','')
text = text.lower() #this makes all the words lowercase, so that capitalization wont affect the frequency measurement
for repeatedword in word_list:
counter = 0 #counter starts at 0
for word in text.split():
if repeatedword.lower() == word:
counter = counter + 1 #add 1 every time there is a match in the list
print(repeatedword,':', counter) #prints the word from 'word_list' and its frequency
The output shows the frequency of only those words in the list you provided, and that's what you wanted right?
the output produced when run in python3 is:
Emma : 3
Woodhouse : 2
father : 2
Taylor : 1
Miss : 1
been : 1
she : 1
her : 3
The best way to deal with this is to use get method in Python dictionary. It can be like this:
def repeatedWords():
with open(fname) as f:
wordcount = {}
#Example list of words not needed
nonwordlist = ['father', 'Miss', 'been']
for word in word_list:
for word in file.read().split():
if not word in nonwordlist:
wordcount[word] = wordcount.get(word, 0) + 1
# Put these outside the function repeatedWords
for k,v in wordcount.items():
print k, v
The print statement should give you this:
word_list = [‘Emma’, ‘Woodhouse’, ‘father’, ‘Taylor’, ‘Miss’, ‘been’, ‘she’, ‘her’]
newDict = {}
for newWord in word_list:
newDict[newWord] = newDict.get(newWord, 0) + 1
print newDict
What this line wordcount[word] = wordcount.get(word, 0) + 1 does is, it first looks for word in the dictionary wordcount, if the word already exists, it gets it's value first and adds 1 to it. If the word does not exist, the value defaults to 0 and at this instance, 1 is added making it the first occurrence of that word having a count of 1.

Word count from a txt file program

I am counting word of a txt file with the following code:
#!/usr/bin/python
file=open("D:\\zzzz\\names2.txt","r+")
wordcount={}
for word in file.read().split():
if word not in wordcount:
wordcount[word] = 1
else:
wordcount[word] += 1
print (word,wordcount)
file.close();
this is giving me the output like this:
>>>
goat {'goat': 2, 'cow': 1, 'Dog': 1, 'lion': 1, 'snake': 1, 'horse': 1, '': 1, 'tiger': 1, 'cat': 2, 'dog': 1}
but I want the output in the following manner:
word wordcount
goat 2
cow 1
dog 1.....
Also I am getting an extra symbol in the output (). How can I remove this?
The funny symbols you're encountering are a UTF-8 BOM (Byte Order Mark). To get rid of them, open the file using the correct encoding (I'm assuming you're on Python 3):
file = open(r"D:\zzzz\names2.txt", "r", encoding="utf-8-sig")
Furthermore, for counting, you can use collections.Counter:
from collections import Counter
wordcount = Counter(file.read().split())
Display them with:
>>> for item in wordcount.items(): print("{}\t{}".format(*item))
...
snake 1
lion 2
goat 2
horse 3
#!/usr/bin/python
file=open("D:\\zzzz\\names2.txt","r+")
wordcount={}
for word in file.read().split():
if word not in wordcount:
wordcount[word] = 1
else:
wordcount[word] += 1
for k,v in wordcount.items():
print k, v
FILE_NAME = 'file.txt'
wordCounter = {}
with open(FILE_NAME,'r') as fh:
for line in fh:
# Replacing punctuation characters. Making the string to lower.
# The split will spit the line into a list.
word_list = line.replace(',','').replace('\'','').replace('.','').lower().split()
for word in word_list:
# Adding the word into the wordCounter dictionary.
if word not in wordCounter:
wordCounter[word] = 1
else:
# if the word is already in the dictionary update its count.
wordCounter[word] = wordCounter[word] + 1
print('{:15}{:3}'.format('Word','Count'))
print('-' * 18)
# printing the words and its occurrence.
for (word,occurance) in wordCounter.items():
print('{:15}{:3}'.format(word,occurance))
#
Word Count
------------------
of 6
examples 2
used 2
development 2
modified 2
open-source 2
import sys
file=open(sys.argv[1],"r+")
wordcount={}
for word in file.read().split():
if word not in wordcount:
wordcount[word] = 1
else:
wordcount[word] += 1
for key in wordcount.keys():
print ("%s %s " %(key , wordcount[key]))
file.close();
If you are using graphLab, you can use this function. It is really powerfull
products['word_count'] = graphlab.text_analytics.count_words(your_text)
#!/usr/bin/python
file=open("D:\\zzzz\\names2.txt","r+")
wordcount={}
for word in file.read().split():
if word not in wordcount:
wordcount[word] = 1
else:
wordcount[word] += 1
for k,v in wordcount.items():
print k,v
file.close();
you can do this:
file= open(r'D:\\zzzz\\names2.txt')
file_split=set(file.read().split())
print(len(file_split))
Below code from Python | How to Count the frequency of a word in the text file? worked for me.
import re
frequency = {}
#Open the sample text file in read mode.
document_text = open('sample.txt', 'r')
#convert the string of the document in lowercase and assign it to text_string variable.
text = document_text.read().lower()
pattern = re.findall(r'\b[a-z]{2,15}\b', text)
for word in pattern:
count = frequency.get(word,0)
frequency[word] = count + 1
frequency_list = frequency.keys()
for words in frequency_list:
print(words, frequency[words])
OUTPUT:
print("sorted counting values:-")
from collections import Counter
fname = open(filename)
fname = fname.read()
fsplit = fname.split()
user = Counter(fsplit)
for i,v in sorted(user.items()):
print((v,i))

Categories

Resources