Refer to this image1
Hey, this is a program i want to write in python. I tried, and i sucessfully iterated words but now how do i count the individual score?
a_string = input("Enter a sentance: ").lower()
vowel_counts = {}
splits = a_string.split()
for i in splits:
words = []
words.append(i)
print(words)
You can flag the vowels using translate to convert all the vowels to 'a's . Then count the 'a's in each word using the count method:
sentence = "computer programmers rock"
vowels = str.maketrans("aeiouAEIOU","aaaaaaaaaa")
flagged = sentence.translate(vowels) # all vowels --> 'a'
counts = [word.count('a') for word in flagged.split()] # counts per word
score = sum(1 if c<=2 else 2 for c in counts) # sum of points
print(counts,score)
# [3, 3, 1] 5
Related
I have to write a function that counts how many times a word (or a series of words) appears in a given text.
This is my function so far. What I noticed is that with a series of 3 words the functions works well, but not with 4 words and so on.
from nltk import ngrams
def function(text, word):
for char in ".?!-":
text = text.replace(char, ' ')
n = len(word.split())
countN = 0
bigram_lower = text.lower()
word_lower = word.lower()
n_grams = ngrams(bigram_lower.split(), n)
for gram in n_grams:
for i in range (0, n):
if gram[i] == word_lower.split()[i]:
countN = countN + 1
print (countN)
First thing, please fix your indentation and don't use bigrams as a variable for ngrams as it's a bit confusing (Since you are not storing just bigrams in the bigrams variable). Secondly lets look at this part of your code -
for gram in bigrams:
for i in range (0, n):
if gram[i] == word_lower.split()[i]:
countN = countN + 1
print (countN)
Here you are increasing countN by one for each time a word in your ngram matches up instead of increasing it when the whole ngram matches up. You should instead only increase countN if all the words have matched up -
for gram in bigrams:
if list(gram) == word_lower.split():
countN = countN + 1
print (countN)
May be it was already done in here
Is nltk mandatory?
# Open the file in read mode
text = open("sample.txt", "r")
# Create an empty dictionary
d = dict()
# Loop through each line of the file
for line in text:
# Remove the leading spaces and newline character
line = line.strip()
# Convert the characters in line to
# lowercase to avoid case mismatch
line = line.lower()
# Split the line into words
words = line.split(" ")
# Iterate over each word in line
for word in words:
# Check if the word is already in dictionary
if word in d:
# Increment count of word by 1
d[word] = d[word] + 1
else:
# Add the word to dictionary with count 1
d[word] = 1
# Print the contents of dictionary
for key in list(d.keys()):
print(key, ":", d[key])
This shuld work for you:
def function(text, word):
for char in ".?!-,":
text = text.replace(char, ' ')
n = len(word.split())
countN = 0
bigram_lower = text.lower()
word_lower = tuple(word.lower().split())
bigrams = nltk.ngrams(bigram_lower.split(), n)
for gram in bigrams:
if gram == word_lower:
countN += 1
print (countN)
>>> tekst="this is the text i want to search, i want to search it for the words i want to search for, and it should count the occurances of the words i want to search for"
>>> function(tekst, "i want to search")
4
>>> function(tekst, "i want to search for")
2
I want to multiply letter of string by digits of number. For example for a word "number" and number "123"
output would be "nuummmbeerrr". How do I create a function that does this? My code is not usefull, because it doesn't work.
I have only this
def new_word(s):
b=""
for i in range(len(s)):
if i % 2 == 0:
b = b + s[i] * int(s[i+1])
return b
for new_word('a3n5z1') output is aaannnnnz .
Using list comprehension and without itertools:
number = 123
word = "number"
new_word = "".join([character*n for (n, character) in zip(([int(c) for c in str(number)]*len(str(number)))[0:len(word)], word)])
print(new_word)
# > 'nuummmbeerrr'
What it does (with more details) is the following:
number = 123
word = "number"
# the first trick is to link each character in the word to the number that we want
# for this, we multiply the number as a string and split it so that we get a list...
# ... with length equal to the length of the word
numbers_to_characters = ([int(c) for c in str(number)]*len(str(number)))[0:len(word)]
print(numbers_to_characters)
# > [1, 2, 3, 1, 2, 3]
# then, we initialize an empty list to contain the repeated characters of the new word
repeated_characters_as_list = []
# we loop over each number in numbers_to_letters and each character in the word
for (n, character) in zip(numbers_to_characters, word):
repeated_characters_as_list.append(character*n)
print(repeated_characters_as_list)
# > ['n', 'uu', 'mmm', 'b', 'ee', 'rrr']
new_word = "".join(repeated_characters_as_list)
print(new_word)
# > 'nuummmbeerrr'
This will solve your issue, feel free to modify it to fit your needs.
from itertools import cycle
numbers = cycle("123")
word = "number"
output = []
for letter in word:
output += [letter for _ in range(int(next(numbers)))]
string_output = ''.join(output)
EDIT:
Since you're a beginner This will be easier to understand for you, even though I suggest reading up on the itertools module since its the right tool for this kind of stuff.
number = "123"
word = "number"
output = []
i = 0
for letter in word:
if(i == len(number)):
i = 0
output += [letter for _ in range(int(number[i]))]
i += 1
string_output = ''.join(output)
print(string_output)
you can use zip to match each digit to its respective char in the word (using itertools.cycle for the case the word is longer), then just multiply the char by that digit, and finally join to a single string.
try this:
from itertools import cycle
word = "number"
number = 123
number_digits = [int(d) for d in str(number)]
result = "".join(letter*num for letter,num in zip(word,cycle(number_digits)))
print(result)
Output:
nuummmbeerrr
I have a text file where I am counting the sum of lines, sum of characters and sum of words. How can I clean the data by removing stop words such as (the, for, a) using string.replace()
I have the codes below as of now.
Ex. if the text file contains the line:
"The only words to count are Apple and Grapes for this text"
It should output:
2 Apple
2 Grapes
1 words
1 only
1 text
And should not output words like:
the
to
are
for
this
Below is the code I have as of now.
# Open the input file
fname = open('2013_honda_accord.txt', 'r').read()
# COUNT CHARACTERS
num_chars = len(fname)
# COUNT LINES
num_lines = fname.count('\n')
#COUNT WORDS
fname = fname.lower() # convert the text to lower first
words = fname.split()
d = {}
for w in words:
# if the word is repeated - start count
if w in d:
d[w] += 1
# if the word is only used once then give it a count of 1
else:
d[w] = 1
# Add the sum of all the repeated words
num_words = sum(d[w] for w in d)
lst = [(d[w], w) for w in d]
# sort the list of words in alpha for the same count
lst.sort()
# list word count from greatest to lowest (will also show the sort in reserve order Z-A)
lst.reverse()
# output the total number of characters
print('Your input file has characters = ' + str(num_chars))
# output the total number of lines
print('Your input file has num_lines = ' + str(num_lines))
# output the total number of words
print('Your input file has num_words = ' + str(num_words))
print('\n The 30 most frequent words are \n')
# print the number of words as a count from the text file with the sum of each word used within the text
i = 1
for count, word in lst[:10000]:
print('%2s. %4s %s' % (i, count, word))
i += 1
Thanks
After opening and reading the file (fname = open('2013_honda_accord.txt', 'r').read()), you can place this code:
blacklist = ["the", "to", "are", "for", "this"] # Blacklist of words to be filtered out
for word in blacklist:
fname = fname.replace(word, "")
# The above causes multiple spaces in the text (e.g. ' Apple Grapes Apple')
while " " in fname:
fname = fname.replace(" ", " ") # Replace double spaces by one while double spaces are in text
Edit:
To avoid problems with words containing the unwanted words, you may do it like this (assuming words are in sentence middle):
blacklist = ["the", "to", "are", "for", "this"] # Blacklist of words to be filtered out
for word in blacklist:
fname = fname.replace(" " + word + " ", " ")
# Or .'!? ect.
A check for double spaces is not required here.
Hope this helps!
You can easily terminate those words by writing a simple function:
#This function drops the restricted words from a sentece.
#Input - sentence, list of restricted words (restricted list should be all lower case)
#Output - list of allowed words.
def restrict (sentence, restricted):
return list(set([word for word in sentence.split() if word.lower() not in restricted]))
Then you can use this function whenever you want (before or after the word count).
for example:
restricted = ["the", "to", "are", "and", "for", "this"]
sentence = "The only words to count are Apple and Grapes for this text"
word_list = restrict(sentence, restricted)
print word_list
Would print:
["count", "Apple", "text", "only", "Grapes", "words"]
Of course you can add empty words removal (double spaces):
return list(set([word for word in sentence.split() if word.lower() not in restricted and len(word) > 0]))
How to get Python to return the position of a repeating word in a string?
E.g. the word "cat" in "the cat sat on the mat which was below the cat" is in the 2nd and 11th position in the sentence.
You can use re.finditer to find all occurrences of the word in a string and starting indexes:
import re
for word in set(sentence.split()):
indexes = [w.start() for w in re.finditer(word, sentence)]
print(word, len(indexes), indexes)
And using dictionary comprehension:
{word: [w.start() for w in re.finditer(word, sentence)] for word in sentence.split()}
This will return a dictionary mapping each word in the sentence, which repeates at least once, to the list of word index (not character index)
from collections import defaultdict
sentence = "the cat sat on the mat which was below the cat"
def foo(mystr):
sentence = mystr.lower().split()
counter = defaultdict(list)
for i in range(len(sentence)):
counter[sentence[i]].append(i+1)
new_dict = {}
for k, v in counter.iteritems():
if len(v) > 1:
new_dict[k] = v
return new_dict
print foo(sentence)
The following will take an input sentence, take a word from the sentence, and then print the position(s) of the word in a list with a starting index of 1 (it looks like that's what you want from your code).
sentence = input("Enter a sentence, ").lower()
word = input("Enter a word from the sentence, ").lower()
words = sentence.split(' ')
positions = [ i+1 for i,w in enumerate(words) if w == word ]
print(positions)
I prefer simplicity and here is my code below:
sentence = input("Enter a sentence, ").lower()
word_to_find = input("Enter a word from the sentence, ").lower()
words = sentence.split() ## Splits the string 'sentence' and returns a list of words in it. Split() method splits on default delimiters.
for pos in range(len(words)):
if word_to_find == words[pos]: ## words[pos] corresponds to the word present in the 'words' list at 'pos' index position.
print (pos+1)
The 'words' consists of the list of all the words present in the sentence. Then after that, we iterate and match each word present at index 'pos' with the word we are looking to find(word_to_find) and if both the words are same then we print the value of pos with 1 added to it.
Hope this is simple enough for you to understand and it serves your purpose.
If you wish to use a list comprehension for the above, then:
words = sentence.split()
positions = [ i+1 for i in range(len(words)) if word_to_find == words[i]]
print (positions)
Both the above ways are same, just the later gives you a list.
positions= []
sentence= input("Enter the sentence please: ").lower()
sentence=sentence.split( )
length=len(sentence))
word = input("Enter the word that you would like to search for please: ").lower()
if word not in sentence:
print ("Error, '"+word+"' is not in this sentence.")
else:
for x in range(0,length):
if sentence[x]==word: #
positions.append(x+1)
print(word,"is at positions", positions)
s="hello fattie i'm a fattie too"
#this code is unsure but manageable
looking word= "fattie"
li=[]
for i in range(len(s)):
if s.startswith(lw, i):
print (i)
space = s[:i].count(" ")
hello = space+1
print (hello)
li.append(hello)
print(li)
new to python here. I am trying to write a program that calculate the average word length in a sentence and I have to do it using the .split command. btw im using python 3.2
this is what I've wrote so far
sentence = input("Please enter a sentence: ")
print(sentence.split())
So far i have the user enter a sentence and it successfully splits each individual word
they enter, for example: Hi my name is Bob, it splits it into ['hi', 'my', 'name', 'is', 'bob']
but now I'm lost I dunno how to make it calculate each word and find the average length of the sentence.
In Python 3 (which you appear to be using):
>>> sentence = "Hi my name is Bob"
>>> words = sentence.split()
>>> average = sum(len(word) for word in words) / len(words)
>>> average
2.6
You might want to filter out punctuation as well as zero-length words.
>>> sentence = input("Please enter a sentence: ")
Filter out punctuation that doesn't count. You can add more to the string of punctuation if you want:
>>> filtered = ''.join(filter(lambda x: x not in '".,;!-', sentence))
Split into words, and remove words that are zero length:
>>> words = [word for word in filtered.split() if word]
And calculate:
>>> avg = sum(map(len, words))/len(words)
>>> print(avg)
3.923076923076923
>>> sentence = "Hi my name is Bob"
>>> words = sentence.split()
>>> sum(map(len, words))/len(words)
2.6
The concise version:
average = lambda lst: sum(lst)/len(lst) #average = sum of numbers in list / count of numbers in list
avg = average([len(word) for word in sentence.split()]) #generate a list of lengths of words, and calculate average
The step-by-step version:
def average(numbers):
return sum(numbers)/len(numbers)
sentence = input("Please enter a sentence: ")
words = sentence.split()
lengths = [len(word) for word in words]
print 'Average length:', average(lengths)
Output:
>>>
Please enter a sentence: Hey, what's up?
Average length: 4
def main():
sentence = input('Enter the sentence: ')
SumAccum = 0
for ch in sentence.split():
character = len(ch)
SumAccum = SumAccum + character
average = (SumAccum) / (len(sentence.split()))
print(average)
def averageWordLength(mystring):
tempcount = 0
count = 1
wordcount = 0
try:
for character in mystring:
if character == " ":
tempcount +=1
if tempcount ==1:
count +=1
else:
tempcount = 0
try:
if character.isalpha(): #sorry for using the .isalpha
wordcount += 1
except:
wordcount = wordcount + 0
if mystring[0] == " " or mystring.endswith(" "): #i'm sorry for using the .endswith
count -=1
try:
result = wordcount/count
if result == 0:
result = "No words"
return result
else:
return result
except ZeroDivisionError:
error = "No words"
return error
except Exception:
error = "Not a string"
return error
mystring = "What big spaces you have!"
output is 3.0 and I didn't use the split
as a modular :
import re
def avg_word_len(s):
words = s.split(' ') # presuming words split by ' '. be watchful about '.' and '?' below
words = [re.sub(r'[^\w\s]','',w) for w in words] # re sub '[^\w\s]' to remove punctuations first
return sum(len(w) for w in words)/float(len(words)) # then calculate the avg, dont forget to render answer as a float
if __name__ == "__main__":
s = raw_input("Enter a sentence")
print avg_word_len(s)
s = input("Please enter a sentence: ")
avg_word_len = len(s.replace(' ',''))/len(s.split())
print('Word average =', avg_word_len)
Output:
Please enter a sentence: this is a testing string
Word average = 4.0
note: this is a plain vanilla use case, additional boundary conditions can be applied as required.
text = input()
words=text.split()
single=''.join(words)
average=len(single)/len(words)
print(average)
def average():
value = input("Enter the sentence:")
sum = 0
storage = 0
average = 0
for i in range (len(value)):
sum = sum + 1
storage = sum
average = average+storage
print (f"the average is :{average/len(value)}")
return average/len(value)
average()