Python - Removing paragraph breaks in input

Python - Removing paragraph breaks in input - python

So I have written a program (however ugly) that counts the number of words and the instances of each unique word in a given input.
My problem is that I want to use it for song lyrics, but most lyric sets come with multiple paragraph breaks.
My question is: how can I take a user input of lyrics with paragraph breaks and reduce the input down to a single string?
This is my code so far:
Song = {}
lines = []
while True:
line = input("")
if line:
lines.append(line)
else:
break
string = '\n'.join(lines)
def string_cleaner(string):
string = string.lower()
newString = ''
validLetters = " abcdefghijklmnopqrstuvwxyz"
newString = ''.join([char for char in string if char in validLetters])
return newString
def song_splitter(string):
string = string_cleaner(string)
words = string.split()
for word in words:
if word in Song:
Song[word] += 1
else:
Song[word] = 1
Expected input:
Well, my heart went "boom"
When I crossed that room
And I held her hand in mine
Whoah, we danced through the night
And we held each other tight
And before too long I fell in love with her
Now I'll never dance with another
(Whooh)
Since I saw her standing there
Oh since I saw her standing there
Oh since I saw her standing there
Desired output:
This song has 328 words.
39 of which are unique.
This song is 11% unique words.
('i', 6)
('her', 4)
('standing', 3)
.... etc

The following example code extracts all the words (English alphabet only) from every line and process them (counts the number of words, and retrieve instances of each unique word).
import re
MESSAGE = 'Please input a new line: '
TEST_LINE = '''
Well, my heart went "boom"
When I crossed that room
And I held her hand in mine
Whoah, we danced through the night
And we held each other tight
And before too long I fell in love with her
Now I'll never dance with another
(Whooh)
Since I saw her standing there
Oh since I saw her standing there well well
Oh since I saw her standing there
'''
prog = re.compile(r'\w+')
class UniqueWordCounter():
def __init__(self):
self.data = {}
def add(self, word):
if word:
count = self.data.get(word)
if count:
count += 1
else:
count = 1
self.data[word] = count
# instances of each unique word
set_of_words = UniqueWordCounter()
# counts the number of words
count_of_words = 0
def handle_line(line):
line = line.lower()
words = map(lambda mo: mo.group(0), prog.finditer(line))
for word in words:
global count_of_words
count_of_words += 1
set_of_words.add(word)
def run():
line = input(MESSAGE)
if not line:
line = TEST_LINE
while line:
'''
Loop continues as long as `line` is not empty
'''
handle_line(line)
line = input(MESSAGE)
count_of_unique_words = len(set_of_words.data.keys())
unique_percentage = count_of_unique_words / count_of_words
print('-------------------------')
print('This song has {} words.'.format(count_of_words))
print('{} of which are unique.'.format(count_of_unique_words))
print('This song is {:.2%} unique words.'.format(unique_percentage))
items = sorted(set_of_words.data.items(), key = lambda tup: tup[1], reverse=True)
items = ["('{}', {})".format(k, v) for k, v in items]
print('\n'.join(items[:3]))
print('...')
run()
If you want to handle lyrics in other languages, you should check out this link.

Related

word Isn't equal to wordlist.readline() even though it is

Here's my code:
wordlist = open('words.txt')
word = input()
i = 0
def loop():
for i in range(466549):
if wordlist.readline().strip() == word:
print(f'found in {i}')
return
else:
print(f"not {wordlist.readline()}")
i = i + 1
loop()
Not only does it not return the first value of the list, but it also doesn't say that it found my word when it did. I got my words.txt file from GitHub, and the second word is "1080". But when I put that in, it prints 'not 1080' even though it clearly is.

I think what you are looking for is:
word = input("enter word: ").strip()
def loop():
with open("words.txt") as f:
wordList = f.readlines()
for i,line in enumerate(wordList):
line = line.strip()
if line == word:
print(f"found in line {i}")
return
else:
print(f"did not find in {line}")
loop()
or if you want to stick to using just readline:
word = input("enter word: ").strip()
wordList = open("words.txt")
def loop():
for i in range(466549):
line = wordList.readline().strip()
if line == word:
print(f"found in line {i}")
return
else:
print(f"did not find in {line}")
loop()
wordList.close()

Maybe you wanna find if specific word in text file.
First, wordlist.readline() actually reads a line from file which means it is not reproducible. so, print(f"not {wordlist.readline()}") doesn't work as you expected.
For example, let's say words.txt looks like below.
hello
1080
world
When you read first line with if wordlist.readline() ... inside loop, that value is "hello", and this is not 1080, so else statement runs. At here, you reads a line with print(f"not {wordlist.readline()}") as second which is 1080, thus not 1080 gonna be printed.
And finally, if wordlist.readline() ... will read world from file, this is also not 1080, and next readline() will be None.
Secondary problem is that variable i is not utilized properly. i is incremented automatically thanks to range and for keyword (Strictly, it just next value from iterable object, not increment), so you don't need like i = 0 or i = i + 1
Third thing is derived from the problem pointed in first one, which is None.strip() is impossible so it will raise an exception.
There are more things to say, but I'll just show you my example so you can compare it with yours
input_wordlist = open('words.txt')
input_word = input()
def loop(word, wordlist):
for index, line in enumerate(wordlist.readlines()):
if line.strip() == word:
# or if you wanna know whether word is in line
# if word in line:
print(f"found in {index}")
return
else:
print(f"not in {line}")
loop(input_word, input_wordlist)
input_wordlist.close()

I made a program that checks if a word in in a file, need some advice

I want to print if the word appears, as well as how many times the word appears in the file. I can't get it to say anything other than this word appears 1 or 0 times in the file.
This problem occurs on line 26, print("It appears " + str(wordcount[word]) + " times")
specifically str(wordcount[word]). This probably simple question, but this is my first week of python so if anyone has an idea please share. Thanks!
I've tried putting wordcount[word], word_counter.__getitem__(wordcount[word]), and word_counter.__getitem__(wordcount)
import collections
file = open(r"C:\Users\Patrick Wu\Documents\1wordfreqtest.txt", "r")
if file.mode == "r":
contents = file.read()
word = input("Word to check for :")
wordcount = {}
"""this below is to remove difference between upper and lower cases as
well as punctuation"""
for word in contents.lower().split():
word = word.replace(".","")
word = word.replace(",","")
word = word.replace(":","")
word = word.replace("\"","")
word = word.replace("!","")
word = word.replace("â€œ","")
word = word.replace("â€˜","")
word = word.replace("*","")
if word not in wordcount:
wordcount[word] = 1
else:
wordcount[word] += 1
word_counter = collections.Counter(wordcount)
if word in str(contents):
print("This word is in the file :)")
print("It appears " + str(wordcount[word]) + " times")
else:
print("This word isn't in the file")

The variable word is overwritten in the local scope, by the loop. So your input word is overwritten by the loop and you end up checking the count of the last word of the input file. Change the input word to be a different variable name than the word you're iterating through in the file.

You have a scoping problem, by using the same name "word" both in the input and in the for-loop.
I would suggest doing something like this:
word = input("Word to check for :")
with open('your_file.txt') as f:
raw = f.read()
num_appearences = raw.count(word)
print(f"The word {word} appears {num_appearences} times in the file")

You can use this code:
import collections
file = open("wordfreqtest.txt", "r")
if file.mode == "r":
contents = file.read().lower()
word = input("Word to check for :").lower()
times = 0
finish = 0
while finish==0:
if word in contents:
contents = contents[contents.find(word) + len(word):]
times += 1
else:
break
if times > 0:
print("This word is in the file :)")
print("It appears " + str(times) + " times")
else:
print("This word isn't in the file")

Counting a desired word in a text file

I have to count the number of times a given word appears in a given text file, this one being the Gettysburg Address. For some reason, it is not counting my input of 'nation' so the output looks as such:
'nation' is found 0 times in the file gettysburg.txt
Here is the code I have currently, could someone point out what I am doing incorrectly?
fname = input("Enter a file name to process:")
find = input("Enter a word to search for:")
text = open(fname, 'r').read()
def processone():
if text is not None:
words = text.lower().split()
return words
else:
return None
def count_word(tokens, token):
count = 0
for element in tokens:
word = element.replace(",", " ")
word = word.replace("."," ")
if word == token:
count += 1
return count
words = processone()
word = find
frequency = count_word(words, word)
print("'"+find+"'", "is found", str(frequency), "times in the file", fname)
My first function splits the file into a string and turns all letters in it lower case. The second one removes the punctuation and is supposed to count the word given in the input.
Taking my first coding class, if you see more flaws in my coding or improvements that could be made, as well as helping find the solution to my problem, feel free.

In the for loop in the count_word() function, you have a return statement at the end of the loop, which exits the function immediately, after only one loop iteration.
You probably want to move the return statement to be outside of the for loop.

as a starter I would suggest you to use print statements and see what variables are printing, that helps to breakdown the problem. For example, print word was showing only first word from the file, which would have explained the problem in your code.
def count_word(tokens, token):
count = 0
for element in tokens:
word = element.replace(",", " ")
word = word.replace("."," ")
print (word)
if word == token:
count += 1
return count
Enter a file name to process:gettysburg.txt
Enter a word to search for:nation
fourscore
'nation' is found 0 times in the file gettysburg.txt

Use code below:
fname = input("Enter a file name to process:")
find = input("Enter a word to search for:")
text = open(fname, 'r').read()
def processone():
if text is not None:
words = text.lower().split()
return words
else:
return None
def count_word(tokens, token):
count = 0
for element in tokens:
word = element.replace(",", " ")
word = word.replace("."," ")
if word == token:
count += 1
return count
words = processone()
word = find
frequency = count_word(words, word)
print("'"+find+"'", "is found", str(frequency), "times in the file", fname)
statement "return" go out statement "for"

Word count from the user

I'm completely new to Python and coding in general. I am needing to write a code that allows the user to input many lines and when they are finished writing their multiple sentences, they enter a single period to stop the program and the program will then tell the user how many words were inputted. How would I go about doing this?
Here is what I have so far:
print("Enter as many lines of text as you want.")
print("When you're done, enter a single period on a line by itself.")
while True:
print("> ", end="")
line = input()
if line == ".":
break
totalWords = line.split()
newWords = totalWords.append(line)
wordCount = len(newWords)
print("The number of words entered:" , wordCount, "")

You should set content out of loop.
content = []
while True:
line = input()
if line == ".":
break
words = line.split()
content.append(words)
words_list = [item for sublist in content for item in sublist]
print(len(words_list))
Besides, Most functions that change the items of sequence/mapping does return None. So newWords = totalWords.append(line) will always return None.

Selecting random word with length-criterion. How to decompose?

This is plan of my game I finished(it works). I want to at the beginning add a difficulty level. Easy is six letter word, medium is seven, and hard is eight or more. Where would it go?
Letters = set(string.ascii_lowercase)
def main():
words = [line.strip() for line in open("E:\wordlist.txt")]
correct_word = random.choice(words).strip().upper()
letters = list(correct_word)
random.shuffle(letters)
word = ""
for i in range (len(letters)):
word += letters[i]
print("Welcome to my game, solve the puzzle.")
print("Lets play")

(In general you will find http://codereview.stackexchange.com more suitable than SO for how-do-I-implement or refactor questions on incomplete code.)
Ok, you first want to prompt user for the difficulty level, convert that to corresponding wordlength, wordlength, then use those in your word-selecting-and-shuffling code.
This could all do with a little refactoring, and since you need to pass some data around, I refactored into a class with setup(),play() methods.
Also you only want to read in your wordlist once (e.g. in the class __init__() method), not every game.
Note the use of the tuple assignment from dict level_to_wordlength, and the
compact while-loop idiom in choose_random_word()
import random
import string
class Hangman(object):
def __init__(self, wordlist = 'wordlist.txt'):
self.words = [word.strip() for word in open(wordlist)] # store them uppercase
#self.level = None
#self.min_wordlength = None
#self.max_wordlength = None
self.correct_word = None
self.letters_unused = None
def select_difficulty(self):
while True:
level = raw_input("Choose difficulty: E(asy) M(edium) H(ard) : ")
level = (level or " ")[0].upper()
if level in 'EMH':
return level
print "Error: bad level"
# or store it in self.level if needed
def choose_random_word(self, min_wordlength, max_wordlength):
word = ''
while not( min_wordlength <= len(word) <= max_wordlength ): # note the idiom
word = random.choice(self.words)
self.correct_word = word
def setup(self):
level = self.select_difficulty() # or store it in self.level if needed
# Convert level to wordlength constraints... we could use an if..elif ladder for simplicity,
# but here's a neat idiomatic method using tuple assignment from a dict.
level_to_wordlength = {'E': (6,6), 'M': (7,7), 'H': (8,999)}
min_wordlength, max_wordlength = level_to_wordlength[level]
self.choose_random_word(min_wordlength, max_wordlength)
self.letters_unused = set(string.ascii_uppercase)
def play(self):
letters = list(self.correct_word)
random.shuffle(letters)
word = ''.join(letters)
print("Welcome to my game, solve the puzzle.")
print("Let's play")
# ... you do the rest ...
if __name__ == '__main__':
game = Hangman()
game.setup()
game.play()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python - Removing paragraph breaks in input - python

Related

word Isn't equal to wordlist.readline() even though it is

I made a program that checks if a word in in a file, need some advice

Counting a desired word in a text file

Word count from the user

Selecting random word with length-criterion. How to decompose?

Categories

Resources