how to find the longest_word by using searching loop - python

Write a function longest_word that asks the user for words and returns the longest word entered by the user. It should stop when the user hits return without typing a word. If multiple words have the same maximum length, return the first word entered by the user. If the user quits before entering any words, return “No words were entered”. This function should use a searching loop.
(Hint: remember that the len function returns the length of a string.)

Takes list of files and a next and checks if the words in file appear in those files.
Code:
def list_search_dictionary(fname, listfname):
for search in listfname:
with open(fname,'r') as f1:
with open(search,'r') as f2:
#splits word by word
All = f2.read().split()
formater = lambda x:x.strip().lower()
#formats each element
All = list(map(formater,All))
for word in f1:
word = formater(word)
if word in All:
print(f'Found: {word}')
Using your functions:
Change your function to:
def my_search_dictionary(search_dic, fname):
# read the file
infile = open(fname,"r")
# look for each line in the file
for line in infile:
# remove the string at the end of the line
line_strip = line.strip()
# lowercase
line_lower = line_strip.lower()
# i love dogs
# more than hotdogs
# split " " in the line and make them in the list
line_split = line_lower.split(" ")
# ["i"," love dogs"]
# ["more", " than", " hotdogs"]
# look for each search term in the line_split
for word in line_split:
# check if every word in the line
for key in search_dic:
if word == key:
search_dic[key] += [fname]
infile.close()
The third function would be:
def list_search_dictionary(fname, listfname):
search_dic = key_emptylist(fname)
for file in listfname:
search_dic = my_search_dictonary(search_dic,file)

Related

Finding longest word in a txt file

I am trying to create a function in which a filename is taken as a parameter and the function returns the longest word in the file with the line number attached to the front of it.
This is what I have so far but it is not producing the expected output I need.
def word_finder(file_name):
with open(file_name) as f:
lines = f.readlines()
line_num = 0
longest_word = None
for line in lines:
line = line.strip()
if len(line) == 0:
return None
else:
line_num += 1
tokens = line.split()
for token in tokens:
if longest_word is None or len(token) > len(longest_word):
longest_word = token
return (str(line_num) + ": " + str(longest_word))
I think this is the shortest way to find the word, correct if not
def wordFinder(filename):
with open(filename, "r") as f:
words = f.read().split() # split() returns a list with the words in the file
longestWord = max(words, key = len) # key = len returns the word size
print(longestWord) # prints the longest word
Issue
Exactly what ewong diagnosed:
last return statement is too deep indented
Currently:
the longest word in the first line only
Solution
Should be aligned with the loop's column, to be executed after the loop.
def word_finder(file_name):
with open(file_name) as f:
lines = f.readlines()
line_num = 0
longest_word = None
for line in lines:
line = line.strip()
if len(line) == 0:
return None
else:
line_num += 1
tokens = line.split()
for token in tokens:
if longest_word is None or len(token) > len(longest_word):
longest_word = token
# return here would exit the loop too early after 1st line
# loop ended
return (str(line_num) + ": " + str(longest_word))
Then:
the longest word in the file with the line number attached to the front of it.
Improved
def word_finder(file_name):
with open(file_name) as f:
line_word_longest = None # global max: tuple of (line-index, longest_word)
for i, line in enumerate(f): # line-index and line-content
line = line.strip()
if len(line) > 0: # split words only if line present
max_token = max(token for token in line.split(), key = len) # generator then max of tokens by length
if line_word_longest is None or len(max_token) > len(line_word_longest[1]):
line_word_longest = (i, max_token)
# loop ended
if line_word_longest is None:
return "No longest word found!"
return f"{line_word_longest[0]}: '{line_word_longest[1]}' ({len(line_word_longest[1])} chars)"
See also:
Basic python file-io variables with enumerate
List Comprehensions in Python to compute minimum and maximum values of a list
Some SO research for similar questions:
inspiration from all languages: longest word in file
only python: [python] longest word in file
non python: -[python] longest word in file

Deleting a specific word form a text file in python

I used this code to delete a word from a text file.
f = open('./test.txt','r')
a = ['word1','word2','word3']
lst = []
for line in f:
for word in a:
if word in line:
line = line.replace(word,'')
lst.append(line)
f.close()
f = open('./test.txt','w')
for line in lst:
f.write(line)
f.close()
But for some reason if the words have the same characters, all those characters get deleted. So for e.g
in my code:
def cancel():
global refID
f1=open("refID.txt","r")
line=f1.readline()
flag = 0
while flag==0:
refID=input("Enter the reference ID or type 'q' to quit: ")
for i in line.split(','):
if refID == i:
flag=1
if flag ==1:
print("reference ID found")
cancelsub()
elif (len(refID))<1:
print("Reference ID not found, please re-enter your reference ID\n")
cancel()
elif refID=="q":
flag=1
else:
print("reference ID not found\n")
menu()
def cancelsub():
global refIDarr, index
refIDarr=[]
index=0
f = open('flightbooking.csv')
csv_f = csv.reader(f)
for row in csv_f:
refIDarr.append(row[1])
for i in range (len(refIDarr)):
if refID==refIDarr[i]:
index=i
print(index)
while True:
proceed=input("You are about to cancel your flight booking, are you sure you would like to proceed? y/n?: ")
while proceed>"y" or proceed<"n" or (proceed>"n" and proceed<"y") :
proceed=input("Invalid entry. \nPlease enter y or n: ")
if proceed=="y":
Continue()
break
elif proceed=="n":
main_menu
break
exit
break
def Continue():
lines = list()
with open('flightbooking.csv', 'r') as readFile:
reader = csv.reader(readFile)
for row in reader:
lines.append(row)
for field in row:
if field ==refID:
lines.remove(row)
break
with open('flightbooking.csv', 'w') as writeFile:
writer = csv.writer(writeFile)
writer.writerows(lines)
f = open('refID.txt','r')
a=refIDarr[index]
print(a)
lst = []
for line in f:
for word in a:
if word in line:
line = line.replace(word,'')
lst.append(line)
print(lst)
f.close()
f = open('refID.txt','w')
for line in lst:
f.write(line)
f.close()
print("Booking successfully cancelled")
menu()
When the code is run, the refID variable has one word stored in it, and it should replace just that word with a blank space, but it takes that word for e.g 'AB123', finds all other words which might have an 'A' or a 'B' or the numbers, and replace all of them. How do I make it so it only deletes the word?
Text file before running code:
AD123,AB123
Expected Output in the text file:
AD123,
Output in text file:
D,
Edit: I have added the entire code, and maybe you can help now after seeing that the array is being appended to and then being used to delete from a text file.
here's my opinion.
refIDarr = ["AB123"]
a = refIDarr[0] => a = "AB123"
strings in python are iterable, so when you do for word in a, you're getting 5 loops where each word is actually a letter.
Something like the following is being executed.
if "A" in line:
line = line.replace("A","")
if "B" in line:
line = line.replace("B","")
if "1" in line:
line = line.replace("1","")
if "2" in line:
line = line.replace("2","")
if "3" in line:
line = line.replace("3","")
they correct way to do this is loop over refIDarr
for word in refIDarr:
line = line.replace(word,'')
NOTE: You don't need the if statement, since if the word is not in the line it will return the same line as it was.
"abc".replace("bananan", "") => "abc"
Here's a working example:
refIDarr = ["hello", "world", "lol"]
with open('mytext.txt', "r") as f:
data = f.readlines()
for word in refIDarr:
data = [line.replace(word, "") for line in data]
with open("mytext.txt", "w") as newf:
newf.writelines(data)
The problem is here:
a=refIDarr[index]
If refIDarr is a list of words, accessing specific index makes a be a word. Later, when you iterate over a (for word in a:), word becomes a letter and not a word as you expect, which causes eventually replacing characters of word instead the word itself in your file.
To avoid that, remove a=refIDarr[index] and change your loop to be:
for line in f:
for word in refIDarr:
if word in line:
line = line.replace(word,'')

Adding words to list

I wrote a function that is supposed to add the words from a .txt to a list but it is supposed to ignore empty lines, how ever my function outputs ['',] at an empty line.
def words(filename):
word = []
file = open(filename)
for line in file:
word.append(line.strip())
return word
How can i fix this thanks
what about a simple if test?
def words(filename):
word = []
file = open(filename)
for line in file:
if line.strip() != ' ':
word.append(line.strip())
return word
EDIT: I forgot the .strip() after line
Besides, you could also use if line.strip():
Last, if you want to get a list of words but have several words per line, you need to split them. Assuming your separator is ' ':
def words(filename):
word = []
file = open(filename)
for line in file:
if line.strip() != ' ':
word.extend(line.strip().split())
return word
You can fix this like that:
def words(filename):
word = []
file = open(filename)
for line in file:
if not line.strip():
word.append(line)
return word
Your problem is that you're adding line.strip(), but what happens if line is actually an empty string? Look:
In [1]: line = ''
In [2]: line.strip()
Out[2]: ''
''.strip() returns an empty string.
You need to test for an empty line and skip the append in that case.
def words(filename):
word = []
file = open(filename)
for line in file:
line=line.strip()
if len(line):
word.append(line)
return word

Indexing and search in a text file

I have a text file that contains the contents of a book. I want to take this file and build an index which allows the user to search through the file to make searches.
The search would consist of entering a word. Then, the program would return the following:
Every chapter which includes that word.
The line number of the line
which contains the word.
The entire line the word is on.
I tried the following code:
infile = open(file)
Dict = {}
word = input("Enter a word to search: ")
linenum = 0
line = infile.readline()
for line in infile
linenum += 1
for word in wordList:
if word in line:
Dict[word] = Dict.setdefault(word, []) + [linenum]
print(count, word)
line = infile.readline()
return Dict
Something like this does not work and seems too awkward for handling the other modules which would require:
An "or" operator to search for one word or another
An "and" operator to search for one word and another in the same chapter
Any suggestions would be great.
def classify_lines_on_chapter(book_contents):
lines_vs_chapter = []
for line in book_contents:
if line.isupper():
current_chapter = line.strip()
lines_vs_chapter.append(current_chapter)
return lines_vs_chapter
def classify_words_on_lines(book_contents):
words_vs_lines = {}
for i, line in enumerate(book_contents):
for word in set([word.strip(string.punctuation) for word in line.split()]):
if word:
words_vs_lines.setdefault(word, []).append(i)
return words_vs_lines
def main():
skip_lines = 93
with open('book.txt') as book:
book_contents = book.readlines()[skip_lines:]
lines_vs_chapter = classify_lines_on_chapter(book_contents)
words_vs_lines = classify_words_on_lines(book_contents)
while True:
word = input("Enter word to search - ")
# Enter a blank input to exit
if not word:
break
line_numbers = words_vs_lines.get(word, None)
if not line_numbers:
print("Word not found!!\n")
continue
for line_number in line_numbers:
line = book_contents[line_number]
chapter = lines_vs_chapter[line_number]
print("Line " + str(line_number + 1 + skip_lines))
print("Chapter '" + str(chapter) + "'")
print(line)
if __name__ == '__main__':
main()
Try it on this input file. Rename it as book.txt before running it.

Censor Words from a File Using a CensorFile - Python

I'm trying to create a 'censor' program which reads in a 'censorlist' with words to censor out of a 'censorfile'. I feel like I'm close, but when I run the program it's only censoring one line. Any help would be greatly appreciated!
#!/usr/bin/python
import sys, os
censorlist = raw_input("File with words to censor? ")
if os.path.isfile(censorlist):
print "The file", censorlist, "exists!"
else:
print "The file", censorlist, "doesn't exist!"
sys.exit()
print ""
filterword = {}
for currentline in censorlist:
line = currentline.strip()
filterword.append(line)
censorfile = raw_input("File to censor using provided list? ")
script = []
for currentline in censorfile:
line = currentline.strip()
script.append(line)
for lines in script:
results = []
words = lines.split()
for word in words:
if word in filter:
results.append("****")
else:
results.append(word)
resultfile = open(censorfile, 'w')
for items in results:
resultfile.write(items)
You just use the string's replace method to replace each instance of a substring...
'foo bar spam foo'.replace('foo', '****') # '**** bar spam ****'
You just need to iterate over the list of words to censor and call replace for each.
for word in censorlist:
string.replace(word, '****')

Categories

Resources