My program is supposed to take a text file, print the contents, count the lines, ask for a string input, remove every occurrence of that string, say how many times the string was removed, and then output the new contents of the file. We were asked to use one file, not an input and an output file.
My code works and does everything it's supposed to up until I try to store the changes in the file at the end of the char_count function and the print_output function seems to not be working right at all.
If I have an input file of contents:
Apples
Oranges
Bananas
Apples
Oranges
Bananas
if I try to remove Bananas, the resulting file contents for the input file is:
ApplesOrangesApplesOrangesles
Oranges
Bananas
I've been trying to figure out what's going on with no progress, and our course textbook doesn't seem to mention overwriting input files, but we're required to do it for an assignment. What is wrong with my last two functions?
def main():
input_file_name = input("Please Enter the name of your text file: ")
infile = open(input_file_name, "r+")
print()
print("---------------------------------------")
print("THE FILE CONTENTS ARE")
print("---------------------------------------")
print_file(infile)
print("---------------------------------------")
count_lines(infile)
print("---------------------------------------")
input_string = input("Please enter the word or string of words you want to remove from the text file: ")
print("---------------------------------------")
char_count(infile, input_string)
print("---------------------------------------")
print("THE NEW FILE CONTENTS ARE")
print_output(infile)
print("---------------------------------------")
infile.close()
def print_file(infile):
infile.seek(0)
allLines = infile.readlines()
for line in allLines:
text = line.rstrip()
print(text)
def count_lines(infile):
infile.seek(0)
allLines = infile.readlines()
count = 0
char = " "
for line in allLines :
text = line.rstrip()
while char != "":
char = infile.read(1)
count = count + 1
print("THE NUMBER OF LINES IS: %d " % count)
def char_count(infile, input_string) :
count = 0
infile.seek(0)
allLines = infile.readlines()
infile.seek(0)
for line in allLines:
while input_string in line:
line = line.replace(input_string, "")
count = count + 1
text = line.rstrip()
infile.write(text)
print("NUMBER OF OCCURRENCES REMOVED IS: %d" % count)
def print_output(infile):
infile.seek(0)
allLines = infile.readlines()
for line in allLines:
text = line.rstrip()
print(text)
main()
you have to truncate the file first to get the required output.
def main():
input_file_name = input("Please Enter the name of your text file: ")
infile = open(input_file_name, "r+")
print()
print("---------------------------------------")
print("THE FILE CONTENTS ARE")
print("---------------------------------------")
print_file(infile)
print("---------------------------------------")
count_lines(infile)
print("---------------------------------------")
input_string = input("Please enter the word or string of words you want to remove from the text file: ")
print("---------------------------------------")
char_count(infile, input_string)
print("---------------------------------------")
print("THE NEW FILE CONTENTS ARE")
print_output(infile)
print("---------------------------------------")
infile.close()
def print_file(infile):
infile.seek(0)
allLines = infile.readlines()
for line in allLines:
text = line.rstrip()
print(text)
def count_lines(infile):
infile.seek(0)
allLines = infile.readlines()
count = 0
char = " "
for line in allLines :
text = line.rstrip()
while char != "":
char = infile.read(1)
count = count + 1
print("THE NUMBER OF LINES IS: %d " % count)
def char_count(infile, input_string) :
count = 0
infile.seek(0)
allLines = infile.readlines()
infile.seek(0)
infile.truncate() #Empty your file first to rewrite it
for line in allLines:
while input_string in line:
line = line.replace(input_string, "")
count = count + 1
text = line.rstrip()
if(text != ""):
infile.write(text + "\n") #To write in multiple lines
print("NUMBER OF OCCURRENCES REMOVED IS: %d" % count)
def print_output(infile):
infile.seek(0)
allLines = infile.readlines()
for line in allLines:
text = line.rstrip()
print(text)
main()
Related
I have a very large file of about 900k values. It is a repetition of values like
/begin throw
COLOR red
DESCRIPTION
"cashmere sofa throw"
10
10
156876
DIMENSION
140
200
STORE_ADDRESS 59110
/end throw
The values keep changing, but I need it like below:
/begin throw
STORE_ADDRESS 59110
COLOR red
DESCRIPTION "cashmere sofa throw" 10 10 156876
DIMENSION 140 200
/end throw
Currently, my approach is removing the new line and including space in them:
the store address is constant throughout the file so I thought of removing it from the index and inserting it before the description
text_file = open(filename, 'r')
filedata = text_file.readlines();
for num,line in enumerate(filedata,0):
if '/begin' in line:
for index in range(num, len(filedata)):
if "store_address 59110 " in filedata[index]:
filedata.remove(filedata[index])
filedata.insert(filedata[index-7])
break
if "DESCRIPTION" in filedata[index]:
try:
filedata[index] = filedata[index].replace("\n", " ")
filedata[index+1] = filedata[index+1].replace(" ","").replace("\n", " ")
filedata[index+2] = filedata[index+2].replace(" ","").replace("\n", " ")
filedata[index+3] = filedata[index+3].replace(" ","").replace("\n", " ")
filedata[index+4] = filedata[index+4].replace(" ","").replace("\n", " ")
filedata[index+5] = filedata[index+5].replace(" ","").replace("\n", " ")
filedata[index+6] = filedata[index+6].replace(" ","").replace("\n", " ")
filedata[index+7] = filedata[index+7].replace(" ","").replace("\n", " ")
filedata[index+8] = filedata[index+8].replace(" ","")
except IndexError:
print("Error Index DESCRIPTION:", index, num)
if "DIMENSION" in filedata[index]:
try:
filedata[index] = filedata[index].replace("\n", " ")
filedata[index+1] = filedata[index+1].replace(" ","").replace("\n", " ")
filedata[index+2] = filedata[index+2].replace(" ","").replace("\n", " ")
filedata[index+3] = filedata[index+3].replace(" ","")
except IndexError:
print("Error Index DIMENSION:", index, num)
After which I write filedata into another file.
This approach is taking too long to run(almost an hour and a half) because as mentioned earlier it is a large file.
I was wondering if there was a faster approach to this issue
You can read the file structure by structure so that you don't have to store the whole content in memory and manipulate it there. By structure, I mean all the values between and including /begin throw and /end throw. This should be much faster.
def rearrange_structure_and_write_into_file(structure, output_file):
# TODO: rearrange the elements in structure and write the result into output_file
current_structure = ""
with open(filename, 'r') as original_file:
with open(output_filename, 'w') as output_file:
for line in original_file:
current_structure += line
if "/end throw" in line:
rearrange_structure_and_write_into_file(current_structure, output_file)
current_structure = ""
The insertion and removal of values from a long list is likely to make this code slower than it needs to be, and also makes it vulnerable to any errors and difficult to reason about. If there are any entries without store_address then the code would not work correctly and would search through the remaining entries until it finds a store address.
A better approach would be to break down the code into functions that parse each entry and output it:
KEYWORDS = ["STORE_ADDRESS", "COLOR", "DESCRIPTION", "DIMENSION"]
def parse_lines(lines):
""" Parse throw data from lines in the old format """
current_section = None
r = {}
for line in lines:
words = line.strip().split(" ")
if words[0] in KEYWORDS:
if words[1:]:
r[words[0]] = words[1]
else:
current_section = r[words[0]] = []
else:
current_section.append(line.strip())
return r
def output_throw(throw):
""" Output a throw entry as lines of text in the new format """
yield "/begin throw"
for keyword in KEYWORDS:
if keyword in throw:
value = throw[keyword]
if type(value) is list:
value = " ".join(value)
yield f"{keyword} {value}"
yield "/end throw"
with open(filename) as in_file, open("output.txt", "w") as out_file:
entry = []
for line in in_file:
line = line.strip()
if line == "/begin throw":
entry = []
elif line == "/end throw":
throw = parse_lines(entry)
for line in output_throw(throw):
out_file.write(line + "\n")
else:
entry.append(line)
Or if you really need to maximize performance by removing all unnecessary operations you could read, parse and write in a single long condition, like this:
with open(filename) as in_file, open("output.txt", "w") as out_file:
entry = []
in_section = True
def write(line):
out_file.write(line + "\n")
for line in in_file:
line = line.strip()
first = line.split()[0]
if line == "/begin throw":
in_section = False
write(line)
entry = []
elif line == "/end throw":
in_section = False
for line_ in entry:
write(line_)
write(line)
elif first == "STORE_ADDRESS":
in_section = False
write(line)
elif line in KEYWORDS:
in_section = True
entry.append(line)
elif first in KEYWORDS:
in_section = False
entry.append(line)
elif in_section:
entry[-1] += " " + line
I used this code to delete a word from a text file.
f = open('./test.txt','r')
a = ['word1','word2','word3']
lst = []
for line in f:
for word in a:
if word in line:
line = line.replace(word,'')
lst.append(line)
f.close()
f = open('./test.txt','w')
for line in lst:
f.write(line)
f.close()
But for some reason if the words have the same characters, all those characters get deleted. So for e.g
in my code:
def cancel():
global refID
f1=open("refID.txt","r")
line=f1.readline()
flag = 0
while flag==0:
refID=input("Enter the reference ID or type 'q' to quit: ")
for i in line.split(','):
if refID == i:
flag=1
if flag ==1:
print("reference ID found")
cancelsub()
elif (len(refID))<1:
print("Reference ID not found, please re-enter your reference ID\n")
cancel()
elif refID=="q":
flag=1
else:
print("reference ID not found\n")
menu()
def cancelsub():
global refIDarr, index
refIDarr=[]
index=0
f = open('flightbooking.csv')
csv_f = csv.reader(f)
for row in csv_f:
refIDarr.append(row[1])
for i in range (len(refIDarr)):
if refID==refIDarr[i]:
index=i
print(index)
while True:
proceed=input("You are about to cancel your flight booking, are you sure you would like to proceed? y/n?: ")
while proceed>"y" or proceed<"n" or (proceed>"n" and proceed<"y") :
proceed=input("Invalid entry. \nPlease enter y or n: ")
if proceed=="y":
Continue()
break
elif proceed=="n":
main_menu
break
exit
break
def Continue():
lines = list()
with open('flightbooking.csv', 'r') as readFile:
reader = csv.reader(readFile)
for row in reader:
lines.append(row)
for field in row:
if field ==refID:
lines.remove(row)
break
with open('flightbooking.csv', 'w') as writeFile:
writer = csv.writer(writeFile)
writer.writerows(lines)
f = open('refID.txt','r')
a=refIDarr[index]
print(a)
lst = []
for line in f:
for word in a:
if word in line:
line = line.replace(word,'')
lst.append(line)
print(lst)
f.close()
f = open('refID.txt','w')
for line in lst:
f.write(line)
f.close()
print("Booking successfully cancelled")
menu()
When the code is run, the refID variable has one word stored in it, and it should replace just that word with a blank space, but it takes that word for e.g 'AB123', finds all other words which might have an 'A' or a 'B' or the numbers, and replace all of them. How do I make it so it only deletes the word?
Text file before running code:
AD123,AB123
Expected Output in the text file:
AD123,
Output in text file:
D,
Edit: I have added the entire code, and maybe you can help now after seeing that the array is being appended to and then being used to delete from a text file.
here's my opinion.
refIDarr = ["AB123"]
a = refIDarr[0] => a = "AB123"
strings in python are iterable, so when you do for word in a, you're getting 5 loops where each word is actually a letter.
Something like the following is being executed.
if "A" in line:
line = line.replace("A","")
if "B" in line:
line = line.replace("B","")
if "1" in line:
line = line.replace("1","")
if "2" in line:
line = line.replace("2","")
if "3" in line:
line = line.replace("3","")
they correct way to do this is loop over refIDarr
for word in refIDarr:
line = line.replace(word,'')
NOTE: You don't need the if statement, since if the word is not in the line it will return the same line as it was.
"abc".replace("bananan", "") => "abc"
Here's a working example:
refIDarr = ["hello", "world", "lol"]
with open('mytext.txt', "r") as f:
data = f.readlines()
for word in refIDarr:
data = [line.replace(word, "") for line in data]
with open("mytext.txt", "w") as newf:
newf.writelines(data)
The problem is here:
a=refIDarr[index]
If refIDarr is a list of words, accessing specific index makes a be a word. Later, when you iterate over a (for word in a:), word becomes a letter and not a word as you expect, which causes eventually replacing characters of word instead the word itself in your file.
To avoid that, remove a=refIDarr[index] and change your loop to be:
for line in f:
for word in refIDarr:
if word in line:
line = line.replace(word,'')
I'm fairly new to python and found a personal project for myself. I am trying to parse through a large text file to find word occurrences, but I cannot seem to get the code to work. It starts like this:
file = 'randomfile.txt'
with open(file) as f:
word = input("Enter a word: ")
line = f.readline()
num_line = 1
while line:
if word in line:
print("line {}: {}".format(num_line, line.strip()))
print("Here are ___ I found with the word" + word)
num_line += 1
f.close()
This code will run but it will not give an output for search words, and I cant see a reason why not, unless python is not reading the text file in the path, or if the fourth line of code is not being read properly? How can I go about fixing this?
This will work:
with open(randonfile.txt",'r') as f:
word = input("Enter a word: ")
line = f.readline()
num_line = 1
for words in line.split():
if word in words:
print("line {}: {}".format(num_line, line.strip()))
print("Here are ___ I found with the word" + word)
num_line += 1
The problem with your code is that only read the first line, and never loop through the entire file.
with open(file) as f:
word = input("Enter a word: ")
line = f.readline() # this is the only place you read a line
num_line = 1
while line:
if word in line:
print("line {}: {}".format(num_line, line.strip()))
print("Here are ___ I found with the word" + word)
num_line += 1
You should read the next line in the while loop, like this:
with open(file) as f:
word = input("Enter a word: ")
line = f.readline()
num_line = 1
while line:
if word in line:
print("line {}: {}".format(num_line, line.strip()))
line = f.readline() # <-- read the next line
print("Here are ___ I found with the word" + word)
num_line += 1
Now you are reading through the file. The next problem is you only increment the line number num_line += 1 once the while loop is over, you need to move this to within the loop so it tracks how many lines have been processed.
with open(file) as f:
word = input("Enter a word: ")
line = f.readline()
num_line = 1
while line:
if word in line:
print("line {}: {}".format(num_line, line.strip()))
line = f.readline() # <-- read the next line
num_line +=1 # <-- increase the counter in the loop
print("Here are ___ I found with the word" + word)
You don't need f.close(), the with_statement automatically closes files. You can also loop directly over the file pointer f, to read each line, like this:
file = 'randomfile.txt'
with open(file) as f:
word = input("Enter a word: ")
num_line = 1
for line in f: # step through each line of the file
if word in line:
print("line {}: {}".format(num_line, line.strip()))
num_line +=1 # <-- increase the counter in the loop
print("Here are ___ I found with the word" + word)
# f.close() -- not needed
I will leave it for you to fix the print statement.
You can do something like this:
word = input("Enter a word: ")
with open(file) as f:
for idx, line in enumerate(f):
# we need to use lower() and split() because we don't want to count if word = 'aaa' and line 'AAAaaa' returns True
if word.lower() in line.lower().split():
print("line {}: {}".format(idx, line.strip()))
def main():
read()
def read():
fileName=input("Enter the file you want to count: ")
infile=open(fileName , "r")
text=infile.readline()
count=0
while text != "":
text=str(count)
count+=1
text=infile.readline()
print(str(count)+ ": " + text)
infile.close()
main()
-the referenced .txt file has only two elements
44
33
-the output of this code should look like
1: 44
2: 33
-my output is
1: 33
2:
im not sure why the program is not picking up the first line in the referenced .txt file. The line numbers are correct however 33 should be second to 44.
The reason is explained in the comments:
def main():
read()
def read():
fileName=input("Enter the file you want to count: ")
infile=open(fileName , "r")
text=infile.readline() ##Reading the first line here but not printing
count=0
while text != "":
text=str(count)
count+=1
text=infile.readline() ##Reading the 2nd line here
print(str(count)+ ": " + text) ##Printing the 2nd line here, missed the first
##line
infile.close()
main()
Modify the program as:
def main():
read()
def read():
fileName= input("Enter the file you want to count: ")
infile = open(fileName , "r")
text = infile.readline()
count = 1 # Set count to 1
while text != "":
print(str(count)+ ": " + str(text)) # Print 1st line here
count = count + 1 # Increment count to 2
text = infile.readline() # Read 2nd line
infile.close() # Close the file
main()
def main():
read()
def read():
fileName=input("Enter the file you want to count: ")
with open(fileName,'r') as f:
print('\n'.join([' : '.join([str(i+1),v.rstrip()]) for i,v in enumerate(f.readlines())]))
main()
I'm very confused by your read function. You start by reading the first line into text:
text=infile.readline()
Presumable at this point text contains 44.
You then immediately demolish this value before you've done anything with it by overwriting it with:
text = str(count)
ie you read two lines before printing anything at all.
You should print the value of text before you overwrite it with the next readline.
Simply move the print statement before readline:
while text != "":
count+=1
print(str(count)+ ": " + text)
text=infile.readline()
I have a text file that contains the contents of a book. I want to take this file and build an index which allows the user to search through the file to make searches.
The search would consist of entering a word. Then, the program would return the following:
Every chapter which includes that word.
The line number of the line
which contains the word.
The entire line the word is on.
I tried the following code:
infile = open(file)
Dict = {}
word = input("Enter a word to search: ")
linenum = 0
line = infile.readline()
for line in infile
linenum += 1
for word in wordList:
if word in line:
Dict[word] = Dict.setdefault(word, []) + [linenum]
print(count, word)
line = infile.readline()
return Dict
Something like this does not work and seems too awkward for handling the other modules which would require:
An "or" operator to search for one word or another
An "and" operator to search for one word and another in the same chapter
Any suggestions would be great.
def classify_lines_on_chapter(book_contents):
lines_vs_chapter = []
for line in book_contents:
if line.isupper():
current_chapter = line.strip()
lines_vs_chapter.append(current_chapter)
return lines_vs_chapter
def classify_words_on_lines(book_contents):
words_vs_lines = {}
for i, line in enumerate(book_contents):
for word in set([word.strip(string.punctuation) for word in line.split()]):
if word:
words_vs_lines.setdefault(word, []).append(i)
return words_vs_lines
def main():
skip_lines = 93
with open('book.txt') as book:
book_contents = book.readlines()[skip_lines:]
lines_vs_chapter = classify_lines_on_chapter(book_contents)
words_vs_lines = classify_words_on_lines(book_contents)
while True:
word = input("Enter word to search - ")
# Enter a blank input to exit
if not word:
break
line_numbers = words_vs_lines.get(word, None)
if not line_numbers:
print("Word not found!!\n")
continue
for line_number in line_numbers:
line = book_contents[line_number]
chapter = lines_vs_chapter[line_number]
print("Line " + str(line_number + 1 + skip_lines))
print("Chapter '" + str(chapter) + "'")
print(line)
if __name__ == '__main__':
main()
Try it on this input file. Rename it as book.txt before running it.