While I was learning how to work with files in Python, I had a question: How can you delete a line from a file that contains a specific word. I wrote the following code:
arr = []
try:
with open("test.txt") as file:
arr = file.readlines()
except FileNotFoundError:
print("File not found!")
word = "five"
try:
with open("test.txt", "w") as file:
for row in arr:
if word not in row:
file.write(row)
except FileNotFoundError:
print("File not found!")
But I would like to know if it is possible to do this without writing all the lines in one array, because the file can sometimes be very large and there can be a lack of memory.
You just need to do the following:
open it in read and write mode.
Read it and filter the line out
move the cursor to the begin of the file
write it back
remove anything than is after it
with open("test.txt", "r+") as f:
output = filter(lambda line: word not in line, f.readlines())
f.seek(0)
f.write(''.join(output))
f.truncate()
To improve memory usage, just read the file line by line using f.readline() and adjust the seek dinamically.
with open("test.txt", "r+") as f:
latest_written_position = f.tell()
line = f.readline()
reading_position = f.tell()
while line:
if word not in line:
f.seek(latest_written_position)
f.write(line)
latest_written_position = f.tell()
f.seek(reading_position)
line = f.readline()
reading_position = f.tell()
f.seek(latest_written_position)
f.truncate()
reading a file line by line and writing it to the same file, without storing all the lines in memory:
word = "five"
try:
with open("test.txt", "r") as file_in:
with open("test.txt", "w") as file_out:
for line in file_in:
if word not in line:
file_out.write(line)
except FileNotFoundError:
print("File not found!")
This should do the trick:
try:
with open("test.txt") as file:
arr = file.readlines()
except IOError:
from sys import exit as exit_the_progrem
try:
exit_the_progrem(__status=-1)
except:
import sys
exiter = sys
del sys
exiter.exit()
finally:
try:
condtion = True
arr = ['hello', 'world'] ## or comment?
locl_i = len(arr)
for i, elem in enumerate(reversed(arr), start=1):
locl_i -= i
if condtion: # example
del i
if not locl_i:
del locl_i
del elem
del arr[locl_i]
print(arr)
with open('out_f.xtxt', 'w') as of:
of.write(arr)
except Exception as __this_thing_went_wrong:
print(__this_thing_went_wrong)
from sys import exit as exit_the_progrem
success = 123
exit_the_progrem(~success)
You can write as you read instead of storing
Consider the following:
try:
with open("test.txt") as file:
with open("new_test.txt", "w") as new_file:
for row in file:
if word not in row:
new_file.write(row)
except FileNotFoundError:
print("File not found!")
Related
The function reads the last line of the file at the specified file path. The function returns the last line of the file as a string, if the file is empty it will return an empty string ("").
I tried writing my code like this but it won't work, it's pretty messy and I'm a beginner
def read_last_line(file_path):
with open(file_path, 'r') as file:
size_file = os.path.getsize(file_path)
return_file_empty = " "
last_line = (list(file)[-1])
print(last_line)
if size_file == 0:
return return_file_empty
else:
return last_line
you can use:
def read_last_line(file_path):
with open(file_path) as f:
lines = f.readlines()
return lines[-1] if lines else ''
for big files you may use:
def read_last_line(file_path):
with open(file_path, 'r') as f:
last_line = ''
for line in f:
last_line = line
return last_line
This opens the file and moves though it until there is no more file (raises StopIteration) and returns the last line.
def read_last_line(filename):
line = ""
with open(filename) as fh:
while True:
try:
line = next(fh)
except StopIteration:
return line
You can use a collections.deque to get it like the following. Unlike the currently accepted answer, doesn't require storing the entire file in memory:
from collections import deque
def get_last_line(filename):
with open(filename, 'r') as f:
try:
lastline = deque(f, 1)[0]
except IndexError: # Empty file.
lastline = None
return lastline
print('last line: {}'.format(get_last_line(filename)))
If I've understood the question correctly, something like this maybe?
def get_last_line(file_path):
with open(file_path, "r") as file:
return next(line for line in reversed(file.read().splitlines()) if line)
I have key words to be search in one file let say abc.txt and in another file I have my data, def.txt.
I want a code in python to find key words written in abc.txt, in def.txt and if present, print those line in a new file.
Thank you.
I tried writing a code but it didn't work.
following is the code I write.
f = open('/home/vivek/Documents/abc.txt')
f1 = open('output.txt', 'a')
f2 = open('/home/vivek/Documents/def.txt', 'r')
# doIHaveToCopyTheLine=False
for line in f.readlines():
if f2 in line:
f1.write(line)
f1.close()
f.close()
f2.close()
Load the keywords into a list then you can check the other file line-by-line, and write to outfile as you find keywords in the line.
with open('/path/to/keywords.txt') as f:
keywords = set(line.strip() for line in f) # assuming words are separated by line
with open('/path/to/search_me.txt') as f, open('/path/to/outfile.txt', 'w') as outfile:
for line in f:
if any(kw in line for kw in keywords):
outfile.write(line)
You should record all the words in abc.txt use a set and then search them in def.txt
word_set = set()
with open('/home/vivek/Documents/abc.txt') as f:
for line in f:
word_set.add(line.strip())
f1 = open('output.txt', 'a')
with open('/home/vivek/Documents/def.txt') as f:
for line in f:
find = False
for word in word_set:
if word in line:
find = True
break
if find:
f1.write(line)
f1.close()
You can try this code:
with open("keyword.txt", "r") as keyword_file:
keywords = keyword_file.read().strip()
keywords = keywords.split()
with open("data.txt", "r") as data_file, open("output.txt", "w") as output_file:
for line in data_file.readlines():
line = line.strip()
for word in keywords:
if line.find(word) != -1:
print line
output_file.writelines(line + '\n')
break
In addition to sytech's answer you may try this:
with open('def.txt') as kw_obj, open('abc.txt') as in_obj:
keywords = set(kw_obj.read().split())
in_lines = in_obj.readlines()
match_lines = [line for keyword in keywords for line in in_lines if keyword in line]
if match_lines:
with open('out.txt', 'w') as out:
out.write(''.join(match_lines))
Lets say I have a Text file with the below content
fdsjhgjhg
fdshkjhk
Start
Good Morning
Hello World
End
dashjkhjk
dsfjkhk
Now I need to write a Python code which will read the text file and copy the contents between Start and end to another file.
I wrote the following code.
inFile = open("data.txt")
outFile = open("result.txt", "w")
buffer = []
keepCurrentSet = True
for line in inFile:
buffer.append(line)
if line.startswith("Start"):
#---- starts a new data set
if keepCurrentSet:
outFile.write("".join(buffer))
#now reset our state
keepCurrentSet = False
buffer = []
elif line.startswith("End"):
keepCurrentSet = True
inFile.close()
outFile.close()
I'm not getting the desired output as expected
I'm just getting Start
What I want to get is all the lines between Start and End.
Excluding Start & End.
Just in case you have multiple "Start"s and "End"s in your text file, this will import all the data together, excluding all the "Start"s and "End"s.
with open('path/to/input') as infile, open('path/to/output', 'w') as outfile:
copy = False
for line in infile:
if line.strip() == "Start":
copy = True
continue
elif line.strip() == "End":
copy = False
continue
elif copy:
outfile.write(line)
If the text files aren't necessarily large, you can get the whole content of the file then use regular expressions:
import re
with open('data.txt') as myfile:
content = myfile.read()
text = re.search(r'Start\n.*?End', content, re.DOTALL).group()
with open("result.txt", "w") as myfile2:
myfile2.write(text)
I'm not a Python expert, but this code should do the job.
inFile = open("data.txt")
outFile = open("result.txt", "w")
keepCurrentSet = False
for line in inFile:
if line.startswith("End"):
keepCurrentSet = False
if keepCurrentSet:
outFile.write(line)
if line.startswith("Start"):
keepCurrentSet = True
inFile.close()
outFile.close()
Using itertools.dropwhile, itertools.takewhile, itertools.islice:
import itertools
with open('data.txt') as f, open('result.txt', 'w') as fout:
it = itertools.dropwhile(lambda line: line.strip() != 'Start', f)
it = itertools.islice(it, 1, None)
it = itertools.takewhile(lambda line: line.strip() != 'End', it)
fout.writelines(it)
UPDATE: As inspectorG4dget commented, above code copies over the first block. To copy multiple blocks, use following:
import itertools
with open('data.txt', 'r') as f, open('result.txt', 'w') as fout:
while True:
it = itertools.dropwhile(lambda line: line.strip() != 'Start', f)
if next(it, None) is None: break
fout.writelines(itertools.takewhile(lambda line: line.strip() != 'End', it))
Move the outFile.write call into the 2nd if:
inFile = open("data.txt")
outFile = open("result.txt", "w")
buffer = []
for line in inFile:
if line.startswith("Start"):
buffer = ['']
elif line.startswith("End"):
outFile.write("".join(buffer))
buffer = []
elif buffer:
buffer.append(line)
inFile.close()
outFile.close()
import re
inFile = open("data.txt")
outFile = open("result.txt", "w")
buffer1 = ""
keepCurrentSet = True
for line in inFile:
buffer1=buffer1+(line)
buffer1=re.findall(r"(?<=Start) (.*?) (?=End)", buffer1)
outFile.write("".join(buffer1))
inFile.close()
outFile.close()
I would handle it like this :
inFile = open("data.txt")
outFile = open("result.txt", "w")
data = inFile.readlines()
outFile.write("".join(data[data.index('Start\n')+1:data.index('End\n')]))
inFile.close()
outFile.close()
if one wants to keep the start and end lines/keywords while extracting the lines between 2 strings.
Please find below the code snippet that I used to extract sql statements from a shell script
def process_lines(in_filename, out_filename, start_kw, end_kw):
try:
inp = open(in_filename, 'r', encoding='utf-8', errors='ignore')
out = open(out_filename, 'w+', encoding='utf-8', errors='ignore')
except FileNotFoundError as err:
print(f"File {in_filename} not found", err)
raise
except OSError as err:
print(f"OS error occurred trying to open {in_filename}", err)
raise
except Exception as err:
print(f"Unexpected error opening {in_filename} is", repr(err))
raise
else:
with inp, out:
copy = False
for line in inp:
# first IF block to handle if the start and end on same line
if line.lstrip().lower().startswith(start_kw) and line.rstrip().endswith(end_kw):
copy = True
if copy: # keep the starts with keyword
out.write(line)
copy = False
continue
elif line.lstrip().lower().startswith(start_kw):
copy = True
if copy: # keep the starts with keyword
out.write(line)
continue
elif line.rstrip().endswith(end_kw):
if copy: # keep the ends with keyword
out.write(line)
copy = False
continue
elif copy:
# write
out.write(line)
if __name__ == '__main__':
infile = "/Users/testuser/Downloads/testdir/BTEQ_TEST.sh"
outfile = f"{infile}.sql"
statement_start_list = ['database', 'create', 'insert', 'delete', 'update', 'merge', 'delete']
statement_end = ";"
process_lines(infile, outfile, tuple(statement_start_list), statement_end)
Files are iterators in Python, so this means you don't need to hold a "flag" variable to tell you what lines to write. You can simply use another loop when you reach the start line, and break it when you reach the end line:
with open("data.txt") as in_file, open("result.text", 'w') as out_file:
for line in in_file:
if line.strip() == "Start":
for line in in_file:
if line.strip() == "End":
break
out_file.write(line)
Lets say I have a Text file with the below content
fdsjhgjhg
fdshkjhk
Start
Good Morning
Hello World
End
dashjkhjk
dsfjkhk
Now I need to write a Python code which will read the text file and copy the contents between Start and end to another file.
I wrote the following code.
inFile = open("data.txt")
outFile = open("result.txt", "w")
buffer = []
keepCurrentSet = True
for line in inFile:
buffer.append(line)
if line.startswith("Start"):
#---- starts a new data set
if keepCurrentSet:
outFile.write("".join(buffer))
#now reset our state
keepCurrentSet = False
buffer = []
elif line.startswith("End"):
keepCurrentSet = True
inFile.close()
outFile.close()
I'm not getting the desired output as expected
I'm just getting Start
What I want to get is all the lines between Start and End.
Excluding Start & End.
Just in case you have multiple "Start"s and "End"s in your text file, this will import all the data together, excluding all the "Start"s and "End"s.
with open('path/to/input') as infile, open('path/to/output', 'w') as outfile:
copy = False
for line in infile:
if line.strip() == "Start":
copy = True
continue
elif line.strip() == "End":
copy = False
continue
elif copy:
outfile.write(line)
If the text files aren't necessarily large, you can get the whole content of the file then use regular expressions:
import re
with open('data.txt') as myfile:
content = myfile.read()
text = re.search(r'Start\n.*?End', content, re.DOTALL).group()
with open("result.txt", "w") as myfile2:
myfile2.write(text)
I'm not a Python expert, but this code should do the job.
inFile = open("data.txt")
outFile = open("result.txt", "w")
keepCurrentSet = False
for line in inFile:
if line.startswith("End"):
keepCurrentSet = False
if keepCurrentSet:
outFile.write(line)
if line.startswith("Start"):
keepCurrentSet = True
inFile.close()
outFile.close()
Using itertools.dropwhile, itertools.takewhile, itertools.islice:
import itertools
with open('data.txt') as f, open('result.txt', 'w') as fout:
it = itertools.dropwhile(lambda line: line.strip() != 'Start', f)
it = itertools.islice(it, 1, None)
it = itertools.takewhile(lambda line: line.strip() != 'End', it)
fout.writelines(it)
UPDATE: As inspectorG4dget commented, above code copies over the first block. To copy multiple blocks, use following:
import itertools
with open('data.txt', 'r') as f, open('result.txt', 'w') as fout:
while True:
it = itertools.dropwhile(lambda line: line.strip() != 'Start', f)
if next(it, None) is None: break
fout.writelines(itertools.takewhile(lambda line: line.strip() != 'End', it))
Move the outFile.write call into the 2nd if:
inFile = open("data.txt")
outFile = open("result.txt", "w")
buffer = []
for line in inFile:
if line.startswith("Start"):
buffer = ['']
elif line.startswith("End"):
outFile.write("".join(buffer))
buffer = []
elif buffer:
buffer.append(line)
inFile.close()
outFile.close()
import re
inFile = open("data.txt")
outFile = open("result.txt", "w")
buffer1 = ""
keepCurrentSet = True
for line in inFile:
buffer1=buffer1+(line)
buffer1=re.findall(r"(?<=Start) (.*?) (?=End)", buffer1)
outFile.write("".join(buffer1))
inFile.close()
outFile.close()
I would handle it like this :
inFile = open("data.txt")
outFile = open("result.txt", "w")
data = inFile.readlines()
outFile.write("".join(data[data.index('Start\n')+1:data.index('End\n')]))
inFile.close()
outFile.close()
if one wants to keep the start and end lines/keywords while extracting the lines between 2 strings.
Please find below the code snippet that I used to extract sql statements from a shell script
def process_lines(in_filename, out_filename, start_kw, end_kw):
try:
inp = open(in_filename, 'r', encoding='utf-8', errors='ignore')
out = open(out_filename, 'w+', encoding='utf-8', errors='ignore')
except FileNotFoundError as err:
print(f"File {in_filename} not found", err)
raise
except OSError as err:
print(f"OS error occurred trying to open {in_filename}", err)
raise
except Exception as err:
print(f"Unexpected error opening {in_filename} is", repr(err))
raise
else:
with inp, out:
copy = False
for line in inp:
# first IF block to handle if the start and end on same line
if line.lstrip().lower().startswith(start_kw) and line.rstrip().endswith(end_kw):
copy = True
if copy: # keep the starts with keyword
out.write(line)
copy = False
continue
elif line.lstrip().lower().startswith(start_kw):
copy = True
if copy: # keep the starts with keyword
out.write(line)
continue
elif line.rstrip().endswith(end_kw):
if copy: # keep the ends with keyword
out.write(line)
copy = False
continue
elif copy:
# write
out.write(line)
if __name__ == '__main__':
infile = "/Users/testuser/Downloads/testdir/BTEQ_TEST.sh"
outfile = f"{infile}.sql"
statement_start_list = ['database', 'create', 'insert', 'delete', 'update', 'merge', 'delete']
statement_end = ";"
process_lines(infile, outfile, tuple(statement_start_list), statement_end)
Files are iterators in Python, so this means you don't need to hold a "flag" variable to tell you what lines to write. You can simply use another loop when you reach the start line, and break it when you reach the end line:
with open("data.txt") as in_file, open("result.text", 'w') as out_file:
for line in in_file:
if line.strip() == "Start":
for line in in_file:
if line.strip() == "End":
break
out_file.write(line)
I have a file that has one sentence per line. I am trying to read the file and search if the sentence is a question using regex and extract the wh-word from the sentences and save them back into another file according the order it appeared in the first file.
This is what I have so far..
def whWordExtractor(inputFile):
try:
openFileObject = open(inputFile, "r")
try:
whPattern = re.compile(r'(.*)who|what|how|where|when|why|which|whom|whose(\.*)', re.IGNORECASE)
with openFileObject as infile:
for line in infile:
whWord = whPattern.search(line)
print whWord
# Save the whWord extracted from inputFile into another whWord.txt file
# writeFileObject = open('whWord.txt','a')
# if not whWord:
# writeFileObject.write('None' + '\n')
# else:
# whQuestion = whWord
# writeFileObject.write(whQuestion+ '\n')
finally:
print 'Done. All WH-word extracted.'
openFileObject.close()
except IOError:
pass
The result after running the code above: set([])
Is there something I am doing wrong here? I would be grateful if someone can point it out to me.
Something like this:
def whWordExtractor(inputFile):
try:
with open(inputFile) as f1:
whPattern = re.compile(r'(.*)who|what|how|where|when|why|which|whom|whose(\.*)', re.IGNORECASE)
with open('whWord.txt','a') as f2: #open file only once, to reduce I/O operations
for line in f1:
whWord = whPattern.search(line)
print whWord
if not whWord:
f2.write('None' + '\n')
else:
#As re.search returns a sre.SRE_Match object not string, so you will have to use either
# whWord.group() or better use whPattern.findall(line)
whQuestion = whWord.group()
f2.write(whQuestion+ '\n')
print 'Done. All WH-word extracted.'
except IOError:
pass
Not sure if it's what you're looking for, but you could try something like this:
def whWordExtractor(inputFile):
try:
whPattern = re.compile(r'who|what|how|where|when|why|which|whom|whose', re.IGNORECASE)
with open(inputFile, "r") as infile:
for line in infile:
whMatch = whPattern.search(line)
if whMatch:
whWord = whMatch.group()
print whWord
# save to file
else:
# no match
except IOError:
pass
Change '(.*)who|what|how|where|when|why|which|whom|whose(\.*)' to
".*(?:who|what|how|where|when|why|which|whom|whose).*\."