open file depending on whether it's .gz or not - python

I'm trying to figure out what the best way of opening a python file is based on its type.
For example, I've got something basic like this but it just doesn't seem 'pythonic' to me and I feel like in some way it can be refactored and written more cleaner;
def openfile(filename):
if read_file_from_top:
if not filename.endswith('.gz'):
with open(filename, 'r') as infile:
for line in infile:
# do something
else:
with gzip.open(filename, 'r') as infile:
for line in infile:
# do something
elif read_file_from_bottom:
if not filename.endswith('.gz'):
with open(filename, 'r') as infile:
for line in infile:
# do something
else:
with gzip.open(filename, 'r') as infile:
for line in infile:
# do something
Would there be a better way to do this, maybe using a generator? Thanks.

You should separate the opening and the reading:
def openfile(filename, mode='r'):
if filename.endswith('.gz'):
return gzip.open(filename, mode)
else:
return open(filename, mode)
with openfile(filename, 'r') as infile:
for line in infile:
# do something

I think something like this is at least a little better:
import gzip
def file_line_gen(filename):
if filename.endswith('.gz'):
open_fn = gzip.open
else:
open_fn = open
with open_fn(filename, 'r') as f:
for line in f:
yield line
for line in file_line_gen('data.gz'):
# do something here
print repr(line)

Short solution using a predefined list of crucial functions:
def processFile(filepath):
with [open, gzip.open][0 if not filepath.endswith('.gz') else 1](filepath, 'r') as fh:
if read_file_from_top:
# do something
elif read_file_from_bottom:
# do something

Related

Function that reads last line of file?

The function reads the last line of the file at the specified file path. The function returns the last line of the file as a string, if the file is empty it will return an empty string ("").
I tried writing my code like this but it won't work, it's pretty messy and I'm a beginner
def read_last_line(file_path):
with open(file_path, 'r') as file:
size_file = os.path.getsize(file_path)
return_file_empty = " "
last_line = (list(file)[-1])
print(last_line)
if size_file == 0:
return return_file_empty
else:
return last_line
you can use:
def read_last_line(file_path):
with open(file_path) as f:
lines = f.readlines()
return lines[-1] if lines else ''
for big files you may use:
def read_last_line(file_path):
with open(file_path, 'r') as f:
last_line = ''
for line in f:
last_line = line
return last_line
This opens the file and moves though it until there is no more file (raises StopIteration) and returns the last line.
def read_last_line(filename):
line = ""
with open(filename) as fh:
while True:
try:
line = next(fh)
except StopIteration:
return line
You can use a collections.deque to get it like the following. Unlike the currently accepted answer, doesn't require storing the entire file in memory:
from collections import deque
def get_last_line(filename):
with open(filename, 'r') as f:
try:
lastline = deque(f, 1)[0]
except IndexError: # Empty file.
lastline = None
return lastline
print('last line: {}'.format(get_last_line(filename)))
If I've understood the question correctly, something like this maybe?
def get_last_line(file_path):
with open(file_path, "r") as file:
return next(line for line in reversed(file.read().splitlines()) if line)

CSV file to list of lines?

I have a txt file and I want to save each line as a list in a new file with fname as the new file name. But the output is not being saved. What am I missing?
import csv
with open('file.txt', 'rU') as csvfile:
reader = csv.reader(csvfile, delimiter='\t')
i = 1
for line in reader:
fname = line[0] + line[1]
#print fname
with open(fname, 'w') as out:
out.write(line)
i +=1
To do what you want, you need to fix two things, one is to open the output files in "append" mode so their previous contents aren't wiped-out everytime something additional is written to them.
Secondly you need some way to know the raw data from the file for each csv row it reads. This can be difficult when you use an extension like the csv module and don't know the internals (which you shouldn't use anyway).
To work around that in this case, you can pass a custom csvfile argument to the csv.reader that will give you the information needed. Basically a small preprocessor of the data being read. Here's what I mean:
import csv
def pre_reader(file):
"""Generator that remembers last line read."""
for line in file:
pre_reader.lastline = line
yield line
with open('test_gen.csv', 'rU') as csvfile:
reader = csv.reader(pre_reader(csvfile), delimiter='\t')
i = 1
for line in reader:
fname = line[0] + line[1]
#print fname
with open(fname, 'a') as out:
out.write(pre_reader.lastline)
i +=1
Change:
with open(fname, 'w') as out:
out.write(line)
To:
with open(fname, 'a') as out:
out.write(line)
w Opens a file for writing only. Overwrites the file if the file exists. If the file does not exist, creates a new file for writing.
a Opens a file for appending. The file pointer is at the end of the file if the file exists. That is, the file is in the append mode. If the file does not exist, it creates a new file for writing.
Better way:
import csv
with open('file.txt', 'rU') as csvfile, open(fname, 'w') as out:
reader = csv.reader(csvfile, delimiter='\t')
i = 1
for line in reader:
fname = line[0] + line[1]
out.write(line)
You cannot write a list so change penultimate line to **out.write(str(line))**
import csv
with open('file.txt', 'rU') as csvfile:
reader = csv.reader(csvfile, delimiter='\t')
i = 1
for line in reader:
fname = line[0] + line[1]
#print fname
with open(fname, 'w') as out:
------> out.write(str(line))
i +=1

How can I read individual lines from a CSV file?

At the moment I've got this function:
def writer(file_name)
open_file = open(file_name,"r+", newline='')
csv_output = csv.writer(open_file)
csv_output.writerow(student)
open_file.close()
where student is:
"string_1","string_2",int
I'm looking to read through the file first and check if the "string_1" that I'm writing matches any of the "string_1"s already written, but I can't find a built-in function that lets me read each line and store it as a list.
First, you have to open the file for reading, go through the file line by line and return, if "string_1" is found:
def append_student(file_name, student)
with open(file_name, "r") as f:
for line in csv.reader(f):
if line[0] == student[0]:
return
with open(file_name, "a") as f:
csv.writer(f).writerow(student)

Python: Filter in File I/O

I'm trying to take out the strings "a" and "b" from a document. Here's what I am doing but it's not working because I can't use replace in a list.
def filter_ab(filename):
fileRef=open(filename)
file_list=fileRef.readlines()
filter="ab"
for k in file_list:
for j in k:
if j in filter:
file_list=file_list.replace(j,"")
Can you use something like this:
f1 = open('file1.txt', 'r')
f2 = open('file2.txt', 'w')
for line in f1:
f2.write(line.replace('a', '').replace('b', ''))
f1.close()
f2.close()
Just use this:
def filter_ab(filename):
lines = []
with open(filename, "r") as fh:
for line in fh.readlines():
line = line.replace("a", "")
line = line.replace("b", "")
lines.append(line)
with open(filename, "w") as fh:
for line in lines:
fh.write(line)
str.translate is actually quite convenient for something like this
with open('file1.txt', 'r') as f1, open('file2.txt', 'w') as f2:
for line in f1:
f2.write(line.translate(None, 'ab'))

Read and overwrite a file in Python

Currently I'm using this:
f = open(filename, 'r+')
text = f.read()
text = re.sub('foobar', 'bar', text)
f.seek(0)
f.write(text)
f.close()
But the problem is that the old file is larger than the new file. So I end up with a new file that has a part of the old file on the end of it.
If you don't want to close and reopen the file, to avoid race conditions, you could truncate it:
f = open(filename, 'r+')
text = f.read()
text = re.sub('foobar', 'bar', text)
f.seek(0)
f.write(text)
f.truncate()
f.close()
The functionality will likely also be cleaner and safer using open as a context manager, which will close the file handler, even if an error occurs!
with open(filename, 'r+') as f:
text = f.read()
text = re.sub('foobar', 'bar', text)
f.seek(0)
f.write(text)
f.truncate()
The fileinput module has an inplace mode for writing changes to the file you are processing without using temporary files etc. The module nicely encapsulates the common operation of looping over the lines in a list of files, via an object which transparently keeps track of the file name, line number etc if you should want to inspect them inside the loop.
from fileinput import FileInput
for line in FileInput("file", inplace=1):
line = line.replace("foobar", "bar")
print(line)
Probably it would be easier and neater to close the file after text = re.sub('foobar', 'bar', text), re-open it for writing (thus clearing old contents), and write your updated text to it.
I find it easier to remember to just read it and then write it.
For example:
with open('file') as f:
data = f.read()
with open('file', 'w') as f:
f.write('hello')
To anyone who wants to read and overwrite by line, refer to this answer.
https://stackoverflow.com/a/71285415/11442980
filename = input("Enter filename: ")
with open(filename, 'r+') as file:
lines = file.readlines()
file.seek(0)
for line in lines:
value = int(line)
file.write(str(value + 1))
file.truncate()
Honestly you can take a look at this class that I built which does basic file operations. The write method overwrites and append keeps old data.
class IO:
def read(self, filename):
toRead = open(filename, "rb")
out = toRead.read()
toRead.close()
return out
def write(self, filename, data):
toWrite = open(filename, "wb")
out = toWrite.write(data)
toWrite.close()
def append(self, filename, data):
append = self.read(filename)
self.write(filename, append+data)
Try writing it in a new file..
f = open(filename, 'r+')
f2= open(filename2,'a+')
text = f.read()
text = re.sub('foobar', 'bar', text)
f.seek(0)
f.close()
f2.write(text)
fw.close()

Categories

Resources