def test(file_name):
if file_name.lower().endswith('.gz'):
with gzip.open(file_name) as f:
f_csv = csv.reader(i.TextIOWrapper(f))
#### Same Code
if file_name.lower().endswith('.csv'):
with open(file_name) as f:
f_csv = csv.reader(i.TextIOWrapper(f))
#### Same Code
Question> Is there a better way to combine the above code without duplicating the 'Same Code' section? The function test uses gzip.open if the the file_name is a gz file otherwise it opens with regular open.
One way would be:
def test(file_name):
loader = None
if file_name.lower().endswith('.gz'):
loader = gzip.open
elif file_name.lower().endswith('.csv'):
loader = open
if loader is not None:
with loader(file_name) as f:
f_csv = csv.reader(i.TextIOWrapper(f))
#### Same Code
def test(file_name):
f = None
if file_name.lower().endswith('.gz'):
f = gzip.open(file_name)
if file_name.lower().endswith('.csv'):
f = open(file_name)
if f is not None:
f_csv = csv.reader(i.TextIOWrapper(f))
#### Same Code
f.close()
Related
I have my code here that reads my config file:
def getConfig(env):
pwd=os.getcwd()
if "win" in (platform.system().lower()):
f = open(pwd+"\config_"+env.lower()+"_dataset2.json")
else:
f = open(pwd+"/config_"+env.lower()+"_dataset2.json")
config = json.load(f)
f.close()
return config
The thing is, I want it to read my other config file as well but I don't know how to incorporate it in the original code above. I know something like this won't work:
def getConfig(env):
pwd=os.getcwd()
if "win" in (platform.system().lower()):
f = open(pwd+"\config_"+env.lower()+"_dataset2.json")
else:
f = open(pwd+"/config_"+env.lower()+"_dataset2.json")
if "win" in (platform.system().lower()):
f = open(pwd+"\config_"+env.lower()+"_dataset1.json")
else:
f = open(pwd+"/config_"+env.lower()+"_dataset1.json")
config = json.load(f)
f.close()
return config
I'm stuck on having the option to run both config files at once, or just run dataset1.json individually, or just run dataset2.json individually.
Maybe something like:
dataset2=config_"+env.lower()+"_dataset2.json
dataset1=config_"+env.lower()+"_dataset1.json
if dataset2:
f = open(pwd+"\config_"+env.lower()+"_dataset2.json")....
#staticmethod
def getConfig(env):
pwd=os.getcwd()
env = env.lower()
with open(os.path.join(pwd, f"config_{env}_dataset2.json")) as f:
config2 = json.load(f)
with open(os.path.join(pwd, f"config_{env}_dataset1.json")) as f:
config1 = json.load(f)
return config2, config1
config2, config1 = getConfig(env)
TypeError: 'staticmethod' object is not callable
You can return multiple values from the function.
There's no need for the conditionals, use os.path.join() to combine pathnames with the OS-specific delimiter.
def getConfig(env):
pwd=os.getcwd()
env = env.lower()
with open(os.path.join(pwd, f"config_{env}_dataset2.json")) as f:
config1 = json.load(f)
with open(os.path.join(pwd, f"config_{env}_dataset1.json")) as f:
config2 = json.load(f)
return config1, config2
dataset1, dataset2 = getConfig(env)
You also don't really need to join with pwd, since relative pathnames are interpreted relative to the current directory.
I need to process two types of files in a directory - .txt and .gz.
There are two types of open statements for this purpose:
.gz files:
with gzip.open(file_name, 'rt', encoding='utf-8') as f:
line = next(f)
while line:
some code
.txt files:
with open(file_name, 'r', encoding='utf-8') as f:
line = next(f)
while line:
some code
Any further processing commands are absolutely identical. Now I see two options to process these two file types:
Option 1 - Use two identical functions that differ only by open statement. Sounds ugly...
Option 2 - Use if construction as following:
if ext == '.gz':
f = gzip.open(file_name, 'rt', encoding='utf-8')
elif ext == '.txt':
f = open(file_name, 'r', encoding='utf-8')
line = next(f)
while line:
some code
But it still looks awkward to me :/
Question: what's pythonic way in Python 3.x to use open statement according to a file extension?
why not:
with (gzip.open if ext==".gz" else open)(file_name, 'rt', encoding='utf-8') as f:
the first argument of with is a ternary expression, where you decide which function to use depending on the extension. I used 'rt' in both cases, it's default for standard open. That method has the advantage to avoid copy/paste and to be able to use context manager.
Maybe some generic function could be created with an helper function:
def myopen(file_name)
return (gzip.open if os.path.splitext(file_name)[1]==".gz" else open)(file_name, 'rt', encoding='utf-8')
use like:
with myopen(file_name):
an alternative is to use a defaultdict with the extension as key
from collections import defaultdict
from pathlib import Path
open_functions = defaultdict(lambda: (open, ("r",), {encoding: "utf-8"}))
open_functions["gz"] = (gzip.open, ("rt",), {encoding: "utf-8"})
filename = Path(filename)
open_function, args, kwargs = open_functions[filename.suffix]
with open_function(filename, *args, **kwargs) as f:
...
I would like suggest the following way:
#------------------------------------
import zipfile
#-----------------------Common Code-------------------------
def disp_line(filee):
for line in filee.readlines():
print(line)
#-----------------------First File-----------------------
z = zipfile.ZipFile('D:\\DC-Data\\baby_names.zip', "r")
zinfo = z.namelist()
for name in zinfo:
with z.open(name) as filee:
disp_line(filee)
#-----------------------2nd File-------------------------
with open('D:\\DC-Data\\iris.txt', 'r') as filee:
disp_line(filee)
#------------------------End ----------------------
I would like to loop through files into a directory, make something on these files and then for each file write out the result.
But my files can't be read because python interprets file names as string objects and not a readable file.
Is there a way to avoid this?
import re
import os
def create_filename_for_fileout (f1):
fileout_n = f1.replace("TT", "out")
fileout = "C:\\Users\\KP\\Desktop\\FSC_Treetag\\out\\"+str(fileout_n)
return fileout
for file_in in os.listdir('C:\\Users\\KP\\Desktop\\FSC_Treetag'):
filename = str(file_in)
file_out = create_filename_for_fileout(filename)
open(file_in, 'r')
open(file_out, 'w')
content_file = file_in.readlines()
for ln in content_file:
regex = re.compile('(.*\t(ADJ|ADV|NOM|VER:cond|VER:futu|VER:impe|VER:impf|VER:infi|VER:pper|VER:pres|VER:pres|VER:simp|VER:subi|VER:subp)\t(.*))')
res = regex.search(ln)
if res:
# categ = res.group(2)
lemme = res.group(3)
file_out.write(str(lemme)+"\n")
file_out.close()
file_in.close()
Result:
content_file = file_in.readlines()
AttributeError: 'str' object has no attribute 'readlines'
>>>
You're not assigning your open to any variable to use.
# Change
open(file_in, 'r')
open(file_out, 'w')
# to
input_file = open(file_in, 'r')
output_file = open(file_out, 'w')
for ln in input_file:
# do your processing
if res:
lemme = res.group(3)
output_file.write(str(lemme) + "\n")
You are not assigning the open functions to the respective handlers (open is returning an object of the file type).
filename = str(file_in)
file_out = create_filename_for_fileout(filename)
open(file_in, 'r')
open(file_out, 'w')
Should be:
file_out = open(create_filename_for_fileout(file_in), 'w')
file_in = open(file_in, 'r')
NOTE: for clarity sake it's a good idea to use another pointer for the infile handler.
Check: https://docs.python.org/2/library/functions.html#open
open(name[, mode[, buffering]])
Open a file, returning an object of the file type described in section File Objects. If the file cannot be opened, IOError is raised.
I am trying to read an excel file, extract some data, and write it out as a csv. This is pretty new to me and I'm messing up somewhere: I keep getting an empty csv. I'm sure I'm missing something very basic, but darned if I can see it. Here is the code:
```
import xlrd
import os
import csv
from zipfile import ZipFile
import datetime
datafile = "./2013_ERCOT_Hourly_Load_Data.xls"
outfile = "./2013_Max_Loads.csv"
def parse_file(datafile):
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)
data = None
outputlist = []
for col in range(1, sheet.ncols):
cv = sheet.col_values(col, start_rowx=1, end_rowx=None)
header = sheet.cell_value(0,col)
maxval = max(cv)
maxpos = cv.index(maxval) + 1
maxtime = sheet.cell_value(maxpos, 0)
realtime = xlrd.xldate_as_tuple(maxtime, 0)
year = realtime[0]
month = realtime[1]
day = realtime[2]
hour = realtime[3]
data = [
'Region:', header,
'Year:', year,
'Month:', month,
'Day:', day,
'Hour:', hour,
maxpos,
maxtime,
realtime,
maxval,
]
path = "./2013_Max_Loads.csv"
return outputlist
def save_file(data, filename):
with open(filename, "wb") as f:
writer = csv.writer(f, delimiter='|')
for line in data:
writer.writerow(line)
parse_file(datafile)
save_file(parse_file(datafile),"2013_Max_Loads.csv")
You declare outfile but you don't use it
You aren't passing a directory (path) for the file to be saved in.
I also think that calling parse_file twice might be messing you up. Just pass the filename and call it from within the save_file function.
I also found that you were returning output list as a blank list.
So here, try this. I will assume your xlrd commands are correct, because I have not personally used the module.
import csv
import xlrd
def parse_file(datafile):
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)
outputlist = []
outputlist_append = outputlist.append
for col in range(1, sheet.ncols):
cv = sheet.col_values(col, start_rowx=1, end_rowx=None)
header = sheet.cell_value(0,col)
maxval = max(cv)
maxpos = cv.index(maxval) + 1
maxtime = sheet.cell_value(maxpos, 0)
realtime = xlrd.xldate_as_tuple(maxtime, 0)
year = realtime[0]
month = realtime[1]
day = realtime[2]
hour = realtime[3]
data = [
'Region:', header,
'Year:', year,
'Month:', month,
'Day:', day,
'Hour:', hour,
maxpos,
maxtime,
realtime,
maxval,
]
outputlist_append(data)
return outputlist
def save_file(data, filename):
parse_file(data)
with open(filename, 'wb') as f:
writer = csv.writer(f, delimiter='|')
for line in data:
writer.writerow(line)
return
datafile = "./2013_ERCOT_Hourly_Load_Data.xls"
outfile = "./2013_Max_Loads.csv"
save_file(datafile, outfile)
UPDATE: Edit in code in function save_file() to implement #wwii's suggestion.
Try substituting the new save_file() below:
def save_file(data, filename):
parse_file(data)
with open(filename, 'wb') as f:
wr = csv.writer(f, delimiter='|')
wr.writerows(data)
return
Also, change the variable (you used writer) to something like wr. You really want to avoid any possible conflicts with having a variable with the same name as a method, a function, or class you are calling.
I have a block of code that reads and writes to csv.
The reader takes a file "x" compares it to file "y" and returns new file "z"
Now I wrote a GUI program using tkinter that returns a filepath to a textbox in the GUI program.
The filepath I get as follows:
def OnButtonClick1(self):
self.labelVariable.set( self.entryVariable.get())
self.entry.focus_set()
self.entry.selection_range(0, tkinter.END)
filename = askopenfilename()
with open(filename,'r') as f:
for file in f:
data = f.read()
self.entry.insert(0,filename)
How can I use this filepath above in my reader to represent "myfile" in the code below?
#Opening my enquiry list .cvs file
datafile = open('myfile', 'r')
datareader = csv.reader(datafile)
n1 = []
for row in datareader:
n1.append(row)
n = list(itertools.chain(*n1))
print()
Help much appriciated!!!
Maybe something like this
class gui:
...
def OnButtonClick1(self):
self.labelVariable.set( self.entryVariable.get())
self.entry.focus_set()
self.entry.selection_range(0, tkinter.END)
filename = askopenfilename()
self.filename = filename
with open(filename,'r') as f:
for file in f:
data = f.read()
self.entry.insert(0,filename)
def GetFilename(self):
return self.filename
...
gui_object = gui()
...
#Opening my enquiry list .cvs file
myfile = gui_object.GetFilename()
datafile = open(myfile, 'r')
datareader = csv.reader(datafile)
n1 = []
for row in datareader:
n1.append(row)
n = list(itertools.chain(*n1))