Transposing all csv files within a folder - python

I got help the last time I asked a question on this site regarding batch processing csv files within a folder using glob.glob() with Python. I am trying to use it this time to transpose all csv files within a folder. The script below only processes the last file and stops. What am I doing wrong?
import csv
import os
import glob
directory = raw_input ("INPUT Folder")
output = raw_input("OUTPUT Folder:")
in_files = os.path.join(directory, '*.csv')
for in_file in glob.glob(in_files):
with open(in_file) as input_file:
reader = csv.reader(input_file)
cols = []
for row in reader:
cols.append(row)
filename = os.path.splitext(os.path.basename(in_file))[0] + '.csv'
with open (os.path.join(output, filename), 'wb') as output_file:
writer = csv.writer(output_file)
for i in range(len(max(cols, key=len))):
writer.writerow ([(c[i] if i<len(c) else '') for c in cols])

You need to indent the "output" portion of the code so that it runs once for each iteration of the for in_file loop:
import csv
import os
import glob
directory = raw_input ("INPUT Folder")
output = raw_input("OUTPUT Folder:")
in_files = os.path.join(directory, '*.csv')
for in_file in glob.glob(in_files):
with open(in_file) as input_file:
reader = csv.reader(input_file)
cols = []
for row in reader:
cols.append(row)
# "outdent" this code so it only needs to run once for each in_file
filename = os.path.splitext(os.path.basename(in_file))[0] + '.csv'
# Indent this to the same level as the rest of the "for in_file" loop!
with open (os.path.join(output, filename), 'wb') as output_file:
writer = csv.writer(output_file)
for i in range(len(max(cols, key=len))):
writer.writerow ([(c[i] if i<len(c) else '') for c in cols])
In your version that code only runs once, after the for in_file loop has completed, and therefore only outputs cols data left over from the final iteration of that loop.
I have also "outdented" the filename = ... statement to the for in_file level, as this only needs to be done once for each in_file, not once for each row of each in_file.

You can get a lot of mileage with data manipulation using pandas:
import os
import pandas as pd
for filename in os.listdir('.'):
# We save an augmented filename later,
# so using splitext is useful for more
# than just checking the extension.
prefix, ext = os.path.splitext(filename)
if ext.lower() != '.csv':
continue
# Load the data into a dataframe
df = pd.DataFrame.from_csv(filename,
header=None,
index_col=None,
parse_dates=False)
# Transpose is easy, but you could do TONS
# of data processing here. pandas is awesome.
df_transposed = df.T
# Save to a new file with an augmented name
df_transposed.to_csv(prefix+'_T'+ext, header=True, index=False)
The os.walk version is not much different, if you need to dig into subfolders as well.

Here is a working one:
had to google for an hour, but works and tested on python33
import csv
import os
import glob
directory = 'C:\Python33\csv'
output = 'C:\Python33\csv2'
in_files = os.path.join(directory, '*.csv')
for in_file in glob.glob(in_files):
with open(in_file) as input_file:
reader = csv.reader(input_file)
cols = []
for row in reader:
cols.append(row)
# "outdent" this code so it only needs to run once for each in_file
filename = os.path.splitext(os.path.basename(in_file))[0] + '.csv'
# Indent this to the same level as the rest of the "for in_file" loop!
with open (os.path.join(output, filename), 'w') as output_file:
writer = csv.writer(output_file)
for i in range(len(max(cols, key=len))):
writer.writerow ([(c[i] if i<len(c) else '') for c in cols])

in_files will only return a single result in that format. Try returning a list:
in_files = [f for f in os.listdir(directory) if f.endswith('.csv')]

Related

Merge csv files, add original file name to each row in output file

I have multiple csv files in a folder with the same data structure,
0.00;1.05;10.5
0.01;2.05;15.5
0.02;3.05;20.5
...
I want ot merge all the csv files to 1 summary file and add a column with file name to each line depanding on the original data source.
0.00;1.05;10.5;csv1.csv
0.01;2.05;15.5;csv1.csv
0.02;3.05;20.5;csv1.csv
0.00;5.05;0.05;csv2.csv
0.01;6.05;1.05;csv2.csv
0.02;7.05;2.05;csv2.csv
...
I managed to merge the files, but cant find a way to add the file names.
files = []
for file in os.listdir(folder):
if file.endswith('.csv'):
files.append(file)
with open('results.csv', 'w', newline='') as fw:
cw = csv.writer(fw)
for file in files:
with open(file, newline='') as f:
cr = csv.reader(islice(f,13,None)
cw.writerows(cr)
I dont want to use pandas concat due to ram limitations.
Thanks you.
You don't need to parse the input csv files, just append a delimiter and then the current file name to each line. You can use the fileinput module:
import fileinput
from pathlib import Path
folder = '.' # set accordingly, assume current directory
path = Path(folder)
with fileinput.input(files=path.glob('*.csv')) as f, open('results.csv', 'w') as outfile:
for line in f:
print(';'.join([line.rstrip('\n'), fileinput.filename().name()]), file=outfile)
Regarding your code, you can fix it like this:
import os
import csv
folder = '.'
files = []
for file in os.listdir(folder):
if file.endswith('.csv'):
files.append(file)
with open('results.csv', 'w', newline='') as fw:
cw = csv.writer(fw, delimiter=';')
for file in files:
with open(file, newline='') as f:
for row in csv.reader(f, delimiter=';'):
row.append(file)
cw.writerow(row)
Here the delimiter argument is set to semi-colon because the default delimiter is comma and your files are using ;. That will fix the proper parsing of the input csv files, and use ; for the output file. Then each input file is processed by reading each line and appending the filename to the row list. Finally the new row is written to the output CSV file.
You can use os and pandas:
import os
import pandas as pd
basedir = <path of your base folder>
all_dfs = []
for filename in filter(lambda f: os.path.splitext(f)[1] == '.csv', next(os.walk(basedir))[2]):
curr_df = pd.read_csv(os.path.join(basedir, filename), sep=';', header=None)
curr_df['filename'] = filename
all_dfs.append(curr_df)
pd.concat(all_dfs, axis=0).to_csv('merged_cvs.csv', sep=';', header=False, index=False)
Or if you prefer in only one line:
pd.concat([pd.concat((df, pd.DataFrame([f for _ in range(len(df))])), axis=1) for f, df in
((filename, pd.read_csv(os.path.join(basedir, filename), sep=';', header=None))
for filename in filter(lambda f: os.path.splitext(f)[1] == '.csv', next(os.walk(basedir))[2]))
]).to_csv('merged_cvs.csv', sep=';', header=False, index=False)
files = []
for file in os.listdir(folder):
if file.endswith('.csv'):
files.append(file)
with open('results.csv', 'w', newline='') as fw:
cw = csv.writer(fw)
for file in files:
with open(file, newline='') as f:
fw.write(f"{file}\n") # just write the filename before the content :)
cr = csv.reader(islice(f,13,None)
cw.writerows(cr)

Separate csv files based on column values

There are few csv files in different folders and sub folders. I need to separate each csv file to incoming and outgoing traffic.
if source == ac:37:43:9b:92:24 && Receiver address ==
8c:15:c7:3a:d0:1a then those rows need to get written to .out.csv
files.
if Transmitter address == 8c:15:c7:3a:d0:1a && Destination==
ac:37:43:9b:92:24 then those rows need to get written into .in.csv
files.
The output files (files that got separated as incoming and outgoing) have to get the same name as input files (eg: if input file is aaa.csv then output files will be aaa.in.csv and aaa.out.csv).
And output files needs to get written into folders and sub folders as input files were.
I tried the below code, but not working.
I am new to programming, so not sure is this code correct or wrong. Any help is greatly appreciated. Thanks
import csv
import os
import subprocess
startdir = '.'
outdir = '.'
suffix = '.csv'
def decode_to_file(cmd, in_file, new_suffix):
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
fileName = outdir + '/' + in_file[len(startdir):-len(suffix)] + new_suffix
os.makedirs(os.path.dirname(fileName), exist_ok=True)
csv_writer = csv.writer(open(fileName, 'w'))
for line_bytes in proc.stdout:
line_str = line_bytes.decode('utf-8')
csv_writer.writerow(line_str.strip().split(','))
for root, dirs, files in os.walk(startdir):
for name in files:
if not name.endswith(suffix):
continue
in_file = os.path.join(root, name)
decode_to_file(
cmd= [if source== ac:37:43:9b:92:24 && Receiver address== 8c:15:c7:3a:d0:1a],
in_file=in_file,
new_suffix='.out.csv'
)
decode_to_file(
cmd= [if Transmitter address == 8c:15:c7:3a:d0:1a && Destination== ac:37:43:9b:92:24],
in_file=in_file,
new_suffix='.in.csv'
)
You could make use of Python's CSV library to process the rows and glob.glob could be used to walk over the files. os.path.splitext() can be used to help with changing the file extension. For example:
import csv
import glob
import os
for filename in glob.glob('**/*.csv', recursive=True):
basename, extension = os.path.splitext(filename)
print(f"Processing - {filename}")
with open(filename, encoding='utf-8') as f_input, \
open(basename + '.in.csv', 'w', newline='', encoding='utf-8') as f_in, \
open(basename + '.out.csv', 'w', newline='', encoding='utf-8') as f_out:
csv_input = csv.reader(f_input)
csv_in = csv.writer(f_in)
csv_out = csv.writer(f_out)
for row in csv_input:
if row[3] == 'ac:37:43:9b:92:24' and row[4] == '8c:15:c7:3a:d0:1a':
csv_out.writerow(row)
if row[5] == '8c:15:c7:3a:d0:1a' and row[6] == 'ac:37:43:9b:92:24':
csv_in.writerow(row)
This assumes that your CSV file are in a standard format e.g. aaa,bbb,ccc,ddd. The csv.reader() will read each line of the file and convert it into a list of values automatically split on the commas. So the first value in each row is row[0].

How to add a blank row at the end to multiple CSV files with Python

I'm using Python 3 & I am having trouble appending lines from multiple csv files into multiple rows for the Master_Total.csv file. I suspect that it is due to not having a "pre-existing" blank row for each csv file. If this is true how do I add a new blank row to each TOTAL.csv file?
TOTAL.csv file:
GND, 0.1V, 1.0V, REFKelvin,
0.000000, 0.100436, 1.003407, 150318.406250,
[no empty row]
enviro.csv file:
temp [C], pressure [kPa], humidity [%RH]
23.870001, 85.903000, 33.75244
[empty row]
When I run my script I get this:
Master_Total.csv
GND, 0.1V, 1.0V, REFKelvin,
0.000000, 0.100436, 1.003407, 150318.4062500.000000, 0.100764, 1.005011, 100.3399580.000019, 0.100252, 1.002642, 100.214996...
Master_enviro.csv
temp [C], pressure [kPa], humidity [%RH]
23.870001, 85.903000, 33.752441
23.760000, 85.914001, 32.997131
24.040001, 85.879997, 33.134460
...
Code:
import shutil, glob, csv, os, sys
path = r'directory'
Enviro_Files = glob.glob(path +"**/*enviro.csv")
Total_Files = glob.glob(path +"**/*TOTAL.csv")
with open('directory_desktop/Master_enviro.csv', 'wb') as outfile1:
for i, filename1 in enumerate(Enviro_Files):
with open(filename1, 'rb') as inputfile1:
if i != 0:
inputfile1.readline()
shutil.copyfileobj(inputfile1, outfile1)
print(filename1 + " has been imported.")
with open('directory_desktop/Master_TOTAL.csv', 'wb') as outfile2:
for h, filename2 in enumerate(Total_Files):
with open(filename2, 'rb') as inputfile2:
if h != 0:
inputfile2.readline()
shutil.copyfileobj(inputfile2, outfile2)
print(fname2 + " has been imported.")
If you make use of Python's CSV library, you can easily test to ensure a given row has values in it, that way it does not matter if there are empty lines are not, they will be skipped over when writing the master files:
import csv
import glob
def merge_csvs(target_filename, csv_list):
with open(target_filename, 'w', newline='') as f_master_target:
csv_master_target = csv.writer(f_master_target)
write_header = True
for csv_filename in csv_list:
with open(csv_filename, 'r', newline='') as f_single:
csv_single = csv.reader(f_single)
header = next(csv_single)
if write_header:
csv_master_target.writerow(header)
write_header = False
for row in csv_single:
if row:
csv_master_target.writerow(row)
path = 'directory'
Enviro_Files = glob.glob(path + "**/*enviro.csv")
Total_Files = glob.glob(path + "**/*TOTAL.csv")
merge_csvs('Master_enviro.csv', Enviro_Files)
merge_csvs('Master_TOTAL.csv', Total_Files)

Process multiple files in Python with one code

I'm having some trouble with my code. I want to do the following: I have about 30 csv files which I need to process and I want the result in one single csv file. So far I have the following code (sample):
import pandas as pd
import csv as csv
df = pd.read_csv ("file.csv",
delimiter=",",skiprows=1)
datamean10=df[61:240].mean()
datamean15=df[241:420].mean()
list10=[]
list15=[]
list10.append(datamean10.clip(0))
list15.append(datamean15.clip(0))
csvfile = "C:/Users/bla/bla/list10.csv"
with open(csvfile, 'w') as output:
writer = csv.writer(output, lineterminator='\n')
writer.writerows(list10)
csvfile = "C:/Users/bla/bla/list15.csv"
with open(csvfile, 'w') as output:
writer = csv.writer(output, lineterminator='\n')
writer.writerows(list15)
So this code does what I want for a single file. I wrote another script to go through all the files:
import sys, os
for root, dir, files in os.walk('C:/Users/bla/bla/bla'):
for file in files:
if file.endswith('.csv'):
os.system ('averagelist.py {}'.format(root + '\\' + file))
Needless to say this script deletes the lists again and starts off with a blank list while I want to append the results to the same list. So that the resulting files have a row of average values for each file that is processed. Can somebody tell me how to do this? Thank you very much in advance.
This should be a working combination.
import pandas as pd
import csv as csv
df = pd.read_csv ("file.csv",
delimiter=",",skiprows=1)
datamean10=df[61:240].mean()
datamean15=df[241:420].mean()
list10=[]
list15=[]
list10.append(datamean10.clip(0))
list15.append(datamean15.clip(0))
csvfile = "C:/Users/bla/bla/list10.csv"
with open(csvfile, 'w') as output:
writer = csv.writer(output, lineterminator='\n')
writer.writerows(list10)
import sys, os
for root, dir, files in os.walk('C:/Users/bla/bla/bla'):
for file in files:
if file.endswith('.csv'):
#csvfile = "C:/Users/bla/bla/list15.csv"
with open(file, 'w') as output:
writer = csv.writer(output, lineterminator='\n')
writer.writerows(list15)
Just open your output files in append mode inside the inner script:
with open(csvfile, 'a') as output:
and truncate then only in master script:
with open("C:/Users/bla/bla/list10.csv", 'w') as output:
pass
with open("C:/Users/bla/bla/list15.csv", 'w') as output:
pass
for root, dir, files in os.walk('C:/Users/bla/bla/bla'):
...
But using system to call a Python script from another Python script is bad. I would be better to write the working part of the inner script in a function, and then use it from the outer script after importing it:
Inner script (averagelist.py)
import pandas as pd
import csv as csv
def average(file):
df = pd.read_csv (file,
delimiter=",",skiprows=1)
...
with open(csvfile, 'w') as output:
writer = csv.writer(output, lineterminator='\n')
writer.writerows(list15)
outer script:
import os
import averagelist
with open("C:/Users/bla/bla/list10.csv", 'w') as output:
pass
with open("C:/Users/bla/bla/list15.csv", 'w') as output:
pass
for root, dir, files in os.walk('C:/Users/bla/bla/bla'):
for file in files:
if file.endswith('.csv'):
averagelist.average(root + '\\' + file))

copy and rename images based on a csv managing duplicates python

I'm working on a script in python to copy and rename a bunch of images based on a csv.
The image folder named "originals" is like:
Alpha_1.jpg
Beta_2.jpg
And the csv. contains:
Alfa_1.jpg 4474094_1
Beta_2.jpg 4474094_2
Beta_2.jpg 4474094_3
So the result should leave on a new folder named "newnames" the sames images like:
4474094_1.jpg
4474094_2.jpg
4474094_3.jpg
Where the 4474094_2.jpg and 4474094_3.jpg is the same picture as Beta_2.jpg
I have the following code which is not working, please any advice I would be most grateful!
import os
import csv
import sys
import shutil
def copy_rename():
os.chdir(r"C:\Transformer-SSBI\Original")
saved_path = os.getcwd()
file_list = os.listdir(saved_path)
src_dir= r"C:\Transformer-SSBI\Originals"
dst_dir= r"C:\Transformer-SSBI\Newnames"
IDs = {}
with open (r'transformer.csv','rb') as csvfile:
Reader = csv.reader(csvfile, delimiter = ';')
for row in Reader:
IDs[row[0]] = row[1]+'.jpg'
for row in IDs:
for file_name in file_list:
if file_name in row:
oldname = shutil.copy(file_name,dst_dir)
newname = IDs[file_name]
os.rename(oldname, newname)
copy_rename()
since you store the mapping in a map, and Beta_2.jpg is renamed to two files,there can be only one key in the map,so it will only be renamed to 4474094_3.jpg,not 4474094_2.jpg,you can avoid the construction of map, and just do the renaming while iterating the csv files:
import os
import csv
import sys
import shutil
def copy_rename():
src_dir= r"C:\Transformer-SSBI\Originals"
dst_dir= r"C:\Transformer-SSBI\Newnames"
or.chdir(dst_dir)
with open (r'transformer.csv','rb') as csvfile:
Reader = csv.reader(csvfile, delimiter = ',')
for row in Reader:
oldname=row[0]
newname=row[1]+".jpg"
if os.path.exists(src_dir+"\\"+oldname):
shutil.copy(src_dir+"\\"+oldname,dst_dir)
os.rename(oldname, newname)
copy_rename()
This is like #Samuelliyi answer, except it avoids any race conditions and is (slightly) more cross platform by using os.path.join.
import os
import csv
import sys
import errno
import shutil
def copy_rename(src_dir, dst_dir, csv_path=None):
if csv_path is None:
csv_path = os.path.join(dst_dir, 'transformer.csv')
with open (csv_path, mode='rb') as csvfile:
Reader = csv.reader(csvfile, delimiter = ',')
for row in Reader:
oldname = row[0]
newname = row[1] + os.path.splitext(oldname)[1]
oldpath = os.path.join(src_dir, oldname)
newpath = os.path.join(dst_dir, newname)
try:
# the rename is implicit in the copy operation
shutil.copy(oldpath, newpath)
except OSError as e:
# only raise exception if it is something other than the file
# not existing
if e.errno != errno.ENOENT:
raise
src_dir= r"C:\Transformer-SSBI\Originals"
dst_dir= r"C:\Transformer-SSBI\Newnames"
copy_rename(src_dir, dst_dir)
Also, the function is now more general and can be used on any two directories that have the same structure (don't hardcode what you can pass in as a parameter).

Categories

Resources