Read all the rows of a CSV - python

Hey I have this code that concatenate all the csv files of a folder in a only csv file. The problem is that this line of code if i! = 0: prevents the first row of each csv from being read except in the first csv. Any ideas on how to modify it so that it can read all the rows of the csv
#!/usr/bin/env python
import pandas as pd
import glob, os
import shutil
#import csv files from folder
path = r'/Users/sonia/Animalsmanage/output'
allFiles = glob.glob(path + "/*.csv")
with open('finaloutput.csv', 'wb') as outfile:
for i, fname in enumerate(allFiles):
with open(fname, 'rb') as infile:
if i != 0:
infile.readline()
outfile.write(bytearray(b'\n')) # add a empty line
shutil.copyfileobj(infile, outfile)
print(fname + " has been imported.")

If you want to include all the rows from the input file and separate the output of each input file by a blank, you can try this:
with open('finaloutput.csv', 'wb') as outfile:
for i, fname in enumerate(allFiles):
with open(fname, 'rb') as infile:
shutil.copyfileobj(infile, outfile)
outfile.write(bytearray(b'\n'))
print(fname + " has been imported.")
So if I have 3 input files:
test1.csv:
name,id
dan,22
keith,23
test2.csv:
name,id
mike,33
phil,44
test3.csv:
name,id
john,5
peter,6
My output file will contain:
name,id
dan,22
keith,23
name,id
mike,33
phil,44
name,id
john,5
peter,6
EDIT:
If you don't want the blank line at the end, you can use the following instead:
numFiles = len(allFiles)
with open('finaloutput.csv', 'wb') as outfile:
for i, fname in enumerate(allFiles):
with open(fname, 'rb') as infile:
shutil.copyfileobj(infile, outfile)
if i < numFiles - 1:
outfile.write(bytearray(b'\n'))
print(fname + " has been imported.")

Related

Python - Read files from folder and Write CSV file in format

import glob
import os
import csv
from collections import OrderedDict
#Remove output file if already exists. Resolve the append Issue
file_path = 'C:\\Users\\Desktop\\Cobol\\Outputs\\LOC3X.csv'
if os.path.isfile(file_path):
os.remove(file_path)
#
list_of_files = glob.glob('C:\\Users\\Desktop\\Cobol\\*.CBL') # Input files in Folder
Fields = ['Program Name', 'LinesofCode'] # to be displayed in output CSV file
# opening output csv file to write (Fields)
file_path = 'C:\\Users\\Desktop\\Cobol\\Outputs\\LOC3X.csv'
with open(file_path, 'a') as csvfile1:
csvwriter = csv.writer(csvfile1)
csvwriter.writerow(Fields)
csvfile1.close()
def process_files_loc(list_of_files):
for fileName in list_of_files:
with open(fileName) as i:
count = sum(1 for line in i)
my_dict = {i : count} #input filename and its lines of code
ordered_dict = OrderedDict() #using OrderedDict
print(ordered_dict)
# creating ordered dict from dict
ordered_dict = OrderedDict(my_dict)
print(ordered_dict)
# writing records of Program name and LinesofCode to output csv file
file_path = 'C:\\Users\\Desktop\\Cobol\\Outputs\\LOC3X.csv'
with open(file_path, 'a') as csvfile2:
csvwriter = csv.writer(csvfile2)
csvwriter.writerows(ordered_dict)
csvfile2.close()
process_files_loc(list_of_files)
Output in Teminal (Error):
PS C:\Users\Python-1> & C:/Users/AppData/Local/Programs/Python/Python310/python.exe c:/Users/Python-1/one.py
OrderedDict()
OrderedDict([(<_io.TextIOWrapper name='C:\\Users\\Desktop\\Cobol\\ABCDEFGH.CBL' mode='r' encoding='cp1252'>, 191)])
OrderedDict()
OrderedDict([(<_io.TextIOWrapper name='C:\\Users\\Desktop\\Cobol\\IJKLMNOP.CBL' mode='r' encoding='cp1252'>, 195)])
Actual output of file in Folder:
C:\Users\Desktop\Cobol\Outputs
Name Date Modified Type Size
LOC3X.csv 9/15/2022 time Comma Seperated 1KB
Problem: Script executed and Read 2 CBL files in the Folder, and created 1 CSV file in output folder. The output CSV file to have,
Program Name LinesofCode
ABCDEFGH.CBL 191
IJKLMNOP.CBL 195
However, the actual output lines in CSV file is,
Program Name LinesofCode
Try something like this:
import glob
import csv
import os
def process_files_loc(files):
res = []
for file in files:
with open(file) as f:
line_count = len([line.strip("\n") for line in f if line != "\n"])
res.append([os.path.basename(f.name), line_count])
return res
if __name__ == '__main__':
with open('C:\\Users\\Main\\Desktop\\test\\test.csv', 'w', newline='') as f:
csvwriter = csv.writer(f)
csvwriter.writerow(['Program Name', 'LinesofCode'])
csvwriter.writerows(process_files_loc(glob.glob('C:\\Users\\Main\\Desktop\\test\\*.PY')))
Result:
result
Regards,

Merge csv files, add original file name to each row in output file

I have multiple csv files in a folder with the same data structure,
0.00;1.05;10.5
0.01;2.05;15.5
0.02;3.05;20.5
...
I want ot merge all the csv files to 1 summary file and add a column with file name to each line depanding on the original data source.
0.00;1.05;10.5;csv1.csv
0.01;2.05;15.5;csv1.csv
0.02;3.05;20.5;csv1.csv
0.00;5.05;0.05;csv2.csv
0.01;6.05;1.05;csv2.csv
0.02;7.05;2.05;csv2.csv
...
I managed to merge the files, but cant find a way to add the file names.
files = []
for file in os.listdir(folder):
if file.endswith('.csv'):
files.append(file)
with open('results.csv', 'w', newline='') as fw:
cw = csv.writer(fw)
for file in files:
with open(file, newline='') as f:
cr = csv.reader(islice(f,13,None)
cw.writerows(cr)
I dont want to use pandas concat due to ram limitations.
Thanks you.
You don't need to parse the input csv files, just append a delimiter and then the current file name to each line. You can use the fileinput module:
import fileinput
from pathlib import Path
folder = '.' # set accordingly, assume current directory
path = Path(folder)
with fileinput.input(files=path.glob('*.csv')) as f, open('results.csv', 'w') as outfile:
for line in f:
print(';'.join([line.rstrip('\n'), fileinput.filename().name()]), file=outfile)
Regarding your code, you can fix it like this:
import os
import csv
folder = '.'
files = []
for file in os.listdir(folder):
if file.endswith('.csv'):
files.append(file)
with open('results.csv', 'w', newline='') as fw:
cw = csv.writer(fw, delimiter=';')
for file in files:
with open(file, newline='') as f:
for row in csv.reader(f, delimiter=';'):
row.append(file)
cw.writerow(row)
Here the delimiter argument is set to semi-colon because the default delimiter is comma and your files are using ;. That will fix the proper parsing of the input csv files, and use ; for the output file. Then each input file is processed by reading each line and appending the filename to the row list. Finally the new row is written to the output CSV file.
You can use os and pandas:
import os
import pandas as pd
basedir = <path of your base folder>
all_dfs = []
for filename in filter(lambda f: os.path.splitext(f)[1] == '.csv', next(os.walk(basedir))[2]):
curr_df = pd.read_csv(os.path.join(basedir, filename), sep=';', header=None)
curr_df['filename'] = filename
all_dfs.append(curr_df)
pd.concat(all_dfs, axis=0).to_csv('merged_cvs.csv', sep=';', header=False, index=False)
Or if you prefer in only one line:
pd.concat([pd.concat((df, pd.DataFrame([f for _ in range(len(df))])), axis=1) for f, df in
((filename, pd.read_csv(os.path.join(basedir, filename), sep=';', header=None))
for filename in filter(lambda f: os.path.splitext(f)[1] == '.csv', next(os.walk(basedir))[2]))
]).to_csv('merged_cvs.csv', sep=';', header=False, index=False)
files = []
for file in os.listdir(folder):
if file.endswith('.csv'):
files.append(file)
with open('results.csv', 'w', newline='') as fw:
cw = csv.writer(fw)
for file in files:
with open(file, newline='') as f:
fw.write(f"{file}\n") # just write the filename before the content :)
cr = csv.reader(islice(f,13,None)
cw.writerows(cr)

Delete blank rows in a csv file in Python

I have some csv files in a folder and I am trying to delete all blank rows and move the news files into a new folder.
Here is the code I have:
import csv
import glob
import os
import shutil
path = 'in_folder/*.csv'
files=glob.glob(path)
#Read every file in the directory
x = 0 #counter
for filename in files:
with open(filename, 'r') as fin:
data = fin.read().splitlines(True)
with open(filename, 'w') as fout:
for line in fin.readlines():
if ''.join(line.split(',')).strip() == '':
continue
fout.write(line)
x += 1
dir_src = "in_folder"
dir_dst = "out_folder"
for file in os.listdir(dir_src):
if x>0:
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
shutil.move(src_file, dst_file)
What the code is doing right now is deleting everything from the files and moving them to the new folder. I want my files to be the same but with deleted blank rows.
You can just output every line to the new file, no need to do any moving afterwards:
dir_src = "in_folder/*.csv"
dir_dst = "out_folder"
files = glob.glob(dir_src)
# Read every file in the directory
x = 0 # counter
for filename in files:
outfilename = os.path.join(dir_dst, os.path.basename(filename))
with open(filename, 'r') as fin:
with open(outfilename, 'w') as fout:
for line in fin:
if ''.join(line.split(',')).strip() == '':
continue
fout.write(line)
x += 1
try this.
for filename in files:
with open(filename, 'r') as fin:
data = fin.read().splitlines(True)
with open(filename, 'w') as fout:
for line in data:
if ''.join(line.split(',')).strip() == '':
continue
fout.write(line)
x += 1

Not opening a specific text file in Python

I have a folder with a bunch of text files. I have the following code that opens all the text files in its directory when executed and throws them all together in a master text file, "result.txt".
import glob
read_files = glob.glob("*.txt")
with open("result.txt", "wb") as outfile:
for f in read_files:
with open(f, "rb") as infile:
outfile.write(infile.read())
I don't want this script to open "result.txt". All text files except result.txt. How can I do this? I don't want it to duplicate result.txt by writing its contents into itself
Use a filter function:
read_files = filter(lambda f : f != 'result.txt', glob.glob('*.txt'))
Well, you can filter result.txt when looping through all files:
import glob
read_files = glob.glob("*.txt")
with open("result.txt", "wb") as outfile:
for f in (file for file in read_files if file != "result.txt"):
with open(f, "rb") as infile:
outfile.write(infile.read())
Alternatively, to prevent bugs in futher uses of read_files list, you could remove "result.txt" from it after glob.glob:
read_files = glob.glob("*.txt")
try:
read_files.remove("result.txt")
except ValueError: #File result.txt does not exist yet
pass
You could use continue to skip the file and start the next iteration of the loop:
for f in read_files:
if f == "result.txt":
continue
...
Alternatively, filter the list of files before you start looping:
read_files = [f for f in glob.glob("*.txt") if f != "result.txt"]

How to read multiple(more than 2) .txt files using python

I have 2 or more than .txt file contains
file1.txt
India
File2.txt
US
I wanted to write output in third file as India US.
Please any one can tell me how to do it using python.
import glob
all_text_files = glob.glob('/path/to/dir', '*.txt')
with open('output_file.txt', 'w') as fh:
for text_file in all_text_files:
data = open(text_file, 'r')
fh.write(data.read())
glob.glob('*.txt') returns ALL the .txt files in the current directory.
If you want to read only a few files, you can specify them in a list
all_text_files = ['file1.txt', 'file2.txt', ....., 'filen.txt']
source_files = ['file1.txt', 'file2.txt']
with open('output.txt', 'w') as fh_out:
for fname in source_files:
with open(fname, 'r') as fh:
fh_out.write(fh.read())
files = ['file1.txt','file2.txt']
for file in files:
with open(file,'r') as file_read:
with open('file3.txt', 'w+') as file_put:
file_put.write(file_read.read())

Categories

Resources