Python - Read files from folder and Write CSV file in format - python

import glob
import os
import csv
from collections import OrderedDict
#Remove output file if already exists. Resolve the append Issue
file_path = 'C:\\Users\\Desktop\\Cobol\\Outputs\\LOC3X.csv'
if os.path.isfile(file_path):
os.remove(file_path)
#
list_of_files = glob.glob('C:\\Users\\Desktop\\Cobol\\*.CBL') # Input files in Folder
Fields = ['Program Name', 'LinesofCode'] # to be displayed in output CSV file
# opening output csv file to write (Fields)
file_path = 'C:\\Users\\Desktop\\Cobol\\Outputs\\LOC3X.csv'
with open(file_path, 'a') as csvfile1:
csvwriter = csv.writer(csvfile1)
csvwriter.writerow(Fields)
csvfile1.close()
def process_files_loc(list_of_files):
for fileName in list_of_files:
with open(fileName) as i:
count = sum(1 for line in i)
my_dict = {i : count} #input filename and its lines of code
ordered_dict = OrderedDict() #using OrderedDict
print(ordered_dict)
# creating ordered dict from dict
ordered_dict = OrderedDict(my_dict)
print(ordered_dict)
# writing records of Program name and LinesofCode to output csv file
file_path = 'C:\\Users\\Desktop\\Cobol\\Outputs\\LOC3X.csv'
with open(file_path, 'a') as csvfile2:
csvwriter = csv.writer(csvfile2)
csvwriter.writerows(ordered_dict)
csvfile2.close()
process_files_loc(list_of_files)
Output in Teminal (Error):
PS C:\Users\Python-1> & C:/Users/AppData/Local/Programs/Python/Python310/python.exe c:/Users/Python-1/one.py
OrderedDict()
OrderedDict([(<_io.TextIOWrapper name='C:\\Users\\Desktop\\Cobol\\ABCDEFGH.CBL' mode='r' encoding='cp1252'>, 191)])
OrderedDict()
OrderedDict([(<_io.TextIOWrapper name='C:\\Users\\Desktop\\Cobol\\IJKLMNOP.CBL' mode='r' encoding='cp1252'>, 195)])
Actual output of file in Folder:
C:\Users\Desktop\Cobol\Outputs
Name Date Modified Type Size
LOC3X.csv 9/15/2022 time Comma Seperated 1KB
Problem: Script executed and Read 2 CBL files in the Folder, and created 1 CSV file in output folder. The output CSV file to have,
Program Name LinesofCode
ABCDEFGH.CBL 191
IJKLMNOP.CBL 195
However, the actual output lines in CSV file is,
Program Name LinesofCode

Try something like this:
import glob
import csv
import os
def process_files_loc(files):
res = []
for file in files:
with open(file) as f:
line_count = len([line.strip("\n") for line in f if line != "\n"])
res.append([os.path.basename(f.name), line_count])
return res
if __name__ == '__main__':
with open('C:\\Users\\Main\\Desktop\\test\\test.csv', 'w', newline='') as f:
csvwriter = csv.writer(f)
csvwriter.writerow(['Program Name', 'LinesofCode'])
csvwriter.writerows(process_files_loc(glob.glob('C:\\Users\\Main\\Desktop\\test\\*.PY')))
Result:
result
Regards,

Related

Python iterate folder of csv and convert do json

I am amateur at python but I have a task of converting folder of csv to json files. I have this script working with specified CSV file but I have no idea how to make the script iterate thrue folder of csv and convert all of those csv to json. The original script:
import csv
import json
import pandas as pd
file = '/users/krzysztofpaszta/CSVtoGD/build-a-bridge.csv'
json_file = '/users/krzysztofpaszta/CSVtoGD/build-a-bridge.json'
#Odczyt pliku CSV
def read_CSV(file, json_file):
csv_rows = []
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
field = reader.fieldnames
for row in reader:
csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
convert_write_json(csv_rows, json_file)
#Zamiana CSV na JSON
def convert_write_json(data, json_file):
with open(json_file, "w") as f:
f.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': ')))
f.write(json.dumps(data))
read_CSV(file,json_file)
someone will give me a hint?
You can use os functions, particularly os.listdir() to iterate over files in the directory, and safely generate new names with os.path.splitext():
import os
DIRECTORY = "/path/to/dir"
for f in os.listdir(os.fsencode(DIRECTORY)):
fn = os.fsdecode(f)
pre, ext = os.path.splitext(fn)
if ext == ".csv":
read_CSV(fn, pre + '.json')
The similar approach with pathlib would be:
from pathlib import Path
DIRECTORY = "/path/to/dir"
files = Path(DIRECTORY).glob('*.csv') # to process files only in this dir
files = Path(DIRECTORY).rglob('*.csv') # to process files in sub-directories recursively
for f in files:
read_CSV(f, str(f.with_suffix('.json'))) # use .with_suffix() for safe name generation
You can list the csv files in a folder using pathlib:
from pathlib import Path
csv_files = Path().glob('*.csv')
Then loop over the files:
for csv_file in csv_files:
csv_path = str(csv_file.absolute())
json_path = csv_path.replace('.csv', '.json')
read_CSV(csv_path, json_path)

Merge csv files, add original file name to each row in output file

I have multiple csv files in a folder with the same data structure,
0.00;1.05;10.5
0.01;2.05;15.5
0.02;3.05;20.5
...
I want ot merge all the csv files to 1 summary file and add a column with file name to each line depanding on the original data source.
0.00;1.05;10.5;csv1.csv
0.01;2.05;15.5;csv1.csv
0.02;3.05;20.5;csv1.csv
0.00;5.05;0.05;csv2.csv
0.01;6.05;1.05;csv2.csv
0.02;7.05;2.05;csv2.csv
...
I managed to merge the files, but cant find a way to add the file names.
files = []
for file in os.listdir(folder):
if file.endswith('.csv'):
files.append(file)
with open('results.csv', 'w', newline='') as fw:
cw = csv.writer(fw)
for file in files:
with open(file, newline='') as f:
cr = csv.reader(islice(f,13,None)
cw.writerows(cr)
I dont want to use pandas concat due to ram limitations.
Thanks you.
You don't need to parse the input csv files, just append a delimiter and then the current file name to each line. You can use the fileinput module:
import fileinput
from pathlib import Path
folder = '.' # set accordingly, assume current directory
path = Path(folder)
with fileinput.input(files=path.glob('*.csv')) as f, open('results.csv', 'w') as outfile:
for line in f:
print(';'.join([line.rstrip('\n'), fileinput.filename().name()]), file=outfile)
Regarding your code, you can fix it like this:
import os
import csv
folder = '.'
files = []
for file in os.listdir(folder):
if file.endswith('.csv'):
files.append(file)
with open('results.csv', 'w', newline='') as fw:
cw = csv.writer(fw, delimiter=';')
for file in files:
with open(file, newline='') as f:
for row in csv.reader(f, delimiter=';'):
row.append(file)
cw.writerow(row)
Here the delimiter argument is set to semi-colon because the default delimiter is comma and your files are using ;. That will fix the proper parsing of the input csv files, and use ; for the output file. Then each input file is processed by reading each line and appending the filename to the row list. Finally the new row is written to the output CSV file.
You can use os and pandas:
import os
import pandas as pd
basedir = <path of your base folder>
all_dfs = []
for filename in filter(lambda f: os.path.splitext(f)[1] == '.csv', next(os.walk(basedir))[2]):
curr_df = pd.read_csv(os.path.join(basedir, filename), sep=';', header=None)
curr_df['filename'] = filename
all_dfs.append(curr_df)
pd.concat(all_dfs, axis=0).to_csv('merged_cvs.csv', sep=';', header=False, index=False)
Or if you prefer in only one line:
pd.concat([pd.concat((df, pd.DataFrame([f for _ in range(len(df))])), axis=1) for f, df in
((filename, pd.read_csv(os.path.join(basedir, filename), sep=';', header=None))
for filename in filter(lambda f: os.path.splitext(f)[1] == '.csv', next(os.walk(basedir))[2]))
]).to_csv('merged_cvs.csv', sep=';', header=False, index=False)
files = []
for file in os.listdir(folder):
if file.endswith('.csv'):
files.append(file)
with open('results.csv', 'w', newline='') as fw:
cw = csv.writer(fw)
for file in files:
with open(file, newline='') as f:
fw.write(f"{file}\n") # just write the filename before the content :)
cr = csv.reader(islice(f,13,None)
cw.writerows(cr)

Converting a list of txt files into a list of csv files with python

I have the rolling code to convert a single .txt file into a a single .csv file, but I need the code to iterate over a directory of .txt files and gives out a directory of the same .txt files but in .csv format.
import csv
textfile = 'X:/general/DavidOrgEcon/GSTT/text to csv/Group.txt'
outfile = 'X:/general/DavidOrgEcon/GSTT/text to csv/Group.csv'
with open(textfile, 'r') as csvfile:
In_text = csv.reader(csvfile, delimiter=':')
all_rows = []
row_dict = {}
count_row = 1
for row in In_text:
if len(row) > 0:
row_dict[row[0].strip()] = row[1].strip()
if count_row % 4 == 0:
all_rows.append(row_dict)
row_dict = {}
count_row += 1
print(all_rows)
keys = all_rows[0].keys()
print(keys)
with open(outfile, 'w', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(all_rows)
So assuming you have your existing function
def text_to_csv(infilepath, outfilepath):
...
which can read a text file from infilepath and output the csv to outfilepath, then you can make a new function that takes two directories and calls it on every text file in the first:
import os
def convert_directory(in_dir, out_dir):
# Loop through every file in the directory
for filename in os.listdir(in_dir):
# Split the file name into a base portion and an extension
# e.g. "file.txt" -> ("file", ".txt")
base_name, extension = os.path.splitext(filename)
# If it is a text file, do the transformation
if extension == ".txt":
# Construct the name of the csv file to create
csv_filename = f"{base_name}.csv"
# Call your function with the full filepaths
text_to_csv(
os.path.join(in_dir, filename),
os.path.join(out_dir, csv_filename)
)
convert_directory(
"X:/general/DavidOrgEcon/GSTT/text to csv/input_dir",
"X:/general/DavidOrgEcon/GSTT/text to csv/output_dir",
)

Script to read multiple input file and writing multiple output file for each input file?

I have multiples input files in format like below which have to be processed.
Input file path /tmp/input.
1.1.1.txt
1.1.2.txt
1.1.3.txt
But, I want to have output files for each input file in another folder suppose (/tmp/outputsmgr) like below:
1.1.1_output.csv
1.1.2_output.csv
1.1.3_output.csv
The issues are:
Firstly, I am not able to write the output files in another/different folder
Secondly, all input files data after processing getting merged in one file in input folder only like below instead of separate output file for each input file
All the below files contains same data instead 1.1.1.txt data should be in file 1.1.1_output.csv and file 1.1.2.txt data should be in file 1.1.2_output.csv.
1.1.1.txt_output.csv
1.1.2.txt_output.csv
1.1.3.txt_output.csv
How can I modify the below code to get the desired result?
import os
import csv
import re
def parseFile(fileName):
# We are using a dictionary to store info for each file
data = list()
# data = dict()
fh = open(fileName, "r")
lines = fh.readlines()[1:]
for line in lines:
line = line.rstrip("\n")
if re.search("sessmgr", line):
splitted = line.split()
temp = dict()
temp["CPU"] = splitted[0]
temp["facility"] = splitted[1]
temp["instance"] = splitted[2]
temp["cpu-used"] = splitted[3]
temp["cpu-allc"] = splitted[4]
temp["mem-used"] = splitted[5]
temp["mem-allc"] = splitted[6]
temp["files-used"] = splitted[7]
temp["files-allc"] = splitted[8]
temp["sessions-used"] = splitted[9]
temp["sessions-allc"] = splitted[10]
# print (splitted[2])
data.append(temp)
# continue;
# print (data)
return data
if __name__ == "__main__":
inputsDirectory = "/tmp/input"
outputDirectory = "/tmp/outputsmgr"
path = os.path.abspath(inputsDirectory)
pathout = os.path.abspath(outputDirectory)
fileLists = ["{0}/{1}".format(path,x) for x in os.listdir(outputDirectory)]
fileList = ["{0}/{1}".format(path,x) for x in os.listdir(inputsDirectory)]
# print(fileList)
csvRows = []
for file in fileList:
newRow = parseFile(file)
csvRows.append(newRow)
# print(csvRows)
for files in fileList:
outputFile = "output.csv"
csvfile = open(os.path.join(files + "_" + outputFile), 'w')
fieldnames = ["CPU",
"facility",
"instance",
"cpu-used",
"cpu-allc",
"mem-used",
"mem-allc",
"files-used",
"files-allc",
"sessions-used",
"sessions-allc"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
headers = {}
for n in writer.fieldnames:
headers[n] = n
writer.writerow(headers)
# writer.writeheader()
for row in csvRows:
for obj in row:
print (obj)
writer.writerow(obj)
I think the code below will do what you want. It processes the files in the input directory sequentially and the results returned from the parseFile() function get written to the corresponding output file in the output directory. It's important to get a new set of csvRows from each input file and write (just) those to each output file.
The code assumes the outputDirectory already exists, but if that's not the case, then you'll need to add code to create it before processing any of the files. Hint: use os.path.exists() and os.path.isdir() in conjunction with os.makedirs().
import csv
import os
import re
def parseFile(filePath, fieldnames, target_re=r"sessmgr"):
""" Yield lines of file matching target regex. """
with open(filePath, "r") as file:
next(file) # Skip/ignore first line.
for line in file:
if re.search(target_re, line):
yield dict(zip(fieldnames, line.split()))
if __name__ == "__main__":
OUTPUT_FILE_SUFFIX = "output.csv"
inputsDirectory = "/tmp/input"
outputDirectory = "/tmp/outputsmgr"
fieldnames = ("CPU", "facility", "instance", "cpu-used", "cpu-allc", "mem-used",
"mem-allc", "files-used", "files-allc", "sessions-used",
"sessions-allc")
input_dir = os.path.abspath(inputsDirectory)
output_dir = os.path.abspath(outputDirectory)
for in_filename in os.listdir(input_dir):
in_filepath = os.path.join(input_dir, in_filename)
print('in_filepath: "{}"'.format(in_filepath))
in_rootname = os.path.splitext(in_filename)[0]
out_filename = in_rootname + "_" + OUTPUT_FILE_SUFFIX
out_filepath = os.path.join(output_dir, out_filename)
print('out_filepath: "{}"'.format(out_filepath))
with open(out_filepath, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(parseFile(in_filepath, fieldnames))

Process multiple files in Python with one code

I'm having some trouble with my code. I want to do the following: I have about 30 csv files which I need to process and I want the result in one single csv file. So far I have the following code (sample):
import pandas as pd
import csv as csv
df = pd.read_csv ("file.csv",
delimiter=",",skiprows=1)
datamean10=df[61:240].mean()
datamean15=df[241:420].mean()
list10=[]
list15=[]
list10.append(datamean10.clip(0))
list15.append(datamean15.clip(0))
csvfile = "C:/Users/bla/bla/list10.csv"
with open(csvfile, 'w') as output:
writer = csv.writer(output, lineterminator='\n')
writer.writerows(list10)
csvfile = "C:/Users/bla/bla/list15.csv"
with open(csvfile, 'w') as output:
writer = csv.writer(output, lineterminator='\n')
writer.writerows(list15)
So this code does what I want for a single file. I wrote another script to go through all the files:
import sys, os
for root, dir, files in os.walk('C:/Users/bla/bla/bla'):
for file in files:
if file.endswith('.csv'):
os.system ('averagelist.py {}'.format(root + '\\' + file))
Needless to say this script deletes the lists again and starts off with a blank list while I want to append the results to the same list. So that the resulting files have a row of average values for each file that is processed. Can somebody tell me how to do this? Thank you very much in advance.
This should be a working combination.
import pandas as pd
import csv as csv
df = pd.read_csv ("file.csv",
delimiter=",",skiprows=1)
datamean10=df[61:240].mean()
datamean15=df[241:420].mean()
list10=[]
list15=[]
list10.append(datamean10.clip(0))
list15.append(datamean15.clip(0))
csvfile = "C:/Users/bla/bla/list10.csv"
with open(csvfile, 'w') as output:
writer = csv.writer(output, lineterminator='\n')
writer.writerows(list10)
import sys, os
for root, dir, files in os.walk('C:/Users/bla/bla/bla'):
for file in files:
if file.endswith('.csv'):
#csvfile = "C:/Users/bla/bla/list15.csv"
with open(file, 'w') as output:
writer = csv.writer(output, lineterminator='\n')
writer.writerows(list15)
Just open your output files in append mode inside the inner script:
with open(csvfile, 'a') as output:
and truncate then only in master script:
with open("C:/Users/bla/bla/list10.csv", 'w') as output:
pass
with open("C:/Users/bla/bla/list15.csv", 'w') as output:
pass
for root, dir, files in os.walk('C:/Users/bla/bla/bla'):
...
But using system to call a Python script from another Python script is bad. I would be better to write the working part of the inner script in a function, and then use it from the outer script after importing it:
Inner script (averagelist.py)
import pandas as pd
import csv as csv
def average(file):
df = pd.read_csv (file,
delimiter=",",skiprows=1)
...
with open(csvfile, 'w') as output:
writer = csv.writer(output, lineterminator='\n')
writer.writerows(list15)
outer script:
import os
import averagelist
with open("C:/Users/bla/bla/list10.csv", 'w') as output:
pass
with open("C:/Users/bla/bla/list15.csv", 'w') as output:
pass
for root, dir, files in os.walk('C:/Users/bla/bla/bla'):
for file in files:
if file.endswith('.csv'):
averagelist.average(root + '\\' + file))

Categories

Resources