Hello I have multiple json files in a path and I want to convert all of them to csv files separately. Here is what I have tried so far which just convert one json file to a csv file.
with open('/Users/hh/MyDataSet/traceJSON-663-661-A0-25449-7.json') as f:
for line in f:
data.append(json.loads(line))
csv_file=open('/Users/hh/MyDataSet/GTruth/traceJSON-663-661-A0-25449-7.csv','w')
write=csv.writer(csv_file)
# write.writerow(["row number","type","rcvTime","pos_x","pos_y","pos_z","spd_x","spd_y","spd_z","acl_x","acl_y","acl_z"
# ,"hed_x","hed_y","hed_z"])
write.writerow(["row number","type","rcvTime","sender","pos_x","pos_y","pos_z","spd_x","spd_y","spd_z","acl_x","acl_y","acl_z"
,"hed_x","hed_y","hed_z"])
for elem in range(len(data)):
if data[elem]['type']==2:
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),'663',round(data[elem]['pos'][0],2),round(data[elem]['pos'][1],2)
,round(data[elem]['pos'][2],2),round(data[elem]['spd'][0],2),round(data[elem]['spd'][1],2),round(data[elem]['spd'][2],2),
round(data[elem]['acl'][0],2),round(data[elem]['acl'][1],2),round(data[elem]['acl'][2],2),round(data[elem]['hed'][0],2),
round(data[elem]['hed'][1],2),round(data[elem]['hed'][2],2)])
elif data[elem]['type']==3:
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),round(data[elem]['sender'],2),round(data[elem]['pos'][0],2),round(data[elem]['pos'][1],2)
,round(data[elem]['pos'][2],2),round(data[elem]['spd'][0],2),round(data[elem]['spd'][1],2),round(data[elem]['spd'][2],2),
round(data[elem]['acl'][0],2),round(data[elem]['acl'][1],2),round(data[elem]['acl'][2],2),round(data[elem]['hed'][0],2),
round(data[elem]['hed'][1],2),round(data[elem]['hed'][2],2)])
# json_file.close()
print('done!')
csv_file.close()
I appreciate if anyone can help me how can I do it. Also in each json file name "traceJSON-663-661-A0-25449-7", the first number like in the above code (663) should be written in csv file like the following code,if the type is 2:
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),'663',....
My json file names are like traceJSON-51-49-A16-25217-7, traceJSON-57-55-A0-25223-7, ....
I suggest using pandas for this:
from glob import glob
import pandas as pd
import os
filepaths = glob('/Users/hh/MyDataSet/*.json') # get list of json files in folder
for f in filepaths:
filename = os.path.basename(f).rsplit('.', 1)[0] # extract filename without extension
nr = int(filename.split('-')[1]) # extract the number from the filename - assuming that all filenames are formatted similarly, use regex otherwise
df = pd.read_json(f) # read the json file as a pandas dataframe, assuming the json file isn't nested
df['type'] = df['type'].replace(2, nr) # replace 2 in 'type' column with the number in the filename
df.to_csv(f'{filename}.csv') # save as csv
If you want to round columns, you can also do this with pandas
import csv
import glob
import json
import os.path
for src_path in glob.glob('/Users/hh/MyDataSet/*.json'):
src_name = os.path.splitext(os.path.basename(src_path))[0]
data = []
with open(src_path) as f:
for line in f:
data.append(json.loads(line))
dest_path = '/Users/hh/MyDataSet/GTruth/' + src_name + '.csv'
csv_file=open(dest_path,'w')
write=csv.writer(csv_file)
write.writerow(["row number","type","rcvTime","sender","pos_x","pos_y","pos_z","spd_x","spd_y","spd_z","acl_x","acl_y","acl_z"
,"hed_x","hed_y","hed_z"])
for elem in range(len(data)):
if data[elem]['type']==2:
sender = src_name.split('-')[1]
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),sender,round(data[elem]['pos'][0],2),round(data[elem]['pos'][1],2)
,round(data[elem]['pos'][2],2),round(data[elem]['spd'][0],2),round(data[elem]['spd'][1],2),round(data[elem]['spd'][2],2),
round(data[elem]['acl'][0],2),round(data[elem]['acl'][1],2),round(data[elem]['acl'][2],2),round(data[elem]['hed'][0],2),
round(data[elem]['hed'][1],2),round(data[elem]['hed'][2],2)])
elif data[elem]['type']==3:
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),round(data[elem]['sender'],2),round(data[elem]['pos'][0],2),round(data[elem]['pos'][1],2)
,round(data[elem]['pos'][2],2),round(data[elem]['spd'][0],2),round(data[elem]['spd'][1],2),round(data[elem]['spd'][2],2),
round(data[elem]['acl'][0],2),round(data[elem]['acl'][1],2),round(data[elem]['acl'][2],2),round(data[elem]['hed'][0],2),
round(data[elem]['hed'][1],2),round(data[elem]['hed'][2],2)])
csv_file.close()
print('done!')
I have 30 excel files and want to combine them all into only 1 excel file (using Python) and i want only 1 header on top of file ( not keep happening 30 times)
dont know how to write in python
please help.
thank you so much
This is my code snippet for merging 13 excel file in to 1 file.
fout=open("1300 restaurant data.csv", "a", encoding="utf8")
# now the rest:
for num in range(1,13):
f = open(str(num)+".csv",encoding="utf8")
for line in f:
fout.write(line)
f.close() # not really needed
fout.close()
Try using the below code to merge a list of files into a single file.
import glob
import pandas as pd
path = "C:/documents"
file_list = glob.glob(path + "/*.xlsx")
excel_list = []
for file in excel_list:
excel_list.append(pd.read_excel(file))
excel_merged = pd.DataFrame()
for excel_file in excel_list:
excel_merged = excel_merged.append(
excel_file, ignore_index=True)
excel_merged.to_excel('mergedFile.xlsx', index=False)
Hello!
I would like to combine horizontally many CSV files (the total number will oscillate around 120-150) into one CSV file by adding one column from each file (in this case column called “grid”). All those files have the same columns and number of the rows (they are constructed the same) and are stored in the same catalogue. I’ve tried with CSV module and pandas. I don't want to define all 120 files. I need a script to do it automatically. I’m stuck and I have no ideas...
Some input CSV files (data) and CSV file (merged) which I would like to get:
https://www.dropbox.com/transfer/AAAAAHClI5b6TPzcmW2dmuUBaX9zoSKYD1ZrFV87cFQIn3PARD9oiXQ
That's how my code looks like when I use the CSV module:
import os
import glob
import csv
os.chdir('\csv_files_direction')
extension = 'csv'
files = [i for i in glob.glob('*.{}'.format(extension))]
out_merg = ('\merged_csv_file_direction')
with open(out_merg,'wt') as out:
writer = csv.writer(out)
for file in files:
with open(file) as csvfile:
data = csv.reader(csvfile, delimiter=';')
result = []
for row in data:
a = row[3] #column which I need
result.append(a)
Using this code I receive values only from the last CSV. The rest is missing. As a result I would like to have one precise column from each CSV file from the catalogue.
And Pandas:
import os
import glob
import pandas as pd
import csv
os.chdir('\csv_files_direction')
extension = 'csv'
files = [i for i in glob.glob('*.{}'.format(extension))]
out_merg = ('\merged_csv_file_direction')
in_names = [pd.read_csv(f, delimiter=';', usecols = ['grid']) for f in files]
Using pandas I receive data from all CSV's as the list which can be navigated using e.g in_names[1].
I confess that this is my first try with pandas and I don't have ideas what should be my next step.
I will really appreciate any help!
Thanks in advance,
Mateusz
For the part of CSV i think you need another list define OUTSIDE the loop.
Something like
import os
import sys
dirname = os.path.dirname(os.path.realpath('__file__'))
import glob
import csv
extension = 'csv'
files = [i for i in glob.glob('*.{}'.format(extension))]
out_merg = ('merged_csv_file_direction')
result= []
with open(out_merg,'wt') as out:
writer = csv.writer(out)
for file in files:
with open(file) as csvfile:
data = csv.reader(csvfile, delimiter=';')
col = []
for row in data:
a = row[3] #column which I need
col.append(a)
result.append((col))
NOTE: I have also changed the way to go into the folder. Now you can run the file direcly in the folder that contains the 2 folders (one for take the data and the other to save the data)
Regarding the part of pandas
you can create a loop again. This time you need to CONCAT the dataframes that you have created using in_names = [pd.read_csv(f, delimiter=';', usecols = ['grid']) for f in files]
I think you can use
import os
import glob
import pandas as pd
import csv
os.chdir('\csv_files_direction')
extension = 'csv'
files = [i for i in glob.glob('*.{}'.format(extension))]
out_merg = ('\merged_csv_file_direction')
in_names = [pd.read_csv(f, delimiter=';', usecols = ['grid']) for f in files]
result = pd.concat(in_names)
Tell me if it works
I'm using the following code to break up a large CSV file and I want the original CSV header to be written to each smaller CSV file. The problem I am having, though, is that the current code seems to skip a line of data for each smaller file. So in the example below Line 51 wouldn't be written to the smaller file (code modified from http://code.activestate.com/recipes/578045-split-up-text-file-by-line-count/). It seems to skip that line or perhaps it's being overwritten by the header:
import os
filepath = 'test.csv'
lines_per_file=50
lpf = lines_per_file
path, filename = os.path.split(filepath)
with open(filepath, 'r') as r:
name, ext = os.path.splitext(filename)
try:
w = open(os.path.join(path, '{}_{}{}'.format(name, 0, ext)), 'w')
header = r.readline()
for i, line in enumerate(r):
if not i % lpf:
#possible enhancement: don't check modulo lpf on each pass
#keep a counter variable, and reset on each checkpoint lpf.
w.close()
filename = os.path.join(path,
'{}_{}{}'.format(name, i, ext))
w = open(filename, 'w')
w.write(header)
w.write(line)
finally:
w.close()
Consider using pandas to split the large csv file:
Lets create a csv file having 500 rows and four columns using pandas:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(500,4), columns=['a','b','c','d'])
df.to_csv('large_data.csv', index=False)
Lets split the large_data.csv in to multiple csv files of each having 50 rows:
import pandas as pd
df = pd.read_csv('large_data.csv', chunksize=50)
i = 1
for chunk in df:
chunk.to_csv('split_data_'+str(i)+'.csv', index=False)
i = i+1
This would have produced the following resultant files: