Hello I have multiple json files in a path and I want to convert all of them to csv files separately. Here is what I have tried so far which just convert one json file to a csv file.
with open('/Users/hh/MyDataSet/traceJSON-663-661-A0-25449-7.json') as f:
for line in f:
data.append(json.loads(line))
csv_file=open('/Users/hh/MyDataSet/GTruth/traceJSON-663-661-A0-25449-7.csv','w')
write=csv.writer(csv_file)
# write.writerow(["row number","type","rcvTime","pos_x","pos_y","pos_z","spd_x","spd_y","spd_z","acl_x","acl_y","acl_z"
# ,"hed_x","hed_y","hed_z"])
write.writerow(["row number","type","rcvTime","sender","pos_x","pos_y","pos_z","spd_x","spd_y","spd_z","acl_x","acl_y","acl_z"
,"hed_x","hed_y","hed_z"])
for elem in range(len(data)):
if data[elem]['type']==2:
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),'663',round(data[elem]['pos'][0],2),round(data[elem]['pos'][1],2)
,round(data[elem]['pos'][2],2),round(data[elem]['spd'][0],2),round(data[elem]['spd'][1],2),round(data[elem]['spd'][2],2),
round(data[elem]['acl'][0],2),round(data[elem]['acl'][1],2),round(data[elem]['acl'][2],2),round(data[elem]['hed'][0],2),
round(data[elem]['hed'][1],2),round(data[elem]['hed'][2],2)])
elif data[elem]['type']==3:
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),round(data[elem]['sender'],2),round(data[elem]['pos'][0],2),round(data[elem]['pos'][1],2)
,round(data[elem]['pos'][2],2),round(data[elem]['spd'][0],2),round(data[elem]['spd'][1],2),round(data[elem]['spd'][2],2),
round(data[elem]['acl'][0],2),round(data[elem]['acl'][1],2),round(data[elem]['acl'][2],2),round(data[elem]['hed'][0],2),
round(data[elem]['hed'][1],2),round(data[elem]['hed'][2],2)])
# json_file.close()
print('done!')
csv_file.close()
I appreciate if anyone can help me how can I do it. Also in each json file name "traceJSON-663-661-A0-25449-7", the first number like in the above code (663) should be written in csv file like the following code,if the type is 2:
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),'663',....
My json file names are like traceJSON-51-49-A16-25217-7, traceJSON-57-55-A0-25223-7, ....
I suggest using pandas for this:
from glob import glob
import pandas as pd
import os
filepaths = glob('/Users/hh/MyDataSet/*.json') # get list of json files in folder
for f in filepaths:
filename = os.path.basename(f).rsplit('.', 1)[0] # extract filename without extension
nr = int(filename.split('-')[1]) # extract the number from the filename - assuming that all filenames are formatted similarly, use regex otherwise
df = pd.read_json(f) # read the json file as a pandas dataframe, assuming the json file isn't nested
df['type'] = df['type'].replace(2, nr) # replace 2 in 'type' column with the number in the filename
df.to_csv(f'{filename}.csv') # save as csv
If you want to round columns, you can also do this with pandas
import csv
import glob
import json
import os.path
for src_path in glob.glob('/Users/hh/MyDataSet/*.json'):
src_name = os.path.splitext(os.path.basename(src_path))[0]
data = []
with open(src_path) as f:
for line in f:
data.append(json.loads(line))
dest_path = '/Users/hh/MyDataSet/GTruth/' + src_name + '.csv'
csv_file=open(dest_path,'w')
write=csv.writer(csv_file)
write.writerow(["row number","type","rcvTime","sender","pos_x","pos_y","pos_z","spd_x","spd_y","spd_z","acl_x","acl_y","acl_z"
,"hed_x","hed_y","hed_z"])
for elem in range(len(data)):
if data[elem]['type']==2:
sender = src_name.split('-')[1]
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),sender,round(data[elem]['pos'][0],2),round(data[elem]['pos'][1],2)
,round(data[elem]['pos'][2],2),round(data[elem]['spd'][0],2),round(data[elem]['spd'][1],2),round(data[elem]['spd'][2],2),
round(data[elem]['acl'][0],2),round(data[elem]['acl'][1],2),round(data[elem]['acl'][2],2),round(data[elem]['hed'][0],2),
round(data[elem]['hed'][1],2),round(data[elem]['hed'][2],2)])
elif data[elem]['type']==3:
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),round(data[elem]['sender'],2),round(data[elem]['pos'][0],2),round(data[elem]['pos'][1],2)
,round(data[elem]['pos'][2],2),round(data[elem]['spd'][0],2),round(data[elem]['spd'][1],2),round(data[elem]['spd'][2],2),
round(data[elem]['acl'][0],2),round(data[elem]['acl'][1],2),round(data[elem]['acl'][2],2),round(data[elem]['hed'][0],2),
round(data[elem]['hed'][1],2),round(data[elem]['hed'][2],2)])
csv_file.close()
print('done!')
So I have a CSV file with a column called content. However, the contents in column look like it is based on JSON, and, therefore, house more columns. I would like to split these contents into multiple columns or extract the final part of it after "value". See picture below to see an example of the file. Any ideas how to get this? I would prefer using Python. I don't have any experience with JSON.
Using pandas you could do in a simpler way.
EDIT updated to handle the single quotes:
import pandas as pd
import json
data = pd.read_csv('test.csv', delimiter="\n")["content"]
res = [json.loads(row.replace("'", '"')) for row in data]
result = pd.DataFrame(res)
result.head()
# Export result to CSV
result.to_csv("result.csv")
my csv:
result:
This script will create a new csv file with the 'value' added to the csv as an additional column
(make sure that the input_csv and output_csv are different filenames)
import csv
import json
input_csv = "data.csv"
output_csv = "data_updated.csv"
values = []
with open(input_csv) as f_in:
dr = csv.DictReader(f_in)
for row in dr:
value = json.loads(row["content"].replace("'", '"'))["value"]
values.append(value)
with open(input_csv) as f_in:
with open(output_csv, "w+") as f_out:
w = csv.writer(f_out, lineterminator="\n")
r = csv.reader(f_in)
all = []
row = next(r)
row.append("value")
all.append(row)
i = 0
for row in r:
row.append(values[i])
all.append(row)
i += 1
w.writerows(all)
I need to generate a .json file which has data in the following format:
{"cnt":[1950,1600,400,1250,995],
"dt":["2020-01","2020-02","2020-03","2020-04","2020-05"]}
I would prefer it getting generated by querying a table or using a CSV to JSON conversion. The format data I will have after querying or in my CSV file will be:
How to do this?
import csv
import json
with open('csv_file_path') as f:
dict_reader = csv.DictReader(f)
dicts = [dict(i) for i in dict_reader]
field_names = dict_reader.fieldnames #get the column headings # CNT,DT etc..
output_dict = {}
for item in field_names:
output_dict.setdefault(item,[])
for d in dicts:
for key in d:
output_dict[key].append(d[key])
with open('josn_file_path', 'w+') as f:
f.write(json.dumps(output_dict, indent=4))
Tested and works fine.
This example will turn your row-based data into a dict-based version.
Please keep in mind that I didn't test this - but it should work fine.
In essence this is what's happening:
Read the source data
Determine the headings you need for the dict
Fill the new data-format from your source_data
Dump this new format to a json file.
Code:
import csv
import json
# Read the source data
with open('path_to_csv_file') as f:
source_data = [i for i in csv.DictReader(f)]
# Discover headers, and prep dict framework.
target_data = {key: [] for key in source_data[0].keys()}
# Iterate over the source_data and append the values to the righ key in target_data
for row in source_data:
for k, v in row.items():
target_data[k].append(v)
# Write target data to json file
with open('path_to_json_file', 'w') as f:
json.dump(data, f)
I need to convert a .dat file that's in a specific format into a .csv file.
The .dat file has multiple rows with a repeating structure. The data is held in brackets and have tags. Below is the sample data; it repeats throughout the data file:
{"name":"ABSDSDSRF","ID":"AFJDKGFGHF","lat":37,"lng":-122,"type":0,"HAC":5,"verticalAccuracy":4,"course":266.8359375,"area":"san_francisco"}
Can anyone provide a starting point for the script?
This will create a csv assuming each line in your .DAT is json. Just order the header list to your liking
import csv, json
header = ['ID', 'name', 'type', 'area', 'HAC', 'verticalAccuracy', 'course', 'lat', 'lng']
with open('file.DAT') as datfile:
with open('output.csv', 'wb') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=header)
writer.writeheader()
for line in datfile:
writer.writerow(json.loads(line))
Your row is in json format. So, you can use:
import json
data = json.loads('{"name":"ABSDSDSRF","ID":"AFJDKGFGHF","lat":37,"lng":-122,"type":0,"HAC":5,"verticalAccuracy":4,"course":266.8359375,"area":"san_francisco"}')
print data.get('name')
print data.get('ID')
This is only a start point. You have to iter all the .dat file. At the end, you have to write an exporter to save the data into the csv file.
Use a regex to find all of the data items. Use ast.literal_eval to convert each data item into a dictionary. Collect the items in a list.
import re, ast
result = []
s = '''{"name":"ABSDSDSRF","ID":"AFJDKGFGHF","lat":37,"lng":-122,"type":0,"HAC":5,"verticalAccuracy":4,"course":266.8359375,"area":"san_francisco"}'''
item = re.compile(r'{[^}]*?}')
for match in item.finditer(s):
d = ast.literal_eval(match.group())
result.append(d)
If each data item is on a separate line in the file You don't need the regex - you can just iterate over the file.
with open('file.dat') as f:
for line in f:
line = line.strip()
line = ast.literal_eval(line)
result.append(line)
Use json.load:
import json
with open (filename) as fh:
data = json.load (fh)
...