Python write mutiple array value into csv - python

with my code, i read the values of JSON data and insert into array
def retrive_json():
with open('t_v1.json') as json_data:
d = json.load(json_data)
array = []
for i in d['ride']:
origin_lat = i['origin']['lat']
origin_lng = i['origin']['lng']
destination_lat = i['destination']['lat']
destination_lng = i['destination']['lng']
array.append([origin_lat,origin_lng,destination_lat,destination_lng])
return array
the result array is this :
[[39.72417, -104.99984, 39.77446, -104.9379], [39.77481, -104.93618, 39.6984, -104.9652]]
how i can write each element of each array into specific field in csv?
i have try in this way:
wrt = csv.writer(open(t_.csv', 'w'), delimiter=',',lineterminator='\n')
for x in jjson:
wrt.writerow([x])
but the value of each array are store all in one field
How can solved it and write each in a field?
this is my json file:
{
"ride":[
{
"origin":{
"lat":39.72417,
"lng":-104.99984,
"eta_seconds":null,
"address":""
},
"destination":{
"lat":39.77446,
"lng":-104.9379,
"eta_seconds":null,
"address":null
}
},
{
"origin":{
"lat":39.77481,
"lng":-104.93618,
"eta_seconds":null,
"address":"10 Albion Street"
},
"destination":{
"lat":39.6984,
"lng":-104.9652,
"eta_seconds":null,
"address":null
}
}
]
}

Let's say we have this:
jsonstring = """{
"ride":[
{
"origin":{
"lat":39.72417,
"lng":-104.99984,
"eta_seconds":null,
"address":""
},
"destination":{
"lat":39.77446,
"lng":-104.9379,
"eta_seconds":null,
"address":null
}
},
{
"origin":{
"lat":39.77481,
"lng":-104.93618,
"eta_seconds":null,
"address":"10 Albion Street"
},
"destination":{
"lat":39.6984,
"lng":-104.9652,
"eta_seconds":null,
"address":null
}
}
]
}"""
Here is a pandas solution:
import pandas as pd
import json
# Load json to dataframe
df = pd.DataFrame(json.loads(jsonstring)["ride"])
# Create the new columns
df["o1"] = df["origin"].apply(lambda x: x["lat"])
df["o2"] = df["origin"].apply(lambda x: x["lng"])
df["d1"] = df["destination"].apply(lambda x: x["lat"])
df["d2"] = df["destination"].apply(lambda x: x["lng"])
#export
print(df.iloc[:,2:].to_csv(index=False, header=True))
#use below for file
#df.iloc[:,2:].to_csv("output.csv", index=False, header=True)
Returns:
o1,o2,d1,d2
39.72417,-104.99984,39.77446,-104.9379
39.77481,-104.93618,39.6984,-104.9652
Condensed answer:
import pandas as pd
import json
with open('data.json') as json_data:
d = json.load(json_data)
df = pd.DataFrame(d["ride"])
df["o1"],df["o2"] = zip(*df["origin"].apply(lambda x: (x["lat"],x["lng"])))
df["d1"],df["d2"] = zip(*df["destination"].apply(lambda x: (x["lat"],x["lng"])))
df.iloc[:,2:].to_csv("t_.csv",index=False,header=False)
Or, maybe the most readable solution:
import json
from pandas.io.json import json_normalize
open('data.json') as json_data:
d = json.load(json_data)
df = json_normalize(d["ride"])
cols = ["origin.lat","origin.lng","destination.lat","destination.lng"]
df[cols].to_csv("output.csv",index=False,header=False)

This might help:
import json
import csv
def retrive_json():
with open('data.json') as json_data:
d = json.load(json_data)
array = []
for i in d['ride']:
origin_lat = i['origin']['lat']
origin_lng = i['origin']['lng']
destination_lat = i['destination']['lat']
destination_lng = i['destination']['lng']
array.append([origin_lat,origin_lng,destination_lat,destination_lng])
return array
res = retrive_json()
csv_cols = ["orgin_lat", "origin_lng", "dest_lat", "dest_lng"]
with open("output_csv.csv", 'w') as out:
writer = csv.DictWriter(out, fieldnames=csv_cols)
writer.writeheader()
for each_list in res:
d = dict(zip(csv_cols,each_list))
writer.writerow(d)
Output csv generated is:
orgin_lat,origin_lng,dest_lat,dest_lng
39.72417,-104.99984,39.77446,-104.9379
39.77481,-104.93618,39.6984,-104.9652

To me it looks like you've got an array of arrays and you want the individual elements. Therefore you'll want to use a nested for loop. Your current for loop is getting each array, to then split up each array into it's elements you'll want to loop through those. I'd suggest something like this:
for x in jjson:
for y in x:
wrt.writerow([y])
Obviously you might want to update your bracketing etc this is just me giving you an idea of how to solve your issue.
Let me know how it goes!

Why the csv-Library?
array = [[1, 2, 3, 4], [5, 6, 7, 8]]
with open('test.csv', 'w') as csv_file :
csv_file.write("# Header Info\n" \
"# Value1, Value2, Value3, Value4\n") # The header might be optional
for row in array :
csv_file.write(",".join(row) + "\n")

Related

JSON or CSV from list in python

So I have a code that converts my category tree to a list and I wanted to convert it to CSV/json. Each item on list can have more ids as shown below.
def paths(tree):
tree_name = next(iter(tree.keys()))
if tree_name == 'children':
for child in tree['children']:
for descendant in paths(child):
yield (tree['id'],) + descendant
else:
yield (tree['id'],)
pprint.pprint(list(paths(tree)))
Output
[(461123, 1010022280, 10222044, 2222871,2222890),
(461123, 129893, 119894, 1110100250),
(461123, 98943, 944894, 9893445),
(461123, 9844495)]
Is there any way I can improve my code or have another code that converts list to json that looks below output?
Output should look like this
{
{
"column1": "462312",
"column2": "1010022280",
"column3": "10222044",
"column4": "2222871",
"column5": "2222890"
},
{
"column1": "461123",
"column2": "129893",
"column3": "119894",
"column4": "1110100250"
}
and so on...
}
if csv should look like this. ** Can be up to column 10
column1
column2
column3
column4
461123
129893
119894
1110100250
461123
129893
119894
Following is the code to convert list of tuple to a list of dict which you can convert to json and the second function turns the data to a csv
data = [(461123, 1010022280, 10222044, 2222871,2222890),
(461123, 129893, 119894, 1110100250),
(461123, 98943, 944894, 9893445),
(461123, 9844495)]
def convert_to_list_of_dicts(data):
output_list = []
for i in data:
data_dict = {}
for count,j in enumerate(i) :
data_dict["column" + str(count+1)] = j
output_list.append(data_dict)
return output_list
# print(convert_to_list_of_dicts(data))
def convert_to_csv(data):
max_column_num = 0
for i in data:
if len(i) > max_column_num:
max_column_num = len(i)
columns = ["column" + str(i+1) for i in range(max_column_num)]
newdata = [tuple(columns)]
for tup in data:
newdata.append(tup)
with open('output.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(newdata)
# convert_to_csv(data)

Json file not formatted correctly when writing json differences with pandas and numpy

I am trying to compare two json and then write another json with columns names and with differences as yes or no. I am using pandas and numpy
The below is sample files i am including actually, these json are dynamic, that mean we dont know how many key will be there upfront
Input files:
fut.json
[
{
"AlarmName": "test",
"StateValue": "OK"
}
]
Curr.json:
[
{
"AlarmName": "test",
"StateValue": "OK"
}
]
Below code I have tried:
import pandas as pd
import numpy as np
with open(r"c:\csv\fut.json", 'r+') as f:
data_b = json.load(f)
with open(r"c:\csv\curr.json", 'r+') as f:
data_a = json.load(f)
df_a = pd.json_normalize(data_a)
df_b = pd.json_normalize(data_b)
_, df_a = df_b.align(df_a, fill_value=np.NaN)
_, df_b = df_a.align(df_b, fill_value=np.NaN)
with open(r"c:\csv\report.json", 'w') as _file:
for col in df_a.columns:
df_temp = pd.DataFrame()
df_temp[col + '_curr'], df_temp[col + '_fut'], df_temp[col + '_diff'] = df_a[col], df_b[col], np.where((df_a[col] == df_b[col]), 'No', 'Yes')
#[df_temp.rename(columns={c:'Missing'}, inplace=True) for c in df_temp.columns if df_temp[c].isnull().all()]
df_temp.fillna('Missing', inplace=True)
with pd.option_context('display.max_colwidth', -1):
_file.write(df_temp.to_json(orient='records'))
Expected output:
[
{
"AlarmName_curr": "test",
"AlarmName_fut": "test",
"AlarmName_diff": "No"
},
{
"StateValue_curr": "OK",
"StateValue_fut": "OK",
"StateValue_diff": "No"
}
]
Coming output: Not able to parse it in json validator, below is the problem, those [] should be replaed by ',' to get right json dont know why its printing like that
[{"AlarmName_curr":"test","AlarmName_fut":"test","AlarmName_diff":"No"}][{"StateValue_curr":"OK","StateValue_fut":"OK","StateValue_diff":"No"}]
Edit1:
Tried below as well
_file.write(df_temp.to_json(orient='records',lines=True))
now i get json which is again not parsable, ',' is missing and unless i add , between two dic and [ ] at beginning and end manually , its not parsing..
[{"AlarmName_curr":"test","AlarmName_fut":"test","AlarmName_diff":"No"}{"StateValue_curr":"OK","StateValue_fut":"OK","StateValue_diff":"No"}]
Honestly pandas is overkill for this... however
load dataframes as you did
concat them as columns. rename columns
do calcs and map boolean to desired Yes/No
to_json() returns a string so json.loads() to get it back into a list/dict. Filter columns to get to your required format
import json
data_b = [
{
"AlarmName": "test",
"StateValue": "OK"
}
]
data_a = [
{
"AlarmName": "test",
"StateValue": "OK"
}
]
df_a = pd.json_normalize(data_a)
df_b = pd.json_normalize(data_b)
df = pd.concat([df_a, df_b], axis=1)
df.columns = [c+"_curr" for c in df_a.columns] + [c+"_fut" for c in df_a.columns]
df["AlarmName_diff"] = df["AlarmName_curr"] == df["AlarmName_fut"]
df["StateValue_diff"] = df["StateValue_curr"] == df["StateValue_fut"]
df = df.replace({True:"Yes", False:"No"})
js = json.loads(df.loc[:,(c for c in df.columns if c.startswith("Alarm"))].to_json(orient="records"))
js += json.loads(df.loc[:,(c for c in df.columns if c.startswith("State"))].to_json(orient="records"))
js
output
[{'AlarmName_curr': 'test', 'AlarmName_fut': 'test', 'AlarmName_diff': 'Yes'},
{'StateValue_curr': 'OK', 'StateValue_fut': 'OK', 'StateValue_diff': 'Yes'}]

Not getting expected output in python when converting a csv to json

I have an excel file in which data is saved in csv format in such a way.This data is present in the excel file as shown below,under column A (The CSV File is generated by LabView Software code which i have written to generate data).I have also attached an image of the csv file for reference at the end of my question.
RPM,Load Current,Battery Output,Power Capacity
1200,30,12,37
1600,88,18,55
I want to create a Json file in such format
{
"power_capacity_data" :
{
"rpm" : ["1200","1600"],
"load_curr" : ["30","88"],
"batt_output" : ["12","18"],
"power_cap" : ["37","55"]
}
}
This is my code
import csv
import json
def main():
#created a dictionary so that i can append data to it afterwards
power_data = {"rpm":[],"load_curr":[],"batt_output":[],"power_cap":[]}
with open('power1.lvm') as f:
reader = csv.reader(f)
#trying to append the data of column "RPM" to dictionary
rowcount = 0
for row in reader:
if rowcount == 0:
#trying to skip the first row
rowcount = rowcount + 1
else:
power_data['rpm'].append(row[0])
print(row)
json_report = {}
json_report['pwr_capacity_data'] = power_data
with open('LVMJSON', "w") as f1:
f1.write(json.dumps(json_report, sort_keys=False, indent=4, separators=(',', ': '),encoding="utf-8",ensure_ascii=False))
f1.close()
if __name__ == "__main__":
main()
The output json file that i am getting is this:(please ignore the print(row) statement in my code)
{
"pwr_capacity_data":
{
"load_curr": [],
"rpm": [
"1200,30,12.62,37.88",
"1600,88,18.62,55.88"
],
"batt_output": [],
"power_cap": []
}
}
The whole row is getting saved in the list,but I just want the values under the column RPM to be saved .Can someone help me out with what I may be doing wrong.Thanks in advance.I have attached an image of csv file to just in case it helps
You could use Python's defaultdict to make it a bit easier. Also a dictionary to map all your header values.
from collections import defaultdict
import csv
import json
power_data = defaultdict(list)
header_mappings = {
'RPM' : 'rpm',
'Load Current' : 'load_curr',
'Battery Output' : 'batt_output',
'Power Capacity' : 'power_cap'}
with open('power1.lvm', newline='') as f_input:
csv_input = csv.DictReader(f_input)
for row in csv_input:
for key, value in row.items():
power_data[header_mappings[key]].append(value)
with open('LVMJSON.json', 'w') as f_output:
json.dump({'power_capacity_data' : power_data}, f_output, indent=2)
Giving you an output JSON file looking like:
{
"power_capacity_data": {
"batt_output": [
"12",
"18"
],
"power_cap": [
"37",
"55"
],
"load_curr": [
"30",
"88"
],
"rpm": [
"1200",
"1600"
]
}
}

How can I put excel data to the dictionary?

I wanna put excel data to the dictionary.
Excel is
views.py is
#coding:utf-8
from django.shortcuts import render
import xlrd
book3 = xlrd.open_workbook('./data/excel.xlsx')
sheet3 = book3.sheet_by_index(0)
large_item = None
data_dict = {}
for row_index in range(1,sheet3.nrows):
rows3 = sheet3.row_values(row_index)
large_item = rows3[1] or large_item
data_dict = rows3
Now when I printed out print(data_dict),['', '4', '10', 'Karen', ''] was shown.Before,I wrote data_dict.extend(rows3) in place of data_dict = rows3,but in that time dict has not extend error happens.My ideal output is
data_dict = {
1: {
user_id: 1,
name_id: 1,
name: Blear,
age: 40,
man: false,
employee: leader,
},
2: {
user_id: 2,
name_id: 5,
・
       ・
       ・
},
・
       ・
       ・
}
How should I write to achieve my goal?
Your problem is :
data_dict = rows3
This doesn't add rows3 to data_dict, this set is value. So data_dict is equal to the last row.
To add element to a dict you need to do this:
data_dict[KEY] = VALUE
Your key will be the row index.
Now, you want another dict like VALUE
{
user_id: 1,
name_id: 1,
name: Blear,
age: 40,
man: false,
employee: leader,
}
So for each row you need to construct this dict, use headers and cell value to do it.
I don't test this code, it's just to give you an idea to how to do it.
#coding:utf-8
from django.shortcuts import render
import xlrd
book3 = xlrd.open_workbook('./data/excel.xlsx')
sheet3 = book3.sheet_by_index(0)
headers = sheet3.row_values(0)
large_item = None
data_dict = {}
for row_index in range(1,sheet3.nrows):
rows3 = sheet3.row_values(row_index)
large_item = rows3[1] or large_item
# Create dict with headers and row values
row_data = {}
for idx_col,value in enumerate(rows3):
header_value = headers[idx_col]
# Avoid to add empty column. A column in your example
if header_value:
row_data[headers[idx_col]] = value
# Add row_data to your data_dict with
data_dict[row_index] = row_data
You can use python's library pandas for an easy solution:
from pandas import *
xls = ExcelFile('your_excel_file.xls')
df = xls.parse(xls.sheet_names[0])
df.to_dict()

JSON Parsing help in Python

I have below data in JSON format, I have started with code below which throws a KEY ERROR.
Not sure how to get all data listed in headers section.
I know I am not doing it right in json_obj['offers'][0]['pkg']['Info']: but not sure how to do it correctly.
how can I get to different nodes like info,PricingInfo,Flt_Info etc?
{
"offerInfo":{
"siteID":"1",
"language":"en_US",
"currency":"USD"
},
"offers":{
"pkg":[
{
"offerDateRange":{
"StartDate":[
2015,
11,
8
],
"EndDate":[
2015,
11,
14
]
},
"Info":{
"Id":"111"
},
"PricingInfo":{
"BaseRate":1932.6
},
"flt_Info":{
"Carrier":"AA"
}
}
]
}
}
import os
import json
import csv
f = open('api.csv','w')
writer = csv.writer(f,delimiter = '~')
headers = ['Id' , 'StartDate', 'EndDate', 'Id', 'BaseRate', 'Carrier']
default = ''
writer.writerow(headers)
string = open('data.json').read().decode('utf-8')
json_obj = json.loads(string)
for pkg in json_obj['offers'][0]['pkg']['Info']:
row = []
row.append(json_obj['id']) # just to test,but I need column values listed in header section
writer.writerow(row)
It looks like you're accessing the json incorrectly. After you have accessed json_obj['offers'], you accessed [0], but there is no array there. json_obj['offers'] gives you another dictionary.
For example, to get PricingInfo like you asked, access like this:
json_obj['offers']['pkg'][0]['PricingInfo']
or 11 from the StartDate like this:
json_obj['offers']['pkg'][0]['offerDateRange']['StartDate'][1]
And I believe you get the KEY ERROR because you access [0] in the dictionary, which since that isn't a key, you get the error.
try to substitute this piece of code:
for pkg in json_obj['offers'][0]['pkg']['Info']:
row = []
row.append(json_obj['id']) # just to test,but I need column values listed in header section
writer.writerow(row)
With this:
for pkg in json_obj['offers']['pkg']:
row.append(pkg['Info']['Id'])
year = pkg['offerDateRange']['StartDate'][0]
month = pkg['offerDateRange']['StartDate'][1]
day = pkg['offerDateRange']['StartDate'][2]
StartDate = "%d-%d-%d" % (year,month,day)
print StartDate
writer.writerow(row)
Try this
import os
import json
import csv
string = open('data.json').read().decode('utf-8')
json_obj = json.loads(string)
print json_obj["offers"]["pkg"][0]["Info"]["Id"]
print str(json_obj["offers"]["pkg"][0]["offerDateRange"]["StartDate"][0]) +'-'+ str(json_obj["offers"]["pkg"][0]["offerDateRange"]["StartDate"][1])+'-'+str(json_obj["offers"]["pkg"][0]
["offerDateRange"]["StartDate"][2])
print str(json_obj["offers"]["pkg"][0]["offerDateRange"]["EndDate"][0]) +'-'+ str(json_obj["offers"]["pkg"][0]["offerDateRange"]["EndDate"][1])+'-'+str(json_obj["offers"]["pkg"][0]
["offerDateRange"]["EndDate"][2])
print json_obj["offers"]["pkg"][0]["Info"]["Id"]
print json_obj["offers"]["pkg"][0]["PricingInfo"]["BaseRate"]
print json_obj["offers"]["pkg"][0]["flt_Info"]["Carrier"]

Categories

Resources