Python Append Data from Loop into Data frame

Python Append Data from Loop into Data frame - python

I created this code where I am able to pull the data I want but not able to sort it as it should be. I am guessing it has to do with the way I am appending each item by ignoring index but I can't find my way around it.
This is my code:
import json
import pandas as pd
#load json object
with open("c:\Sample.json","r",encoding='utf-8') as file:
data = file.read()
data2 = json.loads(data)
print("Type:", type(data2))
cls=['Image', 'Email', 'User', 'Members', 'Time']
df = pd.DataFrame(columns = cls )
for d in data2['mydata']:
for k,v in d.items():
#print(k)
if k == 'attachments':
#print(d.get('attachments')[0]['id'])
image = (d.get('attachments')[0]['id'])
df=df.append({'Image':image},ignore_index = True)
#df['Message'] = image
if k == 'author_user_email':
#print(d.get('author_user_email'))
email = (d.get('author_user_email'))
df=df.append({'Email':email}, ignore_index = True)
#df['Email'] = email
if k == 'author_user_name':
#print(d.get('author_user_name'))
user = (d.get('author_user_name'))
df=df.append({'User':user}, ignore_index = True)
#df['User'] = user
if k == 'room_name':
#print(d.get('room_name'))
members = (d.get('room_name'))
df=df.append({'Members':members}, ignore_index = True)
#df['Members'] = members
if k == 'ts_iso':
#print(d.get('ts_iso'))
time = (d.get('ts_iso'))
df=df.append({'Time':time}, ignore_index = True)
#df['Time'] = time
df
print('Finished getting Data')
df1 = (df.head())
print(df)
print(df.head())
df.to_csv(r'c:\sample.csv', encoding='utf-8')
The code gives me this as the result
I am looking to get this
Data of the file is this:
{
"mydata": [
{
"attachments": [
{
"filename": "image.png",
"id": "888888888"
}
],
"author_user_email": "email#email.com",
"author_user_id": "91",
"author_user_name": "Marlone",
"message": "",
"room_id": "999",
"room_members": [
{
"room_member_id": "91",
"room_member_name": "Marlone"
},
{
"room_member_id": "9191",
"room_member_name": " +16309438985"
}
],
"room_name": "SMS [Marlone] [ +7777777777]",
"room_type": "sms",
"ts": 55,
"ts_iso": "2021-06-13T18:17:32.877369+00:00"
},
{
"author_user_email": "email#email.com",
"author_user_id": "21",
"author_user_name": "Chris",
"message": "Hi",
"room_id": "100",
"room_members": [
{
"room_member_id": "21",
"room_member_name": "Joe"
},
{
"room_member_id": "21",
"room_member_name": "Chris"
}
],
"room_name": "Direct [Chris] [Joe]",
"room_type": "direct",
"ts": 12345678910,
"ts_iso": "2021-06-14T14:42:07.572479+00:00"
}]}
Any help would be appreciated. I am new to python and am learning on my own.

Try:
import json
import pandas as pd
with open("your_data.json", "r") as f_in:
data = json.load(f_in)
tmp = []
for d in data["mydata"]:
image = d.get("attachments", [{"id": None}])[0]["id"]
email = d.get("author_user_email")
user = d.get("author_user_name")
members = d.get("room_name")
time = d.get("ts_iso")
tmp.append((image, email, user, members, time))
df = pd.DataFrame(tmp, columns=["Image", "Email", "User", "Members", "Time"])
print(df)
Prints:
Image Email User Members Time
0 888888888 email#email.com Marlone SMS [Marlone] [ +7777777777] 2021-06-13T18:17:32.877369+00:00
1 None email#email.com Chris Direct [Chris] [Joe] 2021-06-14T14:42:07.572479+00:00

Although the other answer does work, pandas has a built in reader for json files pd.read_json: https://pandas.pydata.org/pandas-docs/version/1.1.3/reference/api/pandas.read_json.html
It has the benefit of being able to handle very large datasets via chunking, as well as processing quite a few different formats. The other answer would not be performant for a large dataset.
This would get you started:
import pandas as pd
df = pd.read_json("c:\Sample.json")

The probblem is that append() adds a new row. So, you have to use at[] https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.at.html specifying the index/row. Se below. Some print/debug messages were left and path to input and output files was changed a little because I'm on Linux.
import json
import pandas as pd
import pprint as pp
#load json object
with open("Sample.json","r",encoding='utf-8') as file:
data = file.read()
data2 = json.loads(data)
#pp.pprint(data2)
cls=['Image', 'Email', 'User', 'Members', 'Time']
df = pd.DataFrame(columns = cls )
pp.pprint(df)
index = 0
for d in data2['mydata']:
for k,v in d.items():
#print(k)
if k == 'attachments':
#print(d.get('attachments')[0]['id'])
image = (d.get('attachments')[0]['id'])
df.at[index, 'Image'] = image
#df['Message'] = image
if k == 'author_user_email':
#print(d.get('author_user_email'))
email = (d.get('author_user_email'))
df.at[index, 'Email'] = email
#df['Email'] = email
if k == 'author_user_name':
#print(d.get('author_user_name'))
user = (d.get('author_user_name'))
df.at[index, 'User'] = user
#df['User'] = user
if k == 'room_name':
#print(d.get('room_name'))
members = (d.get('room_name'))
df.at[index, 'Members'] = members
#df['Members'] = members
if k == 'ts_iso':
#print(d.get('ts_iso'))
time = (d.get('ts_iso'))
df.at[index, 'Time'] = time
#df['Time'] = time
index += 1
# start indexing from 0
df.reset_index()
# replace empty str/cells witn None
df.fillna('None', inplace=True)
pp.pprint(df)
print('Finished getting Data')
df1 = (df.head())
print(df)
print(df.head())
df.to_csv(r'sample.csv', encoding='utf-8')

Related

How do I only return a specific value in a set of values in an API JSON response?

I'm trying to return only a specific value from the "data" key in this response that I'm currently working with:
{
"dataset": {
"id": 49333506,
"dataset_code": "YMAB",
"database_code": "QOR",
"name": "Y-mAbs Therapeutics Inc. (YMAB) Option Earnings Crush, Liquidity, and Volatility Ratings",
"description": "Option Earnings Crush, Liquidity, and Volatility Ratings for Y-mAbs Therapeutics Inc. (YMAB). All time periods are measured in calendar days. See documentation for methodology.",
"refreshed_at": "2022-08-05 21:20:34 UTC",
"newest_available_date": "2022-08-05",
"oldest_available_date": "2020-02-12",
"column_names": [
"Date",
"EarningsCrushRate",
"CalendarDaysUntilEarnings",
"TradingDaysUntilEarnings",
"LiquidityRating",
"HasLeapOptions",
"HasWeeklyOptions",
"Iv30Rank",
"Iv30Percentile",
"Iv30Rating",
"Iv60Rank",
"Iv60Percentile",
"Iv60Rating",
"Iv90Rank",
"Iv90Percentile",
"Iv90Rating",
"Iv360Rank",
"Iv360Percentile",
"Iv360Rating"
],
"frequency": "daily",
"type": "Time Series",
"premium": true,
"limit": null,
"transform": null,
"column_index": null,
"start_date": "2020-02-12",
"end_date": "2022-08-05",
"data": [
[
"2022-08-05",
null,
null,
null,
2.0,
0.0,
0.0,
0.1437,
0.4286,
0.3706,
0.1686,
0.4762,
0.3936,
0.1379,
0.4502,
0.4129,
0.107,
0.5152,
0.4657
],
I only want to return the date, and a single value at a time from the "data": [ key that's within "dataset": {.
Here's the code I have so far, but am stuck as to make this happen:
r = requests.get(url=f"https://data.nasdaq.com/api/v3/datasets/QOR/{symbol}/data.json?api_key={apikey}")
d = r.json()
dataset = d['dataset_data']
data = dataset['data']
column_names = dataset['column_names']
date = column_names[0]
ercrush = column_names[1]
calendar = column_names[2]
tradingdays = column_names[3]
liquidity = column_names[4]
leaps = column_names[5]
weeklies = column_names[6]
ivrank30 = column_names[7]
ivper30 = column_names[8]
ivrate30 = column_names[9]
ivrank60 = column_names[10]
ivper60 = column_names[11]
ivrate60 =column_names[12]
ivrank90 = column_names[13]
ivper90 = column_names[14]
ivrank90 = column_names[15]
ivrank360= column_names[16]
ivper360 = column_names[17]
ivrank360 = column_names[18]
values = data[0]
For example - I'm only trying to return the Date, defined as column_names[0] paired with the value of "2022-08-05" that's within "data": [ , etc.
How would I go about doing this?
Thanks so much for any help.

I figured out the issue!
I created another variable called results = values and now I can pick the values I want and easily match them with the column_names!
Awesome!
The finished code that works:
r = requests.get(url=f"https://data.nasdaq.com/api/v3/datasets/QOR/{symbol}/data.json?api_key=KyVWdRX_o26L5XNUkgqN")
d = r.json()
dataset = d['dataset_data']
data = dataset['data']
column_names = dataset['column_names']
Date = column_names[0]
ercrush = column_names[1]
calendar = column_names[2]
tradingdays = column_names[3]
liquidity = column_names[4]
leaps = column_names[5]
weeklies = column_names[6]
ivrank30 = column_names[7]
ivper30 = column_names[8]
ivrate30 = column_names[9]
ivrank60 = column_names[10]
ivper60 = column_names[11]
ivrate60 =column_names[12]
ivrank90 = column_names[13]
ivper90 = column_names[14]
ivrank90 = column_names[15]
ivrank360= column_names[16]
ivper360 = column_names[17]
ivrank360 = column_names[18]
values = data[0]
results = values[2] #the correction
print(results)

Json file not formatted correctly when writing json differences with pandas and numpy

I am trying to compare two json and then write another json with columns names and with differences as yes or no. I am using pandas and numpy
The below is sample files i am including actually, these json are dynamic, that mean we dont know how many key will be there upfront
Input files:
fut.json
[
{
"AlarmName": "test",
"StateValue": "OK"
}
]
Curr.json:
[
{
"AlarmName": "test",
"StateValue": "OK"
}
]
Below code I have tried:
import pandas as pd
import numpy as np
with open(r"c:\csv\fut.json", 'r+') as f:
data_b = json.load(f)
with open(r"c:\csv\curr.json", 'r+') as f:
data_a = json.load(f)
df_a = pd.json_normalize(data_a)
df_b = pd.json_normalize(data_b)
_, df_a = df_b.align(df_a, fill_value=np.NaN)
_, df_b = df_a.align(df_b, fill_value=np.NaN)
with open(r"c:\csv\report.json", 'w') as _file:
for col in df_a.columns:
df_temp = pd.DataFrame()
df_temp[col + '_curr'], df_temp[col + '_fut'], df_temp[col + '_diff'] = df_a[col], df_b[col], np.where((df_a[col] == df_b[col]), 'No', 'Yes')
#[df_temp.rename(columns={c:'Missing'}, inplace=True) for c in df_temp.columns if df_temp[c].isnull().all()]
df_temp.fillna('Missing', inplace=True)
with pd.option_context('display.max_colwidth', -1):
_file.write(df_temp.to_json(orient='records'))
Expected output:
[
{
"AlarmName_curr": "test",
"AlarmName_fut": "test",
"AlarmName_diff": "No"
},
{
"StateValue_curr": "OK",
"StateValue_fut": "OK",
"StateValue_diff": "No"
}
]
Coming output: Not able to parse it in json validator, below is the problem, those [] should be replaed by ',' to get right json dont know why its printing like that
[{"AlarmName_curr":"test","AlarmName_fut":"test","AlarmName_diff":"No"}][{"StateValue_curr":"OK","StateValue_fut":"OK","StateValue_diff":"No"}]
Edit1:
Tried below as well
_file.write(df_temp.to_json(orient='records',lines=True))
now i get json which is again not parsable, ',' is missing and unless i add , between two dic and [ ] at beginning and end manually , its not parsing..
[{"AlarmName_curr":"test","AlarmName_fut":"test","AlarmName_diff":"No"}{"StateValue_curr":"OK","StateValue_fut":"OK","StateValue_diff":"No"}]

Honestly pandas is overkill for this... however
load dataframes as you did
concat them as columns. rename columns
do calcs and map boolean to desired Yes/No
to_json() returns a string so json.loads() to get it back into a list/dict. Filter columns to get to your required format
import json
data_b = [
{
"AlarmName": "test",
"StateValue": "OK"
}
]
data_a = [
{
"AlarmName": "test",
"StateValue": "OK"
}
]
df_a = pd.json_normalize(data_a)
df_b = pd.json_normalize(data_b)
df = pd.concat([df_a, df_b], axis=1)
df.columns = [c+"_curr" for c in df_a.columns] + [c+"_fut" for c in df_a.columns]
df["AlarmName_diff"] = df["AlarmName_curr"] == df["AlarmName_fut"]
df["StateValue_diff"] = df["StateValue_curr"] == df["StateValue_fut"]
df = df.replace({True:"Yes", False:"No"})
js = json.loads(df.loc[:,(c for c in df.columns if c.startswith("Alarm"))].to_json(orient="records"))
js += json.loads(df.loc[:,(c for c in df.columns if c.startswith("State"))].to_json(orient="records"))
js
output
[{'AlarmName_curr': 'test', 'AlarmName_fut': 'test', 'AlarmName_diff': 'Yes'},
{'StateValue_curr': 'OK', 'StateValue_fut': 'OK', 'StateValue_diff': 'Yes'}]

Facebook API offline conversion event param data must be an array error

I am trying to upload events to the offline conversion dataset for Facebook with some custom fields, But I am receiving the following error
Status: 400
Response:
{
"error": {
"message": "(#100) param data must be an array.",
"type": "OAuthException",
"code": 100,
"fbtrace_id": "A5qsezd_MfvKEYYTVfPcu29"
}
}
I am referring to this page to upload offline events.
https://developers.facebook.com/docs/marketing-api/offline-conversions/
CSV structure
email,event_name,event_time,value,dept,brand,tx_type,cust_type,cust_trend
79FBB38FC843911533020FD8DE5B29CBA9958F,Purchase,2020-06-15T07:42:47Z,100.25, RENTAL,NAN,PA,Active,Growth (+15% to LY)
8EF89542E99BF7D8C0D4AA9F218,Purchase,2020-06-15T17:46:13Z,50,DEPOSITS, NAN,Other,Active,Declined (-15% to LY)
4C83B542E9C9566AA8D6A5279839115E7C0C454A1,Purchase,2020-06-15T09:55:01Z,150,DEPOSITS, NAN,PA,Active,Declined (-15% to LY)
361604C2B8FC67,Purchase,2020-06-15T15:41:18Z,50,DEPOSITS, NAN,OtherNew (Less than 3 Months),Did Not Shop LY
09133B0CDFA527BA9013CA8F1A0382D76F9,Purchase,2020-06-15T08:44:47Z,1,DEPOSITS, NAN,PX,Active,Growth (+15% to LY)
50cff131E2B3042C6E533ss225146C37994E2C2,Purchase,2020-06-15T07:35:50Z,300,DEPOSITS, NAN,Other,ActiveGrowth (+10% to LY)
ECD35DBB79FF37B0FC95E131,Purchase,2020-06-15T16:13:28Z,50,DEPOSITS, NAN,PX,Active,Decline (-12% to LY)
code:
def upload_offline_conversion(**args):
from facebook_business.adobjects.offlineconversiondataset import OfflineConversionDataSet
from facebook_business.api import FacebookAdsApi
import pandas as pd
#import gcsfs
import json
access_token = access_token
FacebookAdsApi.init(app_id=app_id,access_token=access_token)
offline_dataset = OfflineConversionDataSet(dataset_id)
df = pd.read_csv('UPLOADS.csv',sep =',')
df['event_time'] = (pd.to_datetime(df['event_time']).astype(int) / 10 ** 9).astype(int).astype(str)
df['match_keys'] = df.apply(lambda row: json.dumps({k: [row[k]] if k in ['email'] else row[k] for k in ['email'] if pd.notnull(row[k])}), axis=1)
del df['email'] # deleting match_keys single columns since they are now useless
df["currency"]='CAD'
data = (df.groupby(['event_name','event_time','match_keys','value','currency'], as_index=False)
.apply(lambda x: x[['dept','brand','tx_type','cust_type','cust_trend']].to_dict('r'))
.reset_index()
.rename(columns={0:'custom_data'}).to_json(orient='records'))
print(data)
batch_limit = 2000 # Maximum number of events permitted in a single call
for i in range(0, len(data), batch_limit):
params = {
'upload_tag': 'upload_test',
'data': data[i:i+batch_limit],
}
# print(params)
#offline_dataset.create_event(params=params)
expected o/p
data=[
{
match_keys: {"email": ['79FBB38FC843911533020FD8DE5B29CBA9958F']},
currency: "CAD",
value: 100.25,
event_name: "Purchase",
event_time: 1592206967,
custom_data: {
dept: "RENTAL",
brand:"NAN",
tx_type:"PA",
cust_type:"ACTIVE",
cust_trend:"Growth (+15% to LY)"
},
},
{
match_keys: {"email": ["8EF89542E99BF7D8C0D4AA9F218"]},
currency: "CAD",
value: 50,
event_name: "Purchase",
event_time: 1592243173,
custom_data: {
dept: "RENTAL",
brand:"NAN",
tx_type:"PA",
cust_type:"ACTIVE",
cust_trend:"Growth (+15% to LY)"
},
},
#and so on...................
]
my sample output :
{'upload_tag': 'sales_upload_test_final',
'data': '[
{"event_name":"Purchase",
"event_time":"1592243173",
"match_keys":"{"\\email\\": [\\"8EF89542E99BF7D8C0D4AA9F218"\\]}",
"value":"50",
"currency":"CAD",
"custom_data":[{"dept":"DEPOSITS","brand":" NAN","tx_type":"Other","cust_type":"Active","cust_trend":"Declined (-15% to LY)"}]}]}

need to specify LDU as of july 1st 2020.
Code:
def upload_offline_conversion(**args):
from facebook_business.adobjects.offlineconversiondataset import OfflineConversionDataSet
from facebook_business.api import FacebookAdsApi
import pandas as pd
#import gcsfs
import json
access_token = access_token
FacebookAdsApi.init(app_id=app_id,access_token=access_token)
offline_dataset = OfflineConversionDataSet(dataset_id)
df = pd.read_csv('UPLOADS.csv',sep =',')
df['event_time'] = (pd.to_datetime(df['event_time']).astype(int) / 10 ** 9).astype(int).astype(str)
df['match_keys'] = df.apply(lambda row: json.dumps({k: [row[k]] if k in ['email'] else row[k] for k in ['email'] if pd.notnull(row[k])}), axis=1)
del df['email'] # deleting match_keys single columns since they are now useless
df["currency"]='CAD'
data = (df.groupby(['event_name','event_time','match_keys','value','currency'], as_index=False)
.apply(lambda x: x[['dept','brand','tx_type','cust_type','cust_trend']].to_dict('r'))
.reset_index()
.rename(columns={0:'custom_data'}).to_dict(orient='records'))
df = pd.DataFrame(data)
df["data_processing_options"]= [[]] * df.shape[0] #(Value either [] or ["LDU"] )
data = df.to_dict(orient="records")
batch_limit = 2000 # Maximum number of events permitted in a single call
for i in range(0, len(data), batch_limit):
params = {
'upload_tag': 'upload_test',
'data': data[i:i+batch_limit],
}
# print(params)
#offline_dataset.create_event(
params=params)

CSV to elasticsearch with python SerializationError

When i try to send the bulk_data to the local elasticsearch, my data isn't loaded because of the SerializationError.
I already tried to fill the empty cells in the csv file, but that wasn't the solution.
from elasticsearch import Elasticsearch
bulk_data = []
header = []
count = 0
for row in csv_file_object:
if count > 0 :
data_dict = {}
for i in range(len(row)):
row = row.rstrip()
data_dict[header[i]] = row[i]
op_dict = {
"index": {
"_index": INDEX_NAME,
"_type": TYPE_NAME,
}
}
bulk_data.append(op_dict)
bulk_data.append(data_dict)
else:
header = row
count = count+1
# create ES client, create index
es = Elasticsearch(hosts = [ES_HOST])
if es.indices.exists(INDEX_NAME):
print("deleting '%s' index..." % (INDEX_NAME))
res = es.indices.delete(index = INDEX_NAME)
res = es.bulk(index = INDEX_NAME, body = bulk_data, refresh = True)
See image for the SerializationError and bulk_data values:
Please note: the \n is added by the serialization process itself.

I try to repond to you but I can't understand one thing. How you retrieve your field name from data? In your code I see that you retrieve it from a list called header that is empty? I can't understand how you take this value.. Check my answer i don't know if i understand well
from elasticsearch import Elasticsearch
from elasticsearch import helpers
index_name = "your_index_name"
doc_type = "your_doc_type"
esConnector = Elasticsearch(["http://192.168.1.1:9200/"])
# change your ip here
count = 0
def generate_data(csv_file_object)
with open(csv_file_object, "r") as f:
for line in f:
line = line.split(",").rstrip()
data_dict = {header[count]: line}
obj={
'_op_type': 'index',
'_index': index_name,
'_type': doc_type,
'_id': count+1,
'_source': data_dict
}
count +=1
yield obj
for success, info in helpers.parallel_bulk(client=esConnector, actions=generate_data(csv_file_object), thread_count=4):
if not success:
print 'Doc failed', info

Python write mutiple array value into csv

with my code, i read the values of JSON data and insert into array
def retrive_json():
with open('t_v1.json') as json_data:
d = json.load(json_data)
array = []
for i in d['ride']:
origin_lat = i['origin']['lat']
origin_lng = i['origin']['lng']
destination_lat = i['destination']['lat']
destination_lng = i['destination']['lng']
array.append([origin_lat,origin_lng,destination_lat,destination_lng])
return array
the result array is this :
[[39.72417, -104.99984, 39.77446, -104.9379], [39.77481, -104.93618, 39.6984, -104.9652]]
how i can write each element of each array into specific field in csv?
i have try in this way:
wrt = csv.writer(open(t_.csv', 'w'), delimiter=',',lineterminator='\n')
for x in jjson:
wrt.writerow([x])
but the value of each array are store all in one field
How can solved it and write each in a field?
this is my json file:
{
"ride":[
{
"origin":{
"lat":39.72417,
"lng":-104.99984,
"eta_seconds":null,
"address":""
},
"destination":{
"lat":39.77446,
"lng":-104.9379,
"eta_seconds":null,
"address":null
}
},
{
"origin":{
"lat":39.77481,
"lng":-104.93618,
"eta_seconds":null,
"address":"10 Albion Street"
},
"destination":{
"lat":39.6984,
"lng":-104.9652,
"eta_seconds":null,
"address":null
}
}
]
}

Let's say we have this:
jsonstring = """{
"ride":[
{
"origin":{
"lat":39.72417,
"lng":-104.99984,
"eta_seconds":null,
"address":""
},
"destination":{
"lat":39.77446,
"lng":-104.9379,
"eta_seconds":null,
"address":null
}
},
{
"origin":{
"lat":39.77481,
"lng":-104.93618,
"eta_seconds":null,
"address":"10 Albion Street"
},
"destination":{
"lat":39.6984,
"lng":-104.9652,
"eta_seconds":null,
"address":null
}
}
]
}"""
Here is a pandas solution:
import pandas as pd
import json
# Load json to dataframe
df = pd.DataFrame(json.loads(jsonstring)["ride"])
# Create the new columns
df["o1"] = df["origin"].apply(lambda x: x["lat"])
df["o2"] = df["origin"].apply(lambda x: x["lng"])
df["d1"] = df["destination"].apply(lambda x: x["lat"])
df["d2"] = df["destination"].apply(lambda x: x["lng"])
#export
print(df.iloc[:,2:].to_csv(index=False, header=True))
#use below for file
#df.iloc[:,2:].to_csv("output.csv", index=False, header=True)
Returns:
o1,o2,d1,d2
39.72417,-104.99984,39.77446,-104.9379
39.77481,-104.93618,39.6984,-104.9652
Condensed answer:
import pandas as pd
import json
with open('data.json') as json_data:
d = json.load(json_data)
df = pd.DataFrame(d["ride"])
df["o1"],df["o2"] = zip(*df["origin"].apply(lambda x: (x["lat"],x["lng"])))
df["d1"],df["d2"] = zip(*df["destination"].apply(lambda x: (x["lat"],x["lng"])))
df.iloc[:,2:].to_csv("t_.csv",index=False,header=False)
Or, maybe the most readable solution:
import json
from pandas.io.json import json_normalize
open('data.json') as json_data:
d = json.load(json_data)
df = json_normalize(d["ride"])
cols = ["origin.lat","origin.lng","destination.lat","destination.lng"]
df[cols].to_csv("output.csv",index=False,header=False)

This might help:
import json
import csv
def retrive_json():
with open('data.json') as json_data:
d = json.load(json_data)
array = []
for i in d['ride']:
origin_lat = i['origin']['lat']
origin_lng = i['origin']['lng']
destination_lat = i['destination']['lat']
destination_lng = i['destination']['lng']
array.append([origin_lat,origin_lng,destination_lat,destination_lng])
return array
res = retrive_json()
csv_cols = ["orgin_lat", "origin_lng", "dest_lat", "dest_lng"]
with open("output_csv.csv", 'w') as out:
writer = csv.DictWriter(out, fieldnames=csv_cols)
writer.writeheader()
for each_list in res:
d = dict(zip(csv_cols,each_list))
writer.writerow(d)
Output csv generated is:
orgin_lat,origin_lng,dest_lat,dest_lng
39.72417,-104.99984,39.77446,-104.9379
39.77481,-104.93618,39.6984,-104.9652

To me it looks like you've got an array of arrays and you want the individual elements. Therefore you'll want to use a nested for loop. Your current for loop is getting each array, to then split up each array into it's elements you'll want to loop through those. I'd suggest something like this:
for x in jjson:
for y in x:
wrt.writerow([y])
Obviously you might want to update your bracketing etc this is just me giving you an idea of how to solve your issue.
Let me know how it goes!

Why the csv-Library?
array = [[1, 2, 3, 4], [5, 6, 7, 8]]
with open('test.csv', 'w') as csv_file :
csv_file.write("# Header Info\n" \
"# Value1, Value2, Value3, Value4\n") # The header might be optional
for row in array :
csv_file.write(",".join(row) + "\n")

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python Append Data from Loop into Data frame - python

Related

How do I only return a specific value in a set of values in an API JSON response?

Json file not formatted correctly when writing json differences with pandas and numpy

Facebook API offline conversion event param data must be an array error

CSV to elasticsearch with python SerializationError

Python write mutiple array value into csv

Categories

Resources