JSON Parsing help in Python - python

I have below data in JSON format, I have started with code below which throws a KEY ERROR.
Not sure how to get all data listed in headers section.
I know I am not doing it right in json_obj['offers'][0]['pkg']['Info']: but not sure how to do it correctly.
how can I get to different nodes like info,PricingInfo,Flt_Info etc?
{
"offerInfo":{
"siteID":"1",
"language":"en_US",
"currency":"USD"
},
"offers":{
"pkg":[
{
"offerDateRange":{
"StartDate":[
2015,
11,
8
],
"EndDate":[
2015,
11,
14
]
},
"Info":{
"Id":"111"
},
"PricingInfo":{
"BaseRate":1932.6
},
"flt_Info":{
"Carrier":"AA"
}
}
]
}
}
import os
import json
import csv
f = open('api.csv','w')
writer = csv.writer(f,delimiter = '~')
headers = ['Id' , 'StartDate', 'EndDate', 'Id', 'BaseRate', 'Carrier']
default = ''
writer.writerow(headers)
string = open('data.json').read().decode('utf-8')
json_obj = json.loads(string)
for pkg in json_obj['offers'][0]['pkg']['Info']:
row = []
row.append(json_obj['id']) # just to test,but I need column values listed in header section
writer.writerow(row)

It looks like you're accessing the json incorrectly. After you have accessed json_obj['offers'], you accessed [0], but there is no array there. json_obj['offers'] gives you another dictionary.
For example, to get PricingInfo like you asked, access like this:
json_obj['offers']['pkg'][0]['PricingInfo']
or 11 from the StartDate like this:
json_obj['offers']['pkg'][0]['offerDateRange']['StartDate'][1]
And I believe you get the KEY ERROR because you access [0] in the dictionary, which since that isn't a key, you get the error.

try to substitute this piece of code:
for pkg in json_obj['offers'][0]['pkg']['Info']:
row = []
row.append(json_obj['id']) # just to test,but I need column values listed in header section
writer.writerow(row)
With this:
for pkg in json_obj['offers']['pkg']:
row.append(pkg['Info']['Id'])
year = pkg['offerDateRange']['StartDate'][0]
month = pkg['offerDateRange']['StartDate'][1]
day = pkg['offerDateRange']['StartDate'][2]
StartDate = "%d-%d-%d" % (year,month,day)
print StartDate
writer.writerow(row)

Try this
import os
import json
import csv
string = open('data.json').read().decode('utf-8')
json_obj = json.loads(string)
print json_obj["offers"]["pkg"][0]["Info"]["Id"]
print str(json_obj["offers"]["pkg"][0]["offerDateRange"]["StartDate"][0]) +'-'+ str(json_obj["offers"]["pkg"][0]["offerDateRange"]["StartDate"][1])+'-'+str(json_obj["offers"]["pkg"][0]
["offerDateRange"]["StartDate"][2])
print str(json_obj["offers"]["pkg"][0]["offerDateRange"]["EndDate"][0]) +'-'+ str(json_obj["offers"]["pkg"][0]["offerDateRange"]["EndDate"][1])+'-'+str(json_obj["offers"]["pkg"][0]
["offerDateRange"]["EndDate"][2])
print json_obj["offers"]["pkg"][0]["Info"]["Id"]
print json_obj["offers"]["pkg"][0]["PricingInfo"]["BaseRate"]
print json_obj["offers"]["pkg"][0]["flt_Info"]["Carrier"]

Related

python get data json value max

How can I extract the T3 Period, Year and maximum value?
file.json
[
{"Fecha":"2022-08-01T00:00:00.000+02:00", "T3_TipoDato":"Avance", "T3_Periodo":"M08", "Anyo":2022, "value":10.4},
{"Fecha":"2022-07-01T00:00:00.000+02:00", "T3_TipoDato":"Definitivo", "T3_Periodo":"M07", "Anyo":2022, "value":10.8},
{"Fecha":"2022-06-01T00:00:00.000+02:00", "T3_TipoDato":"Definitivo", "T3_Periodo":"M06", "Anyo":2022, "value":10.2}
]
My code:
import json
with open("file.json") as f:
distros_dict = json.load(f)
print (distros_dict)
that is my proposition.
Load data from a file to a list.
Loop thru every dict in a list to edit it.
(At my example I, deleted two keys from every dict in list.)
import json
distros_dict = []
with open(f'file.json', "r", encoding='utf-8') as f:
distros_dict.extend(json.load(f))
for item in distros_dict:
item.pop('Fecha')
item.pop('T3_TipoDato')
distros_dict = sorted(distros_dict, key = lambda i: i['value'], reverse=True)[0]
Try this:
from json import load
with open("file.json") as f:
dictionary_max = max(load(f), key=lambda x: x["value"])
result = {
"T3_Periodo": dictionary_max["T3_Periodo"],
"Anyo": dictionary_max["Anyo"],
"value": dictionary_max["value"],
}
print(result)
output:
{'T3_Periodo': 'M07', 'Anyo': 2022, 'value': 10.8}

Creating multiple dataframe using loop or function

I'm trying to extract the hash rate for 3 cryptocurrencies and I have attached the code for the same below. Now, I want to pass three urls and in return I need three different different dictionaries which should have the values. I'm stuck and I don't understand how should I go about it. I have tried using loops but it is not working out for me.
url = {'Bitcoin' : 'https://bitinfocharts.com/comparison/bitcoin-hashrate.html#3y',
'Ethereum': 'https://bitinfocharts.com/comparison/ethereum-hashrate.html#3y',
'Litecoin': 'https://bitinfocharts.com/comparison/litecoin-hashrate.html'}
for ele in url:
#### requesting the page and extracting the script which has date and values
session = requests.Session()
page = session.get(ele[i])
soup = BeautifulSoup(page.content, 'html.parser')
values = str(soup.find_all('script')[4])
values = values.split('d = new Dygraph(document.getElementById("container"),')[1]
#create an empty dict to append date and hashrates
dict([("crypto_1 %s" % i,[]) for i in range(len(url))])
#run a loop over all the dates and adding to dictionary
for i in range(values.count('new Date')):
date = values.split('new Date("')[i+1].split('"')[0]
value = values.split('"),')[i+1].split(']')[0]
dict([("crypto_1 %s" % i)[date] = value
You can use next example how to get data from all 3 URLs and create a dataframe/dictionary from it:
import re
import requests
import pandas as pd
url = {
"Bitcoin": "https://bitinfocharts.com/comparison/bitcoin-hashrate.html#3y",
"Ethereum": "https://bitinfocharts.com/comparison/ethereum-hashrate.html#3y",
"Litecoin": "https://bitinfocharts.com/comparison/litecoin-hashrate.html",
}
data = []
for name, u in url.items():
html_doc = requests.get(u).text
for date, hash_rate in re.findall(
r'\[new Date\("(.*?)"\),(.*?)\]', html_doc
):
data.append(
{
"Name": name,
"Date": date,
"Hash Rate": float("nan")
if hash_rate == "null"
else float(hash_rate),
}
)
df = pd.DataFrame(data)
df["Date"] = pd.to_datetime(df["Date"])
# here save df to CSV
# this will create a dictionary, where the keys are crypto names and values
# are dicts with keys Date/HashRate:
out = {}
for name, g in df.groupby("Name"):
out[name] = g[["Date", "Hash Rate"]].to_dict(orient="list")
print(out)
Prints:
{
"Bitcoin": {
"Date": [
Timestamp("2009-01-03 00:00:00"),
Timestamp("2009-01-04 00:00:00"),
Timestamp("2009-01-05 00:00:00"),
...

Not getting expected output in python when converting a csv to json

I have an excel file in which data is saved in csv format in such a way.This data is present in the excel file as shown below,under column A (The CSV File is generated by LabView Software code which i have written to generate data).I have also attached an image of the csv file for reference at the end of my question.
RPM,Load Current,Battery Output,Power Capacity
1200,30,12,37
1600,88,18,55
I want to create a Json file in such format
{
"power_capacity_data" :
{
"rpm" : ["1200","1600"],
"load_curr" : ["30","88"],
"batt_output" : ["12","18"],
"power_cap" : ["37","55"]
}
}
This is my code
import csv
import json
def main():
#created a dictionary so that i can append data to it afterwards
power_data = {"rpm":[],"load_curr":[],"batt_output":[],"power_cap":[]}
with open('power1.lvm') as f:
reader = csv.reader(f)
#trying to append the data of column "RPM" to dictionary
rowcount = 0
for row in reader:
if rowcount == 0:
#trying to skip the first row
rowcount = rowcount + 1
else:
power_data['rpm'].append(row[0])
print(row)
json_report = {}
json_report['pwr_capacity_data'] = power_data
with open('LVMJSON', "w") as f1:
f1.write(json.dumps(json_report, sort_keys=False, indent=4, separators=(',', ': '),encoding="utf-8",ensure_ascii=False))
f1.close()
if __name__ == "__main__":
main()
The output json file that i am getting is this:(please ignore the print(row) statement in my code)
{
"pwr_capacity_data":
{
"load_curr": [],
"rpm": [
"1200,30,12.62,37.88",
"1600,88,18.62,55.88"
],
"batt_output": [],
"power_cap": []
}
}
The whole row is getting saved in the list,but I just want the values under the column RPM to be saved .Can someone help me out with what I may be doing wrong.Thanks in advance.I have attached an image of csv file to just in case it helps
You could use Python's defaultdict to make it a bit easier. Also a dictionary to map all your header values.
from collections import defaultdict
import csv
import json
power_data = defaultdict(list)
header_mappings = {
'RPM' : 'rpm',
'Load Current' : 'load_curr',
'Battery Output' : 'batt_output',
'Power Capacity' : 'power_cap'}
with open('power1.lvm', newline='') as f_input:
csv_input = csv.DictReader(f_input)
for row in csv_input:
for key, value in row.items():
power_data[header_mappings[key]].append(value)
with open('LVMJSON.json', 'w') as f_output:
json.dump({'power_capacity_data' : power_data}, f_output, indent=2)
Giving you an output JSON file looking like:
{
"power_capacity_data": {
"batt_output": [
"12",
"18"
],
"power_cap": [
"37",
"55"
],
"load_curr": [
"30",
"88"
],
"rpm": [
"1200",
"1600"
]
}
}

Python write mutiple array value into csv

with my code, i read the values of JSON data and insert into array
def retrive_json():
with open('t_v1.json') as json_data:
d = json.load(json_data)
array = []
for i in d['ride']:
origin_lat = i['origin']['lat']
origin_lng = i['origin']['lng']
destination_lat = i['destination']['lat']
destination_lng = i['destination']['lng']
array.append([origin_lat,origin_lng,destination_lat,destination_lng])
return array
the result array is this :
[[39.72417, -104.99984, 39.77446, -104.9379], [39.77481, -104.93618, 39.6984, -104.9652]]
how i can write each element of each array into specific field in csv?
i have try in this way:
wrt = csv.writer(open(t_.csv', 'w'), delimiter=',',lineterminator='\n')
for x in jjson:
wrt.writerow([x])
but the value of each array are store all in one field
How can solved it and write each in a field?
this is my json file:
{
"ride":[
{
"origin":{
"lat":39.72417,
"lng":-104.99984,
"eta_seconds":null,
"address":""
},
"destination":{
"lat":39.77446,
"lng":-104.9379,
"eta_seconds":null,
"address":null
}
},
{
"origin":{
"lat":39.77481,
"lng":-104.93618,
"eta_seconds":null,
"address":"10 Albion Street"
},
"destination":{
"lat":39.6984,
"lng":-104.9652,
"eta_seconds":null,
"address":null
}
}
]
}
Let's say we have this:
jsonstring = """{
"ride":[
{
"origin":{
"lat":39.72417,
"lng":-104.99984,
"eta_seconds":null,
"address":""
},
"destination":{
"lat":39.77446,
"lng":-104.9379,
"eta_seconds":null,
"address":null
}
},
{
"origin":{
"lat":39.77481,
"lng":-104.93618,
"eta_seconds":null,
"address":"10 Albion Street"
},
"destination":{
"lat":39.6984,
"lng":-104.9652,
"eta_seconds":null,
"address":null
}
}
]
}"""
Here is a pandas solution:
import pandas as pd
import json
# Load json to dataframe
df = pd.DataFrame(json.loads(jsonstring)["ride"])
# Create the new columns
df["o1"] = df["origin"].apply(lambda x: x["lat"])
df["o2"] = df["origin"].apply(lambda x: x["lng"])
df["d1"] = df["destination"].apply(lambda x: x["lat"])
df["d2"] = df["destination"].apply(lambda x: x["lng"])
#export
print(df.iloc[:,2:].to_csv(index=False, header=True))
#use below for file
#df.iloc[:,2:].to_csv("output.csv", index=False, header=True)
Returns:
o1,o2,d1,d2
39.72417,-104.99984,39.77446,-104.9379
39.77481,-104.93618,39.6984,-104.9652
Condensed answer:
import pandas as pd
import json
with open('data.json') as json_data:
d = json.load(json_data)
df = pd.DataFrame(d["ride"])
df["o1"],df["o2"] = zip(*df["origin"].apply(lambda x: (x["lat"],x["lng"])))
df["d1"],df["d2"] = zip(*df["destination"].apply(lambda x: (x["lat"],x["lng"])))
df.iloc[:,2:].to_csv("t_.csv",index=False,header=False)
Or, maybe the most readable solution:
import json
from pandas.io.json import json_normalize
open('data.json') as json_data:
d = json.load(json_data)
df = json_normalize(d["ride"])
cols = ["origin.lat","origin.lng","destination.lat","destination.lng"]
df[cols].to_csv("output.csv",index=False,header=False)
This might help:
import json
import csv
def retrive_json():
with open('data.json') as json_data:
d = json.load(json_data)
array = []
for i in d['ride']:
origin_lat = i['origin']['lat']
origin_lng = i['origin']['lng']
destination_lat = i['destination']['lat']
destination_lng = i['destination']['lng']
array.append([origin_lat,origin_lng,destination_lat,destination_lng])
return array
res = retrive_json()
csv_cols = ["orgin_lat", "origin_lng", "dest_lat", "dest_lng"]
with open("output_csv.csv", 'w') as out:
writer = csv.DictWriter(out, fieldnames=csv_cols)
writer.writeheader()
for each_list in res:
d = dict(zip(csv_cols,each_list))
writer.writerow(d)
Output csv generated is:
orgin_lat,origin_lng,dest_lat,dest_lng
39.72417,-104.99984,39.77446,-104.9379
39.77481,-104.93618,39.6984,-104.9652
To me it looks like you've got an array of arrays and you want the individual elements. Therefore you'll want to use a nested for loop. Your current for loop is getting each array, to then split up each array into it's elements you'll want to loop through those. I'd suggest something like this:
for x in jjson:
for y in x:
wrt.writerow([y])
Obviously you might want to update your bracketing etc this is just me giving you an idea of how to solve your issue.
Let me know how it goes!
Why the csv-Library?
array = [[1, 2, 3, 4], [5, 6, 7, 8]]
with open('test.csv', 'w') as csv_file :
csv_file.write("# Header Info\n" \
"# Value1, Value2, Value3, Value4\n") # The header might be optional
for row in array :
csv_file.write(",".join(row) + "\n")

Python : Normalize Json response (array)

I am new to JSON and Python,I am trying to achieve below
Need to parse below JSON
{
"id": "12345abc",
"codes": [
"BSVN1FKW3JKKNNMN",
"HJYYUKJJL999OJR",
"DFTTHJJJJ0099JUU",
"FGUUKHKJHJGJJYGJ"
],
"ctr": {
"source": "xyz",
"user_id": "1234"
}
}
Expected output:Normalized on "codes" value
ID~CODES~USER_ID
12345abc~BSVN1FKW3JKKNNMN~1234
12345abc~HJYYUKJJL999OJR~1234
12345abc~DFTTHJJJJ0099JUU~1234
12345abc~FGUUKHKJHJGJJYGJ~1234
Started with below ,but need help to get to my desired output.
The "codes" block can have n number of values separated by comma.
The below code is throwing an error "TypeError: string indices must be integers"
#!/usr/bin/python
import os
import json
import csv
f = open('rspns.csv','w')
writer = csv.writer(f,delimiter = '~')
headers = [‘ID’,’CODES’,’USER_ID’]
default = ''
writer.writerow(headers)
string = open('sample.json').read().decode('utf-8')
json_obj = json.loads(string)
#print json_obj['id']
#print json_obj['codes']
#print json_obj['codes'][0]
#print json_obj['codes'][1]
#print json_obj['codes’][2]
#print json_obj['codes’][3]
#print json_obj['ctr’][‘user_id']
for keyword in json_obj:
row = []
row.append(str(keyword['id']))
row.append(str(keyword['codes']))
row.append(str(keyword['ctr’][‘user_id']))
writer.writerow(row)
If your json_obj looks exactly like that , that is it is a dictionary, then the issue is that when you do -
for keyword in json_obj:
You are iterating over keys in json_obj, then if you try to access ['id'] for that key it should error out saying string indices must be integers .
You should first get the id and user_id before looping and then loop over json_obj['codes'] and then add the previously computed id and user_id along with the current value from codes list to the writer csv as a row.
Example -
import json
import csv
string = open('sample.json').read().decode('utf-8')
json_obj = json.loads(string)
with open('rspns.csv','w') as f:
writer = csv.writer(f,delimiter = '~')
headers = ['ID','CODES','USER_ID']
writer.writerow(headers)
id = json_obj['id']
user_id = json_obj['ctr']['user_id']
for code in json_obj['codes']:
writer.writerow([id,code,user_id])
You don't want to iterate through json_obj as that is a dictionary and iterating through will get the keys. The TypeError is caused by trying to index into the keys ('id', 'code', and 'ctr') -- which are strings -- as if they were a dictionary.
Instead, you want a separate row for each code in json_obj['codes'] and to use the json_obj dictionary for your lookups:
for code in json_obj['codes']:
row = []
row.append(json_obj['id'])
row.append(code)
row.append(json_obj['ctr’][‘user_id'])
writer.writerow(row)

Categories

Resources