I am try to use python to extract value, but I found a weird result.
Following is part of my json variable
"weatherElement": [
{
"elementName": "ELEV",
"elementValue": {
"value": "20.0"
}
},
{
"elementName": "TEMP",
"elementValue": {
"value": "25.0"
}
},
{
"elementName": "D_TNT",
"elementValue": {
"value": "2019-11-22T02:10:00+08:00"
}
}
],
and following code is correct for getting value 25.0 which is temperature
for unit in data['records']['location']:
# print(type(unit)) <---- output <dict>
if unit['stationId'] == 'C0V490':
for wea_unit in unit['weatherElement']: # unit['weatherElement'] is list
if wea_unit['elementName'] == 'TEMP':
print(type(wea_unit['elementValue'])) # is str
return wea_unit['elementValue']
My question is why type(wea_unit['elementValue']) is str?
I think it should be dict and I should use wea_unit['elementValue']['value'] to get '25.0', but it is wrong. Are there anyone know what mistake I made? Thanks!
edit:
following is example code which can run directly
import json
def parse_json_to_dataframe(data):
for unit in data['records']['location']:
# print(type(unit))
if unit['stationId'] == 'C0A560':
for wea_unit in unit['weatherElement']: # unit['weatherElement'] is list
if wea_unit['elementName'] == 'TEMP':
print(type(wea_unit['elementValue']))
return wea_unit['elementValue']
v = {"success":"true",
"result": {"resource_id": "O-A0001-001",
"fields": [{"id": "lat", "type": "Double"},
{"id": "lon", "type": "Double"},
{"id": "locationName", "type": "String"},
{"id": "stationId", "type": "String"},
{"id": "description", "type": "String"},
{"id": "elementName", "type": "String"},
{"id": "elementValue", "type": "Double"},
{"id": "parameterName", "type": "String"},
{"id": "parameterValue", "type": "String"}]}, # result end
"records": {"location": [{"lat": "24.778333",
"lon": "121.494583",
"locationName": "福山",
"stationId": "C0A560",
"time": {"obsTime": "2019-11-22 22:00:00"},
"weatherElement": [
{"elementName": "ELEV", "elementValue": "405.0"},
{"elementName": "WDIR", "elementValue": "0"},
{"elementName": "WDSD", "elementValue": "0.0"},
{"elementName": "TEMP", "elementValue": "19.6"}],
"parameter": [
{"parameterName": "CITY_SN", "parameterValue": "06"},
{"parameterName": "TOWN_SN", "parameterValue": "061"}]}]}}
temp = parse_json_to_dataframe(v)
print(temp)
Related
I need to "Semi-Flatten" a JSON object where i have a JSON object with nested items. I have tried to use the flat_json with pandas and other "flatten_json" and json_normalize code in stackoverflow but i end up with completely flatten JSON (something i do not need).
Here is the JSON structure:
[{
"Stat": {
"event": "03458188-abf9-431c-8144-ad49c1d069ed",
"id": "102e1bb1f28ca44b70d02d33380b13",
"number": "1121",
"source": "",
"datetime": "2023-01-13T00:00:00Z",
"status": "ok"
},
"Goal": {
"name": "goalname"
},
"Fordel": {
"company": "companyname"
},
"Land": {
"name": "landname"
}
}, {
"Stat": {
"event": "22222",
"id": "44444",
"number": "5555",
"source": "",
"datetime": "2023-01-13T00:00:00Z",
"status": "ok"
},
"Goal": {
"name": "goalname2"
},
"Fordel": {
"company": "companyname2"
},
"Land": {
"name_land": "landname2"
}
}]
The result i need is this:
[{
"event": "03458188-abf9-431c-8144-ad49c1d069ed",
"id": "102e1bb1f28ca44b70d02d33380b13",
"number": "1121",
"source": "",
"datetime": "2023-01-13T00:00:00Z",
"status": "ok",
"name": "goalname",
"company": "companyname",
"name_land": "landname"
}, {
"event": "22222",
"id": "44444",
"number": "5555",
"source": "",
"datetime": "2023-01-13T00:00:00Z",
"status": "ok",
"name": "goalname2",
"company": "companyname2",
"name_land": "landname2"
}]
If this can be used with pandas or other json packages it would be great.
Coded i have tried: (copy/paste from another question/answer)
def flatten_data(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
That gives me:
{
"0_event": "03458188-abf9-431c-8144-ad49c1d069ed",
"0_id": "102e1bb1f28ca44b70d02d33380b13",
......
"1_event": "102e1bb1f28ca44b70d02d33380b13",
"1_id": "102e1bb1f28ca44b70d02d33380b13",
etc...
}
Map the flatten_data function over the list instead of flattening the entire list.
result = list(map(flatten_data, source)))
I have some JSON data like:
[
{
"adset_id": "23851149362570451",
"reach": "862",
"clicks": "1",
"actions": [
{
"action_type": "post_reaction",
"value": "1"
},
{
"action_type": "post_engagement",
"value": "1"
},
{
"action_type": "page_engagement",
"value": "1"
}
],
"date_start": "2022-10-06",
},
]
In the actual data, this array would contain around 30 or 40 objects; I have shown just one for reference.
How can I flatten everything inside the "actions" array, so that it looks like this instead?
[
{
"adset_id": "23851149362570451",
"reach": "862",
"clicks": "1",
"post_reaction" : "1",
"post_engagement" : "1",
"page_engagement" : "1",
"date_start": "2022-10-06",
},
]
I quickly wrote this. Modify it as per how you ingest the data:
import json
final_result = []
with open('data.json') as f:
api_data_dict = json.load(f)
for api_data_obj in api_data_dict:
actions = []
values = []
updated_actions = {}
#assuming there are objects with action_type and value present inside this dict
for action in api_data_obj['actions']:
actions.append(action['action_type'])
values.append(action['value'])
for action, value in zip(actions, values):
updated_actions[action] = value
api_data_obj.pop('actions', None)
final_result.append({**api_data_obj, **updated_actions})
print(final_result)
data.json contents:
[{
"adset_id": "23851149362570451",
"reach": "862",
"clicks": "1",
"actions": [
{
"action_type": "post_reaction",
"value": "1"
},
{
"action_type": "post_engagement",
"value": "1"
},
{
"action_type": "page_engagement",
"value": "1"
}
],
"date_start": "2022-10-06"
},
{
"adset_id": "234543535643543",
"reach": "862",
"clicks": "1",
"actions": [
{
"action_type": "post_reaction",
"value": "2"
},
{
"action_type": "post_engagement",
"value": "2"
},
{
"action_type": "page_engagement",
"value": "2"
}
],
"date_start": "2022-10-06"
}]
Result:
[
{
"adset_id":"23851149362570451",
"reach":"862",
"clicks":"1",
"date_start":"2022-10-06",
"post_reaction":"1",
"post_engagement":"1",
"page_engagement":"1"
},
{
"adset_id":"234543535643543",
"reach":"862",
"clicks":"1",
"date_start":"2022-10-06",
"post_reaction":"2",
"post_engagement":"2",
"page_engagement":"2"
}
]
How to convert a list of dictionaries to a list?
Here is what I have:
{
"sources": [
{
"ID": "6953",
"VALUE": "https://address-jbr.ofp.ae"
},
{
"ID": "6967",
"VALUE": "https://plots.ae"
},
{
"ID": "6970",
"VALUE": "https://dubai-creek-harbour.ofp.ae"
}]}
Here is what I want it to look like:
({'6953':'https://address-jbr.ofp.ae','6967':'https://plots.ae','6970':'https://dubai-creek-harbour.ofp.ae'})
This is indeed very straightforward:
data = {
"sources": [
{
"ID": "6953",
"VALUE": "https://address-jbr.ofp.ae"
},
{
"ID": "6967",
"VALUE": "https://plots.ae"
},
{
"ID": "6970",
"VALUE": "https://dubai-creek-harbour.ofp.ae"
}]
}
Then:
data_list = [{x["ID"]: x["VALUE"]} for x in data["sources"]]
Which is the same as:
data_list = []
for x in data["sources"]:
data_list.append({
x["ID"]: x["VALUE"]
})
EDIT: You said convert to a "list" in the question and that confused me. Then this is what you want:
data_dict = {x["ID"]: x["VALUE"] for x in data["sources"]}
Which is the same as:
data_dict = {}
for x in data["sources"]:
data_dict[x["ID"]] = x["VALUE"]
P.S. Seems like you're asking for answers to your course assignments or something here, which is not what this place is for.
A solution using pandas
import pandas as pd
data = {
"sources": [
{"ID": "6953", "VALUE": "https://address-jbr.ofp.ae"},
{"ID": "6967", "VALUE": "https://plots.ae"},
{"ID": "6970", "VALUE": "https://dubai-creek-harbour.ofp.ae"},
]
}
a = pd.DataFrame.from_dict(data["sources"])
print(a.set_index("ID").T.to_dict(orient="records"))
outputs to:
[{'6953': 'https://address-jbr.ofp.ae', '6967': 'https://plots.ae', '6970': 'https://dubai-creek-harbour.ofp.ae'}]
this should work.
Dict = {
"sources": [
{
"ID": "6953",
"VALUE": "https://address-jbr.ofp.ae"
},
{
"ID": "6967",
"VALUE": "https://plots.ae"
},
{
"ID": "6970",
"VALUE": "https://dubai-creek-harbour.ofp.ae"
}]}
# Store all the keys here
value_LIST = []
for item_of_list in Dict["sources"]:
for key, value in item_of_list.items():
value_LIST.append(value)
A bit new to avro & python.
I am trying to do a simple conversion to avro using the fastavro library, as the speed of the native apache avro library is just a bit too slow.
I want to:
1.Take a json file
2. Convert the data to avro.
My problem is that it seems like my json isn't in the correct 'record' format to be converted to avro. I even tried putting my json into a string var and making it looks similar to the syntax they have on the site # https://fastavro.readthedocs.io/en/latest/writer.html:
{u'station': u'011990-99999', u'temp': 22, u'time': 1433270389},
{u'station': u'011990-99999', u'temp': -11, u'time': 1433273379},
{u'station': u'012650-99999', u'temp': 111, u'time': 1433275478},
Here is my code:
from fastavro import json_writer, parse_schema, writer
import json
key = "test.json"
schemaFileName = "test_schema.avsc"
with open(r'C:/Path/to/file' + schemaFileName) as sc:
w = json.load(sc)
schema = parse_schema(w)
with open(r'C:/Path/to/file/' + key) as js:
x=json.load(js)
with open('C:/Path/to/file/output.avro', 'wb') as out:
writer(out, schema,x, codec='deflate')
Here is what I get as output:
File "avropython.py", line 26, in <module>
writer(out, schema,x, codec='deflate')
File "fastavro\_write.pyx", line 608, in fastavro._write.writer
ValueError: "records" argument should be an iterable, not dict
My json file and schema, rspectively:
"joined": false,
"toward": {
"selection": "dress",
"near": true,
"shoulder": false,
"fine": -109780201.3804388,
"pet": {
"stood": "saddle",
"live": false,
"leather": false,
"tube": false,
"over": false,
"impossible": true
},
"higher": false
},
"wear": true,
"asleep": "door",
"connected": true,
"stairs": -1195512399.5000324
}
{
"name": "MyClass",
"type": "record",
"namespace": "com.acme.avro",
"fields": [
{
"name": "joined",
"type": "boolean"
},
{
"name": "toward",
"type": {
"name": "toward",
"type": "record",
"fields": [
{
"name": "selection",
"type": "string"
},
{
"name": "near",
"type": "boolean"
},
{
"name": "shoulder",
"type": "boolean"
},
{
"name": "fine",
"type": "float"
},
{
"name": "pet",
"type": {
"name": "pet",
"type": "record",
"fields": [
{
"name": "stood",
"type": "string"
},
{
"name": "live",
"type": "boolean"
},
{
"name": "leather",
"type": "boolean"
},
{
"name": "tube",
"type": "boolean"
},
{
"name": "over",
"type": "boolean"
},
{
"name": "impossible",
"type": "boolean"
}
]
}
},
{
"name": "higher",
"type": "boolean"
}
]
}
},
{
"name": "wear",
"type": "boolean"
},
{
"name": "asleep",
"type": "string"
},
{
"name": "connected",
"type": "boolean"
},
{
"name": "stairs",
"type": "float"
}
]
}
If anyone could help me out, it would be greatly appreciated!!
As mentioned in the error ValueError: "records" argument should be an iterable, not dict, the problem is that when you call writer, the argument for the records needs to be an iterable. One way to solve this is to change your last line to writer(out, schema, [x], codec='deflate')
Alternatively, there is a schemaless_writer that can be used to just write a single record: https://fastavro.readthedocs.io/en/latest/writer.html#fastavro._write_py.schemaless_writer
I have a project in which i have to convert a json file into a CSV file.
The Json sample :
{
"P_Portfolio Group": {
"depth": 1,
"dataType": "PortfolioOverview",
"levelId": "P_Portfolio Group",
"path": [
{
"label": "Portfolio Group",
"levelId": "P_Portfolio Group"
}
],
"label": "Portfolio Group",
"header": [
{
"id": "Label",
"label": "Security name",
"type": "text",
"contentType": "text"
},
{
"id": "SecurityValue",
"label": "MioCHF",
"type": "text",
"contentType": "number"
},
{
"id": "SecurityValuePct",
"label": "%",
"type": "text",
"contentType": "pct"
}
],
"data": [
{
"dataValues": [
{
"value": "Client1",
"type": "text"
},
{
"value": 2068.73,
"type": "number"
},
{
"value": 14.0584,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client2",
"type": "text"
},
{
"value": 1511.9,
"type": "number"
},
{
"value": 10.2744,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client3",
"type": "text"
},
{
"value": 1354.74,
"type": "number"
},
{
"value": 9.2064,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client4",
"type": "text"
},
{
"value": 1225.78,
"type": "number"
},
{
"value": 8.33,
"type": "pct"
}
]
}
],
"summary": [
{
"value": "Total",
"type": "text"
},
{
"value": 11954.07,
"type": "number"
},
{
"value": 81.236,
"type": "pct"
}
]
}
}
And i want o obtain something like:
Client1,2068.73,14.0584
Client2,1511.9,10.2744
Client3,871.15,5.92
Client4,11954.07,81.236
Can you please give me a hint.
import csv
import json
with open("C:\Users\SVC\Desktop\test.json") as file:
x = json.load(file)
f = csv.writer(open("C:\Users\SVC\Desktop\test.csv", "wb+"))
for x in x:
f.writerow(x["P_Portfolio Group"]["data"]["dataValues"]["value"])
but it doesn't work.
Can you please give me a hint.
import csv
import json
with open('C:\Users\SVC\Desktop\test.json') as json_file:
portfolio_group = json.load(json_file)
with open('C:\Users\SVC\Desktop\test.csv', 'w') as csv_file:
csv_obj = csv.writer(csv_file)
for data in portfolio_group['P_Portfolio Group']['data']:
csv_obj.writerow([d['value'] for d in data['dataValues']])
This results in the following C:\Users\SVC\Desktop\test.csv content:
Client1,2068.73,14.0584
Client2,1511.9,10.2744
Client3,1354.74,9.2064
Client4,1225.78,8.33
Use the pandas library:
import pandas as pd
data = pd.read_csv("C:\Users\SVC\Desktop\test.json")
data.to_csv('test.csv')
done