Parse nested JSON response from API service into csv in python

Parse nested JSON response from API service into csv in python - python

I'm trying with no luck to save the output of an API response into a CSV file in a clear and ordered way, this is the script to retrieve API data:
import json
import requests
import csv
# List of keywords to be checked
keywords = open("/test.txt", encoding="ISO-8859-1")
keywords_to_check = []
try:
for keyword in keywords:
keyword = keyword.replace("\n", "")
keywords_to_check.append(keyword)
except Exception:
print("An error occurred. I will try again!")
pass
apikey = # my api key
apiurl = # api url
apiparams = {
'apikey': apikey,
'keyword': json.dumps(keywords_to_check),
'metrics_location': '2840',
'metrics_language': 'en',
'metrics_network': 'googlesearchnetwork',
'metrics_currency': 'USD',
'output': 'csv'
}
response = requests.post(apiurl, data=apiparams)
jsonize = json.dumps(response.json(), indent=4, sort_keys=True)
if response.status_code == 200:
print(json.dumps(response.json(), indent=4, sort_keys=True))
The output I get is the following:
{
"results": {
"bin": {
"cmp": 0.795286539,
"cpc": 3.645033,
"m1": 110000,
"m10": 90500,
"m10_month": 2,
"m10_year": 2019,
"m11": 135000,
"m11_month": 1,
"m11_year": 2019,
"m12": 135000,
"m12_month": 12,
"m12_year": 2018,
"m1_month": 11,
"m1_year": 2019,
"m2": 110000,
"m2_month": 10,
"m2_year": 2019,
"m3": 110000,
"m3_month": 9,
"m3_year": 2019,
"m4": 135000,
"m4_month": 8,
"m4_year": 2019,
"m5": 135000,
"m5_month": 7,
"m5_year": 2019,
"m6": 110000,
"m6_month": 6,
"m6_year": 2019,
"m7": 110000,
"m7_month": 5,
"m7_year": 2019,
"m8": 90500,
"m8_month": 4,
"m8_year": 2019,
"m9": 90500,
"m9_month": 3,
"m9_year": 2019,
"string": "bin",
"volume": 110000
},
"chair": {
"cmp": 1,
"cpc": 1.751945,
"m1": 1000000,
"m10": 823000,
"m10_month": 2,
"m10_year": 2019,
"m11": 1500000,
"m11_month": 1,
"m11_year": 2019,
"m12": 1500000,
"m12_month": 12,
"m12_year": 2018,
"m1_month": 11,
"m1_year": 2019,
"m2": 1000000,
"m2_month": 10,
"m2_year": 2019,
"m3": 1000000,
"m3_month": 9,
"m3_year": 2019,
"m4": 1220000,
"m4_month": 8,
"m4_year": 2019,
"m5": 1220000,
"m5_month": 7,
"m5_year": 2019,
"m6": 1000000,
"m6_month": 6,
"m6_year": 2019,
"m7": 1000000,
"m7_month": 5,
"m7_year": 2019,
"m8": 1000000,
"m8_month": 4,
"m8_year": 2019,
"m9": 1000000,
"m9_month": 3,
"m9_year": 2019,
"string": "chair",
"volume": 1220000
}, ....
What I'd like to achieve is a csv file showing the following info and ordering, with the columns being string, cmp, cpc and volume:
string;cmp;cpc;volume
bin;0.795286539;3.645033;110000
chair;1;1.751945;1220000
Following Sidous' suggestion I've come to the following:
import pandas as pd
data = response.json()
df = pd.DataFrame.from_dict(data)
df.head()
Which game me the following output:
results
bin {'string': 'bin', 'volume': 110000, 'm1': 1100...
chair {'string': 'chair', 'volume': 1220000, 'm1': 1...
flower {'string': 'flower', 'volume': 1830000, 'm1': ...
table {'string': 'table', 'volume': 673000, 'm1': 82...
water {'string': 'water', 'volume': 673000, 'm1': 67...
Close, but still how can I show "string", "volume" etc as columns and avoid displaying the {'s of the dictioary?
Thanks a lot to whoever can help me sort this out :)
Askew

I propose to save the response in pandas data frame, then store it by pandas (you know csv file are easily handled by pandas).
import pandas as pd
# receiving results in a dictionary
dic = response.json()
# remove the results key from the dictionary
dic = dic.pop("results", None)
# convert dictionary to dataframe
data = pd.DataFrame.from_dict(dic, orient='index')
# string;cmp;cpc;volume
new_data = pd.concat([data['string'], data['cmp'], data['cpc'], data['volume']], axis=1)
# removing the default index (bin and chair keys)
new_data.reset_index(drop=True, inplace=True)
print(new_data)
# saving new_data into a csv file
new_data.to_csv('name_of_file.csv')
You find the csv file in the same directory of the python file (otherwise you can specify it in the .to_csv() method).
You can see the final result in screen shot below.

Open a text file using with open command and further write the data down by iterating through the whole dict
with open("text.csv", "w+") as f:
f.write('string;cmp;cpc;volume\n')
for res in response.values(): #This is after I assumed that `response` is of type dict
for r in res.values():
f.write(r['string']+';'+str(r['cmp'])+';'+str(r['cpc'])+';'+str(r['volume'])+'\n')

try this:
import pandas as pd
data = response.json()
cleaned_data = []
for key, val in data["results"].items():
cleaned_data.append(val)
df = pd.DataFrame.from_dict(cleaned_data)
df1 = df[["string","cmp","cpc","volume"]]
df1.head()
df1.to_csv("output.csv")

What about using a csv.DictWriter, since your data are almost what it needs to function?
import csv
if __name__ is "__main__":
results = {"chair": {"cmp": 1, "cpc": 3.64}, "bin": {"cmp": 0.5, "cpc": 1.75}} # two rows will do for the example
# Now let's get the data structure we really want: a list of rows
rows = []
for key, value in results:
rows.append(results)
# And, while we're at it, set the string part
rows[-1]["string"] = key
# Create the header
fieldnames = set()
for row in rows:
for fname in row:
fieldnames.add(fname)
# Write to the file
with open("mycsv.csv", "w", newline="") as file_:
writer = csv.DictWriter(file_, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
You should be good with that kind of stuff, without using any other lib

Related

Python JSON multiple requests get response save to dataframe

Input:
I send multiple get requests with two parameters as lists (Year and Month) using API to import data
My sample code:
import grequests
import json
import requests
import io
import pandas as pd
from pandas import json_normalize
api_key = <api_key>
url = <url>
headersAuth = {
'Authorization': 'Bearer ' + api_key,
}
years_list = ['2020', '2021']
month_list = ['1','2']
#Create an array of urls
urls = [url + "Month=" + str(i) + "&Year=" + str(j) for i in month_list for j in years_list]
#Preapare requests
rs = (grequests.get(u, headers={'Authorization': 'Bearer ' + api_key}) for u in urls)
response = grequests.map(rs)
json_ = [r.json() for r in response]
But after json_ step I stuck, because I'm not sure how to parse this further in the right way
After running my script I get a format that looks like this
[
{'ID': 3473, 'Month': 1, 'Year': 2020, 'Sold': 1234},
{'ID': 3488, 'Month': 1, 'Year': 2020, 'Sold': 1789}]
... etc.
Output:
I would like to export it to pandas dataframe and then to xlsx or csv file as a normal column view
I'm probably missing some basics here, but can't process it further
I tried several things after that:
Use json.dumps jsonStr = json.dumps(json_)
Then convert it to listOfDictionaries = json.loads(jsonStr)
Then data_tuples = list(zip(listOfDictionaries))
df = pd.DataFrame(data_tuples)
But I couldn't get a desired output.
I would appreciate your help on this.

pandas.DataFrame.from_records
import pandas as pd
data = [
{'ID': 3473, 'Month': 1, 'Year': 2020, 'Sold': 1234},
{'ID': 3488, 'Month': 1, 'Year': 2020, 'Sold': 1789}]
pd.DataFrame.from_records(data)
ID Month Year Sold
0 3473 1 2020 1234
1 3488 1 2020 1789

batch_write_item using dynamodb.client() to write large csv to dynamodb table in python

I am trying to insert a large csv file (5M records) to dynamodb using dynamodb_client.batch_write_item().
When I insert using dynamodb_client.put_item(), it works fine but I need to be able to use it with batch_write_item() too.
Here is my code snippet for few records (more than 1):
import json
import boto3
import csv
import pandas as pd
from datetime import datetime
roleARN = 'arn:aws:iam::123:role/xyz_role'
boto3.setup_default_session(profile_name='test_profile')
client = boto3.client('sts')
response = client.assume_role(RoleArn=roleARN,
RoleSessionName='RoleSessionName',
DurationSeconds=1800)
dynamodb_client = boto3.client('dynamodb', region_name='ap-south-1',
aws_access_key_id=response['Credentials']['AccessKeyId'],
aws_secret_access_key=response['Credentials']['SecretAccessKey'],
aws_session_token = response['Credentials']['SessionToken'])
#Fetching time for population
current_time = datetime.utcnow().isoformat()[:-3] + 'Z'
def convert_csv_to_json_list(file):
items = []
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
data = {}
data['col1'] = row['col1']
data['col2'] = int(row['col2'])
data['col3'] = int(row['col3'])
data['Row_Created'] = current_time
data['col4'] = row['col4']
data['col5'] = int(row['col5'])
data['Row_Updated'] = current_time
items.append(data)
return items
def batch_write(items):
table = "sample_table"
#writing batch
try:
print(type(items))
dynamodb_client.batch_write_item(RequestItems = {
table: [{'PutRequest':
{
'Item' : items
}}]
})
print(f'resource, specify all types : write succeeded.')
except Exception as e:
print(f'resource, specify all types : write failed: {e}')
inp_file = "sample_mapping.csv"
json_data = convert_csv_to_json_list(inp_file)
batch_write(json_data)
I keep getting :
<class 'list'>
resource, specify all types : write failed: Parameter validation failed:
Invalid type for parameter RequestItems.sample_table[0][{'col1': 'abc', 'col2': 59, 'col3': 0
, 'Row_Created': '2021-10-08T04:36:04.787Z', 'col4': 'dfrwfr', 'col5': 1, 'Row_Updated': '2021-10-08T04:36:04.787Z'}, {'col1': 'sffr', 'col2': 45, 'col3': 0
, 'Row_Created': '2021-10-08T04:36:04.787Z', 'col4': 'gty7u', 'col5': 1, 'Row_Updated': '2021-10-08T04:36:04.787Z'}], type: <class 'list'>, valid types: <class
'dict'>
Can someone help me where I am going wrong with batch insertion, tried looking up the documentation too.

Each item should be in a separate PutRequest key.
RequestItems = {
table: [
{'PutRequest': {'Item': {}}},
{'PutRequest': {'Item': {}}}
]
}
There are certain limitations with using batch_write_item, such as there cannot be more than 25 items in the request.

Convert HTML dictionary to PYTHON dictionary

i need to get values only for Czechia country from this website list "https://coronavirus-19-api.herokuapp.com/countries" and store like a variable dictionary in ptyhon.
Like this:
Czechia = {"cases":434,"todayCases":0,"deaths":0,"todayDeaths":0,"recovered":3,"active":431,"critical":2}

You could use requests to GET the JSON data from your server URL, then construct a new dictionary with country as the key:
from requests import get
URL = "https://coronavirus-19-api.herokuapp.com/countries"
req = get(URL).json()
result = {obj['country']: {k: v for k, v in obj.items() if k != 'country'} for obj in req}
print(result)
Output:
{'China': {'cases': 80894, 'todayCases': 13, 'deaths': 3237, 'todayDeaths': 11, 'recovered': 69614, 'active': 8043, 'critical': 2622}, 'Italy': {'cases': 31506, 'todayCases': 0, 'deaths': 2503, 'todayDeaths': 0, 'recovered': 2941, 'active': 26062, 'critical': 2060}...
Now you can access your data in O(1) time instead of doing a O(N) linear scan:
print(result["Czechia"])
# {'cases': 464, 'todayCases': 30, 'deaths': 0, 'todayDeaths': 0, 'recovered': 3, 'active': 461, 'critical': 2}
Note: Its probably also safe to ensure req.status_code is 200 OK or whatever else you expect to receive from the server.

In [1]: import requests
...: import json
...:
...: data = requests.get('https://coronavirus-19-api.herokuapp.com/countries').json()
...: result = next(item for item in data if item["country"] == "Czechia")
...: print(json.dumps(result, indent=4))
{
"country": "Czechia",
"cases": 464,
"todayCases": 30,
"deaths": 0,
"todayDeaths": 0,
"recovered": 3,
"active": 461,
"critical": 2
}
In [2]:

While the RoadRunner 's answers solves your problem, I am just giving you the one other way of doing it using python's urllib module.
from urllib.request import urlopen
##import ast
import json
def Corona_Tracker():
res = urlopen('https://coronavirus-19-api.herokuapp.com/countries')
result = res.read().strip()
result_str = json.loads(result)
return result_str
if __name__ == "__main__":
result_str=Corona_Tracker()
while True:
for data in result_str:
if data['country'] == "India":
print(data)
Just replace India with your country , it gives the below output
>>> {'country': 'India', 'cases': 148, 'todayCases': 5, 'deaths': 3, 'todayDeaths': 0, 'recovered': 14, 'active': 131, 'critical': 0}

Read Dictionary and write to a text file

I have a dictionary now:
data = [{'position': 1, 'name':'player1:', 'number': 524}, {'position':2, 'name': 'player2:','number': 333}]
(just list two group of number first to simplify the problem)
I want to read and print it in the order of positions: "position 1", "position 2" ... "position n" in a text or csv file.
something like:
position name number
1 player1 524
2 player2 333
I tried:
data = [{'position': 1, 'name':'player1', 'number': 524}, {'position':2, 'name': 'player2:','number': 333}]
keys = data[0].keys()
with open(output.csv", 'r') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(data)
Seems like I should create a csv instead of open it first. Also, is there any better ways? Thanks.

The easiest thing to do would probably be to read it into a pandas Dataframe and then write it to a csv.
import pandas as pd
data = [
{
'position': 1,
'name':'player1',
'number': 524
}, {
'position': 2,
'name': 'player2',
'number': 333
}
]
df = pd.DataFrame.from_records(data, columns=['position', 'name', 'number'])
df = df.sort_values('position')
df.to_csv('data.csv')

use pandas
import pandas as pd
data = [
{
'position': 1,
'name':'player1:',
'number': 524
}, {
'position':2,
'name':'player2:',
'number': 333
}
]
df = pd.DataFrame.from_records(data, columns=['position', 'name', 'number'])
df = df.sort_values('position')
df.head()

recursively collect string blocks in python

I have a custom data file formatted like this:
{
data = {
friends = {
max = 0 0,
min = 0 0,
},
family = {
cars = {
van = "honda",
car = "ford",
bike = "trek",
},
presets = {
location = "italy",
size = 10,
travelers = False,
},
version = 1,
},
},
}
I want to collect the blocks of data, meaning string between each set of {} while maintaining a hierarhcy. This data is not a typical json format so that is not a possible solution.
My idea was to create a class object like so
class Block:
def __init__(self, header, children):
self.header = header
self.children = children
Where i would then loop through the data line by line 'somehow' collecting the necessary data so my resulting output would like something like this...
Block("data = {}", [
Block("friends = {max = 0 0,\n min = 0 0,}", []),
Block("family = {version = 1}", [...])
])
In short I'm looking for help on ways I can serialize this into useful data I can then easily manipulate. So my approach is to break into objects by using the {} as dividers.
If anyone has suggestions on ways to better approach this I'm all up for ideas. Thank you again.
So far I've just implemented the basic snippets of code
class Block:
def __init__(self, content, children):
self.content = content
self.children = children
def GetBlock(strArr=[]):
print len(strArr)
# blocks = []
blockStart = "{"
blockEnd = "}"
with open(filepath, 'r') as file:
data = file.readlines()
blocks = GetBlock(strArr=data)

You can create a to_block function that takes the lines from your file as an iterator and recursively creates a nested dictionary from those. (Of course you could also use a custom Block class, but I don't really see the benefit in doing so.)
def to_block(lines):
block = {}
for line in lines:
if line.strip().endswith(("}", "},")):
break
key, value = map(str.strip, line.split(" = "))
if value.endswith("{"):
value = to_block(lines)
block[key] = value
return block
When calling it, you have to strip the first line, though. Also, evaluating the "leafs" to e.g. numbers or strings is left as an excercise to the reader.
>>> to_block(iter(data.splitlines()[1:]))
{'data': {'family': {'version': '1,',
'cars': {'bike': '"trek",', 'car': '"ford",', 'van': '"honda",'},
'presets': {'travelers': 'False,', 'size': '10,', 'location': '"italy",'}},
'friends': {'max': '0 0,', 'min': '0 0,'}}}
Or when reading from a file:
with open("data.txt") as f:
next(f) # skip first line
res = to_block(f)
Alternatively, you can do some preprocessing to transform that string into a JSON(-ish) string and then use json.loads. However, I would not go all the way here but instead just wrap the values into "" (and replace the original " with ' before that), otherwise there is too much risk to accidentally turning a string with spaces into a list or similar. You can sort those out once you've created the JSON data.
>>> data = data.replace('"', "'")
>>> data = re.sub(r'= (.+),$', r'= "\1",', data, flags=re.M)
>>> data = re.sub(r'^\s*(\w+) = ', r'"\1": ', data, flags=re.M)
>>> data = re.sub(r',$\s*}', r'}', data, flags=re.M)
>>> json.loads(data)
{'data': {'family': {'version': '1',
'presets': {'size': '10', 'travelers': 'False', 'location': "'italy'"},
'cars': {'bike': "'trek'", 'van': "'honda'", 'car': "'ford'"}},
'friends': {'max': '0 0', 'min': '0 0'}}}

You can also do with ast or json with the help of regex substitutions.
import re
a = """{
data = {
friends = {
max = 0 0,
min = 0 0,
},
family = {
cars = {
van = "honda",
car = "ford",
bike = "trek",
},
presets = {
location = "italy",
size = 10,
travelers = False,
},
version = 1,
},
},
}"""
#with ast
a = re.sub("(\w+)\s*=\s*", '"\\1":', a)
a = re.sub(":\s*((?:\d+)(?: \d+)+)", lambda x:':[' + x.group(1).replace(" ", ",") + "]", a)
import ast
print ast.literal_eval(a)
#{'data': {'friends': {'max': [0, 0], 'min': [0, 0]}, 'family': {'cars': {'car': 'ford', 'bike': 'trek', 'van': 'honda'}, 'presets': {'travelers': False, 'location': 'italy', 'size': 10}, 'version': 1}}}
#with json
import json
a = re.sub(",(\s*\})", "\\1", a)
a = a.replace(":True", ":true").replace(":False", ":false").replace(":None", ":null")
print json.loads(a)
#{u'data': {u'friends': {u'max': [0, 0], u'min': [0, 0]}, u'family': {u'cars': {u'car': u'ford', u'bike': u'trek', u'van': u'honda'}, u'presets': {u'travelers': False, u'location': u'italy', u'size': 10}, u'version': 1}}}

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Parse nested JSON response from API service into csv in python - python

try this: import pandas as pd data = response.json() cleaned_data = [] for key, val in data["results"].items(): cleaned_data.append(val) df = pd.DataFrame.from_dict(cleaned_data) df1 = df[["string","cmp","cpc","volume"]] df1.head() df1.to_csv("output.csv")

Related

Python JSON multiple requests get response save to dataframe

batch_write_item using dynamodb.client() to write large csv to dynamodb table in python

Convert HTML dictionary to PYTHON dictionary

Read Dictionary and write to a text file

recursively collect string blocks in python

Categories

Resources