Listing nested json values by respective headers : Python Pandas

Listing nested json values by respective headers : Python Pandas - python

I am trying to normalize a nested json response from this URL. As it is quite nested, i am not able to achieve the following format, can anyone help me in right direction?
I am using this approach to normalize:
from urllib.request import urlopen
import json
from pandas.io.json import json_normalize
import pandas as pd
class jsonResp():
def __init__(self):
global data
global data1
global path
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
requestURL = ("http://data.corkcity.ie/api/3/action/datastore_search?id=6cc1028e-7388-4bc5-95b7-667a59aa76dc") #Request urls for json
responseOpen = urlopen(requestURL)
elevations = responseOpen.read() #Reads the response
data = json.loads(elevations) #Loads the json file for normalization and parsing
df = pd.DataFrame.from_dict(json_normalize(data), orient='columns')
print(df)
if __name__ == '__main__':
obj = jsonResp()
Thanks

Start by navigating to the records and then use json_normalize():
import requests
json_data = requests.get("http://data.corkcity.ie/api/3/action/datastore_search?id=6cc1028e-7388-4bc5-95b7-667a59aa76dc").json()
from pandas.io.json import json_normalize
df = json_normalize(json_data["result"]["records"])

Related

Parsing JSON output file

Hi a learner in python I have written code the extracts a son file from a sports website.
The code is
from bs4 import BeautifulSoup
import requests
import json
url = "https://s3-ap-southeast-2.amazonaws.com/racevic.static/2022-08-01/sportsbet-pakenham-synthetic/sectionaltimes/race-2.json?"
payload={}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
print(response.text)
and the output looks like this (small portion)
sectionaltimes_callback({"Horses":[{"Comment":"Slow Out 1 Lengths , got back 2nd last off tardy start 8 Lengths 800m, still mile off them getting widest from the corner, charged home last 200m for eye catching second spot # powered home widest","FinalPosition":2,"FinalPositionAbbreviation":"2nd","FullName":"Ameerati","SaddleNumber":12,"HorseUrl":"/horses/ameerati","SilkUrl":"//cdn.silks.racing.com/bb/114031.png","Trainer":"Robbie Griffiths & Mathew de Kock","TrainerUrl":"/trainers/robbie-griffiths","Jockey":"P.M.Moloney","JockeyUrl":"/jockeys/patrick-moloney","SectionalTimes":[{"Distance":"1200m","Position":11,"Time":"1:11.43","AvgSpeed":0.0},{"Distance":"1000m","Position":11,"Time":"59.29","AvgSpeed":0.0},{"Distance":"800m","Position":11,"Time":"46.95","AvgSpeed":0.0},{"Distance":"600m","Position":11,"Time":"34.77","AvgSpeed":0.0},{"Distance":"400m","Position":11,"Time":"22.71","AvgSpeed":0.0},{"Distance":"200m","Position":4,"Time":"11.45","AvgSpeed":0.0},{"Distance":"Finish","Position":2,"Time":"","AvgSpeed":0.0}],"SplitTimes":[{"Distance":"1200m-1000m","Position":11,"Time":"12.14","AvgSpeed":0.0},{"Distance":"1000m-800m","Position":11,"Time":"12.34","AvgSpeed":0.0},{"Distance":"800m-600m","Position":11,"Time":"12.18","AvgSpeed":0.0},{"Distance":"600m-400m","Position":11,"Time":"12.06","AvgSpeed":0.0},{"Distance":"400m-200m","Position":11,"Time":"11.26","AvgSpeed":0.0},{"Distance":"200m-Finish","Position":4,"Time":"11.45","AvgSpeed":0.0}],"StartPosition":0,"BarrierNumber":12,"RaceTime":"","TimeVarToWinner":0.0,"BeatenMargin":0.0,"DistanceRun":0,"DistanceVarToWinner":"","SixHundredMetresTime":"34.77","TwoHundredMetresTime":"11.45","Early":0.0,"Mid":0.0,"Late":0.0,"OverallPeakSpeed":0.0,"PeakSpeedLocation":null,"OverallAvgSpeed":0.0,"DistanceFromRail":0.0},
The help I would appreciate now is what do I do to put this in a format that I can open in excel

import pandas as pd
import requests
import json
from openpyxl import Workbook
from openpyxl.utils import get_column_letter
from openpyxl.utils.dataframe import dataframe_to_rows
def race_data_to_xslxs(url, fname):
# get data
data = json.loads(requests.get(url).text[24:-1])
# create dataframes
dfs = {}
singulars = pd.DataFrame()
for k, v in data.items():
if isinstance(v, list):
dfs[k] = pd.DataFrame(v)
else:
singulars[k] = [v]
dfs = {'summary': singulars, **dfs}
# create workbook
wb = Workbook()
for k, df in dfs.items():
# create sheet
wsx = wb.create_sheet(title=k)
rows = dataframe_to_rows(df)
for r_idx, row in enumerate(rows, 1):
for c_idx, value in enumerate(row, 1):
wsx.cell(row=r_idx, column=c_idx, value=str(value))
del wb['Sheet']
# write excel file
wb.save(filename=fname)
url = "https://s3-ap-southeast-2.amazonaws.com/racevic.static/2022-08-01/sportsbet-pakenham-synthetic/sectionaltimes/race-2.json?"
path = 'fname.xlsx'
race_data_to_xslxs(url=url, fname=path)

The API is returning JSONP, not JSON. This is JSON wrapped in a call to a callback function, which can be used by browsers without violating the same-origin rule. You need to remove that function call before parsing it as JSON.
import re
import json
response = requests.request("GET", url, headers=headers, data=payload)
json_string = re.sub(r'^sectionaltimes_callback\((.*)\)$', r'\1', response)
data = json.loads(json_string)

You can try this -
import requests
import json
url = "https://s3-ap-southeast-2.amazonaws.com/racevic.static/2022-08-01/sportsbet-pakenham-synthetic/sectionaltimes/race-2.json?"
response = requests.get(url)
json.loads(response.text.split("(", 1)[1].strip(")"))

How can we read JSON data from URL, convert to dataframe, and save as CSV

I'm playing around with some code to read JSON encoded data from a URL, push it into a data frame and save the results to a CSV. The code that I attempted to run is shown below. I think this is pretty close, but something is wrong, because nothing gets downloaded.
import urllib
from urllib.request import urlopen
import json
import pandas as pd
from pandas.io.json import json_normalize
all_links = ['https://www.baptisthealthsystem.com/docs/global/standard-charges/474131755_abrazomaranahospital_standardcharges.json?sfvrsn=9a27928_2',
'https://www.baptisthealthsystem.com/docs/global/standard-charges/621861138_abrazocavecreekhospital_standardcharges.json?sfvrsn=674fd6f_2',
'https://www.baptisthealthsystem.com/docs/global/standard-charges/621809851_abrazomesahospital_standardcharges.json?sfvrsn=13953222_2',
'https://www.baptisthealthsystem.com/docs/global/standard-charges/621811285_abrazosurprisehospital_standardcharges.json?sfvrsn=c8113dcf_2']
for item in all_links:
#print(item)
try:
length = len(item)
first_under = item.find('_') + 1
last_under = item.rfind('?') - 21
file_name = item[first_under:last_under]
print(file_name)
# store the response of URL
response = urlopen(item)
data = json.loads(response.read())
#print(type(data))
data = json.loads(item.read().decode())
df = pd.DataFrame(json_normalize(data, 'metrics'), encoding='mac_roman')
DOWNLOAD_PATH = 'C:\\Users\\ryans\\Desktop\\hospital_data\\' + file_name + '.csv'
urllib.request.urlretrieve(df,DOWNLOAD_PATH)
except Exception as e: print(e)
Any thoughts on what could be wrong here?

Export JSON results to CSV using Pandas package

I am trying to modify the following code (I am newbie at python, so try to teach me step by step)
import requests, json
import pandas as pd
class AjaxScraper():
results = []
def fetch(self, url):
return requests.get(url)
def parse(self, content):
self.results = content['data']
for entry in self.results:
del entry['_id']
def to_csv(self):
df = pd.DataFrame(self.results)
pd.to_csv('Table.csv', sep=',', encoding='utf-8',index = False)
def start_me(self):
response = self.fetch('https://scrapingkungfu.herokuapp.com/api?_=1576384789999')
self.parse(response.json())
self.to_csv()
if __name__ == '__main__':
scraper = AjaxScraper()
scraper.start_me()
I have got errors like that
File "demo.py", line 24, in start_me
self.to_csv()
File "demo.py", line 19, in to_csv
pd.to_csv('Table.csv', sep=',', encoding='utf-8',index = False)
AttributeError: module 'pandas' has no attribute 'to_csv'
I wonder why this error appears although I saw many codes that has to_csv in pandas package..!!
** This is a simple dataframe that I need to learn how to reorder the columns using the index of columns
import pandas as pd
name_dict = {
'Name': ['a','b','c','d'],
'Score': [90,80,95,20]
}
df = pd.DataFrame(name_dict)
print (df)

to_csv is a method of a DataFrame object, not of the pandas module.
You need to create a dataframe
Reordering the Dataframe with your example
import pandas as pd
name_dict = {
'Name': ['a','b','c','d'],
'Score': [90,80,95,20]
}
df = pd.DataFrame(name_dict)
print (df)
The solution is creating a new data frame with our desired order
df = df[['Score', 'Name']]

looping Row and scraping data taking input from excel file

i want to scrape web data using input values from excel and scraping web for each row_value taken and save the output to same excel file.
from bs4 import BeautifulSoup
import requests
from urllib import request
import os
import pandas as pd
ciks = pd.read_csv("ciks.csv")
ciks.head()
output
CIK
0 1557822
1 1598429
2 1544670
3 1574448
4 1592290
then
for x in ciks:
url="https://www.sec.gov/cgi-bin/browse-edgar?CIK=" + x +"&owner=exclude&action=getcompany"
r = request.urlopen(url)
bytecode = r.read()
htmlstr = bytecode.decode()
soup = BeautifulSoup(bytecode)
t = soup.find('span',{'class':'companyName'})
print(t.text)
i got an erorr :
----> 9 print (t.text)
AttributeError: 'NoneType' object has no attribute 'text'
here, i want to scrape web data taking each row value as input from the CSV file.

It would be easier to convert the column values as list and then use it in the for loop - see solution below,
from bs4 import BeautifulSoup
import requests
from urllib import request
import os
import pandas as pd
#ciks = pd.read_csv("ciks.csv")
df = pd.read_csv("ciks.csv")
mylist = df['CIK'].tolist()# CIK is the column name
company =[]
for item in mylist:
print(item)
url="https://www.sec.gov/cgi-bin/browse-edgar?CIK=" + str(item) +"&owner=exclude&action=getcompany"
r = request.urlopen(url)
bytecode = r.read()
htmlstr = bytecode.decode()
soup = BeautifulSoup(bytecode,features="lxml")
t = soup.find('span',{'class':'companyName'})
company.append(t.text)
print(t.text)
df.assign(company= company)
print(df)
df.to_csv("ciks.csv")

Python - CSV file blank after using CSV writer

I'm very new at this and I'm having trouble writing a CSV file from a webscraping. Can anyone help? Thank you!
import sys
import urllib2
import csv
import requests
from bs4 import BeautifulSoup
#web scraping wind data
r_wind = requests.get('http://w1.weather.gov/data/obhistory/KCQX.html')
html_wind = r_wind.text
soup = BeautifulSoup(html_wind, "html.parser")
table = soup.find('table')
rows_wind = table.findAll('tr')
rows_wind = rows_wind[1:]
#writing to a csv file
csvfile_wind = open("wind.csv","wb")
output_wind = csv.writer(csvfile_wind, delimiter=',',quotechar='"',quoting=csv.QUOTE_MINIMAL)
for row in rows_wind:
Date = cells[0].text.strip()
Time = cells[1].text.strip()
Wind = cells[2].text.strip()
output.writerow([Date,Time,Wind])
csvfile_wind.close()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Listing nested json values by respective headers : Python Pandas - python

Related

Parsing JSON output file

How can we read JSON data from URL, convert to dataframe, and save as CSV

Export JSON results to CSV using Pandas package

looping Row and scraping data taking input from excel file

Python - CSV file blank after using CSV writer

Categories

Resources