Issue using Pandas DataFrame - python

I am trying to add data to a pandas dataframe. I am having isses getting to the sub directories. Ideally i would like all of the ratio names like "priceBookValueRatio", "priceToBookRatio", ...ect in the first column, with the dates on the top row going left to right, and the appropriate values under each date. Can anyone help?
Here is my code
def get_jsonparsed_data(ticker):
url = ("https://financialmodelingprep.com/api/v3/financial-ratios/" + ticker)
response = urlopen(url)
data = response.read().decode("utf-8")
value = json.loads(data)
data = value["ratios"]
df = pd.DataFrame(data)
print df
And here is what my data looks like
{
"symbol" : "AAPL",
"ratios" : [ {
"date" : "2019-09-28",
"investmentValuationRatios" : {
"priceBookValueRatio" : "11.1154",
"priceToBookRatio" : "11.1154",
"priceToSalesRatio" : "3.8903",
"priceEarningsRatio" : "18.7109",
"receivablesTurnover" : "5.489",
"priceToFreeCashFlowsRatio" : "17.5607",
"priceToOperatingCashFlowsRatio" : "14.5863",
"priceCashFlowRatio" : "0",
"priceEarningsToGrowthRatio" : "0",
"priceSalesRatio" : "0",
"dividendYield" : "",
"enterpriseValueMultiple" : "1.7966762045884",
"priceFairValue" : "0"
},
"profitabilityIndicatorRatios" : {
"niperEBT" : "0.84056163195765",
"ebtperEBIT" : "1",
"ebitperRevenue" : "0.25266552384174",
"grossProfitMargin" : "0.37817768109035",
"operatingProfitMargin" : "1",
"pretaxProfitMargin" : "0.24572017188497",
"netProfitMargin" : "0.21238094505984",
"effectiveTaxRate" : "0.15943836804235",
"returnOnAssets" : "0.5848",
"returnOnEquity" : "0.6106",
"returnOnCapitalEmployed" : "0.2691",
"nIperEBT" : "0.84056163195765",
"eBTperEBIT" : "1",
"eBITperRevenue" : "0.25266552384174"
},
"operatingPerformanceRatios" : {
"receivablesTurnover" : "5.489",
"payablesTurnover" : "1.2542",
"inventoryTurnover" : "64.5433",
"fixedAssetTurnover" : "6.9606185456686",
"assetTurnover" : "0.76857223883066"
},
"liquidityMeasurementRatios" : {
"currentRatio" : "1.54",
"quickRatio" : "1.3844473032029",
"cashRatio" : "0.46202160464632",
"daysOfSalesOutstanding" : "-9.2636",
"daysOfInventoryOutstanding" : "64.2588",
"operatingCycle" : "",
"daysOfPayablesOutstanding" : "64.8648",
"cashConversionCycle" : ""
},
"debtRatios" : {
"debtRatio" : "0.3192",
"debtEquityRatio" : "1.194",
"longtermDebtToCapitalization" : "0.50361776241806",
"totalDebtToCapitalization" : "0.54422142191553",
"interestCoverage" : "0.0",
"cashFlowToDebtRatio" : "0.64222977037771",
"companyEquityMultiplier" : "3.7410043320661"
},
"cashFlowIndicatorRatios" : {
"operatingCashFlowPerShare" : "15.0267",
"freeCashFlowPerShare" : "12.94",
"cashPerShare" : "10.5773",
"payoutRatio" : "0.251",
"receivablesTurnover" : "5.489",
"operatingCashFlowSalesRatio" : "0.26670997101939",
"freeCashFlowOperatingCashFlowRatio" : "0.84875560231154",
"cashFlowCoverageRatios" : "0.64222977037771",
"shortTermCoverageRatios" : "4.2728448275862",
"capitalExpenditureCoverageRatios" : "6.6118151500715",
"dividendpaidAndCapexCoverageRatios" : "2.8191679531974",
"dividendPayoutRatio" : "0.25551976255972"
}
},
{
"date" : "2018-09-29",
"investmentValuationRatios" : {
"priceBookValueRatio" : "10.1842",
"priceToBookRatio" : "10.1842",
"priceToSalesRatio" : "4.1328",
"priceEarningsRatio" : "18.9226",
"receivablesTurnover" : "6.2738",
"priceToFreeCashFlowsRatio" : "17.563",
"priceToOperatingCashFlowsRatio" : "14.1753",
"priceCashFlowRatio" : "14.375642493446",
"priceEarningsToGrowthRatio" : "18.698887988401",
"priceSalesRatio" : "4.1912065394209",
"dividendYield" : "0.012318046710734",
"enterpriseValueMultiple" : "14.710301181747",
"priceFairValue" : "10.389124295011"
},
"profitabilityIndicatorRatios" : {
"niperEBT" : "0.81657819294131",
"ebtperEBIT" : "1",
"ebitperRevenue" : "0.27448935409176",
"grossProfitMargin" : "0.38343718820008",
"operatingProfitMargin" : "1",
"pretaxProfitMargin" : "0.26694026619477",
"netProfitMargin" : "0.22414202074587",
"effectiveTaxRate" : "0.18342180705869",
"returnOnAssets" : "1.0497",
"returnOnEquity" : "0.5556",
"returnOnCapitalEmployed" : "0.217",
"nIperEBT" : "0.81657819294131",
"eBTperEBIT" : "1",
"eBITperRevenue" : "0.27448935409176"
},
"operatingPerformanceRatios" : {
"receivablesTurnover" : "6.2738",
"payablesTurnover" : "1.2564",
"inventoryTurnover" : "60.2871",
"fixedAssetTurnover" : "6.4302488863064",
"assetTurnover" : "0.72621505229339"
},
"liquidityMeasurementRatios" : {
"currentRatio" : "1.133",
"quickRatio" : "0.99453976140569",
"cashRatio" : "0.22352474359306",
"daysOfSalesOutstanding" : "-8.8176",
"daysOfInventoryOutstanding" : "67.3325",
"operatingCycle" : "",
"daysOfPayablesOutstanding" : "76.8054",
"cashConversionCycle" : ""
},
"debtRatios" : {
"debtRatio" : "0.313",
"debtEquityRatio" : "1.0685",
"longtermDebtToCapitalization" : "0.46661721806832",
"totalDebtToCapitalization" : "0.51655010603258",
"interestCoverage" : "0.0",
"cashFlowToDebtRatio" : "0.67637989919901",
"companyEquityMultiplier" : "3.4133013523477"
},
"cashFlowIndicatorRatios" : {
"operatingCashFlowPerShare" : "15.6263",
"freeCashFlowPerShare" : "9.924",
"cashPerShare" : "5.2293",
"payoutRatio" : "0.226",
"receivablesTurnover" : "6.2738",
"operatingCashFlowSalesRatio" : "0.29154916319961",
"freeCashFlowOperatingCashFlowRatio" : "0.8280729395356",
"cashFlowCoverageRatios" : "0.67637989919901",
"shortTermCoverageRatios" : "3.7321187584345",
"capitalExpenditureCoverageRatios" : "5.8164200405619",
"dividendpaidAndCapexCoverageRatios" : "2.8652728954672",
"dividendPayoutRatio" : "0.2303337756799"
}
},
{
"date" : "2017-09-30",
"investmentValuationRatios" : {
"priceBookValueRatio" : "5.9086",
"priceToBookRatio" : "5.9086",
"priceToSalesRatio" : "3.4657",
"priceEarningsRatio" : "16.5922",
"receivablesTurnover" : "7.0564",
"priceToFreeCashFlowsRatio" : "15.4994",
"priceToOperatingCashFlowsRatio" : "12.37",
"priceCashFlowRatio" : "12.166429629599",
"priceEarningsToGrowthRatio" : "16.160760748713",
"priceSalesRatio" : "3.4086956688842",
"dividendYield" : "0.016341413728755",
"enterpriseValueMultiple" : "12.106846738693",
"priceFairValue" : "5.8292161925369"
},
"profitabilityIndicatorRatios" : {
"niperEBT" : "0.75443523849647",
"ebtperEBIT" : "1",
"ebitperRevenue" : "0.27957894553164",
"grossProfitMargin" : "0.38469860491899",
"operatingProfitMargin" : "1",
"pretaxProfitMargin" : "0.2676042820873",
"netProfitMargin" : "0.21092420845075",
"effectiveTaxRate" : "0.24556476150353",
"returnOnAssets" : "0.7847",
"returnOnEquity" : "0.3607",
"returnOnCapitalEmployed" : "0.1752",
"nIperEBT" : "0.75443523849647",
"eBTperEBIT" : "1",
"eBITperRevenue" : "0.27957894553164"
},
"operatingPerformanceRatios" : {
"receivablesTurnover" : "7.0564",
"payablesTurnover" : "1.2897",
"inventoryTurnover" : "65.6173",
"fixedAssetTurnover" : "6.7854838232247",
"assetTurnover" : "0.61077110404749"
},
"liquidityMeasurementRatios" : {
"currentRatio" : "1.276",
"quickRatio" : "1.089670085504",
"cashRatio" : "0.20125181026445",
"daysOfSalesOutstanding" : "-12.5636",
"daysOfInventoryOutstanding" : "56.8007",
"operatingCycle" : "",
"daysOfPayablesOutstanding" : "70.4447",
"cashConversionCycle" : ""
},
"debtRatios" : {
"debtRatio" : "0.3082",
"debtEquityRatio" : "0.863",
"longtermDebtToCapitalization" : "0.42034732372197",
"totalDebtToCapitalization" : "0.46322584262014",
"interestCoverage" : "0.0",
"cashFlowToDebtRatio" : "0.55519536652835",
"companyEquityMultiplier" : "2.7999060031183"
},
"cashFlowIndicatorRatios" : {
"operatingCashFlowPerShare" : "12.3101",
"freeCashFlowPerShare" : "9.779",
"cashPerShare" : "3.8888",
"payoutRatio" : "0.259",
"receivablesTurnover" : "7.0564",
"operatingCashFlowSalesRatio" : "0.28017222576058",
"freeCashFlowOperatingCashFlowRatio" : "0.80613468275594",
"cashFlowCoverageRatios" : "0.55519536652835",
"shortTermCoverageRatios" : "3.476695718075",
"capitalExpenditureCoverageRatios" : "5.1582202232752",
"dividendpaidAndCapexCoverageRatios" : "2.5465900079302",
"dividendPayoutRatio" : "0.26408967756613"
}
},

pandas cant construct a DataFrame from an arbitrary nested dict. You need to pass the data through in a standard format it can parse.
One way to do this is to create a list of single level dicts with keys and values and then construct the DataFrame from this
def clean_data(d):
ret = {}
ret['date'] = d['date']
for outer_key, rec in d.items():
if outer_key != 'date':
for k,v in rec.items():
ret[k]= v
return ret
cleaned_data = [clean_data(d) for d in data['ratios']]
df = pd.DataFrame.from_records(cleaned_data, index='date')
df = df.transpose()
will give you

Related

Python Write Json file from url, python 3 adding \n and b'

I am upgrading from python 2 to 3. This code works in python 2 but not in 3. When I run in python 3 it seems to not get the data into an actual json format plus add's \n and b'.
I believe my python 3 is writing my json file wrongly.
Code to extract json from web url:
def WebService_As_Source(Source_Id):
dst_path = SOURCECONFIG.GLOBAL_WorkPath
bdate = SOURCECONFIG.GLOBAL_DATE
print ("Extracting from Web Service...\t\t" + str(datetime.datetime.now()))
password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
uid = 'stack' #save lan your userid ######################################################## enter UserId
pwd = 'overflow'#save your lan password ######################################################## enter Password
top_level_url = SOURCECONFIG.WebServices_URL(Source_Id)
password_mgr.add_password(None, top_level_url, uid, pwd)
handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
# create "opener" (OpenerDirector instance)
opener = urllib.request.build_opener(handler)
# use the opener to fetch a URL
opener.open(top_level_url)
# Now all calls to urllib2.urlopen use our opener.
urllib.request.install_opener(opener)
req = urllib.request.Request(top_level_url)
response = urllib.request.urlopen(req)
data = response.readlines()
otf = open(os.path.join(dst_path + Source_Id+".json"), "w+") # open text file
rowcount = 0
for line in data:
rowcount = rowcount + 1
otf.write(str(line))
otf.close()
print(Source_Id +" json extracted.\t\t"+ str(datetime.datetime.now()))
Sample of my actual Json file python 3 produces:
b'[ {\n'b' "filterFlag" : "",\n'b' "lookup" : "",\n'b' "rule" : "",\n'b' "prefix" : "",\n'b' "validBDRAppName" : "",\n'b' "vendor" : {\n'b' "bookId" : "40302539",\n'b' "bookName" : "NYC",\n'b' "bookStatus" : "ACTIVE",\n'b' "commProductType" : "",\n'b' "businessDate" : "2019-08-05",\n'b' "endOfDay" : null,\n'b' "excludeFromAggregation" : "FALSE",\n'b' "geoLocation" : "",\n'b' "isHoliday" : "",\n'b' "isOSFIBook" : false,\n'b' "legalEntity" : "",\n'b' "location" : "",\n'b' "logicalDate" : "",\n'b' "regulatoryType" : "Trading",\n'b' "reportingLineBookName" : "NYC",\n'b' "reportingLinePathName" : "super/user",\n'b' "riskFilterType" : "USA",\n'b' "statusId" : "",\n'b' "transit" : "",\n'b' "l8n" : ""\n'b' },\n'b' "bdr" : {\n'b' "bookId" : "7447",\n'b' "bookName" : "NY",\n'b' "bookTransit" : "92218",\n'b' "bookStatus" : "ACTIVE",\n'b' "owner" : "",\n'b' "empId" : "",\n'b' "purpose" : "Trading",\n'b' "appName" : "STRATEGY",\n'b' "appCode" : "STRATEGY",\n'b' "transitDesc" : "TOR",\n'b' "appCategory" : "Front Office",\n'b' "bookAppId" : "49512",\n'b' "bookAppName" : "NY",\n'b' "deskName" : "USA",\n'b' "product" : "",\n'b' "asOfDate" : "2019-08-05",\n'b' "legalEntity" : "CANADA",\n'b' "bookAppSecondaryName" : "NY",\n'b' "strategy" : "NY",\n'b' "lhu" : "FCC3",\n'b' "masterBookName" : "NY"\n'b' }\n'b'}, {\n'b' "filterFlag" : "",\n'b' "lookup" : "",\n'b' "rule" : "",\n'b' "prefix" : "",\n'b' "validBDRAppName" : "",\n'b' "vendor" : {\n'b' "bookId" : "40296540",\n'b' "bookName" : "LDN",\n'b' "bookStatus" : "ACTIVE",\n'b' "commProductType" : "",\n'b' "businessDate" : "2019-08-05",\n'b' "endOfDay" : null,\n'b' "excludeFromAggregation" : "FALSE",\n'b' "geoLocation" : "",\n'b' "isHoliday" : "",\n'b' "isOSFIBook" : false,\n'b' "legalEntity" : "",\n'b' "location" : "",\n'b' "logicalDate" : "",\n'b' "regulatoryType" : "Trading",\n'b' "reportingLineBookName" : "LDN",\n'b' "reportingLinePathName" : "stack/overflow",\n'b' "riskFilterType" : "NONE",\n'b' "statusId" : "",\n'b' "transit" : "",\n'b' "l8n" : ""\n'b' },\n'b'
Sample of my Json file python 2 products:
I ran my code to grab the json from the url in python 2 and it gives me the data in the actual json format and doesnt add the b''s and \n's.
[ {
"filterFlag" : "",
"lookup" : "",
"rule" : "",
"prefix" : "",
"validBDRAppName" : "",
"vendor" : {
"bookId" : "40302539",
"bookName" : "NYC",
"bookStatus" : "ACTIVE",
"commProductType" : "",
"businessDate" : "2019-08-06",
"endOfDay" : null,
"excludeFromAggregation" : "FALSE",
"geoLocation" : "",
"isHoliday" : "",
"isOSFIBook" : false,
"legalEntity" : "",
"location" : "",
"logicalDate" : "",
"regulatoryType" : "Trading",
"reportingLineBookName" : "NYC",
"reportingLinePathName" : "super/user",
"riskFilterType" : "USA",
"statusId" : "",
"transit" : "",
"l8n" : ""
},
"bdr" : {
"bookId" : "7447",
"bookName" : "NY",
"bookTransit" : "92218",
"bookStatus" : "ACTIVE",
"owner" : "",
"empId" : "",
"purpose" : "Trading",
"appName" : "STRATEGY",
"appCode" : "STRATEGY",
"transitDesc" : "TOR",
"appCategory" : "Front Office",
"bookAppId" : "49512",
"bookAppName" : "NY",
"deskName" : "USA",
"product" : "",
"asOfDate" : "2019-08-06",
"legalEntity" : "CANADA",
"bookAppSecondaryName" : "NY",
"strategy" : "NY",
"lhu" : "FCC3",
"masterBookName" : "NY"
}
}, {
"filterFlag" : "",
"lookup" : "",
"rule" : "",
"prefix" : "",
"validBDRAppName" : "",
"vendor" : {
"bookId" : "40296540",
"bookName" : "LDN",
"bookStatus" : "ACTIVE",
"commProductType" : "",
"businessDate" : "2019-08-06",
"endOfDay" : null,
"excludeFromAggregation" : "FALSE",
"geoLocation" : "",
"isHoliday" : "",
"isOSFIBook" : false,
"legalEntity" : "",
"location" : "",
"logicalDate" : "",
"regulatoryType" : "Trading",
"reportingLineBookName" : "LDN",
"reportingLinePathName" : "stack/overflow",
"riskFilterType" : "NONE",
"statusId" : "",
"transit" : "",
"l8n" : ""
can anyone help with this?
The problem is that response.readlines() (where response = urllib.request.urlopen(url)) returns a list of bytes. In python 2 bytes and str are the same thing, but on python 3 this is no longer true. So when you did
otf.write(str(line))
the str() call was a no-op on python 2, but on python 3 you called str on a bytes object. This is never what you want to do:
>>> import urllib
... resp = urllib.request.urlopen('https://stackoverflow.com')
... dat = resp.readlines()
... first_line = dat[0]
... print(type(first_line))
... print(repr(first_line))
... print(repr(str(first_line)))
<class 'bytes'>
b'<!DOCTYPE html>\r\n'
"b'<!DOCTYPE html>\\r\\n'"
As you can see, the first line is a bytes object, and str(first_line) is a string that literally starts with a b and some single quotes.
Instead what you have to do is decode your bytes according to its corresponding encoding. I'm not very familiar with web things so I don't know what the best way is to correctly guess the encoding used by the website you're making requests to, but I do know that the third-party requests library can give you a usually correctly decoded json directly from the response.
If with urllib you have to do the decoding manually you need something like
otf.write(line.decode('utf8'))
Python 2 didn't differentiate between byte strings and unicode strings. Python 3 does, which is what the b'' is denoting.
This line
data = response.readlines()
could be
data = response.read().decode(response.headers.get_content_charset()).split('/n')
which should figure out the proper encoding, as per this answer

Print only specific parts of json file

I am wondering what I am doing wrong when trying to print the data of name of the following code in python.
import urllib.request, json
with urllib.request.urlopen("<THIS IS A URL IN THE ORIGINAL SCRIPT>") as url:
data = json.loads(url.read().decode())
print (data['Departure']['Product']['name'])
print (data['Departure']['Stops']['Stop'][0]['depTime'])
And this is the api I am fetching the data from:
{
"Departure" : [ {
"Product" : {
"name" : "Länstrafik - Buss 201",
"num" : "201",
"catCode" : "7",
"catOutS" : "BLT",
"catOutL" : "Länstrafik - Buss",
"operatorCode" : "254",
"operator" : "JLT",
"operatorUrl" : "http://www.jlt.se"
},
"Stops" : {
"Stop" : [ {
"name" : "Gislaved Lundåkerskolan",
"id" : "740040260",
"extId" : "740040260",
"routeIdx" : 12,
"lon" : 13.530096,
"lat" : 57.298178,
"depTime" : "20:55:00",
"depDate" : "2019-03-05"
}
data["Departure"] is a list, and you are indexing into it like it's a dictionary.
You wrote the dictionary sample confusingly. Here's how I think it looks:
d = {
"Departure" : [ {
"Product" : {
"name" : "Länstrafik - Buss 201",
"num" : "201",
"catCode" : "7",
"catOutS" : "BLT",
"catOutL" : "Länstrafik - Buss",
"operatorCode" : "254",
"operator" : "JLT",
"operatorUrl" : "http://www.jlt.se"
},
"Stops" : {
"Stop" : [ {
"name" : "Gislaved Lundåkerskolan",
"id" : "740040260",
"extId" : "740040260",
"routeIdx" : 12,
"lon" : 13.530096,
"lat" : 57.298178,
"depTime" : "20:55:00",
"depDate" : "2019-03-05"
}]}}]}
And here's how you can print depTime
print(d["Departure"][0]["Stops"]["Stop"][0]["depTime"])
The important part you missed is d["Departure"][0] because d["Departure"] is a list.
As Kyle said in the previous answer, data["Departure"] is a list, but you're trying to use it as a dictionary. There are 2 possible solutions.
Change data["Departure"]["Stops"]["Stop"] etc. to data["Departure"][0]["Stops"]["Stop"] etc.
Change the JSON file to make departure into a dictionary, which would allow you to keep your original code. This would make the final JSON snippet look like this:
"Departure" : {
"Product" : {
"name" : "Länstrafik - Buss 201",
"num" : "201",
"catCode" : "7",
"catOutS" : "BLT",
"catOutL" : "Länstrafik - Buss",
"operatorCode" : "254",
"operator" : "JLT",
"operatorUrl" : "http://www.jlt.se"
},
"Stops" : {
"name" : "Gislaved Lundåkerskolan",
"id" : "740040260",
"extId" : "740040260",
"routeIdx" : 12,
"lon" : 13.530096,
"lat" : 57.298178,
"depTime" : "20:55:00",
"depDate" : "2019-03-05"
}
}

How to get for loop in a function to return (not print) in single one line?

I'm trying to rename text/num ids (eg. ABC123) to just num ids (eg. 123123) by matching text to a dictionary code. (just learning how to code, so if better idea, please do tell).
I tried searching for solutions but many are for using print. I need to pass the results to another function, so can't use print.
def convertid(old_id):
code = {'A' : '1','B' : '2','C' : '3','D' : '4','E' : '5','F' :
'6','G' : '7','H' : '8','I' : '9','J' : '10','K' : '11','L' :
'12','M' : '13','N' : '14','O' : '15','P' : '16','Q' : '17','R' :
'18','S' : '19','T' : '20','U' : '21','V' : '22','W' : '23','X' :
'24','Y' : '25','Z' : '26', '1' : '1','2' : '2','3' : '3','4' :
'4','5' : '5','6' : '6','7' : '7','8' : '8','9' : '9','0' : '0'}
for x in old_id:
new_id = code[x],end = ""
return new_id
I've also tried new_id = "".join(code[x]),and new_id += code[x], but none of them work.
I was hoping to get "123123" as the new id in a single line, instead of:
1
2
3
1
2
3
on multiple lines (sorry, don't know why the preview is showing the numbers on double space lines. The result I got were in single space lines.).
Just join on an empty string:
code = {'A' : '1','B' : '2','C' : '3','D' : '4','E' : '5','F' :
'6','G' : '7','H' : '8','I' : '9','J' : '10','K' : '11','L' :
'12','M' : '13','N' : '14','O' : '15','P' : '16','Q' : '17','R' :
'18','S' : '19','T' : '20','U' : '21','V' : '22','W' : '23','X' :
'24','Y' : '25','Z' : '26', '1' : '1','2' : '2','3' : '3','4' :
'4','5' : '5','6' : '6','7' : '7','8' : '8','9' : '9','0' : '0'}
i = 'A4BC'
n = "".join(code[l] for l in i)
print(n)
# 1423
You can do it with new_id += code[x] like below
def convertid(old_id):
code = {'A' : '1','B' : '2','C' : '3','D' : '4','E' : '5','F' :
'6','G' : '7','H' : '8','I' : '9','J' : '10','K' : '11','L' :
'12','M' : '13','N' : '14','O' : '15','P' : '16','Q' : '17','R' :
'18','S' : '19','T' : '20','U' : '21','V' : '22','W' : '23','X' :
'24','Y' : '25','Z' : '26', '1' : '1','2' : '2','3' : '3','4' :
'4','5' : '5','6' : '6','7' : '7','8' : '8','9' : '9','0' : '0'}
new_id = ''
for x in old_id:
new_id += code[x]
return new_id
print(convertid('ABC123')) # 123123
using get which will handle the case if key is not present and use default value'' in that case
code = {'A' : '1','B' : '2','C' : '3','D' : '4','E' : '5','F' :
'6','G' : '7','H' : '8','I' : '9','J' : '10','K' : '11','L' :
'12','M' : '13','N' : '14','O' : '15','P' : '16','Q' : '17','R' :
'18','S' : '19','T' : '20','U' : '21','V' : '22','W' : '23','X' :
'24','Y' : '25','Z' : '26', '1' : '1','2' : '2','3' : '3','4' :
'4','5' : '5','6' : '6','7' : '7','8' : '8','9' : '9','0' : '0'}
i = 'A4BC'
print(''.join(code.get(l,'') for l in i))

How to extract data from json into a string

I am not able to extract the "Data" "12639735;7490484;3469776;9164745;650;0"
from this file using python:
In php it's piece of cake for me but I cannot master it in python.
Other answers from Stackexchange didn't give me the answer.
Here is the contents of the file test.json:
{
"ActTime" : 1494535483,
"ServerTime" : "2017-05-11 22:44:43",
"Sunrise" : "05:44",
"Sunset" : "21:14",
"result" : [
{
"AddjMulti" : 1.0,
"AddjMulti2" : 1.0,
"AddjValue" : 0.0,
"AddjValue2" : 0.0,
"BatteryLevel" : 255,
"Counter" : "20130.221",
"CounterDeliv" : "12634.521",
"CounterDelivToday" : "0.607 kWh",
"CounterToday" : "1.623 kWh",
"CustomImage" : 0,
"Data" : "12639735;7490484;3469776;9164745;650;0",
"Description" : "",
"Favorite" : 1,
"HardwareID" : 3,
"HardwareName" : "Slimme Meter",
"HardwareType" : "P1 Smart Meter USB",
"HardwareTypeVal" : 4,
"HaveTimeout" : false,
"ID" : "1",
"LastUpdate" : "2017-05-11 22:44:39",
"Name" : "Elektriciteitsmeter",
"Notifications" : "false",
"PlanID" : "0",
"PlanIDs" : [ 0 ],
"Protected" : false,
"ShowNotifications" : true,
"SignalLevel" : "-",
"SubType" : "Energy",
"SwitchTypeVal" : 0,
"Timers" : "false",
"Type" : "P1 Smart Meter",
"TypeImg" : "counter",
"Unit" : 1,
"Usage" : "650 Watt",
"UsageDeliv" : "0 Watt",
"Used" : 1,
"XOffset" : "0",
"YOffset" : "0",
"idx" : "1"
}
],
"status" : "OK",
"title" : "Devices"
}
This should work
import json
with open('test.json') as f:
contents = json.load(f)
print(contents['result'][0]['Data'])
Similar questions have been asked before: Parsing values from a JSON file using Python?
Got it.
url = "http://192.168.2.1:8080/json.htm?type=devices&rid=1"
response = urllib.urlopen(url)
str = json.loads(response.read())
for i in str["result"]:
datastring = i["Data"]
elementstring = i["Data"].split(';')
counter = 0
for j in elementstring:
if counter == 4:
usage = j
counter += 1
delivery = get_num(i["UsageDeliv"])

Process json data using Pyspark

I am building a python script which will be executed through Apache spark in which I am generating a RDD from json file stored on S3 bucket.
I need to filter that json RDD according to some data in json document and thereby generating a new json file which consist of filtered json documents.That json file needs to be uploaded to S3 bucket.
So please suggest me appropriate solution for its implementation through pyspark.
Input json
{
"_id" : ObjectId("55a787ee9efccaeb288b457f"),
"data" : {
"N◦ CATEGORIA" : 102.0,
"NOMBRE CATEGORIA" : "GASEOSAS",
"VARIABLE" : "TOP OF HEART",
"VAR." : "TOH",
"MARCA" : "COCA COLA ZERO",
"MES" : "ENERO",
"MES_N" : 1.0,
"AÑO" : 2014.0,
"UNIVERSO_TOTAL" : 1.0433982E7,
"UNIVERSO_FEMENINO" : 5529024.0,
"UNIVERSO_MASCULINO" : 4904958.0,
"PORCENTAJE_TOTAL" : 0.0066,
"PORCENTAJE_FEMENINO" : 0.0125,
"PORCENTAJE_MASCULINO" : null
},
"app_id" : ObjectId("5376349e11bc073138c33163"),
"category" : "excel_RAC",
"subcategory" : "RAC",
"created_time" : NumberLong(1437042670),
"instance_id" : null,
"metric_date" : NumberLong(1437042670),
"campaign_id" : ObjectId("5386602ba102b6cd4528ed93"),
"datasource_id" : ObjectId("559f5c8f9efccacf0a3c9875"),
"duplicate_id" : "695a3f5f562d0a02f1820fe5d91642a5"
}
The input json data needs to be filtered according to VARIABLE : "TOP OF HEART" and there by generate output json as following
Output Json
{
"_id" : ObjectId("55b5d19e9efcca86118b45a2"),
"widget_type" : "rac_toh_excel",
"campaign_id" : ObjectId("558554b29efccab00a3c987c"),
"datasource_id" : ObjectId("55b5d18f9efcca770b3c986a"),
"date_time" : NumberLong(1388530800),
"data" : {
"key" : "COCA COLA ZERO",
"values" : {
"x" : NumberLong(1388530800),
"y" : 1.0433982E7,
"data" : {
"id" : ObjectId("553a151e5c93ffe0408b46f9"),
"month" : 1.0,
"year" : 2014.0,
"total" : 1.0433982E7,
"variable" : "TOH",
"total_percentage" : 0.0066
}
}
},
"filter" : [
]
}

Categories

Resources