Convert HTML dictionary to PYTHON dictionary - python

i need to get values only for Czechia country from this website list "https://coronavirus-19-api.herokuapp.com/countries" and store like a variable dictionary in ptyhon.
Like this:
Czechia = {"cases":434,"todayCases":0,"deaths":0,"todayDeaths":0,"recovered":3,"active":431,"critical":2}

You could use requests to GET the JSON data from your server URL, then construct a new dictionary with country as the key:
from requests import get
URL = "https://coronavirus-19-api.herokuapp.com/countries"
req = get(URL).json()
result = {obj['country']: {k: v for k, v in obj.items() if k != 'country'} for obj in req}
print(result)
Output:
{'China': {'cases': 80894, 'todayCases': 13, 'deaths': 3237, 'todayDeaths': 11, 'recovered': 69614, 'active': 8043, 'critical': 2622}, 'Italy': {'cases': 31506, 'todayCases': 0, 'deaths': 2503, 'todayDeaths': 0, 'recovered': 2941, 'active': 26062, 'critical': 2060}...
Now you can access your data in O(1) time instead of doing a O(N) linear scan:
print(result["Czechia"])
# {'cases': 464, 'todayCases': 30, 'deaths': 0, 'todayDeaths': 0, 'recovered': 3, 'active': 461, 'critical': 2}
Note: Its probably also safe to ensure req.status_code is 200 OK or whatever else you expect to receive from the server.

In [1]: import requests
...: import json
...:
...: data = requests.get('https://coronavirus-19-api.herokuapp.com/countries').json()
...: result = next(item for item in data if item["country"] == "Czechia")
...: print(json.dumps(result, indent=4))
{
"country": "Czechia",
"cases": 464,
"todayCases": 30,
"deaths": 0,
"todayDeaths": 0,
"recovered": 3,
"active": 461,
"critical": 2
}
In [2]:

While the RoadRunner 's answers solves your problem, I am just giving you the one other way of doing it using python's urllib module.
from urllib.request import urlopen
##import ast
import json
def Corona_Tracker():
res = urlopen('https://coronavirus-19-api.herokuapp.com/countries')
result = res.read().strip()
result_str = json.loads(result)
return result_str
if __name__ == "__main__":
result_str=Corona_Tracker()
while True:
for data in result_str:
if data['country'] == "India":
print(data)
Just replace India with your country , it gives the below output
>>> {'country': 'India', 'cases': 148, 'todayCases': 5, 'deaths': 3, 'todayDeaths': 0, 'recovered': 14, 'active': 131, 'critical': 0}

Related

How can I extract points from JSON response of influxDB API?

I am using the python requests module to query the influxdb on my localhost using the influx API. The code is as follows.
import requests
INFLUX_DATABASE_NAME = "mydb"
influx_write_url = f'http://localhost:8086/write?db={INFLUX_DATABASE_NAME}'
influx_query_url = f'http://localhost:8086/query?db={INFLUX_DATABASE_NAME}'
pid = "CTR2257"
time_range = "7d"
pid_mod = "\'"+ pid +"\'"
query_string = f'SELECT "temp1"::field,"temp2"::field,"hum1"::field,"hum2"::field,"co2"::field,"light"::field,"fan"::field FROM "stat" WHERE productID = {pid_mod} AND time > now() - {time_range}'
payload = {'q': query_string}
r = requests.get(url=influx_query_url, params=payload)
# Printing the response content(JSON data)
print(r.content)
The print statement produces the below output. The output contains the desired dataset but the formatting is not preferred.
b'{"results":[{"statement_id":0,"series":[{"name":"stat","columns":["time","temp1","temp2","hum1","hum2","co2","light","fan"],"values":[["2020-11-04T22:09:26.4960419Z",32,34,65,68,9,true,true],["2020-11-04T23:31:33.8177588Z",30,32,90,85,5,true,true],["2020-11-04T23:31:46.5988965Z",30,32,90,85,5,true,true],["2020-11-04T23:31:49.9271554Z",30,32,90,85,5,true,true]]}]}]}\n'
When I parse this data using .json() method from requests module
request_data = r.json()
print(request_data)
I get the below python dictionary. Again the formatting is same and is not desired.
{'results': [{'statement_id': 0, 'series': [{'name': 'stat', 'columns': ['time', 'temp1', 'temp2', 'hum1', 'hum2', 'co2', 'light', 'fan'], 'values': [['2020-11-04T22:09:26.4960419Z', 32, 34, 65, 68, 9, True, True], ['2020-11-04T23:31:33.8177588Z', 30, 32, 90, 85, 5, True, True], ['2020-11-04T23:31:46.5988965Z', 30, 32, 90, 85, 5, True, True], ['2020-11-04T23:31:49.9271554Z', 30, 32, 90, 85, 5, True, True]]}]}]}
What I am looking for is to get the point data for each field in long format instead of wide format. For ex.
temp1 = [point1, point2, point3,....pointn]
temp2 = [point1, point2, point3,....pointn]
hum1 = [point1, point2, point3,....pointn]
hum2 = [point1, point2, point3,....pointn]
and so own.
Any ideas or help is highly appreciated.

Flask app returns json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

My flask app keeps bringing up this error when I run the json request to an api, but it runs well from my terminal. What could be wrong? I've loked through the website but none of the solution is applicable. This is my code:
import requests
key = "XpVcEz3pf5hXpJ7psgasaszhMng"
isbns = "2497573957X"
res = requests.get("https://www.goodreads.com/book/review_counts.json",
params={"key": key, "isbns": isbns})
goodreads = res.json()
averageratings = goodreads["books"][0]["average_rating"]
And this is the json result I get from my terminal where I ran the code:
{'books': [{
'id': 29207858,
'isbn': '1632168146',
'isbn13': '9781632168146',
'ratings_count': 0,
'reviews_count': 1,
'text_reviews_count': 0,
'work_ratings_count': 26,
'work_reviews_count': 113,
'work_text_reviews_count': 10,
'average_rating': '4.04'
}]
}

Can't fetch an item out of weird json content

I'm trying to get some items from json content. However, the structure of that json content is foreign to me and as a result I can't fetch the value of property out of it.
I've tried so far with:
import json
import requests
from bs4 import BeautifulSoup
link = 'https://www.zillow.com/homedetails/5958-SW-4th-St-Miami-FL-33144/43835884_zpid/'
def fetch_content(link):
content = requests.get(link,headers={"User-Agent":"Mozilla/5.0"})
soup = BeautifulSoup(content.text,"lxml")
item = soup.select_one("script#hdpApolloPreloadedData").text
print(json.loads(item)['apiCache'])
if __name__ == '__main__':
fetch_content(link)
The result I get running the above script is:
{"VariantQuery{\"zpid\":43835884}":{"property":{"zpid":43835884,"streetAddress":"5958 SW 4th St",
Which I can't further process for that weird key in front.
Expected output:
{"zpid":43835884,"streetAddress":"5958 SW 4th St", ----
How can I get the value of that property?
You can get zpid and address by their mangled json with:
json.loads(json.loads(item.text)['apiCache'])['VariantQuery{"zpid":43835884}']['property']['zpid']
Out[1889]: 43835884
json.loads(json.loads(item.text)['apiCache'])['VariantQuery{"zpid":43835884}']['property']['streetAddress']
Out[1890]: '5958 SW 4th St'
I noticed you can always get the zpid like this:
link = 'https://www.zillow.com/homedetails/5958-SW-4th-St-Miami-FL-33144/43835884_zpid/'
content = requests.get(link,headers={"User-Agent":"Mozilla/5.0"})
soup = BeautifulSoup(content.text,"lxml")
item = soup.select_one("script#hdpApolloPreloadedData").text
print(json.loads(item)['zpid'])
Just modify your function to the following. I also added another function (process_fetched_content()) to give you some more freedom. You could simply run it and it will take care of situations even when you have multiple keys that start with 'VariantQuery{"zpid":'. The final output is a dict with the keys being your zpid and the values being what you are looking for.
If you have a lot of zpid values, then this will let you accumulate them all together and then process them. The benefit is the list of keys is then the list of zpids you have.
Here's how you could use this code.
results = process_fetched_content(raw_dictionary = fetch_content(link, verbose=False))
print(results)
output:
{'43835884': {'zpid': 43835884, 'streetAddress': '5958 SW 4th St', 'zipcode': '33144', 'city': 'Miami', 'state': 'FL', 'latitude': 25.76661, 'longitude': -80.292801, 'price': 340000, 'dateSold': 1576875600000, 'bathrooms': 2, 'bedrooms': 3, 'livingArea': 1757, 'yearBuilt': 1973, 'lotSize': 4331, 'homeType': 'SINGLE_FAMILY', 'homeStatus': 'RECENTLY_SOLD', 'photoCount': 19, 'imageLink': 'https://photos.zillowstatic.com/p_g/IS7yxihwtuqmlq1000000000.jpg', 'daysOnZillow': 0, 'isFeatured': False, 'shouldHighlight': False, 'brokerId': 0, 'zestimate': 341336, 'rentZestimate': 2200, 'listing_sub_type': {}, 'priceReduction': '', 'isUnmappable': False, 'rentalPetsFlags': 128, 'mediumImageLink': 'https://photos.zillowstatic.com/p_c/IS7yxihwtuqmlq1000000000.jpg', 'isPreforeclosureAuction': False, 'homeStatusForHDP': 'RECENTLY_SOLD', 'priceForHDP': 340000, 'festimate': 341336, 'isListingOwnedByCurrentSignedInAgent': False, 'isListingClaimedByCurrentSignedInUser': False, 'hiResImageLink': 'https://photos.zillowstatic.com/p_f/IS7yxihwtuqmlq1000000000.jpg', 'watchImageLink': 'https://photos.zillowstatic.com/p_j/IS7yxihwtuqmlq1000000000.jpg', 'tvImageLink': 'https://photos.zillowstatic.com/p_m/IS7yxihwtuqmlq1000000000.jpg', 'tvCollectionImageLink': 'https://photos.zillowstatic.com/p_l/IS7yxihwtuqmlq1000000000.jpg', 'tvHighResImageLink': 'https://photos.zillowstatic.com/p_n/IS7yxihwtuqmlq1000000000.jpg', 'zillowHasRightsToImages': True, 'desktopWebHdpImageLink': 'https://photos.zillowstatic.com/p_h/IS7yxihwtuqmlq1000000000.jpg', 'isNonOwnerOccupied': False, 'hideZestimate': False, 'isPremierBuilder': False, 'isZillowOwned': False, 'currency': 'USD', 'country': 'USA', 'taxAssessedValue': 224131, 'streetAddressOnly': '5958 SW 4th St', 'unit': ' '}}
Code
import json
import requests
from bs4 import BeautifulSoup
link = 'https://www.zillow.com/homedetails/5958-SW-4th-St-Miami-FL-33144/43835884_zpid/'
def fetch_content(link, verbose=False):
content = requests.get(link,headers={"User-Agent":"Mozilla/5.0"})
soup = BeautifulSoup(content.text,"lxml")
item = soup.select_one("script#hdpApolloPreloadedData").text
d = json.loads(item)['apiCache']
d = json.loads(d)
if verbose:
print(d)
return d
def process_fetched_content(raw_dictionary=None):
if raw_dictionary is not None:
keys = [k for k in raw_dictionary.keys() if k.startswith('VariantQuery{"zpid":')]
results = dict((k.split(':')[-1].replace('}',''), d.get(k).get('property', None)) for k in keys)
return results
else:
return None

How to create nested parameters for the requests library

I'm using the popular requests library to request data from an API. The following works:
import requests
parameters = {
'action': 'query_plugins',
'per_page': 10,
'browse': 1,
'page': 1,
}
response = requests.get('https://api.wordpress.org/plugins/info/1.1/', parameters)
results = response.json()
print(results)
But there are some parameters which are nested, for example, here is a URL I might query:
https://api.wordpress.org/plugins/info/1.1/?action=query_plugins&request[per_page]=10&request[browse]=1&request[page]=1&request[fields][description]=0&request[fields][sections]=0'
In the above case I need to pass the following nested parameters:
request[fields][description]
request[fields][sections]
How can this be accomplished?
If I read this issue correctly, you can simply use the bracketed name as the key for your parameters (see this comment):
import requests
parameters = {
'action': 'query_plugins',
'per_page': 10,
'browse': 1,
'page': 1,
'request[fields][description]': 0,
'request[fields][sections]': 0
}
response = requests.get('https://api.wordpress.org/plugins/info/1.1/', parameters)
results = response.json()
You can use a recursive function to flatten the parameters from a dict of dicts to the key-value pairs in the square-bracketed format the API requires:
def params(name, parameters):
def flatten(d):
for k, v in d.items():
if isinstance(v, dict):
for s, i in flatten(v):
yield '[%s]%s' % (k, s), i
else:
yield '[%s]' % k, v
return {name + k: v for k, v in flatten(parameters)}
parameters = {
'per_page': 10,
'browse': 1,
'page': 1,
'fields': {
'description': 0,
'sections': 0
}
}
response = requests.get(
'https://api.wordpress.org/plugins/info/1.1/',
{'action': 'query_plugins', **params('request', parameters)}
)

recursively collect string blocks in python

I have a custom data file formatted like this:
{
data = {
friends = {
max = 0 0,
min = 0 0,
},
family = {
cars = {
van = "honda",
car = "ford",
bike = "trek",
},
presets = {
location = "italy",
size = 10,
travelers = False,
},
version = 1,
},
},
}
I want to collect the blocks of data, meaning string between each set of {} while maintaining a hierarhcy. This data is not a typical json format so that is not a possible solution.
My idea was to create a class object like so
class Block:
def __init__(self, header, children):
self.header = header
self.children = children
Where i would then loop through the data line by line 'somehow' collecting the necessary data so my resulting output would like something like this...
Block("data = {}", [
Block("friends = {max = 0 0,\n min = 0 0,}", []),
Block("family = {version = 1}", [...])
])
In short I'm looking for help on ways I can serialize this into useful data I can then easily manipulate. So my approach is to break into objects by using the {} as dividers.
If anyone has suggestions on ways to better approach this I'm all up for ideas. Thank you again.
So far I've just implemented the basic snippets of code
class Block:
def __init__(self, content, children):
self.content = content
self.children = children
def GetBlock(strArr=[]):
print len(strArr)
# blocks = []
blockStart = "{"
blockEnd = "}"
with open(filepath, 'r') as file:
data = file.readlines()
blocks = GetBlock(strArr=data)
You can create a to_block function that takes the lines from your file as an iterator and recursively creates a nested dictionary from those. (Of course you could also use a custom Block class, but I don't really see the benefit in doing so.)
def to_block(lines):
block = {}
for line in lines:
if line.strip().endswith(("}", "},")):
break
key, value = map(str.strip, line.split(" = "))
if value.endswith("{"):
value = to_block(lines)
block[key] = value
return block
When calling it, you have to strip the first line, though. Also, evaluating the "leafs" to e.g. numbers or strings is left as an excercise to the reader.
>>> to_block(iter(data.splitlines()[1:]))
{'data': {'family': {'version': '1,',
'cars': {'bike': '"trek",', 'car': '"ford",', 'van': '"honda",'},
'presets': {'travelers': 'False,', 'size': '10,', 'location': '"italy",'}},
'friends': {'max': '0 0,', 'min': '0 0,'}}}
Or when reading from a file:
with open("data.txt") as f:
next(f) # skip first line
res = to_block(f)
Alternatively, you can do some preprocessing to transform that string into a JSON(-ish) string and then use json.loads. However, I would not go all the way here but instead just wrap the values into "" (and replace the original " with ' before that), otherwise there is too much risk to accidentally turning a string with spaces into a list or similar. You can sort those out once you've created the JSON data.
>>> data = data.replace('"', "'")
>>> data = re.sub(r'= (.+),$', r'= "\1",', data, flags=re.M)
>>> data = re.sub(r'^\s*(\w+) = ', r'"\1": ', data, flags=re.M)
>>> data = re.sub(r',$\s*}', r'}', data, flags=re.M)
>>> json.loads(data)
{'data': {'family': {'version': '1',
'presets': {'size': '10', 'travelers': 'False', 'location': "'italy'"},
'cars': {'bike': "'trek'", 'van': "'honda'", 'car': "'ford'"}},
'friends': {'max': '0 0', 'min': '0 0'}}}
You can also do with ast or json with the help of regex substitutions.
import re
a = """{
data = {
friends = {
max = 0 0,
min = 0 0,
},
family = {
cars = {
van = "honda",
car = "ford",
bike = "trek",
},
presets = {
location = "italy",
size = 10,
travelers = False,
},
version = 1,
},
},
}"""
#with ast
a = re.sub("(\w+)\s*=\s*", '"\\1":', a)
a = re.sub(":\s*((?:\d+)(?: \d+)+)", lambda x:':[' + x.group(1).replace(" ", ",") + "]", a)
import ast
print ast.literal_eval(a)
#{'data': {'friends': {'max': [0, 0], 'min': [0, 0]}, 'family': {'cars': {'car': 'ford', 'bike': 'trek', 'van': 'honda'}, 'presets': {'travelers': False, 'location': 'italy', 'size': 10}, 'version': 1}}}
#with json
import json
a = re.sub(",(\s*\})", "\\1", a)
a = a.replace(":True", ":true").replace(":False", ":false").replace(":None", ":null")
print json.loads(a)
#{u'data': {u'friends': {u'max': [0, 0], u'min': [0, 0]}, u'family': {u'cars': {u'car': u'ford', u'bike': u'trek', u'van': u'honda'}, u'presets': {u'travelers': False, u'location': u'italy', u'size': 10}, u'version': 1}}}

Categories

Resources