extract array from web fetched json file python

extract array from web fetched json file python - python

i want to fetch live quotes from this page in python 3
link
the quotes here is stored in json file in array "JsonData"
i want to get the value stored in LTP inside the json file
from urllib.request import urlopen
import json
url = ("https://ewmw.edelweiss.in/api/Market/Process/GetFutureValue/BANKNIFTY/05%20Apr%202018")
response = urlopen(url)
data = response.read().decode("utf-8")
y = json.loads(data['JsonData'])
print(y)

Replace
y = json.loads(data['JsonData'])
with
y = json.loads(data.decode('string-escape').strip('"'))
print(y) #And then access the required variable.
print(y["JsonData"])
Note: Your data is escaped. that is why i have used .decode('string-escape').strip('"')

You can try ast :
But before using eval() read this post from NedBat
from urllib.request import urlopen
import json
import ast
url = ("https://ewmw.edelweiss.in/api/Market/Process/GetFutureValue/BANKNIFTY/05%20Apr%202018")
response = urlopen(url)
data = response.read().decode("utf-8")
load_data= ast.literal_eval(data)
convert_to = json.loads(load_data)
print(convert_to['JsonData'])
output:
{'LTP': '24339.3', 'ChgPer': '-0.68', 'ArticleUrl': '', 'Url': '/quotes/index-future/BANKNIFTY~2018-04-26', 'CoCode': 'BANKNIFTY', 'Date': '2018-04-26T00:00:00', 'Chg': '-165.55'}

You can use eval as suggested below, but that's not such a good thing to do, really. So you can go with this:
import requests, json
r = requests.get('https://ewmw.edelweiss.in/api/Market/Process/GetFutureValue/BANKNIFTY/05%20Apr%202018').json()
data = json.loads(r)
print(data['JsonData'])
Or alternatively, if you insist on using urllib, then just add another y = json.loads(y), it's not a good solution, so you want to change that later, but a bit better than eval. Complete code:
from urllib.request import urlopen
import json
url = ("https://ewmw.edelweiss.in/api/Market/Process/GetFutureValue/BANKNIFTY/05%20Apr%202018")
response = urlopen(url)
data = response.read().decode("utf-8")
y = json.loads(data)
y = json.loads(y)
print(y['JsonData'])
If it was me though, I'd go with the first one, cleaner, shorter, better.

Related

create list of tuples with download url + "href"

I'm trying to make a list of tuples, the first element being the download URL and the second being the file name from the URL string with below code:
import urllib
import requests
from bs4 import BeautifulSoup
import pandas as pd
import io
url = r"https://www.ers.usda.gov/data-products/livestock-meat-domestic-data"
my_bytes = urllib.request.urlopen(url)
my_bytes = my_bytes.read().decode("utf8")
parsed_html = BeautifulSoup(my_bytes, features = "lxml")
table_data = parsed_html.body.find('table', attrs = {'id':'data_table'})
download_url = "https://www.ers.usda.gov"
full_download_url = [tuple(download_url,i["href"]) for i in table_data.find_all('a')]
But I've been getting TypeError: must be str, not list all along and I'm not sure how to fix this, please help? Thanks!

This is what I needed:
import urllib
import requests
from bs4 import BeautifulSoup
import pandas as pd
import io
url = r"https://www.ers.usda.gov/data-products/livestock-meat-domestic-data"
my_bytes = urllib.request.urlopen(url)
my_bytes = my_bytes.read().decode("utf8")
parsed_html = BeautifulSoup(my_bytes, features = "lxml")
table_data = parsed_html.body.find('table', attrs = {'id':'data_table'})
download_url = "https://www.ers.usda.gov"
def convertTuple(tup):
str = ''
for item in tup:
str = str + item
return str
full_download_url = [convertTuple(tuple(download_url + i["href"])) for i in table_data.find_all('a')]
Thanks to Geeks for geeks and everyone trying to help :)

You are incorrectly accessing the download_url array index.
Python is interpreting your code as creating an array with one element [0] when i is
0 for example, and then trying to access the element ["href"] which is a string, not a valid index
If you specify download_url before accessing the indices it will work as expected
full_download_url = [(download_url, download_url[i]["href"]) for i in table_data.find_all('a')]

How i can get random object from list in Python

I have build a list which contains href from website and i wanna randomly select one of this link, how can i do that?
from bs4 import BeautifulSoup
import urllib
import requests
import re
import random
url = "https://www.formula1.com/en/latest.html"
articles = []
respone = urllib.request.urlopen(url)
soup = BeautifulSoup(respone,'lxml')
def getItems():
for a in soup.findAll('a',attrs={'href': re.compile("/en/latest/article.")}):
articles = a['href']
x = random.choice(articles)
print(x)
That code work, but selecting only random index from all of the objects

You're very close to the answer. You just need to do this:
from bs4 import BeautifulSoup
import urllib
import requests
import re
import random
url = "https://www.formula1.com/en/latest.html"
articles = []
respone = urllib.request.urlopen(url)
soup = BeautifulSoup(respone,'lxml')
def getItems():
for a in soup.findAll('a',attrs={'href': re.compile("/en/latest/article.")}):
articles.append(a['href'])
x = random.choice(articles)
print(x)
getItems()
The changes are:
We add each article to the articles array.
The random choice is now done after the loop, rather than inside the loop.

scrape blocks of data from webpage API

I try to collect block data which forms a small table from a webpage. Pls see my codes below.
`
import requests
import re
import json
import sys
import os
import time
from lxml import html,etree
from bs4 import BeautifulSoup
import pandas as pd
url = 'https://www.investing.com/instruments/OptionsDataAjax'
params = {'pair_id': 525, ## SPX
'date': 1536555600, ## 2018-9-4
'strike': 'all', ## all prices
'callspots': 'calls',#'call_andputs',
'type':'analysis', # webpage viewer
'bringData':'true',
}
headers = {'User-Agent': Chrome/39.0.2171.95 Safari/537.36'}
def R(text, end='\n'): print('\033[0;31m{}\033[0m'.format(text), end=end)
def G(text, end='\n'): print('\033[0;32m{}\033[0m'.format(text), end=end)
page = requests.get(url, params=params,headers = headers)
if page.status_code != 200:
R('ERROR CODE:{}'.format(page.status_code))
sys.exit
G('Problem in connection!')
else:
G('OK')
soup = BeautifulSoup(page.content,'lxml')
spdata = json.loads(soup.text)
print(spdata['data'])`
This result--spdata['data'] gives me a str, I just want to get following blocks in this str. There are many such data blocks in this str with the same format.
SymbolSPY180910C00250000
Delta0.9656
Imp Vol0.2431
Bid33.26
Gamma0.0039
Theoretical33.06
Ask33.41
Theta-0.0381
Intrinsic Value33.13
Volume0
Vega0.0617
Time Value-33.13
Open Interest0
Rho0.1969
Delta / Theta-25.3172
I use json and BeautifulSoup here, maybe regular expression will help but I don't know much about re. To get the result, any approach is appreciated. Thanks.

Add this after your code:
regex = r"((SymbolSPY[1-9]*):?\s*)(.*?)\n[^\S\n]*\n[^\S\n]*"
for match in re.finditer(regex, spdata['data'], re.MULTILINE | re.DOTALL):
for line in match.group().splitlines():
print (line.strip())
Outputs
OK
SymbolSPY180910C00245000
Delta0.9682
Imp Vol0.2779
Bid38.26
Gamma0.0032
Theoretical38.05
Ask38.42
Theta-0.0397
Intrinsic Value38.13
Volume0
Vega0.0579
Time Value-38.13
Open Interest0
Rho0.1934
Delta / Theta-24.3966
SymbolSPY180910P00245000
Delta-0.0262
Imp Vol0.2652
...

python-Using urllib to retrieve web content but got differet content from what I got using browser

I want to write an translation api using this site, which has many desirable features when deal with sentences with wildcards.
First I use F12 in chrome to see what request url is using to produce the result.
I checked that only salt and sigh changed when I use different inputs.
So I look the js source code to see how salt and sigh were produced.
Then I use python library urllib to send the request and get the response. But the response translation was not the same when I use the browser to get it. For example,
Input :"what album was #head_entity# released on?"
Output_browser: "#head_entity#发布了什么专辑?"
Output_python:"发布的专辑是什么# head_entity?#"
which is clearly different.
This is the code for producing my result:
import urllib.request
import urllib.parse
import json
import time
import random
import hashlib
def translator(content):
"""arg:content"""
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
data = {}
u = 'fanyideskweb'
d = content
f = str(int(time.time()*1000) + random.randint(1,10))
c = 'rY0D^0\'nM0}g5Mm1z%1G4'
sign = hashlib.md5((u + d + f + c).encode('utf-8')).hexdigest()
data['i'] = content
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = f
data['sign'] = sign
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CL1CKBUTTON'
data['typoResult'] = 'true'
data = urllib.parse.urlencode(data).encode('utf-8')
request = urllib.request.Request(url=url,data=data,method='POST')
response = urllib.request.urlopen(request)
d = json.loads(response.read().decode('utf-8'))
return d['translateResult'][0][0]['tgt']
translator('what album was #head_entity# released on?')
The only thing I think I changed to make the request different to the original page was the url argument in the code:
My_url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
Original_url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule' which gave me an error {"errorCode":50}
I checked the header and data parameters one by one but still can't solve the problem. I have no idea why this happened. Any ideas?

Request a html file

This link lets me get a random item from database. However, I would like to automatically retrieve items using Python. Here's my code:
import sys
from urllib.parse import urlencode
from urllib.request import urlopen
# parameters
data = {}
data["query"] = "reviewd:yes+AND+organism:9606"
data["random"] = "yes"
url_values = urlencode(data)
url = "http://www.uniprot.org/uniprot/"
full_url = url + '?' + url_values
data = urlopen(full_url)
out = open("1.html", 'w')
out.write(str(data.read()))
However, I cannot get the desired page. Anyone knows what's wrong with my code? I'm using Python 3.x.

You have several issues:
reviewd is misspelled, it should be reviewed
The base url needs to have /uniprot/ at the end
You need to use space instead of + in your query string
Here is what that would look like:
import sys
from urllib.parse import urlencode
from urllib.request import urlopen
# parameters
data = {}
data["query"] = "reviewed:yes AND organism:9606"
data["random"] = "yes"
url_values = urlencode(data)
url = "http://www.uniprot.org/uniprot/"
full_url = url + '?' + url_values
data = urlopen(full_url)
out = open("1.html", 'w')
out.write(str(data.read()))
This produces the following URL:
http://www.uniprot.org/uniprot/?query=reviewed%3Ayes+AND+organism%3A9606&random=yes

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

extract array from web fetched json file python - python

Replace y = json.loads(data['JsonData']) with y = json.loads(data.decode('string-escape').strip('"')) print(y) #And then access the required variable. print(y["JsonData"]) Note: Your data is escaped. that is why i have used .decode('string-escape').strip('"')

Related

create list of tuples with download url + "href"

How i can get random object from list in Python

scrape blocks of data from webpage API

python-Using urllib to retrieve web content but got differet content from what I got using browser

Request a html file

Categories

Resources