How to request post to aspx site in python? - python

This is the url I'm trying to parse.
url = 'https://www.ncsl.org/research/energy/energy-legislation-tracking-database.aspx'
I'm trying to select "all state" and "all topic" option, and retrieve the search output.
def Generate_params(tn=22, sn=55, searchyear=2008, vr=vrstring):
topicstring = "dnn$ctr85406$StateNetDB$ckBxTopics$"
statestring = 'dnn$ctr85406$StateNetDB$ckBxStates$'
params = {}
params['dnn$ctr85406$StateNetDB$ckBxAllTopics']='on'
params['dnn$ctr85406$StateNetDB$ckBxAllStates']='on'
for t in range(tn+1):
topic_n = topicstring+"{}".format(t)
params[topic_n]='on'
for s in range(sn+1):
state_n = statestring+"{}".format(s)
params[state_n]='on'
# vrstring is a string parameter I have outside the function.
params['__VIEWSTATE']=vrstring
params['dnn$ctr85406$StateNetDB$ddlYear']=str(searchyear)
params['dnn$ctr85406$StateNetDB$btnSearch']='Search'
params['__VIEWSTATEGENERATOR']='CA0B0334'
return params
from requests import Request, Session
post_params=Generate_params()
session = Session()
response = session.post(url, headers=headers,data=post_params)
But the returned response don't have the search output. What did I do wrong?

Related

Extract drug class using rxcui code extracted from Rxnorm API

I wanted to extract drug class using Rxnorm API (RxNorm API) using NDC code. My python codes are:
#!/usr/bin/python
#pip install simplejson
import os
import sys
import requests
import simplejson as json
def connectionCheck():
url = 'http://rxnav.nlm.nih.gov/REST/version'
header = {'Accept': 'application/json'}
getCheck = requests.get(url, headers=header)
if getCheck.status_code != requests.codes.ok:
response = "RXNorm server response error. Response code: %s" % getCheck.status_code
else:
response = "Connection check complete. RXNorm online. Response code: %s" % getCheck.status_code
return response
def rxNorm(ndc):
# ndc value coming from master.py
# ndc = [array of ndc values]
if ndc[0] is None:
return {"rxcui": "", "rxtty": "", "rxstring": ""}
else:
# if internet or request throws an error, print out to check connection and exit
try:
baseurl = 'http://rxnav.nlm.nih.gov/REST/'
# Searching RXNorm API, Search by identifier to find RxNorm concepts
# http://rxnav.nlm.nih.gov/REST/rxcui?idtype=NDC&id=0591-2234-10
# Set url parameters for searching RXNorm for SETID
ndcSearch = 'rxcui?idtype=NDC&id='
# Search RXNorm API, Return all properties for a concept
rxPropSearch = 'rxcui/'
rxttySearch = '/property?propName=TTY'
rxstringSearch = '/property?propName=RxNorm%20Name'
# Request RXNorm API to return json
header = {'Accept': 'application/json'}
def getTTY(rxCUI):
# Search RXNorm again using RXCUI to return RXTTY & RXSTRING
getTTY = requests.get(baseurl+rxPropSearch+rxCUI+rxttySearch, headers=header)
ttyJSON = json.loads(getTTY.text, encoding="utf-8")
return ttyJSON['propConceptGroup']['propConcept'][0]['propValue']
def getSTRING(rxCUI):
# Search RXNorm again using RXCUI to return RXTTY & RXSTRING
getString = requests.get(baseurl+rxPropSearch+rxCUI+rxstringSearch, headers=header)
stringJSON = json.loads(getString.text, encoding="utf-8")
return stringJSON['propConceptGroup']['propConcept'][0]['propValue']
# Search RXNorm using NDC code, return RXCUI id
# ndc = [ndc1, ndc2, ... ]
for item in ndc:
getRXCUI = requests.get(baseurl+ndcSearch+item, headers=header)
if getRXCUI.status_code != requests.codes.ok:
print ("RXNorm server response error. Response code: %s" % getRXCUI.status_code)
rxcuiJSON = json.loads(getRXCUI.text, encoding="utf-8")
# Check if first value in list returns a RXCUI, if not go to next value
try:
if rxcuiJSON['idGroup']['rxnormId']:
rxCUI = rxcuiJSON['idGroup']['rxnormId'][0]
rxTTY = getTTY(rxCUI)
rxSTRING = getSTRING(rxCUI)
return {"rxcui": rxCUI, "rxtty": rxTTY, "rxstring": rxSTRING}
except:
# if last item return null values
if item == ndc[-1]:
return {"rxcui": "", "rxtty": "", "rxstring": ""}
pass
except:
sys.exit("RXNorm connection")
Test using Toy NDC ID Code:
dataTest=rxNorm(['69238131109'])
print(dataTest)
which gave me the following output:
{'rxcui': '483448', 'rxtty': 'SCD', 'rxstring': 'pregabalin 50 MG Oral Capsule'}
Now I am interested to get the drug class using 'rxcui': '483448' info using RxClass API. However, I couldn't make sense of this API. How can I use 'rxcui': '483448' info here to get the desired drug class. I appreciate your time. Thanks!

Api call using python and token_auth

"""
#Collects basic metrics from Matomo installation and returns a pandas dataframe
"""
token = os.getenv("token")
# Build url string
base_url = 'https://matomo.___.com/index.php?module=API'
site_num = '&idSite=1'
return_format = '&format=json'
period = '&period=day'
date_range = '&date=last30'
method = '&method=VisitsSummary.get'
token_string = "&token_auth=" + token
my_url = base_url + site_num + return_format + period + date_range + method + token_string
# send request for report
r = requests.get(my_url)
# parse and tidy collected data
data = pd.DataFrame(r.json()).T
data = data.reset_index()
data.columns = [
"date",
"uniq_visitors",
"users",
"visits",
"actions",
"visits_converted",
"bounces",
"sum_visit_length",
"max_actions",
"bounce_rate",
"actions_per_visit",
"avg_time_on_site",
]
return data
I am trying to get data from the matomo API using an auth_token and parameters by using above code but i am not able to access it and my url is not taking token code any one has idea how i can solve this
Given that you are using the request library, passing parameters and headers can be done using the following params in your get call:
r = requests.get(my_url, params=payload)
In the same way, an auth token is usually passed within headers:
r = requests.get(my_url, params=payload, headers=headers)
Using this format you can simply create a headers object which contains your token_auth and directly pass your parameters in a payload object:
headers = {'token_auth': token}
payload = {'module':'API', 'idSite':1, 'format':'json', 'period':'day', 'date':'last30', 'method':'VisitsSummary.get'}
Since you are now passing your parameters in you get request, there is no need to add them to the end of your url. Thus, your url should stay as https://matomo.___.com/index.php. These can then be used within your params and headers respectively. Please note that this assumes that the matomo API places the token_auth in its headers such as most APIs do. If this is not the case you could pass it directly within the params payload.
Here is a global overview:
token = os.getenv("token")
# Get url, headers and params
my_url = 'https://matomo.___.com/index.php'
payload = {'module':'API', 'idSite':1, 'format':'json', 'period':'day', 'date':'last30', 'method':'VisitsSummary.get'}
headers = {'token_auth': token}
# send request for report
r = requests.get(my_url, params=payload, headers=headers)
Note this answers your question specifically regarding the API call and not the processing after.

Wikipedia All-Pages API after 30 requests returns same pages titles

I am want to extract all Wikipedia titles via API.Each response contains continue key which is used to get next logical batch,but after 30 requests continue key starts to repeat it mean I am receiving same pages.
I have tried the following code above and Wikipedia documentation
https://www.mediawiki.org/wiki/API:Allpages
def get_response(self, url):
resp = requests.get(url=url)
return resp.json()
appcontinue = []
url = 'https://en.wikipedia.org/w/api.php?action=query&list=allpages&format=json&aplimit=500'
json_resp = self.get_response(url)
next_batch = json_resp["continue"]["apcontinue"]
url +='&apcontinue=' + next_batch
appcontinue.append(next_batch)
while True:
json_resp = self.get_response(url)
url = url.replace(next_batch, json_resp["continue"]["apcontinue"])
next_batch = json_resp["continue"]["apcontinue"]
appcontinue.append(next_batch)
I am expecting to receive more than 10000 unique continue keys as one response could contains max 500 Titles.
Wikipedia has 5,673,237 articles in English.
Actual response. I did more than 600 requests and there is only 30 unique continue keys.
json_resp["continue"] contains two pairs of values, one is apcontinue and the other is continue. You should add them both to your query. See https://www.mediawiki.org/wiki/API:Query#Continuing_queries for more details.
Also, I think it'll be easier to use the params parameter of request.get instead of manually replacing the continue values. Perhaps something like this:
import requests
def get_response(url, params):
resp = requests.get(url, params)
return resp.json()
url = 'https://en.wikipedia.org/w/api.php?action=query&list=allpages&format=json&aplimit=500'
params = {}
while True:
json_resp = get_response(url, params)
params = json_resp["continue"]
...

Python login to chess.com with requests

I am trying to read out out some of my stats from chess.com. I am struggling to log in with python.
I am trying to follow the tutorial here
I have identified the following tags from the login-page. _username as the username, _password as the password and _token as the hidden token.
My code:
session_requests = requests.session()
login_url = 'https://www.chess.com/login'
result = session_requests.get(login_url)
tree = html.fromstring(result.text)
token = list(set(tree.xpath("//input[#name='_token']/#value")))[0]
payload = {
"_username": "ChristianSloper",
"_password": "mypasswordgoeshere",
"_token": token
}
result = session_requests.post( login_url, data = payload, headers = dict(referer=login_url), verify=True)
Unfortunately, I just get sent back to the login page. I am very new to front end /web and would be very pleased for any help.
your payloads aren't entirely correct (there are a few things missing in it) and it appears that you are sending the POST request to the wrong URL, try this code:
session_requests = requests.session()
login_url = 'https://www.chess.com/login'
result = session_requests.get(login_url)
tree = html.fromstring(result.text)
token = list(set(tree.xpath("//input[#name='_token']/#value")))[0]
payload={"_username": "ChristianSloper",
"_password": "mypasswordgoeshere",
"login": '',
"_target_path": "https://www.chess.com/home",
"_token": token
}
session_requests.headers.update(dict(referer=login_url))
result = session_requests.post("https://www.chess.com:443/login_check", data = payload, verify=True)
Hope this helps!

Instagram get next_max_tag_id

I'm trying to do some analytics analysis on Instagram photos that are posted with a specified hashtag. So now I'm trying to store all the images in a temporary database that'll be used for the analysis.
I'm using python and I've a celery task to get all the images, but it is not working when I run with a next_max_tag_id, which is probably wrong.
Does someone know how to get the correct next_max_tag_id?
this is the code I'm using:
#task()
def get_latest_photos():
next_max_tag_id = get_option('next_max_tag_id')
if not next_max_tag_id:
next_max_tag_id = 0
url = BASE + '/tags/{tag}/media/recent?client_id={cliend_id}' \
'&max_tag_id={max_id}'.format(**{
'tag': a_tag,
'cliend_id': getattr(settings, 'INSTAGRAM_CLIENT_ID'),
'max_id': next_max_tag_id
})
while url:
request = requests.get(url)
if request.status_code != 200:
pass #TODO: error
json_response = request.json()
if json_response['meta']['code'] != 200:
pass #TODO: error
# do something with json_response['data']:
url = None
if json_response.has_key('pagination'):
pagination = json_response['pagination']
if pagination.has_key('next_url'):
url = json_response['pagination']['next_url']
if pagination.has_key('next_max_tag_id'):
next_max_tag_id = pagination['next_max_tag_id']
update_option('next_max_tag_id', next_max_tag_id)
The flow is basically this:
get next_max_tag_id from the db (defaults to 0)
while we have a valid URL it fetches the data, the next url and the next_max_tag_id
updates the next_max_tag_id
The only thing that seems wrong to me is the next_max_tag_id, because every time I go to the API URL with the last next_max_tag_id I get the old images.
Yes. Here's how to use pagination correctly. You have to loop through the pages and reference the function you're in. You can update the script below that gets everyone you're following and query for next_max_id as well.
currently_following = set([])
def parse_following(next_url=None):
if next_url == None:
urlUserMedia = "https://api.instagram.com/v1/users/self/follows?access_token=%s" % (auth_token)
else:
urlUserMedia = next_url
values = {
'client_id' : client_id}
try:
data = urllib.urlencode(values)
req = urllib2.Request(urlUserMedia,None,headers)
response = urllib2.urlopen(req)
result = response.read()
dataObj = json.loads(result)
next_url = None
if dataObj.get('pagination') is not None:
next_url = dataObj.get('pagination').get('next_url')
currently_following.update(user['id'] for user in dataObj['data'])
if next_url is not None:
parse_following(next_url)
except Exception as e:
print e

Categories

Resources