I wanted to extract drug class using Rxnorm API (RxNorm API) using NDC code. My python codes are:
#!/usr/bin/python
#pip install simplejson
import os
import sys
import requests
import simplejson as json
def connectionCheck():
url = 'http://rxnav.nlm.nih.gov/REST/version'
header = {'Accept': 'application/json'}
getCheck = requests.get(url, headers=header)
if getCheck.status_code != requests.codes.ok:
response = "RXNorm server response error. Response code: %s" % getCheck.status_code
else:
response = "Connection check complete. RXNorm online. Response code: %s" % getCheck.status_code
return response
def rxNorm(ndc):
# ndc value coming from master.py
# ndc = [array of ndc values]
if ndc[0] is None:
return {"rxcui": "", "rxtty": "", "rxstring": ""}
else:
# if internet or request throws an error, print out to check connection and exit
try:
baseurl = 'http://rxnav.nlm.nih.gov/REST/'
# Searching RXNorm API, Search by identifier to find RxNorm concepts
# http://rxnav.nlm.nih.gov/REST/rxcui?idtype=NDC&id=0591-2234-10
# Set url parameters for searching RXNorm for SETID
ndcSearch = 'rxcui?idtype=NDC&id='
# Search RXNorm API, Return all properties for a concept
rxPropSearch = 'rxcui/'
rxttySearch = '/property?propName=TTY'
rxstringSearch = '/property?propName=RxNorm%20Name'
# Request RXNorm API to return json
header = {'Accept': 'application/json'}
def getTTY(rxCUI):
# Search RXNorm again using RXCUI to return RXTTY & RXSTRING
getTTY = requests.get(baseurl+rxPropSearch+rxCUI+rxttySearch, headers=header)
ttyJSON = json.loads(getTTY.text, encoding="utf-8")
return ttyJSON['propConceptGroup']['propConcept'][0]['propValue']
def getSTRING(rxCUI):
# Search RXNorm again using RXCUI to return RXTTY & RXSTRING
getString = requests.get(baseurl+rxPropSearch+rxCUI+rxstringSearch, headers=header)
stringJSON = json.loads(getString.text, encoding="utf-8")
return stringJSON['propConceptGroup']['propConcept'][0]['propValue']
# Search RXNorm using NDC code, return RXCUI id
# ndc = [ndc1, ndc2, ... ]
for item in ndc:
getRXCUI = requests.get(baseurl+ndcSearch+item, headers=header)
if getRXCUI.status_code != requests.codes.ok:
print ("RXNorm server response error. Response code: %s" % getRXCUI.status_code)
rxcuiJSON = json.loads(getRXCUI.text, encoding="utf-8")
# Check if first value in list returns a RXCUI, if not go to next value
try:
if rxcuiJSON['idGroup']['rxnormId']:
rxCUI = rxcuiJSON['idGroup']['rxnormId'][0]
rxTTY = getTTY(rxCUI)
rxSTRING = getSTRING(rxCUI)
return {"rxcui": rxCUI, "rxtty": rxTTY, "rxstring": rxSTRING}
except:
# if last item return null values
if item == ndc[-1]:
return {"rxcui": "", "rxtty": "", "rxstring": ""}
pass
except:
sys.exit("RXNorm connection")
Test using Toy NDC ID Code:
dataTest=rxNorm(['69238131109'])
print(dataTest)
which gave me the following output:
{'rxcui': '483448', 'rxtty': 'SCD', 'rxstring': 'pregabalin 50 MG Oral Capsule'}
Now I am interested to get the drug class using 'rxcui': '483448' info using RxClass API. However, I couldn't make sense of this API. How can I use 'rxcui': '483448' info here to get the desired drug class. I appreciate your time. Thanks!
Related
I have been trying to get spacfic pages extract from each pdf and then merge all the extracted pdf in once.
I have list of pdfs
I am using pdfrw this library but getting error while extracting the pages
from pdfrw import PdfReader, PdfWriter
import os
files = [f for f in os.listdir(
'.') if os.path.isfile(f) and f.endswith('.pdf')]
print(files)
for pdf in files:
pages = PdfReader(pdf).pages
parts = [(6, 7)]
for part in parts:
title = pdf.title().split('.')[0]
outdata = PdfWriter(f'{title}_{part[0]}_.pdf')
for pagenum in range(*part):
outdata.addpage(pages[pagenum-1])
outdata.write()
Please help if possible
raise PdfParseError('Invalid PDF header: %s' %
pdfrw.errors.PdfParseError: Invalid PDF header: '<!doctype html>'
Manas,
One way to achieve your requirement is to use API. For example, consider following code snippet where it splits PDF from uploaded file.
import os
import requests # pip install requests
# The authentication key (API Key).
# Get your own by registering at https://app.pdf.co
API_KEY = "*********************************"
# Base URL for PDF.co Web API requests
BASE_URL = "https://api.pdf.co/v1"
# Source PDF file
SourceFile = ".\\sample.pdf"
# Comma-separated list of page numbers (or ranges) to process. Example: '1,3-5,7-'.
Pages = "1-2,3-"
def main(args = None):
uploadedFileUrl = uploadFile(SourceFile)
if (uploadedFileUrl != None):
splitPDF(uploadedFileUrl)
def splitPDF(uploadedFileUrl):
"""Split PDF using PDF.co Web API"""
# Prepare requests params as JSON
# See documentation: https://apidocs.pdf.co
parameters = {}
parameters["pages"] = Pages
parameters["url"] = uploadedFileUrl
# Prepare URL for 'Split PDF' API request
url = "{}/pdf/split".format(BASE_URL)
# Execute request and get response as JSON
response = requests.post(url, data=parameters, headers={ "x-api-key": API_KEY })
if (response.status_code == 200):
json = response.json()
if json["error"] == False:
# Download generated PNG files
part = 1
for resultFileUrl in json["urls"]:
# Download Result File
r = requests.get(resultFileUrl, stream=True)
localFileUrl = f"Page{part}.pdf"
if r.status_code == 200:
with open(localFileUrl, 'wb') as file:
for chunk in r:
file.write(chunk)
print(f"Result file saved as \"{localFileUrl}\" file.")
else:
print(f"Request error: {response.status_code} {response.reason}")
part = part + 1
else:
# Show service reported error
print(json["message"])
else:
print(f"Request error: {response.status_code} {response.reason}")
def uploadFile(fileName):
"""Uploads file to the cloud"""
# 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE.
# Prepare URL for 'Get Presigned URL' API request
url = "{}/file/upload/get-presigned-url?contenttype=application/octet-stream&name={}".format(
BASE_URL, os.path.basename(fileName))
# Execute request and get response as JSON
response = requests.get(url, headers={ "x-api-key": API_KEY })
if (response.status_code == 200):
json = response.json()
if json["error"] == False:
# URL to use for file upload
uploadUrl = json["presignedUrl"]
# URL for future reference
uploadedFileUrl = json["url"]
# 2. UPLOAD FILE TO CLOUD.
with open(fileName, 'rb') as file:
requests.put(uploadUrl, data=file, headers={ "x-api-key": API_KEY, "content-type": "application/octet-stream" })
return uploadedFileUrl
else:
# Show service reported error
print(json["message"])
else:
print(f"Request error: {response.status_code} {response.reason}")
return None
if __name__ == '__main__':
main()
Now, to merge PDF file you can use similar to following code snippet.
import os
import requests # pip install requests
# The authentication key (API Key).
# Get your own by registering at https://app.pdf.co
API_KEY = "**********************************"
# Base URL for PDF.co Web API requests
BASE_URL = "https://api.pdf.co/v1"
# Source PDF files
SourceFile_1 = ".\\sample1.pdf"
SourceFile_2 = ".\\sample2.pdf"
# Destination PDF file name
DestinationFile = ".\\result.pdf"
def main(args = None):
UploadedFileUrl_1 = uploadFile(SourceFile_1)
UploadedFileUrl_2 = uploadFile(SourceFile_2)
if (UploadedFileUrl_1 != None and UploadedFileUrl_2!= None):
uploadedFileUrls = "{},{}".format(UploadedFileUrl_1, UploadedFileUrl_2)
mergeFiles(uploadedFileUrls, DestinationFile)
def mergeFiles(uploadedFileUrls, destinationFile):
"""Perform Merge using PDF.co Web API"""
# Prepare requests params as JSON
# See documentation: https://apidocs.pdf.co
parameters = {}
parameters["name"] = os.path.basename(destinationFile)
parameters["url"] = uploadedFileUrls
# Prepare URL for 'Merge PDF' API request
url = "{}/pdf/merge".format(BASE_URL)
# Execute request and get response as JSON
response = requests.post(url, data=parameters, headers={ "x-api-key": API_KEY })
if (response.status_code == 200):
json = response.json()
if json["error"] == False:
# Get URL of result file
resultFileUrl = json["url"]
# Download result file
r = requests.get(resultFileUrl, stream=True)
if (r.status_code == 200):
with open(destinationFile, 'wb') as file:
for chunk in r:
file.write(chunk)
print(f"Result file saved as \"{destinationFile}\" file.")
else:
print(f"Request error: {response.status_code} {response.reason}")
else:
# Show service reported error
print(json["message"])
else:
print(f"Request error: {response.status_code} {response.reason}")
def uploadFile(fileName):
"""Uploads file to the cloud"""
# 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE.
# Prepare URL for 'Get Presigned URL' API request
url = "{}/file/upload/get-presigned-url?contenttype=application/octet-stream&name={}".format(
BASE_URL, os.path.basename(fileName))
# Execute request and get response as JSON
response = requests.get(url, headers={ "x-api-key": API_KEY })
if (response.status_code == 200):
json = response.json()
if json["error"] == False:
# URL to use for file upload
uploadUrl = json["presignedUrl"]
# URL for future reference
uploadedFileUrl = json["url"]
# 2. UPLOAD FILE TO CLOUD.
with open(fileName, 'rb') as file:
requests.put(uploadUrl, data=file, headers={ "x-api-key": API_KEY, "content-type": "application/octet-stream" })
return uploadedFileUrl
else:
# Show service reported error
print(json["message"])
else:
print(f"Request error: {response.status_code} {response.reason}")
return None
if __name__ == '__main__':
main()
In this sample I am using pdf.co API. Refer to following links for more information.
https://apidocs.pdf.co/30-pdf-split, https://apidocs.pdf.co/31-pdf-merge
Thanks!
After a Post action on a certain Link I get the following answer
{"data":{"loginWithEmail":{"__typename":"LoginResponse","me":{"__typename":"User","username":"davishelenekb","displayname":"davishelenekb","avatar":"https://image.sitecdn.com/avatar/default11.png","partnerStatus":"NONE","role":"None","myChatBadges":[],"private":{"__typename":"UserPrivateInfo","accessToken":"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtasdasdasdaslzaGVsZW5la2IiLCJkaXNwbGF5bmFtZSI6ImRhdmlzaGVsZW5la2IiLCJhdmF0YXIiOiJodHRwczovL2ltYWdlLmRsaXZlY2RuLmNvbS9hdmF0YXIvZGVmYXVsdDExLnBuZyIsInBhcnRuZXJfc3RhdHVzX3N0cmluZyI6Ik5PTkUiLCJpZCI6IiIsImxpZCI6MCwidHlwZSI6ImVtYWlsIiwicm9sZSI6Ik5vbmUiLCJvYXV0aF9hcHBpZCI6IiIsImV4cCI6MTYwOTE4NDQwNyadasdasdaNTkyNDA3LCJpc3MiOiJETGl2ZSJ9.cQXJFUEo7r4bQa2FPHvKAvjisEF1VKldhFdxOcZ3YTk","email":"email","emailVerified":true,"bttAddress":{"__typename":"MyBTTAddress","senderAddress":null}},"subCashbacked":true},"accessToken":"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6ImRhdmlzaGVsZW5la2IiLCJkaXNwbGF5bmFtZSI6ImRhdmlzaGVsZW5la2IiLCJhdmF0YXIiOiJodHRwczovL2ltYWdlLmRsaXZlY2RuLmNvbS9hdmF0YXIvZGVmYasdasdyIsInBhcnRuZXJfc3RhdHVzX3N0cmluZyI6Ik5PTkUiLCJpasdasdlwZSI6ImVtYWlsIiwicm9sZSI6Ik5vbmUiLCJvYXV0aF9hcHBpZCI6IiIsImV4cCI6MTYwOTE4NDQasd221DA3LCJpc3MiOiJETGl2ZSJ9.cQXJFUEo7r4bQa2FPHvKAvjisEF1VKldhFdxOcZ3YTk","twofactorToken":null,"err":null}}}
I just want to extract the key that is in
"accessToken":"KEY",
How can I do this?
My Code
import requests
import json
from fake_useragent import UserAgent
#Set Modules
ua = UserAgent()
url = 'site'
#Read TXT
accounts = 'accounts\\accounts.txt'
with open(accounts) as line:
login = line.readline()
line = login.split(",")
cnt = 1
email = line[0]
password = line[1]
#login
head = {
'.......': '.........',
}
data = {
..........
}
test = requests.post(url, json.dumps(data), headers=head)
if test.status_code == 200:
print('Loged!')
print(test.text)
else:
print('Error') ```
You can take the text of the response, parse it as JSON, and then access the "accessToken" property:
test = requests.post(url, json.dumps(data), headers=head)
if test.status_code == 200:
parsed = json.loads(test.text)
key = parsed['data']['loginWithEmail']['accessToken']
print(key)
Side note:
This snippet assumes that the format of the returned JSON is well known and no error occurs. In a real-world scenario, you may want to add a few validations to it.
You can achieve what you need like this:
response = json.loads(test.text)
print(response["data"]["loginWithEmail"]["me"]["private"]["accessToken"])
I want to get the URLs of the most recent posts of an Instagram user (not me, and I don't have an IG account so I can't use the API). The URLs should be in the style of https://www.instagram.com/p/BpnlsmWgqon/
I've tried making a request with response = requests.get(profile_url) and then parsing the HTML with soup = BeautifulSoup(html, 'html.parser').
After these and some other functions I get a big JSON file with data of the most recent pics (but not their URLs).
How can I get the URLs and extract just that?
Edit: This is what I've coded now. It's a mess, I've trying many approaches but none has worked.
#from subprocess import call
#from instagram.client import InstagramAPI
import requests
import json
from bs4 import BeautifulSoup
#from InstagramAPI.InstagramAPI import InstagramAPI
from instagram.client import InstagramAPI
from config import login, password
userid = "6194091573"
#url = "https://www.instagram.com/mercadona.novedades/?__a=1"
#pic_url =
#call('instalooter user mercadona.novedades ./pics -n 2')
#r = requests.get("https://www.instagram.com/mercadona.novedades")
#print(r.text)
def request_pic_url(profile_url):
response = requests.get(profile_url)
return response.text
def extract_json(html):
soup = BeautifulSoup(html, 'html.parser')
body = soup.find('body')
script_tag = body.find('script')
raw_string = script_tag.text.strip().replace('window._sharedData =', '').replace(';', '')
return json.loads(raw_string)
def get_recent_pics(profile_url):
results = []
response = request_pic_url(profile_url)
json_data = extract_json(response)
metrics = json_data['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']["edges"]
for node in metrics:
node = node.get('node')
if node and isinstance(node, dict):
results.append(node)
return results
def api_thing():
api = InstagramAPI(login, password)
recent_media, next_ = api.user_recent_media(userid, 2)
for media in recent_media:
print(media.caption.text)
def main():
userid = "6194091573"
api_thing()
if __name__ == "__main__":
main()
def get_large_pic(url):
return url + "/media/?size=l"
def get_media_id(url):
req = requests.get('https://api.instagram.com/oembed/?url={}'.format(url))
media_id = req.json()['media_id']
return media_id
i suggest you to use the following library:https://github.com/LevPasha/Instagram-API-python
api = InstagramAPI("username", "password")
api.login()
def get_lastposts(us_id):
api.getUserFeed(us_id)
if 'items' in api.LastJson:
info = api.LastJson['items']
posts=[]
for media in info:
if (media['caption']!=None):
#print(media['caption']['media_id'])
posts.append(media['caption']['media_id'])
return posts
get_lastposts('user_id')
I have written code for calling the AlchemyLanguage API of Bluemix in Python. I need the keywords and entities, but it is only showing the first keyword and first entity for the text file. Where am I going wrong?
import requests
import urllib
import urllib2
def call_alchemy_api(text, API_KEY):
payload = {'outputMode':'json','extract':'entities,keywords','sentiment':'1','maxRetrieve':'1', 'url':'https://www.ibm.com/us-en/'}
payload['apikey'] = API_KEY
encoded_text = urllib.quote_plus(text)
payload['text'] = text
data = urllib.urlencode(payload)
url = 'https://gateway-a.watsonplatform.net/calls/text/TextGetCombinedData'
req = urllib2.Request(url, data)
response = urllib2.urlopen(req)
return response
if __name__ == "__main__":
api_key = 'xxxxxxxxxxxxxxxxxxxxxmyapi'
f = open('in0.txt','r')
text = f.read()
print text
response = call_alchemy_api(text, api_key)
print response.read()
Change the maxRetrieve keyword's value.
Example:
payload = {'outputMode':'json','extract':'entities,keywords','sentiment':'1','maxRetrieve':'3', 'url':'https://www.ibm.com/us-en/'}
API Link:
http://www.ibm.com/watson/developercloud/alchemy-language/api/v1/
I'm trying to do some analytics analysis on Instagram photos that are posted with a specified hashtag. So now I'm trying to store all the images in a temporary database that'll be used for the analysis.
I'm using python and I've a celery task to get all the images, but it is not working when I run with a next_max_tag_id, which is probably wrong.
Does someone know how to get the correct next_max_tag_id?
this is the code I'm using:
#task()
def get_latest_photos():
next_max_tag_id = get_option('next_max_tag_id')
if not next_max_tag_id:
next_max_tag_id = 0
url = BASE + '/tags/{tag}/media/recent?client_id={cliend_id}' \
'&max_tag_id={max_id}'.format(**{
'tag': a_tag,
'cliend_id': getattr(settings, 'INSTAGRAM_CLIENT_ID'),
'max_id': next_max_tag_id
})
while url:
request = requests.get(url)
if request.status_code != 200:
pass #TODO: error
json_response = request.json()
if json_response['meta']['code'] != 200:
pass #TODO: error
# do something with json_response['data']:
url = None
if json_response.has_key('pagination'):
pagination = json_response['pagination']
if pagination.has_key('next_url'):
url = json_response['pagination']['next_url']
if pagination.has_key('next_max_tag_id'):
next_max_tag_id = pagination['next_max_tag_id']
update_option('next_max_tag_id', next_max_tag_id)
The flow is basically this:
get next_max_tag_id from the db (defaults to 0)
while we have a valid URL it fetches the data, the next url and the next_max_tag_id
updates the next_max_tag_id
The only thing that seems wrong to me is the next_max_tag_id, because every time I go to the API URL with the last next_max_tag_id I get the old images.
Yes. Here's how to use pagination correctly. You have to loop through the pages and reference the function you're in. You can update the script below that gets everyone you're following and query for next_max_id as well.
currently_following = set([])
def parse_following(next_url=None):
if next_url == None:
urlUserMedia = "https://api.instagram.com/v1/users/self/follows?access_token=%s" % (auth_token)
else:
urlUserMedia = next_url
values = {
'client_id' : client_id}
try:
data = urllib.urlencode(values)
req = urllib2.Request(urlUserMedia,None,headers)
response = urllib2.urlopen(req)
result = response.read()
dataObj = json.loads(result)
next_url = None
if dataObj.get('pagination') is not None:
next_url = dataObj.get('pagination').get('next_url')
currently_following.update(user['id'] for user in dataObj['data'])
if next_url is not None:
parse_following(next_url)
except Exception as e:
print e