Im trying to score user input againt a model hosted in azure databricks. For this I can only use urllib which is a standard library inside python. I did use Requests for my need and below is the code.
import numpy as np
import pandas as pd
import requests
def create_tf_serving_json(data):
return {'inputs': {name: data[name].tolist() for name in data.keys()} if isinstance(data, dict) else data.tolist()}
def score_model(model_uri, databricks_token, data):
headers = {
"Authorization": f"Bearer {databricks_token}",
"Content-Type": "application/json",
}
data_json = data.to_dict(orient='records') if isinstance(data, pd.DataFrame) else create_tf_serving_json(data)
response = requests.request(method='POST', headers=headers, url=model_uri, json=data_json)
if response.status_code != 200:
raise Exception(f"Request failed with status {response.status_code}, {response.text}")
return response.json()
# Scoring a model that accepts pandas DataFrames
data = pd.DataFrame([{
"Pclass": 1,
"Age": 22,
"SibSp": 1,
"Parch": 1,
"Fare" :50
}])
score_model("My Model URL", 'Databricks Token', data)
Can someone help me to do the same with urllib.
Thanks in advance!
Related
I am pretty new to python and I am trying to create a script that will pull data from a ticketing platform.
I got the list of agents and their ids but when I try to pull the data it's giving me this error:
KeyError: 'data'
Is there a way for me to have the parameter "agents": to automatically update using the agent_id list?
Here is the code, I removed the links and the API key for privacy reasons:
import requests
import json
from cgitb import text
from openpyxl import Workbook
import openpyxl
import requests
from datetime import date
from datetime import timedelta
#Agents list
agents_list = ["Agent1", "Agent2", "Agent3"]
agent_id = []
agents_names = []
today = date.today()
yesterday = today - timedelta(days = 1)
start_date = str(yesterday)
end_date = str(yesterday)
def extragere_date_agenti():
url = "https://x.gorgias.com/api/users?limit=100&order_by=name%3Aasc&roles=agent&roles=admin"
headers = {
"accept": "application/json",
"authorization": "Basic"
}
response = requests.get(url, headers=headers)
text_name_id = json.loads(response.text)
for names in text_name_id["data"]:
agent_name = names["firstname"]
agents_id = names["id"]
if agent_name in agents_list:
agents_names.append(agent_name)
agent_id.append(agents_id)
extragere_date_agenti()
def extragere_numere():
url = "https://x.gorgias.com/api/stats/total-messages-sent"
payload = {"filters": {
"period": {
"start_datetime": start_date + "T00:00:00-05:00",
"end_datetime": end_date + "T23:59:59-05:00"
},
"agents": [agent_id], #This is the value that I want to modify
"channels": ["email"]
}}
headers = {
"accept": "application/json",
"content-type": "application/json",
"authorization": "Basic"
}
response = requests.post(url, json=payload, headers=headers)
text_numere = json.loads(response.text)
numere_finale = text_numere["data"]["data"]["value"]
print(numere_finale)
I've tried to do a for loop but it's giving me the same error. Any suggestions?
First, add the condition to check the response status code
Also, add another condition to prevent this type of key error:
if "data" in text_name_id:
Your Error:
KeyError: 'data'
Means that in text_name_id is no Key named "data".
Difficult to tell you how to fix it without any more info...
Are you sure, that request returns a positiv status?? I see no ErrorHandling, if respone.status_code == 200: should be enough to check.
Are you sure that the response json has a Key named "data"? Try this to set a default if key is missing:
text_name_id.get("data", [{"firstname": "error", "id": 0}])
--- Edit ---
Okay, is that the right one I don't see a "id" or "firstname" key. But if it is the right JSON, than you can't iterate over dict like you did in Python.
To do so you would want to do this:
for key, value in text_name_id['data']['data'].items():
...
I am trying to extract the data in the table at https://www.ecoregistry.io/emit-certifications/ra/10
Using the google developer tools>network tab, I am able to get the json link where the data for this table is stored: https://api-front.ecoregistry.io/api/project/10/emitcertifications
I am able to manually copy this json data and extract the information using this code I've written:
import json
import pandas as pd
data = '''PASTE JSON DATA HERE'''
info = json.loads(data)
columns = ['# Certificate', 'Carbon offsets destination', 'Final user', 'Taxpayer subject','Date','Tons delivered']
dat = list()
for x in info['emitcertifications']:
dat.append([x['consecutive'],x['reasonUsingCarbonOffsets'],x['userEnd'],x['passiveSubject'],x['date'],x['quantity']])
df = pd.DataFrame(dat,columns=columns)
df.to_csv('Data.csv')
I want to automate it such that I can extract the data from the json link: https://api-front.ecoregistry.io/api/project/10/emitcertifications directly instead of manually pasting json data in:
data = '''PASTE JSON DATA HERE'''
The link is not working in python or even in browser directly:
import requests
import json
url = ('https://api-front.ecoregistry.io/api/project/10/emitcertifications')
response = requests.get(url)
print(json.dumps(info, indent=4))
The error output I get is:
{'status': 0, 'codeMessages': [{'codeMessage': 'ERROR_401', 'param': 'invalid', 'message': 'No autorizado'}]}
When I download the data from the developer tools then this dictionary has 'status':1 and after that all the data is there.
Edit: I tried adding request headers to the url but it still did not work:
import requests
import json
url = ('https://api-front.ecoregistry.io/api/project/10/emitcertifications')
hdrs = {"accept": "application/json","accept-language": "en-IN,en;q=0.9,hi-IN;q=0.8,hi;q=0.7,en-GB;q=0.6,en-US;q=0.5","authorization": "Bearer null", "content-type": "application/json","if-none-match": "W/\"1326f-t9xxnBEIbEANJdito3ai64aPjqA\"", "lng": "en", "platform": "ecoregistry","sec-ch-ua": "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"100\", \"Google Chrome\";v=\"100\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty","sec-fetch-mode": "cors", "sec-fetch-site": "same-site" }
response = requests.get(url, headers = hdrs)
print(response)
info = response.json()
print(json.dumps(info, indent=4))
print(response) give output as '<Response [304]>' while info = response.json() gives traceback error 'Expecting value: line 1 column 1 (char 0)'
Can someone please point me in the right direction?
Thanks in advance!
Posting comment as an answer:
The headers required for that api in order to retrieve data
is platform: ecoregistry.
import requests as req
import json
req = req.get('https://api-front.ecoregistry.io/api/project/10/emitcertifications', headers={'platform': 'ecoregistry'})
data = json.loads(data)
print(data.keys())
# dict_keys(['status', 'projectSerialYear', 'yearValidation', 'project', 'emitcertifications'])
print(data['emitcertifications'][0].keys())
# dict_keys(['id', 'auth', 'operation', 'typeRemoval', 'consecutive', 'serialInit', 'serialEnd', 'serial', 'passiveSubject', 'passiveSubjectNit', 'isPublicEndUser', 'isAccept', 'isCanceled', 'isCancelProccess', 'isUpdated', 'isKg', 'reasonUsingCarbonOffsetsId', 'reasonUsingCarbonOffsets', 'quantity', 'date', 'nitEnd', 'userEnd'])
I am trying to set up an AWS API Gateway that could receive a POST request an upload a csv file to S3. Ideally, I would like to make some transformations to the file before uploading it to S3 (renaming and formatting some columns to normalize their names accross different uploads).
So far, I have set up my API Gateway to receive the request and send it to an AWS Lambda. I use Lambda proxy integration. The triggered lambda is as follows:
import logging
import pandas as pd
import boto3
logger = logging.getLogger()
logger.setLevel(logging.INFO)
s3 = boto3.client("s3")
def handler(event, context):
logger.info(f"Event: {event}")
df = pd.read_csv(event['body']['file'])
logger.info(f"df1: {df}")
# Provided parameters
try:
code = event['body']['code']
except KeyError:
logger.info('Code not provided')
code = 'Code'
try:
date = event['body']['date']
except KeyError:
logger.info('Date not provided')
date = 'Date'
try:
debit = event['body']['debit']
except KeyError:
logger.info('Debit not provided')
debit = 'Debit'
try:
credit = event['body']['credit']
except KeyError:
logger.info('Credit not provided')
credit = 'Credit'
try:
id= event['body']['id']
except KeyError:
logger.info('Id not provided')
id = '001'
df.rename(columns={code: 'Code', date: 'Date', credit: 'Credit', debit: 'Debit'})
df.to_csv(f's3://bucket/{id}/file.csv', line_terminator='\n', sep = ';', date_format='%Y-%m-%d %H:%M:%S')
return {
'statusCode': 200,
'headers': {
'Content-Type': 'text/csv',
'Access-Control-Allow-Origin': '*'
},
'body': {
'uploaded': True
},
'isBase64Encoded': False
}
To test this API, I use the following function:
import requests
csv_file = open("file.csv", 'rb')
headers = {"x-api-key": "xxx", "Content-Type":"text/csv"}
url = "https://xxx.execute-api.xxx.amazonaws.com/xxx"
body = {
"file": csv_file,
"code": "my_code"
}
# files = {
# "file": ("file.csv", open('file.csv', 'r'), 'text/csv')
# }
r = requests.post(url=url, headers=headers, data=body)
print(r.text)
The output is {"message": "Internal server error"}, and if I look in CloudWatch logs, I see that the event is encoded this way:
'body': 'file=%EF%BB%BFCol1%3BCol2%3BCol3%3BCol4%0D%0A&file=11%3B12%3B13%3B14%3B%0D%0A&file=21%3B22%3B23%3B24%3B...'
It looks like the body is encoded and passed row by row into different "file" fields. For a file with about 5000 rows I get the error OSError: [Errno 36] File name too long when trying to read it.
Is there another way to proceed in order to get a full dataset that I can transform into a pandas dataframe?
I have also seen suggestions with multipart/form-data, using files=files in the request or using csv library but I keep getting similar errors.
Thank you
It show error {"success":false,"error":"Missing parameter market"}
import time
import hmac
from requests import Request
import requests
import json
api_key=''
api_secret=''
payload = json.dumps({
"market": 'XRP/USDT',
"side": 'BUY',
"price": 0.7,
"size": 1,
"type": "limit",
"reduceOnly": False,
"ioc": False,
"postOnly": False,
"clientId": None
})
ts = int(time.time() * 1000)
request = Request('POST', 'https://ftx.com/api/orders')
prepared = request.prepare()
signature_payload = f'{ts}{prepared.method}{prepared.path_url}{payload}'.encode()
print(signature_payload)
signature = hmac.new(api_secret.encode(), signature_payload, 'sha256').hexdigest()
prepared.headers['FTX-KEY'] = api_key
prepared.headers['FTX-SIGN'] = signature
prepared.headers['FTX-TS'] = str(ts)
url='https://ftx.com/api/orders'
response = requests.request("POST", url,headers=prepared.headers,data=payload)
print(response.text)
Can you please recommend how to fix ? I have try many way but it doesn't work
You may try
prepared.headers['Content-Type'] = 'application/json'
Hello I know this question is a bit older, but maybe someone else needs an answer.
I sent prepared and it worked for me.
from requests import Request, Session
import hmac
import json
import time
from pprint import pprint
API_KEY = ""
API_SECRET = ""
SUBACCOUNT = ""
def place_order(market, side, price, size, order_type, reduceOnly, postOnly, ioc=False):
endpoint = "https://ftx.com/api/orders"
ts = int(time.time() * 1000)
s = Session()
data = json.dumps({
"market": market,
"side": side,
"price": price,
"type": order_type,
"size": size,
"reduceOnly": reduceOnly,
"ioc": ioc,
"postOnly": postOnly
})
request = Request("POST", endpoint, data=data)
prepared = request.prepare()
signature_payload = f"{ts}{prepared.method}{prepared.path_url}{data}".encode()
signature = hmac.new(API_SECRET.encode(), signature_payload, "sha256").hexdigest()
prepared.headers["FTX-KEY"] = API_KEY
prepared.headers["FTX-SIGN"] = signature
prepared.headers["FTX-TS"] = str(ts)
prepared.headers["FTX-SUBACCOUNT"] = SUBACCOUNT
response = s.send(prepared)
data = response.json()
pprint(data)
return data
I have dynamic API URL using the which each URL is getting data in response as JSON which is as following.
{
"#type":"connection",
"id":"001ZOZ0B00000000006Z",
"orgId":"001ZOZ",
"name":"WWW3",
"description":"Test connection2",
"createTime":"2018-07-20T18:28:05.000Z",
"updateTime":"2018-07-20T18:28:53.000Z",
"createdBy":"xx.xx#xx.com.dev",
"updatedBy":"xx.xx#xx.com.dev",
"agentId":"001ZOZ08000000000007",
"runtimeEnvironmentId":"001ZOZ25000000000007",
"instanceName":"ShareConsumer",
"shortDescription":"Test connection2",
"type":"TOOLKIT",
"port":0,
"majorUpdateTime":"2018-07-20T18:28:05.000Z",
"timeout":60,
"connParams":{
"WSDL URL":"https://xxxservices1.work.com/xxx/service/xxport2/n5/Integration%20System/API__Data?wsdl",
"Must Understand":"true",
"DOMAIN":"n5",
"agentId":"001ZOZ0800XXX0007",
"agentGroupId":"001ZOZ25000XXX0007",
"AUTHENTICATION_TYPE":"Auto",
"HTTP Password":"********",
"Encrypt password":"false",
"orgId":"001Z9Z",
"PRIVATE_KEY_FILE":"",
"KEY_FILE_TYPE":"PEM",
"mode":"UPDATE",
"CERTIFICATE_FILE_PASSWORD":null,
"CERTIFICATE_FILE":null,
"TRUST_CERTIFICATES_FILE":null,
"Username":"xxx#xxx",
"CERTIFICATE_FILE_TYPE":"PEM",
"KEY_PASSWORD":null,
"TIMEOUT":"60",
"Endpoint URL":"https://wxxservices1.xx.com/xxx/service/xxport2/n5/Integration%20System/API__Data",
"connectionTypes":"NOAUTH",
"HTTP Username":"API#n5",
"Password":"********"
}
}
Now catch over here is i have close around 50 URLs which gives this type JSON data. I am iterating it using the following code but i am not able to store in Python pandas dataframe as each response from each URL.
It will be either last response only stored there.
I would also like to convert this whole dataframe to CSV.
What is best method to append response of each result of URL response to dataframe and then convert to CSV?
Python Code as following:
import requests
from urllib.request import Request, urlopen
from urllib.request import urlopen, URLError, HTTPError
import urllib.error
import json
import pandas as pd
from pandas.io.json import json_normalize
import os
import csv
#This CSV file where we are getting ID and iterating over it for each url for get JSON data for the each URL
ConnID_data_read=pd.read_csv('ConnID.csv', delimiter = ',')
df = pd.DataFrame(ConnID_data_read)
user_iics_loginURL='https://xx-us.xxx.com/ma/api/v2/user/login'
headers = {
'Content-Type': "application/json",
'Accept': "application/json",
'cache-control': "no-cache"
}
payload = "{\r\n\"#type\": \"login\",\r\n\"username\": \"xx#xx.com.xx\",\r\n\"password\": \"xxxx\"\r\n}"
response = requests.request("POST", user_iics_loginURL, data=payload, headers=headers)
resp_obj = json.loads(response.text)
session_id = resp_obj['SessionId']
server_URL = resp_obj['serverUrl']
print(session_id)
Finaldf = pd.DataFrame()
for index, row in df.iterrows():
api_ver="/api/v2/connection/"+row['id']
#https://xx-us.xxx.com/saas/api/v2/connection/001ZOZ0B000000000066
conndetails_url = server_URL+api_ver
print(conndetails_url)
act_headers = {
'icSessionId': session_id,
'Content-Type': "application/json",
'cache-control': "no-cache",
}
act_response = requests.get(conndetails_url.strip(),headers=act_headers)
print(act_response.text)
print("Creating Data Frame on this***********************")
act_json_data= json.loads(act_response.text)
flat_json = json_normalize(act_json_data)
print(flat_json)
Conndf = pd.DataFrame(flat_json)
Finaldf.append(Conndf)
Finaldf.to_csv('NewTest.csv')
first thing I notice is:
flat_json = json_normalize(act_json_data)
print(flat_json)
Conndf = pd.DataFrame(flat_json)
when you do flat_json = json_normalize(act_json_data), flat_json is already a dataframe. Doing Conndf = pd.DataFrame(flat_json) is unnecessary and redundant, although shouldn't cause a problem, it's just extra code you don't need.
Secondly here's the issue. When you append the dataframe, you need to set it equal to itself. So change:
Finaldf.append(Conndf)
to
Finaldf = Finaldf.append(Conndf)
I'd also just rest the index, as that's just a habit of mine when I append dataframes:
Finaldf = Finaldf.append(Conndf).reset_index(drop=True)
Other than that 1 line, it looks fine and you should get the full dataframe saved to csv with Finaldf.to_csv('NewTest.csv')