post data using python-requests - python

I'm trying to post the following data. But I'm getting an error. Can you please take look? Thanks a lot.
I'm posting the same data using Postman. And it works.
def _build_post_data(bike_instance):
"""
data = {
"apikey": "XXX",
"data": {
"created_at": "date_XX",
"Price": "Decimal_XX"
}
}
"""
data = {}
raw_data = serializers.serialize('python', [bike_instance])
actual_data = [d['fields'] for d in raw_data]
data.update(
{
"apikey": XXX,
"data": actual_data[0]
}
)
return data
Posting data
bike = Bike.objects.get(pk=XXX)
data = _build_post_data(bike)
dump_data = json.dumps(data, cls=DjangoJSONEncoder)
requests.post(url, data=dump_data)
error
u'{"error":{"message":"422 Unprocessable Entity","errors":[["The data field is required."],["The apikey field is required."]],"status_code":422}}'
data and apikey already in the dict. then why I'm getting an error? Any idea?
Postman works

With Postman you are sending a multipart/form-data request, with requests you only send JSON (the value of the data field in Postman), and are not including the apikey field.
Use a dictionary with the JSON data as one of the values, and pass that in as the files argument. It probably also works as the data argument (sent as application/x-www-urlencoded):
form_structure = {'apikey': 'XXXX', 'data': dump_data}
requests.post(url, files=form_structure)
# probably works too: requests.post(url, data=form_structure)

Related

How to scrape data from sciencedirect

I want to scrape all data from sciencedirect by keyword.
I know that sciencedirect is program by ajax,
so the data of their page could't be extract directly via the
url of search result page.
The page I want to scrape
I've find the json data from numerous requests in Network area, in my view, I could get json data by this url of the request.But there are some error msg and garbled. Here is my code.
The request that contain json
import requests as res
import json
from bs4 import BeautifulSoup
keyword="digital game"
url = 'https://www.sciencedirect.com/search/api?'
payload = {
'tak': keyword,
't': 'ZNS1ixW4GGlMjTKbRHccgZ2dHuMVHqLqNBwYzIZayNb8FZvZFnVnLBYUCU%2FfHTxZMgwoaQmcp%2Foemth5%2FnqtM%2BGQW3NGOv%2FI0ng6yDADzynQO66j9EPEGT0aClusSwPFvKdDbfVcomCzYflUlyb3MA%3D%3D',
'hostname': 'www.sciencedirect.com'
}
r = res.get(url, params = payload)
print(r.content) # get garbled
r = r.json()
print(r) # get error msg
Garbled (not json data I expect)
Error msg (about .json()
Try setting the HTTP headers in the request such as user-agent to mimic a standard web browser. This will return query search results in JSON format.
import requests
keyword = "digital game"
url = 'https://www.sciencedirect.com/search/api?'
headers = {
'User-Agent': 'Mozilla/5.0',
'Accept': 'application/json'
}
payload = {
'tak': keyword,
't': 'ZNS1ixW4GGlMjTKbRHccgZ2dHuMVHqLqNBwYzIZayNb8FZvZFnVnLBYUCU%2FfHTxZMgwoaQmcp%2Foemth5%2FnqtM%2BGQW3NGOv%2FI0ng6yDADzynQO66j9EPEGT0aClusSwPFvKdDbfVcomCzYflUlyb3MA%3D%3D',
'hostname': 'www.sciencedirect.com'
}
r = requests.get(url, headers=headers, params=payload)
# need to check if the response output is JSON
if "json" in r.headers.get("Content-Type"):
data = r.json()
else:
print(r.status_code)
data = r.text
print(data)
Output:
{'searchResults': [{'abstTypes': ['author', 'author-highlights'], 'authors': [{'order': 1, 'name': 'Juliana Tay'},
..., 'resultsCount': 961}}
I've got the same problem. The point is that sciencedirect.com is using cloudflare which blocks the access for scraping bots. I've tried to use different approaches like cloudsraper, cfscrape etc... Unsuccessful! Then I've made a small parser based on Selenium which allows me to take metadata from publications and put it into my own json file with following schema:
schema = {
"doi_number": {
"metadata": {
"pub_type": "Review article" | "Research article" | "Short communication" | "Conference abstract" | "Case report",
"open_access": True | False,
"title": "title_name",
"journal": "journal_name",
"date": "publishing_date",
"volume": str,
"issue": str,
"pages": str,
"authors": [
"author1",
"author2",
"author3"
]
}
}
}
If you have any questions or maybe ideas fill free to contact me.

Print Specific Value from an API Request in Python

I am trying to print the values from an API Request. The JSON file returned is large(4,000 lines) so I am just trying to get specific values from the key value pair and automate a message.
Here is what I have so far:
import requests
import json
import urllib
url = "https://api.github.com/repos/<companyName>/<repoName>/issues" #url
payload = {}
headers = {
'Authorization': 'Bearer <masterToken>' #authorization works fine
}
name = (user.login) #pretty sure nothing is being looked out
url = (url)
print(hello %name, you have a pull request to view. See here %url for more information) # i want to print those keys here
The JSON file (exported from the API get request is as followed:
[
{
**"url": "https://github.com/<ompanyName>/<repo>/issues/1000",**
"repository_url": "https://github.com/<ompanyName>/<repo>",
"labels_url": "https://github.com/<ompanyName>/<repo>/issues/1000labels{/name}",
"comments_url": "https://github.com/<ompanyName>/<repo>/issues/1000",
"events_url": "https://github.com/<ompanyName>/<repo>/issues/1000",
"html_url": "https://github.com/<ompanyName>/<repo>/issues/1000",
"id": <id>,
"node_id": "<nodeID>",
"number": 702,
"title": "<titleName>",
"user": {
**"login": "<userName>",**
"id": <idNumber>,
"node_id": "nodeID",
"avatar_url": "https://avatars3.githubusercontent.com/u/urlName?v=4",
"gravatar_id": "",
"url": "https://api.github.com/users/<userName>",
"html_url": "https://github.com/<userName>",
"followers_url": "https://api.github.com/users/<userName>/followers",
"following_url": "https://api.github.com/users/<userName>/following{/other_user}",
"gists_url": "https://api.github.com/users/<userName>/gists{/gist_id}",
"starred_url": "https://api.github.com/users/<userName>/starred{/owner}{/repo}",
"subscriptions_url": "https://api.github.com/users/<userName>/subscriptions",
"organizations_url": "https://api.github.com/users/<userName>/orgs",
"repos_url": "https://api.github.com/users/<userName>/repos",
"events_url": "https://api.github.com/users/<userName>/events{/privacy}",
"received_events_url": "https://api.github.com/users/<userName>/received_events",
"type": "User",
"site_admin": false
},
]
(note this JSON file repeats a few hundred times)
From the API request, I am trying to get the nested "login" and the url.
What am I missing?
Thanks
Edit:
Solved:
import requests
import json
import urllib
url = "https://api.github.com/repos/<companyName>/<repoName>/issues"
payload = {}
headers = {
'Authorization': 'Bearer <masterToken>'
}
response = requests.get(url).json()
for obj in response:
name = obj['user']['login']
url = obj['url']
print('Hello {0}, you have an outstanding ticket to review. For more information see here:{1}.'.format(name,url))
Since it's a JSON array you have to loop over it. And JSON objects are converted to dictionaries, so you use ['key'] to access the elements.
for obj in response:
name = obj['user']['login']
url = obj['url']
print(f'hello {name}, you have a pull request to view. See here {url} for more information')
you can parse it into a python lists/dictionaries and then access it like any other python object.
response = requests.get(...).json()
login = response[0]['user']
You can convert JSON formatted data to a Python dictionary like this:
https://www.w3schools.com/python/python_json.asp
json_data = ... # response from API
dict_data = json.loads(json_data)
login = response[0]['user']['login']
url = response[0]['url']

flask: how to make validation on Request JSON and JSON schema?

In flask-restplus API , I need to make validation on request JSON data where I already defined request body schema with api.model. Basically I want to pass input JSON data to API function where I have to validate input JSON data before using API function. To do so, I used RequestParser for doing this task, but the API function was expecting proper JSON data as parameters after request JSON is validated and parsed. To do request JSON validation, first I have to parse received input JSON data, parse its JSON body, validate each then reconstructs it as JSON object, and pass to the API function. Is there any easier way to do this?
input JSON data
{
"body": {
"gender": "M",
"PCT": {
"value": 12,
"device": "roche_cobas"
},
"IL6": {
"value": 12,
"device": "roche_cobas"
},
"CRP": {
"value": 12,
"device": "roche_cobas"
}
}
}
my current attempt in flask
from flask_restplus import Api, Namespace, Resource, fields, reqparse, inputs
from flask import Flask, request, make_response, Response, jsonify
app = Flask(__name__)
api = Api(app)
ns = Namespace('')
feature = api.model('feature', {
'value': fields.Integer(required=True),
'time': fields.Date(required=True)
})
features = api.model('featureList', {
'age': fields.String,
'gender': fields.String(required=True),
'time': fields.Date,
'features': fields.List(fields.Nested(feature, required=True))
})
#ns.route('/hello')
class helloResource(Resource):
#ns.expect(features)
def post(self):
json_dict = request.json ## get input JSON data
## need to parse json_dict to validate expected argument in JSON body
root_parser = reqparse.RequestParser()
root_parser.add_argument('body', type=dict)
root_args = root_parser.parse_args()
jsbody_parser = reqparse.RequestParser()
jsbody_parser.add_argument('age', type=dict, location = ('body',))
jsbody_parser.add_argument('gender', type=dict, location=('body',))
## IL6, CRP could be something else, how to use **kwargs here
jsbody_parser.add_argument('IL6', type=dict, location=('body',))
jsbody_parser.add_argument('PCT', type=dict, location=('body',))
jsbody_parser.add_argument('CRP', type=dict, location=('body',))
jsbody_parser = jsbody_parser.parse_args(req=root_args)
## after validate each argument on input JSON request body, all needs to be constructed as JSON data
json_data = json.dumps(jsonify(jsbody_parser)) ## how can I get JSON again from jsbody_parser
func_output = my_funcs(json_data)
rest = make_response(jsonify(str(func_output)), 200)
return rest
if __name__ == '__main__':
api.add_namespace(ns)
app.run(debug=True)
update: dummy api function
Here is dummy function that expecting json data after validation:
import json
def my_funcs(json_data):
a =json.loads(json_data)
for k,v in a.iteritems():
print k,v
return jsonify(a)
current output of above attempt:
I have this on response body:
{
"errors": {
"gender": "'gender' is a required property"
},
"message": "Input payload validation failed"
}
obviously, request JSON input is not handled and not validated in my attempt. I think I have to pass json_dict to RequestParser object, but still can't validate request JSON here. how to make this happen?
I have to validate expected arguments from JSON body, after validation, I want to construct JSON body that gonna be used as a parameter for API function. How can I make this happen? any workaround to achieve this?
parsed JSON must pass to my_funcs
in my post, request JSON data should be parsed, such as age, gender should be validated as expected arguments in the request JSON, then jsonify added arguments as JSON and pass the my_funcs. how to make this happen easily in fl
I want to make sure flask should parse JSON body and add arguments as it expected, otherwise throw up error. for example:
{
"body": {
"car": "M",
"PCT": {
"value": 12,
"device": "roche_cobas"
},
"IL6": {
"device": "roche_cobas"
},
"CRP": {
"value": 12
}
}
}
if I give JSON data like above for making POST request to a server endpoint, it should give the error. How to make this happen? how to validate POST request JSON for flask API call?
As I tried to convey in our conversation it appears you are after a serilzation and deserilization tool. I have found Marshmallow to be an exceptional tool for this (it is not the only one). Here's a working example of using Marshmallow to validate a request body, converting the validated data back to a JSON string and passing it to a function for manipulation, and returning a response with JSON data:
from json import dumps, loads
from flask import Flask, jsonify, request
from marshmallow import Schema, fields, ValidationError
class BaseSchema(Schema):
age = fields.Integer(required=True)
gender = fields.String(required=True)
class ExtendedSchema(BaseSchema):
# have a look at the examples in the Marshmallow docs for more complex data structures, such as nested fields.
IL6 = fields.String()
PCT = fields.String()
CRP = fields.String()
def my_func(json_str:str):
""" Your Function that Requires JSON string"""
a_dict = loads(json_str)
return a_dict
app = Flask(__name__)
#app.route('/base', methods=["POST"])
def base():
# Get Request body from JSON
request_data = request.json
schema = BaseSchema()
try:
# Validate request body against schema data types
result = schema.load(request_data)
except ValidationError as err:
# Return a nice message if validation fails
return jsonify(err.messages), 400
# Convert request body back to JSON str
data_now_json_str = dumps(result)
response_data = my_func(data_now_json_str)
# Send data back as JSON
return jsonify(response_data), 200
#app.route('/extended', methods=["POST"])
def extended():
""" Same as base function but with more fields in Schema """
request_data = request.json
schema = ExtendedSchema()
try:
result = schema.load(request_data)
except ValidationError as err:
return jsonify(err.messages), 400
data_now_json_str = dumps(result)
response_data = my_func(data_now_json_str)
return jsonify(response_data), 200
Here's some quick tests to show validation, as well as extending the fields in your request body:
import requests
# Request fails validation
base_data = {
'age': 42,
}
r1 = requests.post('http://127.0.0.1:5000/base', json=base_data)
print(r1.content)
# Request passes validation
base_data = {
'age': 42,
'gender': 'hobbit'
}
r2 = requests.post('http://127.0.0.1:5000/base', json=base_data)
print(r2.content)
# Request with extended properties
extended_data = {
'age': 42,
'gender': 'hobbit',
'IL6': 'Five',
'PCT': 'Four',
'CRP': 'Three'}
r3 = requests.post('http://127.0.0.1:5000/extended', json=extended_data)
print(r3.content)
Hope this helps gets you where you're going.

Using the POST Method for Batch Geocoding with ArcGIS Server REST API?

I'm trying to hit my geocoding server's REST API:
[https://locator.stanford.edu/arcgis/rest/services/geocode/USA_StreetAddress/GeocodeServer] (ArcGIS Server 10.6.1)
...using the POST method (which, BTW, could use an example or two, there only seems to be this VERY brief "note" on WHEN to use POST, not HOW: https://developers.arcgis.com/rest/geocode/api-reference/geocoding-geocode-addresses.htm#ESRI_SECTION1_351DE4FD98FE44958C8194EC5A7BEF7D).
I'm trying to use requests.post(), and I think I've managed to get the token accepted, etc..., but I keep getting a 400 error.
Based upon previous experience, this means something about the formatting of the data is bad, but I've cut-&-pasted directly from the Esri support site, this test pair.
# import the requests library
import requests
# Multiple address records
addresses={
"records": [
{
"attributes": {
"OBJECTID": 1,
"Street": "380 New York St.",
"City": "Redlands",
"Region": "CA",
"ZIP": "92373"
}
},
{
"attributes": {
"OBJECTID": 2,
"Street": "1 World Way",
"City": "Los Angeles",
"Region": "CA",
"ZIP": "90045"
}
}
]
}
# Parameters
# Geocoder endpoint
URL = 'https://locator.stanford.edu/arcgis/rest/services/geocode/USA_StreetAddress/GeocodeServer/geocodeAddresses?'
# token from locator.stanford.edu/arcgis/tokens
mytoken = <GeneratedToken>
# output spatial reference id
outsrid = 4326
# output format
format = 'pjson'
# params data to be sent to api
params ={'outSR':outsrid,'f':format,'token':mytoken}
# Use POST to batch geocode
r = requests.post(url=URL, data=addresses, params=params)
print(r.json())
print(r.text)
Here's what I consistently get:
{'error': {'code': 400, 'message': 'Unable to complete operation.', 'details': []}}
I had to play around with this for longer than I'd like to admit, but the trick (I guess) is to use the correct request header and convert the raw addresses to a JSON string using json.dumps().
import requests
import json
url = 'http://sampleserver6.arcgisonline.com/arcgis/rest/services/Locators/SanDiego/GeocodeServer/geocodeAddresses'
headers = { 'Content-Type': 'application/x-www-form-urlencoded' }
addresses = json.dumps({ 'records': [{ 'attributes': { 'OBJECTID': 1, 'SingleLine': '2920 Zoo Dr' }}] })
r = requests.post(url, headers = headers, data = { 'addresses': addresses, 'f':'json'})
print(r.text)

Convert Python dictionary to a JSON array

Here's my function which connects to an API:
def order_summary():
"""Get order summary for a specific order"""
# Oauth2 params
headerKey = api_login()
headers = {'Authorization': headerKey}
# Payload params
payloadOrderSum = {
"domainId": 15,
"domainName": "SGL",
"orderId": 3018361
}
# API response
orderSumResp = requests.post(url + "order/summary", data=payloadOrderSum, headers=headers)
print(orderSumResp.content)
The API expects a JSON array as Payload Params which essentially looks like that:
[
{
"domainId": 0,
"domainName": "string",
"orderId": 0
}
]
The other endpoints I coded for on this API didn't need for the params to be an array so I could just use them as is and send them as a dictionary and it worked.
I've tried a couple things using the JSON library but I can't seem to get it to work. I saw that the JSonEncoder converts lists and tuples to JSON arrays but I couldn't figure it out.
Not sure what other info I could provide but just ask if there are any.
Thanks!
Wrap payloadOrderSum into a list:
payloadOrderSum = {
"domainId": 15,
"domainName": "SGL",
"orderId": 3018361
}
orderSumResp = requests.post(url + "order/summary", json=[payloadOrderSum], headers=headers)
Note that I used json kwarg instead of data (added in version 2.4.2).
dump your dict with json.dumps requests-doc
r = requests.post(url, data=json.dumps(payload))
It could help if you specify what you tried with the JSON library.
However, you might wanna try this if you haven't already done so:
import json
payloadOrderSum = json.dumps(
{
"domainId": 15,
"domainName": "SGL",
"orderId": 3018361
}
)

Categories

Resources