how to fix response.json() no data available error? - python

Earlier today I was trying to web scrape some climate data from here: https://www.weather.gov/wrh/Climate?wfo=sew. I have since found a better way to get the data I need, but I'm still confused about the code issue.
I'm new to web scraping but have the following code. The session.post() request returns a status code 200, which I assume means the data was found, but the request.json() gives "{'error': 'no data available'}". Does anyone know why this might be?
s = requests.session()
url_search = 'https://data.rcc-acis.org/StnData'
headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'}
data = {"elems":[{"name":"maxt","add":"t"},{"name":"mint","add":"t"},{"name":"avgt","add":"t"},{"name":"avgt","normal":"departure91","add":"t"},{"name":"hdd","add":"t"},{"name":"cdd","add":"t"},{"name":"pcpn","add":"t"},{"name":"snow","add":"t"},{"name":"snwd","add":"t"}],"sid":"KRNT+5","sDate":"2022-12-01","eDate":"2022-12-31"}
req = s.post(url_search, headers = headers, json = data)
j = req.json()

Try to pass data in data= parameter, not in json= parameter. Also, try to remove the + part from the "sid" parameter:
import pandas as pd
import requests
api_url = "https://data.rcc-acis.org/StnData"
payload = {
"params": '{"elems":[{"name":"maxt","add":"t"},{"name":"mint","add":"t"},{"name":"avgt","add":"t"},{"name":"avgt","normal":"departure91","add":"t"},{"name":"hdd","add":"t"},{"name":"cdd","add":"t"},{"name":"pcpn","add":"t"},{"name":"snow","add":"t"},{"name":"snwd","add":"t"}],"sid":"OLMthr","sDate":"2022-12-01","eDate":"2022-12-31"}',
"output": "json",
}
data = requests.post(api_url, data=payload).json()
print(data)
Prints:
{
"meta": {"state": "WA", "sids": ["OLMthr 9"], "uid": 32815, "name": "Olympia Area"},
"data": [
[
"2022-12-01",
["36", 24],
["22", 24],
["29.0", 24],
["-11.2", 24],
["36", 24],
["0", 24],
["0.07", 24],
["M", -1],
["M", -1],
],
[
"2022-12-02",
["41", 24],
["27", 24],
["34.0", 24],
["-6.0", 24],
["31", 24],
["0", 24],
["0.43", 24],
["M", -1],
["M", -1],
],
...and so on.

Related

Python: Replace Python text - CURL Python

I need the variable defined as NUEMRODEDNI to be tempered, how is it done?
Excuse me I'm a newbie
I don't know how to use a variable defined in python, I need it to be replaced as shown in the image
import string
import requests
from requests.structures import CaseInsensitiveDict
url = "www.url.com:8022/SistemaIdentificacion/servlet/com.personas.consultapersona?030bf8cfcd4bfccbd543df61b1b43f67,gx-no-cache=1648440596691"
NUEMRODEDNI = "41087712"
headers = CaseInsensitiveDict()
headers["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0"
headers["Accept"] = "*/*"
headers["Accept-Language"] = "es-AR,es;q=0.8,en-US;q=0.5,en;q=0.3"
headers["Accept-Encoding"] = "gzip, deflate"
headers["GxAjaxRequest"] = "1"
headers["Content-Type"] = "application/json"
headers["AJAX_SECURITY_TOKEN"] = "a6da9873adb..."
headers["X-GXAUTH-TOKEN"] = "eyJ0eXAiOiJ..."
headers["Origin"] = "http://www.url.com"
headers["Connection"] = "keep-alive"
headers["Referer"] = "www.url.com/SistemaIdentificacion/servlet/com.personas.consultapersona"
headers["Cookie"] = "GX_CLIENT_ID=0496f100-9e4e-4e36-a68d-ba3770ee2bff; GX_SESSION_ID=KUqyHU%2FZbpu96sYlj7Gry8bCYpV6CaSgVk0BLxVCpAU%3D; JSESSIONID=1812E6AC00940BDB325EF9592CB93FF8; GxTZOffset=America/Argentina/Buenos_Aires"
data = '{"MPage":false,"cmpCtx":"","parms":[EDIT HERE,{"s":"M","v":[["","(None)"],["F","Femenino"],["M","Masculino"]]},"M",{"User":"","CompanyCode":0,"Profile":"","UsrOneLoginID":"6647","Depid":1,"UsrLP":6488,"unidad":"","unidadid":"68","IMFParteCuerpo":"","denunciasid":0,"destino":"68","TipoPersona":"","NombreArchivo":"","denorigen":"","macdestinoscodorganigrama":""}],"hsh":[],"objClass":"consultapersona","pkgName":"com.personas","events":["ENTER"],"grids":{}}'
resp = requests.post(url, headers=headers, data=data)
print(resp.content)
Please provide your code typed out rather than a screenshot of it, so that we can simply copy & run it on our end.
Nontheless you should try:
NUMERODNI = "41087712"
data = '{"MPage":false,"cmpCtx":"","parms":[EDIT HERE,{"s":"M","v":[["","(None)"],["F","Femenino"],["M","Masculino"]]},"M",{"User":"","CompanyCode":0,"Profile":"","UsrOneLoginID":"6647","Depid":1,"UsrLP":6488,"unidad":"","unidadid":"68","IMFParteCuerpo":"","denunciasid":0,"destino":"68","TipoPersona":"","NombreArchivo":"","denorigen":"","macdestinoscodorganigrama":""}],"hsh":[],"objClass":"consultapersona","pkgName":"com.personas","events":["ENTER"],"grids":{}}'
data = data.replace("EDIT HERE", NUMERODNI)
print(data) # '{... "parms":[41087712, ...}'
This solution definetly delivers the desired string as result.
If your code still does not do what you would like it to, then the actual issue has to be somewhere else.
Since you're POSTing JSON data, it's easier to just keep your data as a Python dict and tell Requests to JSON-ify it (requests.post(json=...)).
That way you don't need string substitution, just use the variable.
I also took the opportunity to make your headers construction shorter – it can just be a dict.
import requests
url = "www.url.com:8022/SistemaIdentificacion/servlet/com.personas.consultapersona?030bf8cfcd4bfccbd543df61b1b43f67,gx-no-cache=1648440596691"
NUEMRODEDNI = "41087712"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0",
"Accept": "*/*",
"Accept-Language": "es-AR,es;q=0.8,en-US;q=0.5,en;q=0.3",
"Accept-Encoding": "gzip, deflate",
"GxAjaxRequest": "1",
"Content-Type": "application/json",
"AJAX_SECURITY_TOKEN": "a6da9873adb...",
"X-GXAUTH-TOKEN": "eyJ0eXAiOiJ...",
"Origin": "http://www.url.com",
"Connection": "keep-alive",
"Referer": "www.url.com/SistemaIdentificacion/servlet/com.personas.consultapersona",
"Cookie": "GX_CLIENT_ID=0496f100-9e4e-4e36-a68d-ba3770ee2bff; GX_SESSION_ID=KUqyHU%2FZbpu96sYlj7Gry8bCYpV6CaSgVk0BLxVCpAU%3D; JSESSIONID=1812E6AC00940BDB325EF9592CB93FF8; GxTZOffset=America/Argentina/Buenos_Aires",
}
data = {
"MPage": False,
"cmpCtx": "",
"parms": [
NUEMRODEDNI,
{"s": "M", "v": [["", "(None)"], ["F", "Femenino"], ["M", "Masculino"]]},
"M",
{
"User": "",
"CompanyCode": 0,
"Profile": "",
"UsrOneLoginID": "6647",
"Depid": 1,
"UsrLP": 6488,
"unidad": "",
"unidadid": "68",
"IMFParteCuerpo": "",
"denunciasid": 0,
"destino": "68",
"TipoPersona": "",
"NombreArchivo": "",
"denorigen": "",
"macdestinoscodorganigrama": "",
},
],
"hsh": [],
"objClass": "consultapersona",
"pkgName": "com.personas",
"events": ["ENTER"],
"grids": {},
}
resp = requests.post(url, headers=headers, json=data)
resp.raise_for_status()
print(resp.content)

Python requests POST method unexpected content in response

I am tring to get the EPG data at the web page https://www.meo.pt/tv/canais-programacao/guia-tv using Python requests. I use this module a lot, but mainly the GET method. This request however is using POST. Everytime you scroll down the page, a request is sent to the API below using these params to load additional program data to the page:
import requests
#post request
url = 'https://www.meo.pt/_layouts/15/Ptsi.Isites.GridTv/GridTvMng.asmx/getProgramsFromChannels'
headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
'Connection': 'keep-alive',
'Content-Length': '214',
'Content-type': 'application/json; charset=UTF-8',
'Host': 'www.meo.pt',
'Origin': 'https://www.meo.pt',
'Referer': 'https://www.meo.pt/tv/canais-programacao/guia-tv',
'sec-ch-ua': '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
'X-KL-Ajax-Request': 'Ajax_Request'
}
data = {"service":"channelsguide",
"channels":["LVTV","TOROTV","CAÇAP","CAÇAV","RTPACRS","CLUBB","MCM T","TRACE","24KITC","E!"],
"dateStart":"2021-04-20T23:00:00.000Z",
"dateEnd":"2021-04-21T23:00:00.000Z",
"accountID":""}
r = requests.post(url=url, headers=headers, data=data)
print(r.text)
I have tried this request, both with and without the headers used, as I don't know if they are needed for a POST request. However, both these options don't return what i was expecting, which was a JSON object containing the program data for these channels.
What am I doing wrong?
Consider using json argument instead of data in request function. The json argument parses your body to JSON format while data you are sending a raw dictionary.
data = {"service":"channelsguide",
"channels":["LVTV","TOROTV","CAÇAP","CAÇAV","RTPACRS","CLUBB","MCM T","TRACE","24KITC","E!"],
"dateStart":"2021-04-20T23:00:00.000Z",
"dateEnd":"2021-04-21T23:00:00.000Z",
"accountID":""}
r = requests.post(url=url, headers=headers, json=data)
If you want to keep using data argument you should parse data dictionary to JSON to send the correct body format.
You can use this example how to POST json data to the API Url:
import json
import requests
url = "https://www.meo.pt/_layouts/15/Ptsi.Isites.GridTv/GridTvMng.asmx/getProgramsFromChannels"
payload = {
"accountID": "",
"channels": [
"SCPHD",
"EURHD",
"EURS2HD",
"DISNY",
"CART",
"BIGGS",
"SICK",
"NICKELO",
"DISNYJ",
"PANDA",
],
"dateEnd": "2021-04-21T22:00:00.000Z",
"dateStart": "2021-04-20T22:00:00.000Z",
"service": "channelsguide",
}
data = requests.post(url, json=payload).json()
# pretty print the data:
print(json.dumps(data, indent=4))
Prints:
{
"d": {
"__type": "Ptsi.Isites.GridTv.CanaisService.GridTV",
"ExtensionData": {},
"services": [],
"channels": [
{
"__type": "Ptsi.Isites.GridTv.CanaisService.Channels",
"ExtensionData": {},
"id": 36,
"name": "SPORTING TV HD",
"sigla": "SCPHD",
"friendlyUrlName": "Sporting_TV_HD",
"url": "https://meogo.meo.pt/direto?canalUrl=Sporting_TV_HD",
"meogo": true,
"logo": "https://www.meo.pt/PublishingImages/canais/sporting-tv-hd.png",
"isAdult": false,
"categories": [
{
"ExtensionData": {},
"id": 2,
"name": "Desporto"
}
],
...
If you want to keep using data argument you should parse data
dictionary to JSON to send the correct body format.
And you should set headers as:
headers = {
'Content-type': 'application/json'
}
complete code is:
import json
import requests
url = "https://www.meo.pt/_layouts/15/Ptsi.Isites.GridTv/GridTvMng.asmx/getProgramsFromChannels"
headers = {
'Content-type': 'application/json'
}
payload = {
"accountID": "",
"channels": [
"SCPHD",
"EURHD",
"EURS2HD",
"DISNY",
"CART",
"BIGGS",
"SICK",
"NICKELO",
"DISNYJ",
"PANDA",
],
"dateEnd": "2021-04-21T22:00:00.000Z",
"dateStart": "2021-04-20T22:00:00.000Z",
"service": "channelsguide",
}
resp = requests.post(url,headers=headers,data=json.dumps(payload))
print(resp.text)

JSON to Python List

Context: Using Visual Studio Code, trying to convert my JSON response into a Python list, so I can add it to a Google Sheet.
I'd like to convert my response into a List of JSON Lists (like the "working example" below)
Working Example
RandomJson = [
['hello',2],
["hi",3]
]
bla = sheet.values().update(spreadsheetId=SAMPLE_SPREADSHEET_ID, range='sheet1!A1', valueInputOption="USER_ENTERED", body={"values":RandomJson}).execute()
I've tried a number of ways, but I cannot get "My Data Set" into the "Desired Format"
Can anybody help please?
My Data Set
{
"data": {
"tokens": [
{
"name": "FMX Token",
"symbol": "FMXT"
},
{
"name": "HeavensGate",
"symbol": "HATE"
},
{
"name": "Shrimp Swap",
"symbol": "Shrimp"
}
]
}
}
Desired Format
RandomJson = [
["FMX Token","FMXT"],
["HeavensGate","HATE"],
["Shrimp Swap","Shrimp"]
]
Edit - Full Code
I have made a change suggested in the comments, and also added "j = json.loads(JsonData)"
I'm now getting an error:
"googleapiclient.errors.HttpError: <HttpError 400 when requesting https://sheets.googleapis.com/v4/spreadsheets//values/sheet1%21A1?valueInputOption=USER_ENTERED&alt=json returned "Invalid JSON payload received. Unknown name "FMX Token" at 'data.values': Cannot find field."
import requests
import gspread
import json
import os.path
import pprint
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2 import service_account
SERVICE_ACCOUNT_FILE = 'CredEDs.json'
SCOPES = ["https://spreadsheets.google.com/feeds","https://www.googleapis.com/auth/spreadsheets","https://www.googleapis.com/auth/drive.file","https://www.googleapis.com/auth/drive"]
creds = None
creds = service_account.Credentials.from_service_account_file(
SERVICE_ACCOUNT_FILE, scopes=SCOPES)
SAMPLE_SPREADSHEET_ID = ''
service = build('sheets','v4',credentials=creds)
sheet = service.spreadsheets()
headers = {
'authority': 'api.thegraph.com',
'accept': '*/*',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'content-type': 'application/json',
'origin': 'https://info.uniswap.org',
'sec-fetch-site': 'cross-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://info.uniswap.org',
'accept-language': 'en-GB,en;q=0.9,es-419;q=0.8,es;q=0.7,en-US;q=0.6',
}
data = rb'{"operationName":"tokens","variables":{"skip":500},"query":"query tokens($skip: Int\u0021) {\n tokens(first: 500, skip: $skip) {\n name\n symbol\n}\n}\n"}'
response = requests.post('https://api.thegraph.com/subgraphs/name/uniswap/uniswap-v2', headers=headers, data=data)
JsonData = response.text
j = json.loads(JsonData)
result = {token['name']: token['symbol'] for token in j['data']['tokens']}
bla = sheet.values().update(spreadsheetId=SAMPLE_SPREADSHEET_ID, range='sheet1!A1', valueInputOption="USER_ENTERED", body={"values":result}).execute()
It's as simple as:
result = [[token['name'], token['symbol']] for token in data['data']['tokens']]

Error 403 when sending a post request to a site with Python

So I am coding a simple script for a site that adds the product to the cart and then checks out. The script was working fine and I decided to rewrite it so the code was cleaner, but now I am facing a problem when making the post request to add the product to my cart which I was not facing before and the site hasn't changed anything. I think there might be a problem with my headers but I don't see anything else. This is my code.
import requests, re
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
'origin': 'https://www.colorskates.com',
'connection': 'keep-alive',
'cache-control': 'max-age=0',
'upgrade-insecure-requests': '1'
}
s = requests.Session()
product = input('What shoe would you like to run for: ')
size = input('What size would you like to run for: ')
sizes = {
'35': 37,
'36': 7,
'36.5': 24,
'37': 8,
'37.5': 9,
'38': 10,
'38.5': 22,
'39': 11,
'40': 12,
'40 2/3': 187,
'40.5': 84,
'41': 13,
'41 1/3': 188,
'42': 14,
'42 2/3': 189,
'42.5': 15,
'43': 16,
'43 1/3': 190,
'44': 17,
'44 2/3': 191,
'44.5': 21,
'45': 18,
'45 1/3': 192,
'45.5': 39,
'46': 19,
'46.5': 147,
'47': 47,
'47.5': 117,
'48': 48,
'48.5': 85,
'49.5': 177
}
product_atc = product + '?action=add_product'
product_ids = int(re.search(r'\d+', product).group(0))
atc = s.post(product_atc, data={'id[2]': sizes[size], 'quantity': 1, 'products_id': product_ids}, headers=headers)
if atc.status_code not in (302, 200):
print('Error adding item to cart ' + str(atc.status_code) + '..')
else:
print('ATC Successful..')
The second image is the headers and the form data that has to be in the post request which I am pretty sure I am passing them correctly.
Looks like you are receiving a 403 Forbidden error. This means that you are not authorized to access the resource you are requesting.
The HTTP 403 Forbidden client error status response code indicates that the server understood the request but refuses to authorize it
To fix it, you will have to reference and use the authorization scheme used by the API you are requesting from.
I found my mistake, I didn't have correct headers I just fixed them.
headers = {
'authority': 'www.colorskates.com',
'path': product_atc[27:len(product_atc)],
'cache-control': 'max-age=0',
'referer': product,
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'
}

Python3: JSON POST Request WITHOUT requests library

I want to send JSON encoded data to a server using only native Python libraries. I love requests but I simply can't use it because I can't use it on the machine which runs the script. I need to do it without.
newConditions = {"con1":40, "con2":20, "con3":99, "con4":40, "password":"1234"}
params = urllib.parse.urlencode(newConditions)
params = params.encode('utf-8')
req = urllib.request.Request(conditionsSetURL, data=params)
urllib.request.urlopen(req)
My server is a local WAMP server. I always get an
urllib.error.HTTPError: HTTP Error 500: Internal Server Error
I am 100% sure that this is NOT a server issue, because the same data, with the same url, on the same machine, with the same server works with the requests library and Postman.
You are not posting JSON, you are posting a application/x-www-form-urlencoded request.
Encode to JSON and set the right headers:
import json
newConditions = {"con1":40, "con2":20, "con3":99, "con4":40, "password":"1234"}
params = json.dumps(newConditions).encode('utf8')
req = urllib.request.Request(conditionsSetURL, data=params,
headers={'content-type': 'application/json'})
response = urllib.request.urlopen(req)
Demo:
>>> import json
>>> import urllib.request
>>> conditionsSetURL = 'http://httpbin.org/post'
>>> newConditions = {"con1":40, "con2":20, "con3":99, "con4":40, "password":"1234"}
>>> params = json.dumps(newConditions).encode('utf8')
>>> req = urllib.request.Request(conditionsSetURL, data=params,
... headers={'content-type': 'application/json'})
>>> response = urllib.request.urlopen(req)
>>> print(response.read().decode('utf8'))
{
"args": {},
"data": "{\"con4\": 40, \"con2\": 20, \"con1\": 40, \"password\": \"1234\", \"con3\": 99}",
"files": {},
"form": {},
"headers": {
"Accept-Encoding": "identity",
"Connection": "close",
"Content-Length": "68",
"Content-Type": "application/json",
"Host": "httpbin.org",
"User-Agent": "Python-urllib/3.4",
"X-Request-Id": "411fbb7c-1aa0-457e-95f9-1af15b77c2d8"
},
"json": {
"con1": 40,
"con2": 20,
"con3": 99,
"con4": 40,
"password": "1234"
},
"origin": "84.92.98.170",
"url": "http://httpbin.org/post"
}

Categories

Resources