Context: Using Visual Studio Code, trying to convert my JSON response into a Python list, so I can add it to a Google Sheet.
I'd like to convert my response into a List of JSON Lists (like the "working example" below)
Working Example
RandomJson = [
['hello',2],
["hi",3]
]
bla = sheet.values().update(spreadsheetId=SAMPLE_SPREADSHEET_ID, range='sheet1!A1', valueInputOption="USER_ENTERED", body={"values":RandomJson}).execute()
I've tried a number of ways, but I cannot get "My Data Set" into the "Desired Format"
Can anybody help please?
My Data Set
{
"data": {
"tokens": [
{
"name": "FMX Token",
"symbol": "FMXT"
},
{
"name": "HeavensGate",
"symbol": "HATE"
},
{
"name": "Shrimp Swap",
"symbol": "Shrimp"
}
]
}
}
Desired Format
RandomJson = [
["FMX Token","FMXT"],
["HeavensGate","HATE"],
["Shrimp Swap","Shrimp"]
]
Edit - Full Code
I have made a change suggested in the comments, and also added "j = json.loads(JsonData)"
I'm now getting an error:
"googleapiclient.errors.HttpError: <HttpError 400 when requesting https://sheets.googleapis.com/v4/spreadsheets//values/sheet1%21A1?valueInputOption=USER_ENTERED&alt=json returned "Invalid JSON payload received. Unknown name "FMX Token" at 'data.values': Cannot find field."
import requests
import gspread
import json
import os.path
import pprint
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2 import service_account
SERVICE_ACCOUNT_FILE = 'CredEDs.json'
SCOPES = ["https://spreadsheets.google.com/feeds","https://www.googleapis.com/auth/spreadsheets","https://www.googleapis.com/auth/drive.file","https://www.googleapis.com/auth/drive"]
creds = None
creds = service_account.Credentials.from_service_account_file(
SERVICE_ACCOUNT_FILE, scopes=SCOPES)
SAMPLE_SPREADSHEET_ID = ''
service = build('sheets','v4',credentials=creds)
sheet = service.spreadsheets()
headers = {
'authority': 'api.thegraph.com',
'accept': '*/*',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'content-type': 'application/json',
'origin': 'https://info.uniswap.org',
'sec-fetch-site': 'cross-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://info.uniswap.org',
'accept-language': 'en-GB,en;q=0.9,es-419;q=0.8,es;q=0.7,en-US;q=0.6',
}
data = rb'{"operationName":"tokens","variables":{"skip":500},"query":"query tokens($skip: Int\u0021) {\n tokens(first: 500, skip: $skip) {\n name\n symbol\n}\n}\n"}'
response = requests.post('https://api.thegraph.com/subgraphs/name/uniswap/uniswap-v2', headers=headers, data=data)
JsonData = response.text
j = json.loads(JsonData)
result = {token['name']: token['symbol'] for token in j['data']['tokens']}
bla = sheet.values().update(spreadsheetId=SAMPLE_SPREADSHEET_ID, range='sheet1!A1', valueInputOption="USER_ENTERED", body={"values":result}).execute()
It's as simple as:
result = [[token['name'], token['symbol']] for token in data['data']['tokens']]
Related
Hello guys I am making a bot in python requests to register a site and verify phone number however when I want to extract cookies I only get it like this this is one of the cookies: {'__cf_bm': 'Y0T3GzPQqXsWAnmYUSNWoaEogoto8_OELyA_wo_FUqQ-1662189807-0-Adw17pUjv2YaGcp3ynR1CX3CvGY/4AMBCJWqCn0GIk2qbGXQPifeKXlo5IN+iaDCsqeXrTN32gNXpdybETEYT6U='}
I want to extract them like this for each cookie:
{
"name": "__cf_bm",
"value": "je.AFZpQXtMPL7oZPdgCV6Bo4UxCb3BqMYdu2L7Eol0-1662189210-0-ARq9RgTbX7ItgRQ6dBXYjEdvQS8/fP7JiVibVrHYspk37sUiN3mQOlnHBfkJdMG033inhnAIS92vthv8bLdeYYQ=",
"domain": ".yemeksepeti.com",
"hostOnly": false,
"path": "/",
"secure": true,
"httpOnly": true,
"sameSite": "no_restriction",
"session": false,
"firstPartyDomain": "",
"partitionKey": null,
"expirationDate": 1662191010,
"storeId": "firefox-default",
"id": 1
}
this is the reproducible for register and then get the cookies from requests:
import json
import time
import requests
import selenium.webdriver
import urllib3
import random
import string
import secrets
password_length = 13
password = secrets.token_urlsafe(password_length)
file1 = open("Original.txt", "w")
kacAdet = int(input('Kaç adet hesap kaydetmek istiyorsunuz?'))
proxies = {
'http': '23.229.62.241:3128',
}
def random_char(char_num):
return ''.join(random.choice(string.ascii_letters) for _ in range(char_num))
email = random_char(4)
print(random_char(4))
for i in range(kacAdet):
cookies = {
'dhhPerseusGuestId': '1661866956.4391864905.SzayaOAaKf',
'_gcl_au': '1.1.2045638547.1656310028',
'_pxvid': '60553c6b-f5df-11ec-b676-644a4b4e486e',
'_ga_W5PM07D07L': 'GS1.1.1661866956.12.1.1661871822.0.0.0',
'_ga': 'GA1.2.302729117.1656310029',
'_tq_id.TV-276372-1.c7a5': 'b3e61a11d2f82260.1656310030.0.1661871797..',
'__ssid': '288a4fe9f40bd5024bd37c43ed7b18f',
'_fbp': 'fb.1.1656310031494.1660145579',
'device_token': 'eyJhbGciOiJSUzI1NiIsImtpZCI6ImtleW1ha2VyLXZvbG8tZGV2aWNlLXlzLXRyIiwidHlwIjoiSldUIn0.eyJpZCI6Ijk2MzI1ZThhLTVjYmQtNDk3NC1iZmYzLTgzYjdjMWZhMDdiZSIsImNsaWVudF9pZCI6InZvbG8iLCJ1c2VyX2lkIjoidHI4anBkOHMiLCJleHBpcmVzIjo0ODA5OTEwMDQ0LCJ0b2tlbl90eXBlIjoiYmVhcmVyIiwic2NvcGUiOiJERVZJQ0VfVE9LRU4ifQ.qlV25H4fL5mN9uHVHK-0VGpC6UZziOMfxpY5EbDXRCPeitQY-dfks5CA60Qj59alrbna3Ew5SS96HZ7elMOCDWDk9Al3RSBDgCoZNhn10jVexrmqqVBebRZyF27qh49tb1WoGE5FzWvX0MvlZA-1fvXCciPsHlRuKzr6oKPr4iZ2ilRBye9YwClEdpnu2ymg19moJhZn8oDmVotNAnEOC4tsRC64nXHT14rnU3zNx6yds1eeIjImOM-1u99lxOjmbTuBkQJWNEUxucimnA2RUe3VmZKhFronpM46ChdZr0f8xU7hvxGCZ2NxSVlN-uN79oHYThIgcxbaP0NjPajRcg',
'ab.storage.userId.f767a324-7168-4b7e-8687-652a3ac864bd': '%7B%22g%22%3A%22TR_44270161%22%2C%22c%22%3A1661869740576%2C%22l%22%3A1661869740576%7D',
'ab.storage.deviceId.f767a324-7168-4b7e-8687-652a3ac864bd': '%7B%22g%22%3A%224c13c6a1-fbd2-04ec-5d84-b972ec0ba576%22%2C%22c%22%3A1656310045145%2C%22l%22%3A1656310045145%7D',
'_hjSessionUser_1543571': 'eyJpZCI6ImE2MGI0MTQ2LTAzMGQtNWUwOS05YjhmLWZmMGRjNGE4YjRlOCIsImNyZWF0ZWQiOjE2NTYzMTA3MDg3NzIsImV4aXN0aW5nIjp0cnVlfQ==',
'AppVersion': '0ff5617',
'_gid': 'GA1.2.267011508.1661808541',
'__cf_bm': 'tjx3_.dOiiolQhAxWstqrkSP1CKvO_S1N4p.0EeINK4-1661871791-0-AUZhgLbPnAeoeXTbqYZ9zWdoMEV/paQVlVDqhYwfMhjTEpAzR9DL6w/R9PJ/UunzV1kS0E5eHo3I23DtfqQbu2g=',
'dhhPerseusSessionId': '1661866956.9452682616.Q3sF0bSpb0',
'dhhPerseusHitId': '1661871837551.356435850558823550.x7vphunw0q',
'ab.storage.sessionId.f767a324-7168-4b7e-8687-652a3ac864bd': '%7B%22g%22%3A%226aa2e303-cb5c-e1cd-d63c-ae74f0ef9a28%22%2C%22e%22%3A1661873591540%2C%22c%22%3A1661869740577%2C%22l%22%3A1661871791540%7D',
'hl': 'tr',
'_pxhd': '7s9aESLpFRxZZKv6tpZw1ARL/WCcfDvZ/5vEGc7NJzvNODk0RH90lYQB4Eua5Kqza/RouLT9ZEDCwApCgb6LWA==:fdJxKPXZt-rd6WPHmuK4PmpAXFudob0AhmJ-XL04XjMoHApgWh/7FNngAW72wqaDqlEmfZDx6D2EV-3kuOsLqWb15qT-85pfWFSmnayR70k=',
'_px3': 'e298931c318130994efbdc870fa364efa1eab482a8e5234e8667c760ad79fba0:6SPR0+3825cPBG3QzlECszOACYSdPXJLRT/ifAegNKTC7Ky1is4UYFhA01ROdi9t5FPC67v5CjaqHtWa7tzTuQ==:1000:DVJEiHg7S1XrIagRXrM6VXHwU2ozpb8RE2lXnVttxjQFilQ+GEHfG07sM+9BqFSgi69SNwFOgo8/C3CALs2K5rm1G7jyBJ432cNubNkJx+jEy2YeDw5fFuUlytWWtZDt+qcrwX9wwFJi/j6POWqnm9j1aZuVEztpuZnSLs0D0QZBZV0qaOPaQwxJw9vLCsigsgKcWGAbfbRMvVKXx0Cevw==',
'pxcts': 'deb1b68a-2874-11ed-9c26-6c6e70485242',
'tooltip-reorder': 'true',
'_dc_gtm_UA-86673819-11': '1',
'_dd_s': 'logs=1&id=38095de6-1b84-4b17-a32c-c539cfaf0dca&created=1661871796050&expire=1661872741919',
'_pxff_fp': '1',
}
headers = {
'Host': 'www.yemeksepeti.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0',
'Accept': 'application/json',
'Accept-Language': 'tr-TR,tr;q=0.8,en-US;q=0.5,en;q=0.3',
'Content-Type': 'multipart/form-data; boundary=---------------------------19743795514118722864129047497',
'X-Requested-With': 'XMLHttpRequest',
'X-Device-Fingerprint': 'db5f21349eff189ec4f7664477e01cd4',
'X-Device': 'eyJhbGciOiJSUzI1NiIsImtpZCI6ImtleW1ha2VyLXZvbG8tZGV2aWNlLXlzLXRyIiwidHlwIjoiSldUIn0.eyJpZCI6Ijk2MzI1ZThhLTVjYmQtNDk3NC1iZmYzLTgzYjdjMWZhMDdiZSIsImNsaWVudF9pZCI6InZvbG8iLCJ1c2VyX2lkIjoidHI4anBkOHMiLCJleHBpcmVzIjo0ODA5OTEwMDQ0LCJ0b2tlbl90eXBlIjoiYmVhcmVyIiwic2NvcGUiOiJERVZJQ0VfVE9LRU4ifQ.qlV25H4fL5mN9uHVHK-0VGpC6UZziOMfxpY5EbDXRCPeitQY-dfks5CA60Qj59alrbna3Ew5SS96HZ7elMOCDWDk9Al3RSBDgCoZNhn10jVexrmqqVBebRZyF27qh49tb1WoGE5FzWvX0MvlZA-1fvXCciPsHlRuKzr6oKPr4iZ2ilRBye9YwClEdpnu2ymg19moJhZn8oDmVotNAnEOC4tsRC64nXHT14rnU3zNx6yds1eeIjImOM-1u99lxOjmbTuBkQJWNEUxucimnA2RUe3VmZKhFronpM46ChdZr0f8xU7hvxGCZ2NxSVlN-uN79oHYThIgcxbaP0NjPajRcg',
'Origin': 'https://www.yemeksepeti.com',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'Referer': 'https://www.yemeksepeti.com/login/new?step=registration',
}
data = '-----------------------------19743795514118722864129047497\r\nContent-Disposition: form-data; name="_target_path"\r\n\r\nhttps://www.yemeksepeti.com/\r\n-----------------------------19743795514118722864129047497\r\nContent-Disposition: form-data; name="customer[first_name]"\r\n\r\nmert\r\n-----------------------------19743795514118722864129047497\r\nContent-Disposition: form-data; name="customer[last_name]"\r\n\r\ndemir\r\n-----------------------------19743795514118722864129047497\r\nContent-Disposition: form-data; name="customer[email]"\r\n\r\nmertdemir' + email + str(i) + '#gmail.com\r\n-----------------------------19743795514118722864129047497\r\nContent-Disposition: form-data; name="customer[password]"\r\n\r\n'+str(password)+'\r\n-----------------------------19743795514118722864129047497\r\nContent-Disposition: form-data; name="customer[birthdate]"\r\n\r\n1999-10-01\r\n-----------------------------19743795514118722864129047497\r\nContent-Disposition: form-data; name="customer[terms_and_conditions_consent]"\r\n\r\nagreed\r\n-----------------------------19743795514118722864129047497\r\nContent-Disposition: form-data; name="customer[marketing_consent]"\r\n\r\nopt-in\r\n-----------------------------19743795514118722864129047497\r\nContent-Disposition: form-data; name="customer[marketing_sms_consent]"\r\n\r\nopt-in\r\n-----------------------------19743795514118722864129047497--\r\n'
response = requests.post('https://www.yemeksepeti.com/api/v1/customers/async_register', cookies=cookies,
headers=headers, data=data, verify=False)
my_cookies = requests.utils.dict_from_cookiejar(response.cookies)
print(my_cookies)
print(response.status_code)
Don't use dict_from_cookiejar but write own code to convert it.
for item in response.cookies:
print('name :', item.name)
print('value :', item.value)
print('domain:', item.domain)
print('path :', item.path)
# ... other attributes ...
print('---')
EDIT:
Eventually you can use __dict__ to get all attributes as dictionary.
for item in response.cookies:
print(item.__dict__)
You may also get original data as string and parse it on your own.
print( response.headers['Set-Cookie'] )
EDIT:
import json
all_items = []
for item in response.cookies:
all_items.append(tem.__dict__)
# --- write ---
text = json.dumps(all_items)
with open('data.json', 'w') as fh:
fh.write(text)
# --- read ---
with open('data.json') as fh:
text = fh.read()
all_items = json.loads(text)
I created a script that checks for newest video and then comment on it as soon as it finds it. The problem is the script sometimes get the video 4 mins after it is uploaded and sometimes 30 sec after the video is uploaded. I want to get the video as soon as it uploads. The code is as under
import time, requests, os
from Google import Create_Service
from subprocess import Popen
SECRET_FILE = 'client.json'
APINAME = 'youtube'
APIVERSION = 'v3'
SCOPE = ["https://www.googleapis.com/auth/youtube.force-ssl"]
service_y = Create_Service(SECRET_FILE, APINAME, APIVERSION, SCOPE)
channel = "https://www.youtube.com/user/UCBqIaQItMBrQjK6NOXB1eQQ"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
}
comment_text = "First comment. Congrats on 100 Mil Subs - I remember watching you count to 1 million."
vid = "sds"
while True:
try:
res = requests.get(channel + "/videos", headers=headers)
except:
script2 = open("script2.py","w")
script2.writelines(["from subprocess import Popen\n","import time,os\n","time.sleep(2)\n",f"Popen('python {os.path.basename(__file__)}')\n","exit(0)"])
Popen("python script2.py")
exit(0)
html = res.text
html = html.split("publishedTimeText")[1].split("}")[0]
print(html)
time.sleep(1)
print(vid)
if vid == html:
pass
else:
if vid == "sds":
vid = html
else:
request = service_y.search().list(part="id", channelId="UCBqIaQItMBrQjK6NOXB1eQQ", order="date", maxResults="1")
response = request.execute()
vid = response["items"][0]["id"]["videoId"]
request = service_y.commentThreads().insert(
part="snippet",
body={
"snippet": {
"videoId": vid,
"topLevelComment": {
"snippet": {
"textOriginal": comment_text
}
}
}
}
)
response = request.execute()
break
Thanks
I am trying to explore zillow housing data for analysis. but I found the data I scraped from Zillow would be much less then listing.
there is one exmaple:
I try to pull house-for-sale listing on 35216:
https://www.zillow.com/birmingham-al-35216/?searchQueryState=%7B%22usersSearchTerm%22%3A%2235216%22%2C%22mapBounds%22%3A%7B%22west%22%3A-86.93997505787829%2C%22east%22%3A-86.62926796559313%2C%22south%22%3A33.33562772711966%2C%22north%22%3A33.51819716059094%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A73386%2C%22regionType%22%3A7%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22ah%22%3A%7B%22value%22%3Atrue%7D%2C%22sort%22%3A%7B%22value%22%3A%22globalrelevanceex%22%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A13%2C%22pagination%22%3A%7B%7D%7D
we can see there are 76 records. and if I use google chrome extension: Zillow-to-excel , all 76 houses in listing can be scraped.
https://chrome.google.com/webstore/detail/zillow-to-excel/aecdekdgjlncaadbdiciepplaobhcjgi/related
But when I use Python with request to scrape zillow data, only 18-20 records could be scraped.
here is my code:
import requests
import json
from bs4 import BeautifulSoup as soup
import pandas as pd
import numpy as np
cnt=0
stop_check=0
ele=[]
url='https://www.zillow.com/birmingham-al-35216/'
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7,zh-TW;q=0.6',
'upgrade-insecure-requests': '1',
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
for i in range(1,2):
params = {
'searchQueryState':'{"pagination":{"currentPage":'+str(i)+'},"usersSearchTerm":"35216","mapBounds":{"west":-86.83314614582643,"east":-86.73781685417354,"south":33.32843303639682,"north":33.511017584543204},"regionSelection":[{"regionId":73386,"regionType":7}],"isMapVisible":true,"filterState":{"sort":{"value":"globalrelevanceex"},"ah":{"value":true}},"isListVisible":true,"mapZoom":13}'
}
page=requests.get(url, headers=headers,params=params,timeout=2)
sp=soup(page.content, 'lxml')
lst=sp.find_all('address',{'class':'list-card-addr'})
ele.extend(lst)
print(i, len(lst))
if len(lst)==0:
stop_check+=1
if stop_check>=3:
print('stop on three empty')
Headers and params comes from web using chrome develop tool. I also tried other search and found I only can scrape first 9-11 records on each pages.
I know there is a zillow API but it could be used for a general search like all houses in a zipcode. So I want to try web-scraping.
May I have some suggestions how to fix my code?
Thanks a lot!
You can try that
import requests
import json
url = 'https://www.zillow.com/search/GetSearchPageState.htm'
headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'upgrade-insecure-requests': '1',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
}
houses = []
for page in range(1, 3):
params = {
"searchQueryState": json.dumps({
"pagination": {"currentPage": page},
"usersSearchTerm": "35216",
"mapBounds": {
"west": -86.97413567189196,
"east": -86.57244804982165,
"south": 33.346263857015515,
"north": 33.48754107532057
},
"mapZoom": 12,
"regionSelection": [
{
"regionId": 73386, "regionType": 7
}
],
"isMapVisible": True,
"filterState": {
"isAllHomes": {
"value": True
},
"sortSelection": {
"value": "globalrelevanceex"
}
},
"isListVisible": True
}),
"wants": json.dumps(
{
"cat1": ["listResults", "mapResults"],
"cat2": ["total"]
}
),
"requestId": 3
}
# send request
page = requests.get(url, headers=headers, params=params)
# get json data
json_data = page.json()
# loop via data
for house in json_data['cat1']['searchResults']['listResults']:
houses.append(house)
# show data
print('Total houses - {}'.format(len(houses)))
# show info in houses
for house in houses:
if 'brokerName' in house.keys():
print('{}: {}'.format(house['brokerName'], house['price']))
else:
print('No broker: {}'.format(house['price']))
Total houses - 76
RealtySouth-MB-Crestline: $424,900
eXp Realty, LLC Central: $259,900
ARC Realty Mountain Brook: $849,000
Ray & Poynor Properties: $499,900
Hinge Realty: $1,550,000
...
P.S. do not forget to mark answer as correct if I help you :)
I am tring to get the EPG data at the web page https://www.meo.pt/tv/canais-programacao/guia-tv using Python requests. I use this module a lot, but mainly the GET method. This request however is using POST. Everytime you scroll down the page, a request is sent to the API below using these params to load additional program data to the page:
import requests
#post request
url = 'https://www.meo.pt/_layouts/15/Ptsi.Isites.GridTv/GridTvMng.asmx/getProgramsFromChannels'
headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
'Connection': 'keep-alive',
'Content-Length': '214',
'Content-type': 'application/json; charset=UTF-8',
'Host': 'www.meo.pt',
'Origin': 'https://www.meo.pt',
'Referer': 'https://www.meo.pt/tv/canais-programacao/guia-tv',
'sec-ch-ua': '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
'X-KL-Ajax-Request': 'Ajax_Request'
}
data = {"service":"channelsguide",
"channels":["LVTV","TOROTV","CAÇAP","CAÇAV","RTPACRS","CLUBB","MCM T","TRACE","24KITC","E!"],
"dateStart":"2021-04-20T23:00:00.000Z",
"dateEnd":"2021-04-21T23:00:00.000Z",
"accountID":""}
r = requests.post(url=url, headers=headers, data=data)
print(r.text)
I have tried this request, both with and without the headers used, as I don't know if they are needed for a POST request. However, both these options don't return what i was expecting, which was a JSON object containing the program data for these channels.
What am I doing wrong?
Consider using json argument instead of data in request function. The json argument parses your body to JSON format while data you are sending a raw dictionary.
data = {"service":"channelsguide",
"channels":["LVTV","TOROTV","CAÇAP","CAÇAV","RTPACRS","CLUBB","MCM T","TRACE","24KITC","E!"],
"dateStart":"2021-04-20T23:00:00.000Z",
"dateEnd":"2021-04-21T23:00:00.000Z",
"accountID":""}
r = requests.post(url=url, headers=headers, json=data)
If you want to keep using data argument you should parse data dictionary to JSON to send the correct body format.
You can use this example how to POST json data to the API Url:
import json
import requests
url = "https://www.meo.pt/_layouts/15/Ptsi.Isites.GridTv/GridTvMng.asmx/getProgramsFromChannels"
payload = {
"accountID": "",
"channels": [
"SCPHD",
"EURHD",
"EURS2HD",
"DISNY",
"CART",
"BIGGS",
"SICK",
"NICKELO",
"DISNYJ",
"PANDA",
],
"dateEnd": "2021-04-21T22:00:00.000Z",
"dateStart": "2021-04-20T22:00:00.000Z",
"service": "channelsguide",
}
data = requests.post(url, json=payload).json()
# pretty print the data:
print(json.dumps(data, indent=4))
Prints:
{
"d": {
"__type": "Ptsi.Isites.GridTv.CanaisService.GridTV",
"ExtensionData": {},
"services": [],
"channels": [
{
"__type": "Ptsi.Isites.GridTv.CanaisService.Channels",
"ExtensionData": {},
"id": 36,
"name": "SPORTING TV HD",
"sigla": "SCPHD",
"friendlyUrlName": "Sporting_TV_HD",
"url": "https://meogo.meo.pt/direto?canalUrl=Sporting_TV_HD",
"meogo": true,
"logo": "https://www.meo.pt/PublishingImages/canais/sporting-tv-hd.png",
"isAdult": false,
"categories": [
{
"ExtensionData": {},
"id": 2,
"name": "Desporto"
}
],
...
If you want to keep using data argument you should parse data
dictionary to JSON to send the correct body format.
And you should set headers as:
headers = {
'Content-type': 'application/json'
}
complete code is:
import json
import requests
url = "https://www.meo.pt/_layouts/15/Ptsi.Isites.GridTv/GridTvMng.asmx/getProgramsFromChannels"
headers = {
'Content-type': 'application/json'
}
payload = {
"accountID": "",
"channels": [
"SCPHD",
"EURHD",
"EURS2HD",
"DISNY",
"CART",
"BIGGS",
"SICK",
"NICKELO",
"DISNYJ",
"PANDA",
],
"dateEnd": "2021-04-21T22:00:00.000Z",
"dateStart": "2021-04-20T22:00:00.000Z",
"service": "channelsguide",
}
resp = requests.post(url,headers=headers,data=json.dumps(payload))
print(resp.text)
I'm trying to populate json response issuing a post http requests with appropriate parameters from a webpage. When I run the script, I see that the script gets stuck and doesn't bring any result. It doesn't throw any error either. This is the site link. I chose three options from the three dropdowns from this form in that site before hitting Get times & tickets button.
I've tried with:
import requests
from bs4 import BeautifulSoup
url = 'https://www.thetrainline.com/'
link = 'https://www.thetrainline.com/api/journey-search/'
payload = {"passengers":[{"dateOfBirth":"1991-01-31"}],"isEurope":False,"cards":[],"transitDefinitions":[{"direction":"outward","origin":"1f06fc66ccd7ea92ae4b0a550e4ddfd1","destination":"7c25e933fd14386745a7f49423969308","journeyDate":{"type":"departAfter","time":"2021-02-11T22:45:00"}}],"type":"single","maximumJourneys":4,"includeRealtime":True,"applyFareDiscounts":True}
with requests.Session() as s:
s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'
s.headers['content-type'] = 'application/json'
s.headers['accept'] = 'application/json'
r = s.post(link,json=payload)
print(r.status_code)
print(r.json())
How can I get json response issuing post requests with parameters from that site?
You are missing the required headers: x-version and referer. The referer header is referring to the search form and you can build it. Before journey-search you have to post an availability request.
import requests
from requests.models import PreparedRequest
headers = {
'authority': 'www.thetrainline.com',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'x-version': '2.0.18186',
'dnt': '1',
'accept-language': 'en-GB',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/88.0.4324.96 Safari/537.36',
'content-type': 'application/json',
'accept': 'application/json',
'origin': 'https://www.thetrainline.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
}
with requests.Session() as s:
origin = "6e2242b3f38bbbd8d8124e1d84d319e1"
destination = "15bcf02bc44ea754837c8cf14569f608"
localDateTime = "2021-02-03T19:30:00"
dateOfBirth = "1991-02-03"
passenger_type = "single"
req = PreparedRequest()
url = "http://www.neo4j.com"
params = {
"origin": origin,
"destination": destination,
"outwardDate": localDateTime,
"outwardDateType": "departAfter",
"journeySearchType": passenger_type,
"passengers[]": dateOfBirth
}
req.prepare_url("https://www.thetrainline.com/book/results", params)
headers.update({"referer": req.url})
s.headers = headers
payload_availability = {
"origin": origin,
"destination": destination,
"outwardDefinition": {
"localDateTime": localDateTime,
"searchMethod": "DEPARTAFTER"
},
"passengerBirthDates": [{
"id": "PASSENGER-0",
"dateOfBirth": dateOfBirth
}],
"maximumNumberOfJourneys": 4,
"discountCards": []
}
r = s.post('https://www.thetrainline.com/api/coaches/availability', json=payload_availability)
r.raise_for_status()
payload_search = {
"passengers": [{"dateOfBirth": "1991-02-03"}],
"isEurope": False,
"cards": [],
"transitDefinitions": [{
"direction": "outward",
"origin": origin,
"destination": destination,
"journeyDate": {
"type": "departAfter",
"time": localDateTime}
}],
"type": passenger_type,
"maximumJourneys": 4,
"includeRealtime": True,
"applyFareDiscounts": True
}
r = s.post('https://www.thetrainline.com/api/journey-search/', json=payload_search)
r.raise_for_status()
print(r.json())
As Sers's reply, headers are missing.
When scrawling websites, you have to keep in mind anti-scrawling mechanism. The website will block your requests by taking into consideration your IP address, request headers, cookies, and various other factors.