Python: Replace Python text - CURL Python

Python: Replace Python text - CURL Python - python

I need the variable defined as NUEMRODEDNI to be tempered, how is it done?
Excuse me I'm a newbie
I don't know how to use a variable defined in python, I need it to be replaced as shown in the image
import string
import requests
from requests.structures import CaseInsensitiveDict
url = "www.url.com:8022/SistemaIdentificacion/servlet/com.personas.consultapersona?030bf8cfcd4bfccbd543df61b1b43f67,gx-no-cache=1648440596691"
NUEMRODEDNI = "41087712"
headers = CaseInsensitiveDict()
headers["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0"
headers["Accept"] = "*/*"
headers["Accept-Language"] = "es-AR,es;q=0.8,en-US;q=0.5,en;q=0.3"
headers["Accept-Encoding"] = "gzip, deflate"
headers["GxAjaxRequest"] = "1"
headers["Content-Type"] = "application/json"
headers["AJAX_SECURITY_TOKEN"] = "a6da9873adb..."
headers["X-GXAUTH-TOKEN"] = "eyJ0eXAiOiJ..."
headers["Origin"] = "http://www.url.com"
headers["Connection"] = "keep-alive"
headers["Referer"] = "www.url.com/SistemaIdentificacion/servlet/com.personas.consultapersona"
headers["Cookie"] = "GX_CLIENT_ID=0496f100-9e4e-4e36-a68d-ba3770ee2bff; GX_SESSION_ID=KUqyHU%2FZbpu96sYlj7Gry8bCYpV6CaSgVk0BLxVCpAU%3D; JSESSIONID=1812E6AC00940BDB325EF9592CB93FF8; GxTZOffset=America/Argentina/Buenos_Aires"
data = '{"MPage":false,"cmpCtx":"","parms":[EDIT HERE,{"s":"M","v":[["","(None)"],["F","Femenino"],["M","Masculino"]]},"M",{"User":"","CompanyCode":0,"Profile":"","UsrOneLoginID":"6647","Depid":1,"UsrLP":6488,"unidad":"","unidadid":"68","IMFParteCuerpo":"","denunciasid":0,"destino":"68","TipoPersona":"","NombreArchivo":"","denorigen":"","macdestinoscodorganigrama":""}],"hsh":[],"objClass":"consultapersona","pkgName":"com.personas","events":["ENTER"],"grids":{}}'
resp = requests.post(url, headers=headers, data=data)
print(resp.content)

Please provide your code typed out rather than a screenshot of it, so that we can simply copy & run it on our end.
Nontheless you should try:
NUMERODNI = "41087712"
data = '{"MPage":false,"cmpCtx":"","parms":[EDIT HERE,{"s":"M","v":[["","(None)"],["F","Femenino"],["M","Masculino"]]},"M",{"User":"","CompanyCode":0,"Profile":"","UsrOneLoginID":"6647","Depid":1,"UsrLP":6488,"unidad":"","unidadid":"68","IMFParteCuerpo":"","denunciasid":0,"destino":"68","TipoPersona":"","NombreArchivo":"","denorigen":"","macdestinoscodorganigrama":""}],"hsh":[],"objClass":"consultapersona","pkgName":"com.personas","events":["ENTER"],"grids":{}}'
data = data.replace("EDIT HERE", NUMERODNI)
print(data) # '{... "parms":[41087712, ...}'
This solution definetly delivers the desired string as result.
If your code still does not do what you would like it to, then the actual issue has to be somewhere else.

Since you're POSTing JSON data, it's easier to just keep your data as a Python dict and tell Requests to JSON-ify it (requests.post(json=...)).
That way you don't need string substitution, just use the variable.
I also took the opportunity to make your headers construction shorter – it can just be a dict.
import requests
url = "www.url.com:8022/SistemaIdentificacion/servlet/com.personas.consultapersona?030bf8cfcd4bfccbd543df61b1b43f67,gx-no-cache=1648440596691"
NUEMRODEDNI = "41087712"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0",
"Accept": "*/*",
"Accept-Language": "es-AR,es;q=0.8,en-US;q=0.5,en;q=0.3",
"Accept-Encoding": "gzip, deflate",
"GxAjaxRequest": "1",
"Content-Type": "application/json",
"AJAX_SECURITY_TOKEN": "a6da9873adb...",
"X-GXAUTH-TOKEN": "eyJ0eXAiOiJ...",
"Origin": "http://www.url.com",
"Connection": "keep-alive",
"Referer": "www.url.com/SistemaIdentificacion/servlet/com.personas.consultapersona",
"Cookie": "GX_CLIENT_ID=0496f100-9e4e-4e36-a68d-ba3770ee2bff; GX_SESSION_ID=KUqyHU%2FZbpu96sYlj7Gry8bCYpV6CaSgVk0BLxVCpAU%3D; JSESSIONID=1812E6AC00940BDB325EF9592CB93FF8; GxTZOffset=America/Argentina/Buenos_Aires",
}
data = {
"MPage": False,
"cmpCtx": "",
"parms": [
NUEMRODEDNI,
{"s": "M", "v": [["", "(None)"], ["F", "Femenino"], ["M", "Masculino"]]},
"M",
{
"User": "",
"CompanyCode": 0,
"Profile": "",
"UsrOneLoginID": "6647",
"Depid": 1,
"UsrLP": 6488,
"unidad": "",
"unidadid": "68",
"IMFParteCuerpo": "",
"denunciasid": 0,
"destino": "68",
"TipoPersona": "",
"NombreArchivo": "",
"denorigen": "",
"macdestinoscodorganigrama": "",
},
],
"hsh": [],
"objClass": "consultapersona",
"pkgName": "com.personas",
"events": ["ENTER"],
"grids": {},
}
resp = requests.post(url, headers=headers, json=data)
resp.raise_for_status()
print(resp.content)

Related

I was trying to scrape some data from website, but cant understand how the webpages calling functions to get the data?

I was trying to scrape for option data table. The website has a drop-down menu to select the expiration.
I can see that the page is making API calls like this
to fetch data. However, if I use the link to send a request from python i get nothing, why is that ?? How to correct this ?
import requests ##### to connect to web for data
import pandas as pd
import numpy as np
import datetime as dt
from pathlib import Path
#from io import BytesIO
#from zipfile import ZipFile
date=dt.datetime.today().strftime("%d%m%Y")
#date='09072021'
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) chrome/80.0.3987.132 Safari/537.36','Accept-Language': 'en-US,en;q=0.9','Accept-Encoding': 'gzip, deflate'}
x= True
url="https://www.barchart.com/proxies/core-api/v1/options/get?baseSymbol=%24SPX&fields=symbol%2CbaseSymbol%2CstrikePrice%2Cmoneyness%2CbidPrice%2Cmidpoint%2CaskPrice%2ClastPrice%2CpriceChange%2CpercentChange%2Cvolume%2CopenInterest%2CvolumeOpenInterestRatio%2Cvolatility%2CoptionType%2CdaysToExpiration%2CexpirationDate%2CtradeTime%2CweightedImpliedVolatility%2ChistoricVolatility20d%2CsymbolCode%2CsymbolType&groupBy=optionType&expirationDate=nearest&meta=field.shortName%2Cexpirations%2Cfield.description&orderBy=strikePrice&orderDir=asc&raw=1"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:81.0) Gecko/20100101 Firefox/81.0'}
with requests.Session() as req:
req.headers.update(headers)
response = req.get(url).json()

You can try that code
import requests
import json
from urllib.parse import unquote
# create session
session = requests.Session()
# set user agent to avoid cloudflare
session.headers['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36'
# create url
url = 'https://www.barchart.com/proxies/core-api/v1/options/get'
# create params for start page
params = {
'view': 'stacked',
'expiration': '2021-07-26-w'
}
# load first page
session.get(
'https://www.barchart.com/stocks/quotes/$SPX/options', params=params
)
# create params for AJAX
params_ajax = {
'baseSymbol': '$SPX',
'fields': ','.join(
[
'symbol', 'baseSymbol', 'strikePrice', 'moneyness', 'bidPrice',
'midpoint', 'askPrice', 'lastPrice', 'priceChange',
'percentChange', 'volume', 'openInterest',
'volumeOpenInterestRatio', 'volatility', 'optionType',
'daysToExpiration', 'expirationDate', 'tradeTime',
'weightedImpliedVolatility', 'historicVolatility20d',
'symbolCode', 'symbolType'
]
),
'groupBy': 'optionType',
'expirationDate': '2021-07-28',
'meta': ','.join(['field.shortName', 'expirations', 'field.description']),
'orderBy': 'strikePrice',
'orderDir': 'asc',
'expirationType': 'weekly',
'raw': 1
}
# change headers
session.headers['Accept'] = 'application/json'
session.headers['X-XSRF-TOKEN'] = unquote(session.cookies['XSRF-TOKEN'])
# get result
result = session.get(url, params=params_ajax).json()
print(json.dumps(result, indent=4))
{
"count": 2,
"total": 458,
"data": {
"Call": [
{
"symbol": "$SPX|20210726|1200.00WC",
"baseSymbol": "$SPX",
"strikePrice": "1,200.00",
"moneyness": "+72.24%",
"bidPrice": "3,125.50",
"midpoint": "3,126.95",
"askPrice": "3,128.40",
"lastPrice": "0.00",
"priceChange": "0.00",
"percentChange": "unch",
"volume": "0",
"openInterest": "0",
"volumeOpenInterestRatio": "0.00",
"volatility": "438.24%",
"optionType": "Call",
"daysToExpiration": "6",
"expirationDate": "07\/26\/21",
"tradeTime": "N\/A",
"weightedImpliedVolatility": "14.66%",
"historicVolatility20d": "10.79%",
"symbolType": "Call",
...
P.S. if I help you - please mark answer as correct

Regular expression operations to separate store locations with in Walmart locations

I am currently trying to implement a re to my code for Mcdonald locations across Canada. The goal is to add a column to my csv that states if the locations has walmart. All the address in "address1" that are in walmart have a tag that says (walmart). I am hoping to separate it,if anyone can help with that would be great! If there is a way to do it in excel that would be just as good.
import csv
import json
import requests
import re
url = "https://www.mcdonalds.com/googleapps/GoogleRestaurantLocAction.do?method=searchLocation&latitude=43.6936965&longitude=-79.2969938&radius=1000000&maxResults=1700&country=ca&language=en-ca&showClosed=&hours24Text=Open%2024%20hr"
payload = {}
files = {}
headers = {
"authority": "www.mcdonalds.com",
"sec-ch-ua": '" Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"',
"accept": "*/*",
"x-requested-with": "XMLHttpRequest",
"sec-ch-ua-mobile": "?0",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36",
"sec-fetch-site": "same-origin",
"sec-fetch-mode": "cors",
"sec-fetch-dest": "empty",
"referer": "https://www.mcdonalds.com/ca/en-ca/restaurant-locator.html",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
"cookie": "bm_sz=C04645E7F7A956C5F9D9C5A20DEAEC97~YAAQ1Cv2SEtfMBN6AQAAItxfEwwTVV2V2Tr7UWpPt1Ps7gl84FzQlmbWIm4kBBh5dxlK3w8RenwiEiKtvERE6dLmrwPwJUuy+14gU/LeEZvP+uxzyBr04oQXdcSEQuiOgdkAGasqnBrTw1mp5E5iehnRpvHBDdSqh8wRSgJV0eG4f8YwSz66BfntCBALtQNCAFK2; _abck=F05779F2345218EA4989FF467D897C5A~0~YAAQ1Cv2SExfMBN6AQAAItxfEwaIwCrBeP25JBhBb7TX+HmnLQgrj1TkosrB+oHSv9ctrxRukqEDUaHPL1KkjpqjY1XY1yyulQ0ZRhsEfhY968YVsTOqfiosAu3kykd3pJG/bQ37XHwWs5qXpIdhMXRwJwXmkYtl3ETG8kXK2iZ22Q31COaSjNVACLaa7s9tCk9ItgLvUj5x9Nldjnd8AdXR0pXicrQY1IaruJyNqwMcJv42AUHW7iH4Ex9ZOSYsgEjLMNd44mS525X/gSNUTSOzoqoWsnH4MU59vfgLTwc2hVncAv67LBViTLxbWw4eVAvz7Z5phQfCmvoIy0PD8gy5iwPDMaD3GASrK9xScDPAPUI2wquxmSJ+f2cQaxZQKhvJCeH9cz14OZfx8ksA2ss53E0l0kDvgmnw~-1~-1~-1; ak_bmsc=BA4817D8DEE20E92C1E6251C54FC124348F62BD48F5F00005F91C9608B679D5F~plUkbYfsvYr5dCayJ9dMGEJ3QDgkmkv2mLpE7pCY9vW0xrdawvmyxfSnupw/4F7C48Akdn8PKsBniqz+7F+RZb8v4AkvH3c0RuvnynqJoni+kJcDYtPOxdMvdtGdTlZGIkSQNfpcxHNQDVlzojdSBX0vyBh/8seKQv10U67M7m787olYzg9jnsUwk3/VHBrnMDogiWJT8rNV7saSXunN0pAgucZWo/XhCpTJL+tI9urt0=; MCDCountry_code=US; bm_mi=BEE06312635FD442995BC0237BAFDA7C~f/RxgMW/JJSUc/wB9ZRg9fPD/76+wq/TaoWEZR1/ttrAiVTO256xhDTsVYc/kdHIjWkxvfO4XDcBjqe4hQ4qXt8Anpfi09vna/zcC7l6OVWpWeRSoZNztl7h5VF407L3XG+9CpzjSHNcaqAPRk5d0J5gLMtL/KmR8XBkAC0Syim7ST97nxNrPfLdlkSPMGm4Oy86xvY5PH5Nu47zS/gwhanBFg69tAdrQdaZewE2eGuzoJPsZit3UsihTzhXc4LY92hfSdh3/kZRId+NE8Jp0w==; bm_sv=7CACE3495320A7C0A6CF8F41DFE0EB36~F9KzvznVNk/fE4+ijLD5H/szY7O161rWlemmShElumIW7HN49Gq2d9Sd2tqBjCa9sJOX4zoehAkc8WvsID5Idon/hDlDeLJZuqnEmff4PN4a9yst3R170rBCm1egzGvCBmB1jq9aCwQm5VgIJgloPOdpiIPfD3kDxFbKhqMuS5U=; JSESSIONID=64PZkBXhhpvNjM4NganzSZ0r1npIIaM7Fo84EsxN.eap7node7; _abck=F05779F2345218EA4989FF467D897C5A~-1~YAAQ1Cv2SExyMBN6AQAA5Et0EwZueCejZbKz1VDGCq2sB43Yx4dq0SiiGeUS6gVpXRIdw3rA3OdpNGHq7tVzQ+IvPpEKwLML9736x1qB5SQxV3jai89y2B2QF6K8nKtyrDAes0qbeTyIrHu0Rh1HLs7CjNxiLi0wswbCZfSsPI6fJZiEt+Itre3lfmua/HkhIRwpVTKqlVN5eQ8XIX+s1jJbINx/jUmMTW+jB5k4A5NARGChYH7rJQGYIT/oyZYpSbS3Yweqa4FRgGMW4gYZBN39+t2xSfewADLdpihfOnoZtakw9VhcvAKaf4mEzjB7WEfNJIZSjSE8DzvbJNIF41MGuAhhrnEBwBE8uVCZsA+2qjVPSADVp2Nn8JanJXCbucnLFOLsmPz3oVtGzentht1cHog4+eYOUlmw~0~-1~-1; bm_sv=7CACE3495320A7C0A6CF8F41DFE0EB36~F9KzvznVNk/fE4+ijLD5H/szY7O161rWlemmShElumIW7HN49Gq2d9Sd2tqBjCa9sJOX4zoehAkc8WvsID5Idon/hDlDeLJZuqnEmff4PN5ZCTzA250oKEeVeXaa6j4gEGJ9RRtrTXQdYXzzSx6fM9aLwif+We2vtIc1yLQgTt4=",
"dnt": "1",
}
response = requests.request(
"GET", url, headers=headers, data=payload, files=files
)
stores = json.loads(response.text)
with open("McDonaldworkshop
\.csv", mode="w") as CSVFile:
writer = csv.writer(
CSVFile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
)
writer.writerow(
[
"address",
"postcode",
"telephone",
]
)
for store in stores["features"]:
address = store["properties"]["addressLine1"]
post_code = store["properties"]["postcode"]
telephone = store["properties"].get("telephone", "N/A")
writer.writerow([address,post_code, telephone])

Try:
import csv
import json
import requests
url = "https://www.mcdonalds.com/googleapps/GoogleRestaurantLocAction.do?method=searchLocation&latitude=43.6936965&longitude=-79.2969938&radius=1000000&maxResults=1700&country=ca&language=en-ca&showClosed=&hours24Text=Open%2024%20hr"
payload = {}
files = {}
headers = {
"authority": "www.mcdonalds.com",
"sec-ch-ua": '" Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"',
"accept": "*/*",
"x-requested-with": "XMLHttpRequest",
"sec-ch-ua-mobile": "?0",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36",
"sec-fetch-site": "same-origin",
"sec-fetch-mode": "cors",
"sec-fetch-dest": "empty",
"referer": "https://www.mcdonalds.com/ca/en-ca/restaurant-locator.html",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
"cookie": "bm_sz=C04645E7F7A956C5F9D9C5A20DEAEC97~YAAQ1Cv2SEtfMBN6AQAAItxfEwwTVV2V2Tr7UWpPt1Ps7gl84FzQlmbWIm4kBBh5dxlK3w8RenwiEiKtvERE6dLmrwPwJUuy+14gU/LeEZvP+uxzyBr04oQXdcSEQuiOgdkAGasqnBrTw1mp5E5iehnRpvHBDdSqh8wRSgJV0eG4f8YwSz66BfntCBALtQNCAFK2; _abck=F05779F2345218EA4989FF467D897C5A~0~YAAQ1Cv2SExfMBN6AQAAItxfEwaIwCrBeP25JBhBb7TX+HmnLQgrj1TkosrB+oHSv9ctrxRukqEDUaHPL1KkjpqjY1XY1yyulQ0ZRhsEfhY968YVsTOqfiosAu3kykd3pJG/bQ37XHwWs5qXpIdhMXRwJwXmkYtl3ETG8kXK2iZ22Q31COaSjNVACLaa7s9tCk9ItgLvUj5x9Nldjnd8AdXR0pXicrQY1IaruJyNqwMcJv42AUHW7iH4Ex9ZOSYsgEjLMNd44mS525X/gSNUTSOzoqoWsnH4MU59vfgLTwc2hVncAv67LBViTLxbWw4eVAvz7Z5phQfCmvoIy0PD8gy5iwPDMaD3GASrK9xScDPAPUI2wquxmSJ+f2cQaxZQKhvJCeH9cz14OZfx8ksA2ss53E0l0kDvgmnw~-1~-1~-1; ak_bmsc=BA4817D8DEE20E92C1E6251C54FC124348F62BD48F5F00005F91C9608B679D5F~plUkbYfsvYr5dCayJ9dMGEJ3QDgkmkv2mLpE7pCY9vW0xrdawvmyxfSnupw/4F7C48Akdn8PKsBniqz+7F+RZb8v4AkvH3c0RuvnynqJoni+kJcDYtPOxdMvdtGdTlZGIkSQNfpcxHNQDVlzojdSBX0vyBh/8seKQv10U67M7m787olYzg9jnsUwk3/VHBrnMDogiWJT8rNV7saSXunN0pAgucZWo/XhCpTJL+tI9urt0=; MCDCountry_code=US; bm_mi=BEE06312635FD442995BC0237BAFDA7C~f/RxgMW/JJSUc/wB9ZRg9fPD/76+wq/TaoWEZR1/ttrAiVTO256xhDTsVYc/kdHIjWkxvfO4XDcBjqe4hQ4qXt8Anpfi09vna/zcC7l6OVWpWeRSoZNztl7h5VF407L3XG+9CpzjSHNcaqAPRk5d0J5gLMtL/KmR8XBkAC0Syim7ST97nxNrPfLdlkSPMGm4Oy86xvY5PH5Nu47zS/gwhanBFg69tAdrQdaZewE2eGuzoJPsZit3UsihTzhXc4LY92hfSdh3/kZRId+NE8Jp0w==; bm_sv=7CACE3495320A7C0A6CF8F41DFE0EB36~F9KzvznVNk/fE4+ijLD5H/szY7O161rWlemmShElumIW7HN49Gq2d9Sd2tqBjCa9sJOX4zoehAkc8WvsID5Idon/hDlDeLJZuqnEmff4PN4a9yst3R170rBCm1egzGvCBmB1jq9aCwQm5VgIJgloPOdpiIPfD3kDxFbKhqMuS5U=; JSESSIONID=64PZkBXhhpvNjM4NganzSZ0r1npIIaM7Fo84EsxN.eap7node7; _abck=F05779F2345218EA4989FF467D897C5A~-1~YAAQ1Cv2SExyMBN6AQAA5Et0EwZueCejZbKz1VDGCq2sB43Yx4dq0SiiGeUS6gVpXRIdw3rA3OdpNGHq7tVzQ+IvPpEKwLML9736x1qB5SQxV3jai89y2B2QF6K8nKtyrDAes0qbeTyIrHu0Rh1HLs7CjNxiLi0wswbCZfSsPI6fJZiEt+Itre3lfmua/HkhIRwpVTKqlVN5eQ8XIX+s1jJbINx/jUmMTW+jB5k4A5NARGChYH7rJQGYIT/oyZYpSbS3Yweqa4FRgGMW4gYZBN39+t2xSfewADLdpihfOnoZtakw9VhcvAKaf4mEzjB7WEfNJIZSjSE8DzvbJNIF41MGuAhhrnEBwBE8uVCZsA+2qjVPSADVp2Nn8JanJXCbucnLFOLsmPz3oVtGzentht1cHog4+eYOUlmw~0~-1~-1; bm_sv=7CACE3495320A7C0A6CF8F41DFE0EB36~F9KzvznVNk/fE4+ijLD5H/szY7O161rWlemmShElumIW7HN49Gq2d9Sd2tqBjCa9sJOX4zoehAkc8WvsID5Idon/hDlDeLJZuqnEmff4PN5ZCTzA250oKEeVeXaa6j4gEGJ9RRtrTXQdYXzzSx6fM9aLwif+We2vtIc1yLQgTt4=",
"dnt": "1",
}
response = requests.request(
"GET", url, headers=headers, data=payload, files=files
)
stores = json.loads(response.text)
with open("data.csv", mode="w") as CSVFile:
writer = csv.writer(
CSVFile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
)
writer.writerow(["address", "postcode", "telephone", "has walmart"])
for store in stores["features"]:
has_walmart = "(Wal-Mart)" in store["properties"]["addressLine1"]
address = store["properties"]["addressLine1"].replace("(Wal-Mart)", "")
post_code = store["properties"]["postcode"]
telephone = store["properties"].get("telephone", "N/A")
writer.writerow(
[address, post_code, telephone, "X" if has_walmart else ""]
)
This will create new column has walmart where it has "X" if the address contains (Wal-Mart). Also, it replaces (Wal-Mart) with empty string in address:

Incorrect response from post request with requests

Search url - http://aptaapps.apta.org/findapt/Default.aspx?UniqueKey=.
Need to get data for the zipcode(10017)
Sending post requests but I receive the search page(response from the search url) but not the page with results.
My code:
# -*- coding: UTF-8 -*-
import requests
from bs4 import BeautifulSoup, element
search_url = "http://aptaapps.apta.org/findapt/Default.aspx?UniqueKey="
session = requests.Session()
r = session.get(search_url)
post_page = BeautifulSoup(r.text, "lxml")
try:
target_value = post_page.find("input", id="__EVENTTARGET")["value"]
except TypeError:
target_value = ""
try:
arg_value = post_page.find("input", id="__EVENTARGUMENT")["value"]
except TypeError:
arg_value = ""
try:
state_value = post_page.find("input", id="__VIEWSTATE")["value"]
except TypeError:
state_value = ""
try:
generator_value = post_page.find("input", id="__VIEWSTATEGENERATOR")["value"]
except TypeError:
generator_value = ""
try:
validation_value = post_page.find("input", id="__EVENTVALIDATION")["value"]
except TypeError:
validation_value = ""
post_data = {
"__EVENTTARGET": target_value,
"__EVENTARGUMENT": arg_value,
"__VIEWSTATE": state_value,
"__VIEWSTATEGENERATOR": generator_value,
"__EVENTVALIDATION": validation_value,
"ctl00$SearchTerms2": "",
"ctl00$maincontent$txtZIP": "10017",
"ctl00$maincontent$txtCity": "",
"ctl00$maincontent$lstStateProvince": "",
"ctl00$maincontent$radDist": "1",
"ctl00$maincontent$btnSearch": "Find a Physical Therapist"
}
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4",
"Cache-Control": "max-age=0",
"Content-Length": "3025",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "aptaapps.apta.org",
"Origin": "http://aptaapps.apta.org",
"Proxy-Connection": "keep-alive",
"Referer": "http://aptaapps.apta.org/findapt/default.aspx?UniqueKey=",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
}
post_r = session.post(search_url, data=post_data, headers=headers)
print(post_r.text)

Short Answer:
try to replace:
post_r = session.post(search_url, data=post_data, headers=headers)
to:
post_r = session.post(search_url, json=post_data, headers=headers)
Long Answer:
For POST method, there are many kinds of data types to post in. Such as form-data, x-www-form-urlencoded, application/json, file and etc.
You should know what is the type of the post data. There is a brilliant chrome plugin called postman. You can use it to try different data type and find what is the correct one.
After you find, use the correct parameter key in requests.post, the parameter data if for form-data and x-www-form-urlencoded. The parameter json is for json format. You can reference the document of requests to know more about the parameter.

Wizzair scraping

I'm trying to scrape WizzAir for personal use. Can't understand what's wrong with my code. Could it be incorrect payload object or cookies?
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36",
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, sdch, br",
"Accept-Language": "en-US,en;q=0.8,lt;q=0.6,ru;q=0.4",
"Origin": "https://wizzair.com",
"Referer": "https://wizzair.com/"
}
search_url = "https://wizzair.com/lt-LT/FlightSearch"
session = requests.Session()
r = session.get("https://be.wizzair.com/3.8.2/Api/asset/yellowRibbon", headers=headers, allow_redirects=False)
session_id = r.cookies["ASP.NET_SessionId"]
cookies = {
"ASP.NET_SessionId": session_id,
"HomePageSelector": "FlightSearch",
}
# wizz_url = "https://be.wizzair.com/3.8.2/Api/search/search"
wizz_url = "https://be.wizzair.com/3.8.2/Api/asset/farechart"
payload = {"flightList":[{"departureStation":"VNO","arrivalStation":"FCO","departureDate":"2017-02-20"}],"adultCount":1,"childCount":0,"infantCount":0,"wdc":True, "dayInterval":3}
r = session.post(url=wizz_url,data=payload,headers=headers, cookies=cookies)
print r.content
>>> {"validationCodes":["FlightCount_MustBe_OneOrTwo"]}

I run this - even without session and cookies - and get some data.
You have to send it as JSON - using json=payload
import requests
payload = {
"flightList":[
{
"departureStation": "VNO",
"arrivalStation": "FCO",
"departureDate": "2017-02-20"
}
],
"adultCount": 1,
"childCount": 0,
"infantCount": 0,
"wdc": True,
"dayInterval": 3
}
url = 'https://be.wizzair.com/3.8.2/Api/search/search'
r = requests.post(url, json=payload)
print(r.text)
data = r.json()
print(data['outboundFlights'][0]['flightNumber'])
If you will have to use cookies and headers then use Session and then you don't have to copy cookies and headers from one request to another.
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36",
#"Accept": "application/json, text/plain, */*",
#"Accept-Encoding": "gzip, deflate, sdch, br",
#"Accept-Language": "en-US,en;q=0.8,lt;q=0.6,ru;q=0.4",
}
s = requests.Session()
s.headers.update(headers)
# to get cookies
r = s.get("https://www.wizzair.com/")
payload = {
"flightList":[
{
"departureStation": "VNO",
"arrivalStation": "FCO",
"departureDate": "2017-02-20"
}
],
"adultCount": 1,
"childCount": 0,
"infantCount": 0,
"wdc": True,
"dayInterval": 3
}
url = 'https://be.wizzair.com/3.8.2/Api/search/search'
r = s.post(url, json=payload)
print(r.text)
data = r.json()
print(data['outboundFlights'][0]['flightNumber'])

Request decoding in Python

import urllib.request
url = ""
http_header = {
"User-Agent": "Mozilla/5.0(compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0) like Gecko",
"Accept": "text/html, application/xhtml+xml, */*",
"Accept-Language": "ko-KR",
"Content-type": "application/x-www-form-urlencoded",
"Host": ""
}
params = {
'id': 'asdgasd',
'call_flag': 'idPoolChk',
'reg_type': 'NY'
}
data = urllib.parse.urlencode(params).encode()
req = urllib.request.Request(url, data)
response = urllib.request.urlopen(req)
the_page = response.read()
print(the_page)
When I run this program, I get:
\xec\x95\x84\xec\x9d\xb4\xeb\x94\x94\xea\xb0\x80 \xec\xa4\x91\xeb\xb3\xb5\xeb\x90\xa9\xeb\x8b\x88\xeb\x8b\xa4
How can I transform this to Korean?

It's UTF-8.
print(the_page.decode('utf-8'))
assuming your console will handle those characters.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python: Replace Python text - CURL Python - python

Related

I was trying to scrape some data from website, but cant understand how the webpages calling functions to get the data?

Regular expression operations to separate store locations with in Walmart locations

Incorrect response from post request with requests

Wizzair scraping

Request decoding in Python

Categories

Resources