Read URLs from external file - python

I found the following TikTok Downloader which is working fine.
from argparse import ArgumentParser
import os
from urllib.parse import parse_qsl, urlparse
import requests
class TikTokDownloader:
HEADERS = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'DNT': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
'Accept': '*/*',
'Sec-Fetch-Site': 'same-site',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Dest': 'video',
'Referer': 'https://www.tiktok.com/',
'Accept-Language': 'en-US,en;q=0.9,bs;q=0.8,sr;q=0.7,hr;q=0.6',
'sec-gpc': '1',
'Range': 'bytes=0-',
}
def __init__(self, url: str, web_id: str):
self.__url = url
self.__cookies = {
'tt_webid': web_id,
'tt_webid_v2': web_id
}
def __get_video_url(self) -> str:
response = requests.get(self.__url, cookies=self.__cookies, headers=TikTokDownloader.HEADERS)
return response.text.split('"playAddr":"')[1].split('"')[0].replace(r'\u0026', '&')
def download(self, file_path: str):
video_url = self.__get_video_url()
url = urlparse(video_url)
params = tuple(parse_qsl(url.query))
request = requests.Request(method='GET',
url='{}://{}{}'.format(url.scheme,
url.netloc, url.path),
cookies=self.__cookies,
headers=TikTokDownloader.HEADERS,
params=params)
prepared_request = request.prepare()
session = requests.Session()
response = session.send(request=prepared_request)
response.raise_for_status()
if os.path.exists(file_path):
choice = input('File already exists. Overwrite? (Y/N): ')
if choice.lower() != 'y':
return
with open(os.path.abspath(file_path), 'wb') as output_file:
output_file.write(response.content)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument('--web-id', help='Value of tt_webid or tt_webid_v2 cookie (they are the same).')
parser.add_argument('-o', '--output', default='download.mp4', help='Full output path.')
parser.add_argument('url', help='Video url (https://www.tiktok.com/#username/video/1234567890123456789 or https://vm.tiktok.com/a1b2c3/).')
args = parser.parse_args()
downloader = TikTokDownloader(args.url, args.web_id)
downloader.download(args.output)
The issue is that I have to run this command to download each video:
python3 ./tiktok.py --web-id 1234567890123 -o ./file.mp4 https://vm.tiktok.com/...
And I have 1000 links to download. All the links are in A txt file without comma. Like:
Https://tiktok.com/1
Https://tiktok.com/2
Https://tiktok.com/3
So- I'm looking to find a way to read the text file and automatically replace the link in the command that I have to run. Or should I change the actual script?

Use my code please, I have just defined a function that will help you to download all those videos by just entering the path where the file with a thousand links is located, preferably save this python script in the same directory where your file with a thousand links is located:
Use the function
A_thousand_links_jbsidis("my_file_with_1000_links.txt")
This is going to put automatic names to each video based on date and time, I tested it and it works!
Here is the code by jbsidis:
from argparse import ArgumentParser
import os
from urllib.parse import parse_qsl, urlparse
import requests
class TikTokDownloaderjbsidis:
HEADERS = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'DNT': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
'Accept': '*/*',
'Sec-Fetch-Site': 'same-site',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Dest': 'video',
'Referer': 'https://www.tiktok.com/',
'Accept-Language': 'en-US,en;q=0.9,bs;q=0.8,sr;q=0.7,hr;q=0.6',
'sec-gpc': '1',
'Range': 'bytes=0-',
}
def __init__(self, url, web_id):
self.__url = url
self.__cookies = {
'tt_webid': web_id,
'tt_webid_v2': web_id
}
def __get_video_url(self) -> str:
response = requests.get(self.__url, cookies=self.__cookies, headers=TikTokDownloaderjbsidis.HEADERS)
return response.text.split('"playAddr":"')[1].split('"')[0].replace(r'\u0026', '&')
def download(self, file_path: str):
video_url = self.__get_video_url()
url = urlparse(video_url)
params = tuple(parse_qsl(url.query))
request = requests.Request(method='GET',
url='{}://{}{}'.format(url.scheme,
url.netloc, url.path),
cookies=self.__cookies,
headers=TikTokDownloaderjbsidis.HEADERS,
params=params)
prepared_request = request.prepare()
session = requests.Session()
response = session.send(request=prepared_request)
response.raise_for_status()
if os.path.exists(file_path):
choice = str('jbsidis File already exists. Overwrite? (Y/N): ')
print("Downloading jbsidis == "+str(file_path))
with open(os.path.abspath(file_path), 'wb') as output_file:
output_file.write(response.content)
import time
import random
def A_thousand_links_jbsidis(file_with_a_thousand_links):
n=open(file_with_a_thousand_links).read()
m=n.splitlines() #guessing the links are per line
MyWebIDis="1234567890123" #put the id that works for you
c=0
for new_url in m:
c=c+1
new_auto_file_name=str(c)+" - "+str(time.strftime("_%Y%m%d_%H%M%S_"))+"_video_"+".mp4" #i guess they are mp4
clean_url=str(new_url).replace("\n","").replace("\x0a","").replace("\x0d","").replace(" ","")
downloader = TikTokDownloaderjbsidis(clean_url, MyWebIDis)
downloader.download(new_auto_file_name)
time.sleep(10) #just in case the internet is not that fast, wait 10 seconds after next download
A_thousand_links_jbsidis("my_file_with_1000_links.txt")
And here is the image, I don't know why sometimes we answer questions without giving a real solution, greetings from El Salvador.
jbsidis

Related

how to youtube python a post request to change account to another one?

I'm trying to send a post request to change my YouTube account to another one, but I can't do it.
Below I have attached the code that I wrote, it gives me an error, I can't figure out what the problem is. I also know that if the account is successfully changed, there should be a 303 response.
At the very end of the code there are cookie lines: __Secure-1 PSD and __Secure-1PAPISID, I replaced them with "-" to send them here because they are private. You can take them yourself from chrome cookies
Could you help me?
import requests
import hashlib
import re
import time
class YouTube(object):
base_headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36',
}
origin = 'https://youtube.com'
referer = f'https://youtube.com'
def __init__(self, cookies: dict):
self.__CHANNEL_ID = None
self.__API_KEY = None
self.__SAPISIDHASH = None
self.cookies = cookies
self.session = requests.Session()
self.session.headers.update(self.base_headers)
self.load_cookies()
def load_cookies(self) -> None:
for key, value in self.cookies.items():
self.session.cookies.set(key, value)
def set_data(self) -> None:
response = self.session.get(self.referer)
# print(response.text)
print(self.__API_KEY)
def get_data_of_monetization(self) -> dict:
# s = requests.Session()
#
self.session.headers.update(
{'accept-encoding': 'gzip, deflate, br', 'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
'sec-ch-ua-arch': '"x86"', 'sec-ch-ua-bitness': '"64"', 'sec-ch-ua-full-version': '"103.0.5060.134"',
'sec-ch-ua-full-version-list': '".Not/A)Brand";v="99.0.0.0", "Google Chrome";v="103.0.5060.134", "Chromium";v="103.0.5060.134"',
'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-ch-ua-platform-version': '"10.0.0"',
'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'x-client-data': 'CKq1yQEIkbbJAQiltskBCMG2yQEIqZ3KAQio68oBCJahywEI2+/LAQjmucwBCLS6zAEIibvMAQj2u8wBCJi9zAEI8sDMAQiawcwBCLLBzAEIxMHMAQjXwcwBCN/EzAEYq6nKAQ=='})
r = self.session.get('https://www.youtube.com/signin',
params={'action_handle_signin': 'true', 'authuser': '1',
'next': 'https%3A%2F%2Fstudio.youtube.com%2F', 'feature': 'masthead_switcher',
'skip_identity_prompt': 'true'},
headers={
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'cookie': f'__Secure-1PSID={self.cookies["__Secure-1PSID"]}; __Secure-1PAPISID={self.cookies["__Secure-1PAPISID"]}',
'referer': 'https://studio.youtube.com/', 'sec-ch-ua-model': '""', 'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-site',
'service-worker-navigation-preload': 'true'},
)
print(r.text)
return r.json()
#property
def get_channel_id(self) -> str:
return self.__CHANNEL_ID
#property
def get_api_key(self) -> str:
return self.__API_KEY
#property
def get_sapisidhash(self) -> str:
return self.__SAPISIDHASH
if __name__ == '__main__':
cookie = {'__Secure-1PSID': '-',
'__Secure-1PAPISID': '-'
}
client = YouTube(cookie)
client.set_data()
response = client.get_data_of_monetization()

Scraping Data from booking with python

hope you're doing well !
So i'm trying to scrape data from Booking (name of Hotel , room ..) , i run the code it's work but i don't get the data in the excel file, the data file is empty !
This is my code :
# Create an Extractor by reading from the YAML file
e = Extractor.from_yaml_file('C:/Users/pc/OneDrive/Bureau/booking-hotel-scraper-master/booking.yml')
def scrape(url):
headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'DNT': '1',
'Upgrade-Insecure-Requests': '1',
# You may want to change the user agent if you get blocked
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Referer': 'https://www.booking.com/index.en-gb.html',
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
}
# Download the page using requests
print("Downloading %s"%url)
r = requests.get(url, headers=headers)
# Pass the HTML of the page and create
return e.extract(r.text,base_url=url)
with open("C:/Users/pc/OneDrive/Bureau/booking-hotel-scraper-master/urls.txt",'r') as urllist, open('C:/Users/pc/OneDrive/Bureau/booking-hotel-scraper-master/data.csv','w') as outfile:
fieldnames = [
"name",
"location",
"price",
"price_for",
"room_type",
"beds",
"rating",
"rating_title",
"number_of_ratings",
"url"
]
writer = csv.DictWriter(outfile, fieldnames=fieldnames,quoting=csv.QUOTE_ALL)
writer.writeheader()
for url in urllist.readlines():
data = scrape(url)
if data and data['hotels'] is not None:
for h in data["hotels"]:
writer.writerow(h)
And this is the result in the excel file :
There is no error in mycode it's only about how to get this data.
The booking.yml :

scrapy_splash not rendering for list of urls

I created a spider with scrapy_splash,
I hardcoded 3 urls in start_requests.
When I run with any one url it is working fine for all the urls.
when I put all the urls in a list and run one by one, it is not working, and splash not returning complete rendered html in response.body.
kindly help.
code:
import re
import time
import json
import scrapy
import w3lib
from scrapy_splash import SplashRequest
class SpeSpider(scrapy.Spider):
name = 'spe'
# allowed_domains = ['s']
# start_urls = ['http://s/']
without_wait_script = """
function main(splash, args)
splash.private_mode_enabled = false
assert(splash:go(args.url))
assert(splash:wait(2))
return {
html = splash:html(),
}
end
"""
wait_script = """
function main(splash, args)
assert(splash:go(args.url))
assert(splash:wait(10))
return {
html = splash:html(),
}
end
"""
splash_headers = {
'authority': 'www.avivainvestors.com',
'sec-ch-ua': '"Google Chrome";v="95", "Chromium";v="95", ";Not A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'referer': 'https://www.avivainvestors.com/fr-fr/nos-expertises/nos-fonds/',
'accept-language': 'en-US,en;q=0.9,lb;q=0.8',
}
def start_requests(self):
url1="https://www.avivainvestors.com/fr-fr/nos-expertises/equities/uk-listed-equity-high-alpha-fund/lu0160960752-gbp/"
url2 = "https://www.avivainvestors.com/fr-fr/nos-expertises/equities/japon-isr/fr0013340841-eur/"
url3 = "https://www.avivainvestors.com/fr-fr/nos-expertises/fixed-income/emerging-markets-corporate-bond-fund/lu1550133976-usd/"
urls = [url1, url2, url3]
for url in urls:
time.sleep(10)
yield SplashRequest(
url=url,
endpoint="execute",
callback=self.scrape_document_id,
args={"lua_source":self.wait_script},
splash_headers= self.splash_headers
)
def scrape_document_id(self, response):
value = response.xpath('//div[#class="ec-table__cell-content ng-binding ng-scope" and text() = "Rapport annuel"]/../..//td/ec-button/#mstar-component-id').get()
print("VALUE", value)
v = re.search(r"\[([^]]+)\]", value).group().strip("[]")
yield {
"url": response.url,
"id" : v
}
This is because you are using a yield statement which is a generator.
My guess is that you are just doing this,
x = SpeSpider()
x.start_requests()
which only creates a generator from your yield statement.
Try this,
x = SpeSpider()
list(x.start_requests())
It will run your function and produce a list though I am not sure if this is the behaviour you want because I don't any code on how you instantiate the class objects or what the results should look like.

Python requests appending ambersand (&) to URL when adding results from variable as parameter

Below is my code which basically retrieves data from the database, puts it into a variable in CSV format which I then am trying to append on to a GET request URL. However, the get request results in null as the GET Request URL has an ampersand (&) sign in it.
Question is how do I get rid of it?
This is the URL, note the ampersand (&):
https://demo-api.ig.com/gateway/deal/clientsentiment?marketIds=&JGB,BCHUSD,AT20,
import requests
import json
import time
import datetime
import csv
import pandas as pd
import psycopg2
conn_string = "host=' dbname='' user='' password=''"
conn = psycopg2.connect(conn_string)
cursor=conn.cursor()
# Query to source marketIds
postgreSQL_select_Query = "SELECT DISTINCT () FROM static WHERE TYPE!='' AND marketId!='None'"
cursor.execute(postgreSQL_select_Query)
#print("Selecting marketId from table using cursor.fetchall")
instrument_static_marketId = cursor.fetchall()
cursor.execute(postgreSQL_select_Query )
#This puts the sql result into nice CSV format
y=','.join([y[0] for y in cursor.fetchall() ])
print(y)
# closing database connection.
conn.close ()
def main():
headers = {
'Connection': 'keep-alive',
'Origin': 'https://.com',
'X-IG-API-KEY': '',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'Content-Type': 'application/json; charset=UTF-8',
'Accept': 'application/json; charset=UTF-8',
'X-SECURITY-TOKEN': '',
'CST': '',
'Sec-Fetch-Site': 'same-site',
'Sec-Fetch-Mode': 'cors',
'Referer': 'https://',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
}
response = requests.get('https://demo-api.ig.com/gateway/deal/clientsentiment?marketIds=',params=y, headers=headers)
print(response.url)
result = response.json()
print(result)
if __name__ == '__main__':
main()
You've included part of a parameter in your URL which is incorrect and confused requests.
Leave that off, and pass a dictionary for params, just like you're already doing with headers:
y = 'JGB,BCHUSD,AT20'
params = {
'marketIDs': y,
}
url = 'https://demo-api.ig.com/gateway/deal/clientsentiment'
response = requests.get(url, params=params, headers=headers)

Reading data from a website passing parameters

import requests
from lxml import html
from bs4 import BeautifulSoup
session_requests = requests.session()
sw_url = "https://www.southwest.com"
sw_url2 = "https://www.southwest.com/flight/select-flight.html?displayOnly=&int=HOMEQBOMAIR"
#result = session_requests.get(sw_url)
#tree = html.fromstring(result.text)
payload = {"name":"AirFormModel","origin":"MCI","destination":"DAL","departDate":"2018-02-28T06:00:00.000Z","returnDate":"2018-03-03T06:00:00.000Z","tripType":"true","priceType":"DOLLARS","adult":1,"senior":0,"promoCode":""}
#{
# 'origin': 'MCI',
# 'destination': 'DAL',
# 'departDate':'2018-02-28T06:00:00.000Z',
# 'returnDate':'2018-03-01T06:00:00.000Z',
# 'adult':'1'
#}
p = requests.post(sw_url,params=payload)
#print(p.text)
print(p.content)
p1 = requests.get(sw_url2)
soup = BeautifulSoup(p.text,'html.parser')
print(soup.find("div",{"class":"productPricing"}))
pr = soup.find_all("span",{"class":"currency_symbol"})
for tag in pr:
print(tag)
print('++++')
print(tag.next_sibling)
print(soup.find("div",{"class":"twoSegments"}))
soup = BeautifulSoup(p1.text,'html.parser')
print(soup.find("div",{"class":"productPricing"}))
pr = soup.find_all("span",{"class":"currency_symbol"})
for tag in pr:
print(tag)
print('++++')
print(tag.next_sibling)
print(soup.find("div",{"class":"twoSegments"}))
I need to retrieve prices for flights between 2 locations on specific dates. I identified the parameters by looking at the session info from inspector of the browser and included them in the post request.
I am not sure what I'm doing wrong here, but I am unable to read the data from the tags correctly. It's printing none.
Edit : 4/25/2018
I'm using the following code now, but it doesn't seem to help. Please advise.
import threading
from lxml import html
from bs4 import BeautifulSoup
import time
import datetime
import requests
def worker(oa,da,ods):
"""thread worker function"""
print (oa + ' ' + da + ' ' + ods + ' ' + str(datetime.datetime.now()))
url = "https://www.southwest.com/api/air-booking/v1/air-booking/page/air/booking/shopping"
rh = {
'accept': 'application/json,text/javascript,*/*;q=0.01',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.5',
'cache-control': 'max-age=0',
'content-length': '454',
'content-type': 'application/json',
'referer': 'https://www.southwest.com/air/booking/select.html?originationAirportCode=MCI&destinationAirportCode=LAS&returnAirportCode=&departureDate=2018-05-29&departureTimeOfDay=ALL_DAY&returnDate=&returnTimeOfDay=ALL_DAY&adultPassengersCount=1&seniorPassengersCount=0&fareType=USD&passengerType=ADULT&tripType=oneway&promoCode=&reset=true&redirectToVision=true&int=HOMEQBOMAIR&leapfrogRequest=true',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'
}
fd = {
'returnAirport':'',
'twoWayTrip':'false',
'fareType':'DOLLARS',
'originAirport':oa,
'destinationAirport':da,
'outboundDateString':ods,
'returnDateString':'',
'adultPassengerCount':'1',
'seniorPassengerCount':'0',
'promoCode':'',
'submitButton':'true'
}
with requests.Session() as s:
r = s.post(url,headers = rh )
# soup = BeautifulSoup(r.content,'html.parser')
# soup = BeautifulSoup(r.content,'lxml')
print(r)
print(r.content)
print (oa + ' ' + da + ' ' + ods + ' ' + str(datetime.datetime.now()))
return
#db = MySQLdb.connect(host="localhost",user="root",passwd="vikram",db="garmin")
rcount = 0
tdelta = 55
#print(strt_date)
threads = []
count = 1
thr_max = 2
r = ["MCI","DEN","MCI","MDW","MCI","DAL"]
strt_date = (datetime.date.today() + datetime.timedelta(days=tdelta)).strftime("%m/%d/%Y")
while count < 2:
t = threading.Thread(name=r[count-1]+r[count],target=worker,args=(r[count-1],r[count],strt_date))
threads.append(t)
t.start()
count = count + 2
When you say looked at the session info from inspector of the browser, I'm assuming you meant the network tab. If that's the case, are you sure you noted the data being sent properly?
Here's the URL that gets sent by the browser, following which the page you required is fetched:
url = 'https://www.southwest.com/flight/search-flight.html'
You didn't use headers in your request, which, in my opinion, should be passed compulsorily in some cases. Here are the headers that the browser passes:
:authority:www.southwest.com
:method:POST
:path:/flight/search-flight.html
:scheme:https
accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
accept-encoding:gzip, deflate, br
accept-language:en-US,en;q=0.9
cache-control:max-age=0
content-length:564
content-type:application/x-www-form-urlencoded
origin:https://www.southwest.com
referer:https://www.southwest.com/flight/search-flight.html?int=HOMEQBOMAIR
upgrade-insecure-requests:1
user-agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36
Note:
I removed the cookie header, because that would be taken care of by requests if you're using session.
The first four headers (those that begin with a colon (':')) cannot be passed in Python's requests; so, I skipped them.
Here's the dict that I used to pass the headers:
rh = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'cache-control': 'max-age=0',
'content-length': '564',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://www.southwest.com',
'referer': 'https://www.southwest.com/flight/search-flight.html?int=HOMEQBOMAIR',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'
}
And here is the form data sent by browser:
fd = {
'toggle_selfltnew': '',
'toggle_AggressiveDrawers': '',
'transitionalAwardSelected': 'false',
'twoWayTrip': 'true',
'originAirport': 'MCI',
# 'originAirport_displayed': 'Kansas City, MO - MCI',
'destinationAirport': 'DAL',
# 'destinationAirport_displayed': 'Dallas (Love Field), TX - DAL',
'airTranRedirect': '',
'returnAirport': 'RoundTrip',
'returnAirport_displayed': '',
'outboundDateString': '02/28/2018',
'outboundTimeOfDay': 'ANYTIME',
'returnDateString': '03/01/2018',
'returnTimeOfDay': 'ANYTIME',
'adultPassengerCount': '1',
'seniorPassengerCount': '0',
'promoCode': '',
'fareType': 'DOLLARS',
'awardCertificateToggleSelected': 'false',
'awardCertificateProductId': ''
}
Note that I commented out two of the items above, but it didn't make any difference. I assumed you'd be having only the location codes and not the full name. If you do have them or if you can extract them from the page, you can send those as well along with other data.
I don't know if it makes any difference, but I used data instead of params:
with requests.Session() as s:
r = s.post(url, headers = rh, data = fd)
soup = BeautifulSoup(r.content, 'lxml')
Finally, here is the result:
>>> soup.find('span', {'class': 'currency_symbol'}).text
'$'

Categories

Resources