Cannot identify Javascript XHR API loading data to this page - python

I am trying to parse the EPG data at the below link. When I inspect the HTML using the below, all the program data is missing. I realise this is because it's being loaded async by Javascript, but I cannot figure out in Chrome Tools which is the API call as there seems to be a lot loaded into this page at once:
import requests
url = 'https://mi.tv/ar/programacion/lunes'
headers ={
'Accept': 'text/html, */*; q=0.01',
'Referer': outer,
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="90", "Google Chrome";v="90"',
'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36',
'X-KL-Ajax-Request': 'Ajax_Request',
'X-Requested-With': 'XMLHttpRequest'
}
r = requests.get(url=url, headers=headers)
rr = r.text
print(rr)
...anyone identify for me what the correct API is? I can see there are API parameters given in the HTML, but I've not been able to assemble them into a working link and I cannot see anything with that URL root in chrome tools...

The following shows the right url to use and how to return listings in a dict by channel key
import requests
from bs4 import BeautifulSoup as bs
from pprint import pprint
headers = {'User-Agent': 'Mozilla/5.0'}
r = requests.get('https://mi.tv/ar/async/guide/all/lunes/60', headers = headers)
soup = bs(r.content, 'lxml')
listings = {c.select_one('h3').text: list(zip([i.text for i in c.select('.time')], [i.text for i in c.select('.title')]))
for c in soup.select('.channel')}
pprint(listings)

Related

Error 403 on public page w/ python get request

I'm a complete newbie to python and trying to get the content of a webpage with a get request. The page I'm trying to access is public without any authorization as far as I can see. It's a job listing from the career website of a popular company and everyone can view the page.
My code looks like this:
import requests
from bs4 import BeautifulSoup
url = 'https://www.tuvsud.com/de-de/karriere/stellen/jobs/projektmanagerin-auditservice-food-corporate-functions-business-support-all-regions-133776'
headers = {
'Host': '',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
}
r = requests.get(url, headers=headers)
soup = BeautifulSoup(r.content, 'html.parser')
print(r.status_code)
However I get the status code 403. With the google url for example it works though.
I would be happy about any help! thanks in advance

Scraping Data from .ASPX Website URL with Python

I have a static .aspx url that I am trying to scrape. All of my attempts yield the raw html data of the regular website instead of the data I am querying.
My understanding is the headers I am using (which I found from another post) are correct and generalizable:
import urllib.request
from bs4 import BeautifulSoup
headers = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17',
'Content-Type': 'application/x-www-form-urlencoded',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
}
class MyOpener(urllib.request.FancyURLopener):
version = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17'
myopener = MyOpener()
url = 'https://www.mytaxcollector.com/trSearch.aspx'
# first HTTP request without form data
f = myopener.open(url)
soup_dummy = BeautifulSoup(f,"html5lib")
# parse and retrieve two vital form values
viewstate = soup_dummy.select("#__VIEWSTATE")[0]['value']
viewstategen = soup_dummy.select("#__VIEWSTATEGENERATOR")[0]['value']
Trying to enter the form data causes nothing to happen:
formData = (
('__VIEWSTATE', viewstate),
('__VIEWSTATEGENERATOR', viewstategen),
('ctl00_contentHolder_trSearchCharactersAPN', '631091430000'),
('__EVENTTARGET', 'ct100$MainContent$calculate')
)
encodedFields = urllib.parse.urlencode(formData)
# second HTTP request with form data
f = myopener.open(url, encodedFields)
soup = BeautifulSoup(f,"html5lib")
trans_emissions = soup.find("span", id="ctl00_MainContent_transEmissions")
print(trans_emissions.text)
This give raw html code almost exactly the same as the "soup_dummy" variable. But what I want to see is the data of the field ('ctl00_contentHolder_trSearchCharactersAPN', '631091430000') being submitted (this is the "parcel number" box.
I would really appreciate the help. If anything, linking me to a good post about HTML requests (one that not only explains but actually walks through scraping aspx) would be great.
To get the result using the parcel number, your parameters have to be somewhat different from what you have already tried with. Moreover, you have to use this url https://www.mytaxcollector.com/trSearchProcess.aspx to send the post requests.
Working code:
from urllib.request import Request, urlopen
from urllib.parse import urlencode
from bs4 import BeautifulSoup
url = 'https://www.mytaxcollector.com/trSearchProcess.aspx'
payload = {
'hidRedirect': '',
'hidGotoEstimate': '',
'txtStreetNumber': '',
'txtStreetName': '',
'cboStreetTag': '(Any Street Tag)',
'cboCommunity': '(Any City)',
'txtParcelNumber': '0108301010000', #your search term
'txtPropertyID': '',
'ctl00$contentHolder$cmdSearch': 'Search'
}
data = urlencode(payload)
data = data.encode('ascii')
req = Request(url,data)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36')
res = urlopen(req)
soup = BeautifulSoup(res.read(),'html.parser')
for items in soup.select("table.propInfoTable tr"):
data = [item.get_text(strip=True) for item in items.select("td")]
print(data)

Can't find text from page using Python BS4

I am trying to learn how to use BS4 but I ran into this problem. I try to find the text in the Google Search results page showing the number of results for the search but I can't find no text 'results' neither in the html_page nor in the soup HTML parser. This is the code:
from bs4 import BeautifulSoup
import requests
url = 'https://www.google.com/search?q=stack'
res = requests.get(url)
html_page = res.content
soup = BeautifulSoup(html_page, 'html.parser')
print(b'results' in html_page)
print('results' in soup)
Both prints return False, what am I doing wrong? How to fix that?
EDIT:
Turns out the language of the webpage was a problem, adding &hl=en to the URL almost fixed it.
url = 'https://www.google.com/search?q=stack&hl=en'
The first print is now True but the second is still False.
requests library when returning the response in form of response.content usually returns in a raw format. So to answer your second question, replace the res.content with res.text.
from bs4 import BeautifulSoup
import requests
url = 'https://www.google.com/search?q=stack'
res = requests.get(url)
html_page = res.text
soup = BeautifulSoup(html_page, 'html.parser')
print('results' in soup)
Output: True
Keep in mind, Google is usually very active in handling scrapers. To avoid getting blocked/captcha'ed, you can add a user agent to emulate a browser. :
# This is a standard user-agent of Chrome browser running on Windows 10
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36' }
Example:
from bs4 import BeautifulSoup
import requests
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
resp = requests.get('https://www.amazon.com', headers=headers).text
soup = BeautifulSoup(resp, 'html.parser')
...
<your code here>
Additionally, you can add another set of headers to pretend like a legitimate browser. Add some more headers like this:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' : 'en-US,en;q=0.5',
'Accept-Encoding' : 'gzip',
'DNT' : '1', # Do Not Track Request Header
'Connection' : 'close'
}
It's not because res.content should be changed to res.text as 0xInfection mentioned, it would still return the result.
However, in some cases, it will only return bytes content if it's not gzip or deflate transfer-encodings, which are automatically decoded by requests to a readable format (correct me in the comments or edit this answer if I'm wrong).
It's because there's no user-agent specified thus Google will block a request eventually because default requests user-agent is python-requests and Google understands that it's a bot/script. Learn more about request headers.
Pass user-agent into request headers:
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
request.get('YOUR_URL', headers=headers)
Code and example in the online IDE:
import requests, lxml
from bs4 import BeautifulSoup
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
params = {
"q": "fus ro dah definition", # query
"gl": "us", # country to make request from
"hl": "en" # language
}
response = requests.get('https://www.google.com/search',
headers=headers,
params=params).content
soup = BeautifulSoup(response, 'lxml')
number_of_results = soup.select_one('#result-stats nobr').previous_sibling
print(number_of_results)
# About 114,000 results
Alternatively, you can achieve the same thing by using Google Direct Answer Box API from SerpApi. It's a paid API with a free plan.
The difference in your case is that you only need to extract the data you want without thinking about how to extract stuff or figure out how to bypass blocks from Google or other search engines since it's already done for the end-user.
import os
from serpapi import GoogleSearch
params = {
"engine": "google",
"q": "fus ro dah definition",
"api_key": os.getenv("API_KEY"),
}
search = GoogleSearch(params)
results = search.get_dict()
result = results["search_information"]['total_results']
print(result)
# 112000
Disclaimer, I work for SerpApi.

BeautifulSoup does not show some tags in html page

If I visit this page here, I can see the image on the page with img tag upon inspection.
But when I try to get the page using requests and parsing with BeautifulSoup I can't access the same image. What am I missing here?
The code works fine, I get 200 as status_code from the request.
import requests
from bs4 import BeautifulSoup
url = 'https://mangadex.org/chapter/435396/2'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36'}
page = requests.get(url,headers=headers)
print(page.status_code)
soup = BeautifulSoup(page.text,'html.parser')
img_tags = soup.find_all('img')
for img in img_tags:
print(img)
EDIT::
As per suggestion, the selenium option works fine. But is there a way to speed it up as BeautifulSoup does?
The page has JavaScript that needs to run in order to populate some of the elements on the page. You could use Selenium to run the page's JavaScript before accessing the image.
You can use API to get images. Code below gets all images from the page and print urls:
import requests
headers = {
'Accept': 'application/json, text/plain, */*',
'Referer': 'https://mangadex.org/chapter/435396/2',
'DNT': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/73.0.3683.86 Safari/537.36',
}
params = (
('id', '435396'),
('type', 'chapter'),
('baseURL', '/api'),
)
response = requests.get('https://mangadex.org/api/', headers=headers, params=params)
data = response.json()
img_base_url = "https://s4.mangadex.org/data"
img_hash = data["hash"]
img_names = data["page_array"]
for img in img_names:
print(f"{img_base_url}/{img_hash}/{img}")
Output:
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x1.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x2.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x3.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x4.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x5.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x6.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x7.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x8.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x9.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x10.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x11.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x12.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x13.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x14.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x15.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x16.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x17.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x18.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x19.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x20.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x21.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x22.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x23.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x24.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x25.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x26.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x27.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x28.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x29.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x30.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x31.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x32.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x33.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x34.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x35.png
https://s4.mangadex.org/data/ac081a99e13d8765d48e55869cd5444c/x36.png

Python script to download file from button on website

I want to download an xls file by clicking the button "Export to excel" from the following url: https://apps.tampagov.net/CallsForService_Webapp/Default.aspx?type=TPD.
More specifically the button: name = "ctl00$MainContent$btndata". I've already been able to do this using selenium, but, I plan on building a docker image with this script and running as a docker container because this xls is regularly updated and I need the most current data on my local machine and it doesn't make sense to have a browser open that often to fetch this data. I understand there are headless versions of chrome and firefox although I don't believe they support downloads. Also, I understand that web get will not work in this situation because the button is not a static link to the resource. Maybe there's a completely different approach for downloading and updating this data to my computer?
import urllib
import requests
from bs4 import BeautifulSoup
headers = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=.08',
'Origin': 'https://www.tampagov.net',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17',
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'https://apps.tampagov.net/CallsForService_Webapp/Default.aspx?type=TPD',
'Accept-Encoding': 'gzip,deflate,br',
'Accept-Language': 'en-US,en;q=0.5',
}
class MyOpener(urllib.FancyURLopener):
version = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17'
myopener = MyOpener()
url = 'https://apps.tampagov.net/CallsForService_Webapp/Default.aspx?type=TPD'
# first HTTP request without form data
f = myopener.open(url)
soup = BeautifulSoup(f, "html.parser")
# parse and retrieve two vital form values
viewstate = soup.select("#__VIEWSTATE")[0]['value']
eventvalidation = soup.select("#__EVENTVALIDATION")[0]['value']
formData = (
('__EVENTVALIDATION', eventvalidation),
('__VIEWSTATE', viewstate),
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
('Accept-Encoding', 'gzip, deflate, br'),
('Accept-Language', 'en-US,en;q=0.5'),
('Host', 'apps,tampagov.net'),
('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0'))
payload = urllib.urlencode(formData)
# second HTTP request with form data
r = requests.post("https://apps.tampagov.net/CallsForService_Webapp/Default.aspx?type=TPD", params=payload)
print(r.status_code, r.reason)
First: I removed import urllib because 'requests' is enough.
Some issues you have:
You don't need to create one nested tuple then apply urllib.urlencode, uses one dictionary instead that is one reason why requests is so popular.
You'd better populate all parameters for the http post request. like below what I did, otherwise, the request may be rejected by the backend.
I added one simple codes to save the content to the local.
PS: for those form parameters, you can get their values by analysis the html responsed from http get. Also you can customize the parameters as you need, like page size etc.
Below is a working sample:
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
def downloadExcel():
headers = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=.08',
'Origin': 'https://www.tampagov.net',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17',
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'https://apps.tampagov.net/CallsForService_Webapp/Default.aspx?type=TPD',
'Accept-Encoding': 'gzip,deflate,br',
'Accept-Language': 'en-US,en;q=0.5',
}
r = requests.get("https://apps.tampagov.net/CallsForService_Webapp/Default.aspx?type=TPD", headers=headers)
# parse and retrieve two vital form values
if not r.status_code == 200:
print('Error')
return
soup = BeautifulSoup(r.content, "html.parser")
viewstate = soup.select("#__VIEWSTATE")[0]['value']
eventvalidation = soup.select("#__EVENTVALIDATION")[0]['value']
print ('__VIEWSTATE:', viewstate)
print ('__EVENTVALIDATION:', eventvalidation)
formData = {
'__EVENTVALIDATION': eventvalidation,
'__VIEWSTATE': viewstate,
'__EVENTTARGET': '',
'__EVENTARGUMENT': '',
'__VIEWSTATEGENERATOR': '49DF2C80',
'MainContent_RadScriptManager1_TSM':""";;System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35:en-US:59e0a739-153b-40bd-883f-4e212fc43305:ea597d4b:b25378d2;Telerik.Web.UI, Version=2015.2.826.40, Culture=neutral, PublicKeyToken=121fae78165ba3d4:en-US:c2ba43dc-851e-4009-beab-3032480b6a4b:16e4e7cd:f7645509:24ee1bba:c128760b:874f8ea2:19620875:4877f69a:f46195d3:92fe8ea0:fa31b949:490a9d4e:bd8f85e4:58366029:ed16cbdc:2003d0b8:88144a7a:1e771326:aa288e2d:b092aa46:7c926187:8674cba1:ef347303:2e42e72a:b7778d6c:c08e9f8a:e330518b:c8618e41:e4f8f289:1a73651d:16d8629e:59462f1:a51ee93e""",
'search_block_form':'',
'ctl00$MainContent$btndata':'Export to Excel',
'ctl00_MainContent_RadWindow1_C_RadGridVehicles_ClientState':'',
'ctl00_MainContent_RadWindow1_ClientState':'',
'ctl00_MainContent_RadWindowManager1_ClientState':'',
'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl00$PageSizeComboBox':'20',
'ctl00_MainContent_RadGrid1_ctl00_ctl02_ctl00_PageSizeComboBox_ClientState':'',
'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$RDIPFdispatch_time':'',
'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$RDIPFdispatch_time$dateInput':'',
'ctl00_MainContent_RadGrid1_ctl00_ctl02_ctl02_RDIPFdispatch_time_dateInput_ClientState':'{"enabled":true,"emptyMessage":"","validationText":"","valueAsString":"","minDateStr":"1900-01-01-00-00-00","maxDateStr":"2099-12-31-00-00-00","lastSetTextBoxValue":""}',
'ctl00_MainContent_RadGrid1_ctl00_ctl02_ctl02_RDIPFdispatch_time_ClientState':'{"minDateStr":"1900-01-01-00-00-00","maxDateStr":"2099-12-31-00-00-00"}',
'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$RadComboBox1address':'',
'ctl00_MainContent_RadGrid1_ctl00_ctl02_ctl02_RadComboBox1address_ClientState':'',
'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$RadComboBox1case_description':'',
'ctl00_MainContent_RadGrid1_ctl00_ctl02_ctl02_RadComboBox1case_description_ClientState':'',
'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$FilterTextBox_grid':'',
'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$RadComboBox1report_number':'',
'ctl00_MainContent_RadGrid1_ctl00_ctl02_ctl02_RadComboBox1report_number_ClientState':'',
'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$FilterTextBox_out_max_date':'',
'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$FilterTextBox_out_rowcount':'',
'ctl00$MainContent$RadGrid1$ctl00$ctl03$ctl01$PageSizeComboBox':'20',
'ctl00_MainContent_RadGrid1_ctl00_ctl03_ctl01_PageSizeComboBox_ClientState':'',
'ctl00_MainContent_RadGrid1_rfltMenu_ClientState':'',
'ctl00_MainContent_RadGrid1_gdtcSharedTimeView_ClientState':'',
'ctl00_MainContent_RadGrid1_gdtcSharedCalendar_SD':'[]',
'ctl00_MainContent_RadGrid1_gdtcSharedCalendar_AD':'[[1900,1,1],[2099,12,31],[2018,3,29]]',
'ctl00_MainContent_RadGrid1_ClientState':'',
}
# second HTTP request with form data
r = requests.post("https://apps.tampagov.net/CallsForService_Webapp/Default.aspx?type=TPD", data=formData, headers=headers)
print('received:', r.status_code, len(r.content))
with open(r"C:\Users\xxx\Desktop\test\test\apps.xls", "wb") as handle:
for data in tqdm(r.iter_content()):
handle.write(data)
downloadExcel()
Find out the URL you need to fetch as #Sphinx explains, and then simulate it using something similar to:
import urllib.request
import urllib.parse
data = urllib.parse.urlencode({...})
data = data.encode('ascii')
with urllib.request.urlopen("http://...", data) as fd:
print(fd.read().decode('utf-8'))
Take a look at the documentation of urllib.

Categories

Resources