I am trying to download a CSV file generated from a report at This website.
Below is the request I am trying to replicate.
Example of API i am trying to call
I copy the cURL and parse it and get the following format for the request:
import requests
cookies = {
'_ga': 'GA1.2.938840467.1566745600',
'terminal': '174224916994986694513353793024390053397',
'__utmc': '1',
'request_timing~11947199': '2~2237~74~91~2402',
'_gid': 'GA1.2.89702438.1657551717',
'cf_clearance': 'RDmwUSB_b6JmRSJpvrM76reZifV_m6cHjCJ0kmUkAS8-1657566551-0-250',
'GSG_SESSION_ID': '322919708739472562779456661040511933493',
'is_session_valid': '1',
'plack_session': '27e03cd7d13a440955626dbc574adef85a619f88',
'__utma': '1.938840467.1566745600.1657299681.1657567143.4',
'__utmz': '1.1657567143.4.4.utmcsr=leetc.com|utmccn=(referral)|utmcmd=referral|utmcct=/',
'__utmt': '1',
'session_id': '53da3a29-e3d5-4dd4-96c3-7562a0fb7715',
'_gat': '1',
'request_id~1513632513': '6441F026-014E-11ED-9669-AEADB5E8FA7B',
'__utmb': '1.5.9.1657567169673',
'request_timing~1513632513': '1~4217~79~105~4401',
}
headers = {
'authority': 'lee.county-taxes.com',
'accept': 'application/xml, text/xml, */*; q=0.01',
'accept-language': 'en-US,en;q=0.9',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
# Requests sorts cookies= alphabetically
# 'cookie': '_ga=GA1.2.938840467.1566745600; terminal=174224916994986694513353793024390053397; __utmc=1; request_timing~11947199=2~2237~74~91~2402; _gid=GA1.2.89702438.1657551717; cf_clearance=RDmwUSB_b6JmRSJpvrM76reZifV_m6cHjCJ0kmUkAS8-1657566551-0-250; GSG_SESSION_ID=322919708739472562779456661040511933493; is_session_valid=1; plack_session=27e03cd7d13a440955626dbc574adef85a619f88; __utma=1.938840467.1566745600.1657299681.1657567143.4; __utmz=1.1657567143.4.4.utmcsr=leetc.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmt=1; session_id=53da3a29-e3d5-4dd4-96c3-7562a0fb7715; _gat=1; request_id~1513632513=6441F026-014E-11ED-9669-AEADB5E8FA7B; __utmb=1.5.9.1657567169673; request_timing~1513632513=1~4217~79~105~4401',
'origin': 'https://lee.county-taxes.com',
'referer': 'https://lee.county-taxes.com/public/reports/real_estate',
'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'x-requested-with': 'XMLHttpRequest',
}
data = 'base_url=public%2Freports%2Freal_estate&parent_request_id=5955C82C-014E-11ED-A791-8C0D896639A2&report_search=cert_status%09%2F%7C%2F%09tax_year%09%3E2014%09roll_year%09*%09cert_sale_date%09%2F%7C%2F%09exemption%09%2F%7C%2F%09deed_status%09%2F%7C%2F%09standard_flag%09%2F%7C%2F%09&report_search_current_user_inputs=&session_id=322919708739472562779456661040511933493&app_url=%2Ftcb%2Fapp&page_url=public%2Freports%2Freal_estate&report_name=Certificate+Information+by+Year&selected_report=624&filetype=csv&delimiter=comma"ing=on&pdf_report_title=Certificate+Information+by+Year+Report+(Certificate+Search)&add_run_by_to_header=on&paper_size=letter&page_orientation=portrait&page_break=auto&break_selector=tax_year&shade_alternate_rows=on&pdf_report_description=&report_download_email_address=&report_took_too_long_download_email=&displayed_columns=certificate_number%7Ccert_status%7Ctax_year%7Croll_year%7Caccount_number%7Csitus_address%7Ccert_sale_date%7Cissued_date%7Cpurchased_date%7Ccertificate_rate%7Cinterest_start_date%7Cface_amount%7Credeemed_date%7Credemption_amount_paid%7Ctransferred_date%7Cexemption%7Cdeed_status%7Cbidder_number%7Ccertificate_buyer%7Cstandard_flag&hide_results=&sort_by_displayed=&hide_public=&display_name=¤t_view=certs¤t_display=data&select_view=certs&last_sort_col=1&sbgb_boundary=2&search_order_column1=tax_year&search_order_column2=&select_display=data&search_order_direction1=desc&report_search_dummy=&report_search_dummy=%2F%7C%2F&report_search_dummy=%3E2014&report_search_dummy=&report_search_dummy=&report_search_dummy=%2F%7C%2F&report_search_dummy=&report_search_dummy=&report_search_dummy=&report_search_dummy=&report_search_dummy=&report_search_dummy=&report_search_dummy=&report_search_dummy=&report_search_dummy=%2F%7C%2F&report_search_dummy=%2F%7C%2F&report_search_dummy=&report_search_dummy=&report_search_dummy=%2F%7C%2F&rows_per_page=50&go_to_report_page=&report_page=1&backend=warehouse&print_cover_page=0&download_now=browser&ajax_request=0.4428397794570913&cookie_id=2025887579'
response = requests.post('https://lee.county-taxes.com/tcb/app/public/reports/real_estate/download', cookies=cookies, headers=headers, data=data)
Unfortunately when I run this code I get the following output in response.content:
b'<ajax-response><response type=\'element_errors\'></response> <response type="element" id="message_container" class=""> </response> <response type="element" id="report_download_util"><![CDATA[ ]]></response> <response type="javascript"><![CDATA[\n file_download(d_location() + \'/download_pending_report?pending_report_file=pwHy_qVA5b\'); ]]></response>\n</ajax-response>'
I'm not sure what to make of this? is trying to download the CSV from an API call possible? Am I using the wrong call?
TIA.
The following works:
from httpx import Client
from bs4 import BeautifulSoup
data = {
'base_url':'public/reports/real_estate',
'parent_request_id':'4C4ACC20-0155-11ED-9D24-CAB03D8B3709',
'session_id':296334053076598741934874852698924119209,
'app_url':'/tcb/app',
'page_url':'public/reports/real_estate',
'report_name':'Active Certificate Vendors',
'selected_report':623,
'filetype':'csv',
'delimiter':'comma',
'quoting':'on',
'pdf_report_title':'Active Certificate Vendors Report (Certificate Sale Bidders)',
'add_run_by_to_header':'on',
'paper_size':'letter',
'page_orientation':'portrait',
'page_break':'auto',
'break_selector':'bidder_number',
'shade_alternate_rows':'on',
'displayed_columns':'vendor_number|bidder_name|full_name|bidder_number|address_lines|business_telephone|email',
'current_view':'cert_sale_bidders',
'current_display':'data',
'select_view':'cert_sale_bidders',
'last_sort_col':1,
'sbgb_boundary':2,
'search_order_column1':'bidder_number',
'select_display':'data',
'search_order_direction1':'asc',
'rows_per_page':50,
'report_page':1,
'backend':'mysql',
'print_cover_page':0,
'reset_report_name':0,
'preserve_messages':0,
'preserve_backend':0,
'preserve_collapse':0,
'ajax_request':0.6517064905478597,
'cookie_id':1982672363
}
with Client(headers=headers, timeout=60.0, follow_redirects=True) as client:
r = client.post('https://lee.county-taxes.com/tcb/app/public/reports/real_estate/report_results', data=data)
soup = BeautifulSoup(r.text)
profit = soup.select_one('#report_results__results')
print(profit.text)
This returns:
Lee County
Lee County
1
c/o County LandsPO Box 398
countyrevenuetransmittals#leeclerk.org
Venus 1 LLC
Venus 1 LLC
70
P O Box 25177
305-913-3333
Hgoldenberg#g-g-h.com
.....
Related
I am trying to scrape multiple pages with json but they will provide me error
import requests
import json
import pandas as pd
headers = {
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8,pt;q=0.7',
'Connection': 'keep-alive',
'Origin': 'https://www.nationalhardwareshow.com',
'Referer': 'https://www.nationalhardwareshow.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'cross-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'accept': 'application/json',
'content-type': 'application/x-www-form-urlencoded',
'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
params = {
'x-algolia-agent': 'Algolia for vanilla JavaScript 3.27.1',
'x-algolia-application-id': 'XD0U5M6Y4R',
'x-algolia-api-key': 'd5cd7d4ec26134ff4a34d736a7f9ad47',
}
for i in range(0,4):
data = '{"params":"query=&page={i}&facetFilters=&optionalFilters=%5B%5D"}'
resp = requests.post('https://xd0u5m6y4r-dsn.algolia.net/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query', params=params, headers=headers, data=data).json()
req_json=resp
df = pd.DataFrame(req_json['hits'])
f = pd.DataFrame(df[['name','representedBrands','description']])
print(f)
the error :
Traceback (most recent call last):
File "e:\ScriptScraping\Extract data from json\uk.py", line 31, in <module>
df = pd.DataFrame(req_json['hits']) KeyError: 'hits'
Try to concatenate the variable i with data parameter
import requests
import json
import pandas as pd
headers = {
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8,pt;q=0.7',
'Connection': 'keep-alive',
'Origin': 'https://www.nationalhardwareshow.com',
'Referer': 'https://www.nationalhardwareshow.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'cross-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'accept': 'application/json',
'content-type': 'application/x-www-form-urlencoded',
'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"'
}
params = {
'x-algolia-agent': 'Algolia for vanilla JavaScript 3.27.1',
'x-algolia-application-id': 'XD0U5M6Y4R',
'x-algolia-api-key': 'd5cd7d4ec26134ff4a34d736a7f9ad47'
}
lst=[]
for i in range(0,4):
data = '{"params":"query=&page='+str(i)+'&facetFilters=&optionalFilters=%5B%5D"}'
resp = requests.post('https://xd0u5m6y4r-dsn.algolia.net/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query', params=params, headers=headers, data=data).json()
req_json=resp
df = pd.DataFrame(req_json['hits'])
f = pd.DataFrame(df[['name','representedBrands','description']])
lst.append(f)
#print(f)
d=pd.concat(lst)
print(d)
It is returning status_code 400 as the request is bad. You are sending wrongly formatted data. Change:
data = '{"params":"query=&page={i}&facetFilters=&optionalFilters=%5B%5D"}'
To
data = '{"params":"query=&page='+str(i)+'&facetFilters=&optionalFilters=%5B%5D"}'
For it to work. Hope I could help.
I am having trouble reading data from a url with BeautifulSoup
This is my code:
url1 = "https://www.barstoolsportsbook.com/events/1018282280"
from bs4 import BeautifulSoup
import requests
headers = {
'User-agent':
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
html = requests.get(url1, headers=headers)
soup = BeautifulSoup(html.text, 'lxml')
data = soup.findAll('div',attrs={"class":"section"})
print(data)
#for x in data:
# print(x.find('p').text)
When I print(data) I am returned []. What could be the reason for this? I would like to avoid using selenium for this task if possible.
This is the HTML for what I'm trying to grab
<div data-v-50e01018="" data-v-2a52296d="" class="section"><p data-v-5f665d29="" data-v-50e01018="" class="header strongbody2">HOT TIPS</p><p data-v-5f665d29="" data-v-50e01018="" class="tip body2"> The Mets have led after 3 innings in seven of their last nine night games against NL East Division opponents that held a losing record. </p><p data-v-5f665d29="" data-v-50e01018="" class="tip body2"> The 'Inning 1 UNDER 0.5 runs' market has hit in each of the Marlins' last nine games against NL East opponents. </p></div>
You can likely get what you want with this request:
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:103.0) Gecko/20100101 Firefox/103.0',
'Accept': 'application/json, text/plain, */*',
'Accept-Language': 'en-US,en;q=0.5',
'Authorization': 'Basic MTI2OWJlMjItNDI2My01MTI1LWJlNzMtMDZmMjlmMmZjNWM3Omk5Zm9jajRJQkZwMUJjVUc0NGt2S2ZpWEpremVKZVpZ',
'Origin': 'https://www.barstoolsportsbook.com',
'DNT': '1',
'Connection': 'keep-alive',
'Referer': 'https://www.barstoolsportsbook.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'cross-site',
}
response = requests.get('https://api.isportgenius.com.au/preview/1018282280', headers=headers)
response.json()
You should browse the network tab to see where the rest of the data is coming from, or use a webdriver.
import requests
url = "https://baroul-timis.ro/get-av-data?param=toti-avocatii"
payload={}
headers = {
'Accept': 'text/html, */*; q=0.01',
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8,pt;q=0.7',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Cookie': '_csrf-frontend=ccc4c9069d6ad3816ea693a980ecbebda2770e9448ffe9fed17cdf397a5e2851a%3A2%3A%7Bi%3A0%3Bs%3A14%3A%22_csrf-frontend%22%3Bi%3A1%3Bs%3A32%3A%22J3N0AJG6xybnGl91dfrlt-qMOk3hfbQ6%22%3B%7D',
'Pragma': 'no-cache',
'Referer': 'https://baroul-timis.ro/tabloul-avocatilor/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"'
}
resp= requests.request("GET", url, headers=headers, data=payload).json()
sample=resp['data']
for test in sample:
product=test['actions']
print(product)
they give me these url:
<i class="fas fa-address-book" aria-hidden="true"></i>
But I want to these only and also I want to scrape multiple pages but the link of all the pages is same :
/tabloul-avocatilor/avocat/av-felicia-petre
To get all 948 names and links you can use next example:
import requests
from bs4 import BeautifulSoup
url = "https://baroul-timis.ro/get-av-data?param=toti-avocatii"
data = requests.get(url).json()
for i, d in enumerate(data["data"], 1):
first_name = d["firstname"]
last_name = BeautifulSoup(d["lastname"], "html.parser").text
link = BeautifulSoup(d["actions"], "html.parser").a["href"]
print(
"{:<3} {:<30} {:<30} {}".format(
i, first_name[:29], last_name[:29], link
)
)
Prints:
...
943 Adela-Ioana FRUNZĂ /tabloul-avocatilor/avocat/av-adela-frunza
944 Marina GLIGOR-VOLSCHI /tabloul-avocatilor/avocat/av-marina-gligor-volschi
945 Denis-Alexandru TOTH /tabloul-avocatilor/avocat/av-denis-toth
946 Raluca-Roxana ȘURIANU /tabloul-avocatilor/avocat/av-raluca-surianu
947 Alexandra-Bianka CIOBANU /tabloul-avocatilor/avocat/av-alexandra-ciobanu
948 Alexandra-Oana OLARIU /tabloul-avocatilor/avocat/av-alexandra-olariu
I know you can loop through page numbers in a URL but is it possible loop through pages in a Payload? I would guess you need beautiful soup? At the end of the Payload, there is this code.
page=1&start=0&limit=250
Currently, I am just getting page 1 with 250 lines. I need to loop through the code and retrieve page=2&start=251&limit=250 and the subsequent 11 pages. Could anyone show me how to do this?
Working Code for first 250 Lines
import requests
import pandas as pd
def stock_data(stock_info):
data = pd.DataFrame(stock_info)
data = data.iloc[:, 4:]
data.to_csv("data.csv", index=False)
url = "https://www.stockrover.com/stock_infos/grid?_dc=1644876887410"
payload = "state=%7B%22sortInfo%22%3A%7B%7D%2C%22columns%22%3A%5B76%2C77%2C50%2C80%2C547%2C13%2C21%2C771%5D%2C%22view%22%3A313%2C%22priorPrimaryColumn%22%3A170%2C%22filterData%22%3A%5B%5D%2C%22name%22%3A%22New%201%22%2C%22cType%22%3A%22Screener%22%2C%22cNode%22%3A%22s_42%22%2C%22cIsFolder%22%3Afalse%2C%22gridSelection%22%3A%22ANDE%22%2C%22lastActive%22%3A1396898415%2C%22primaryColumn%22%3A76%2C%22folderDisabledParams%22%3A%7B%22filterData%22%3A%5B%5D%7D%2C%22mainGridDateRange%22%3A%22ytd%22%2C%22groupState%22%3Anull%2C%22moversGridDateRange%22%3A%221_day%22%2C%22peersGridDateRange%22%3A%221_day%22%2C%22lastGridSelections%22%3A%5B%22ANDE%22%5D%2C%22lastQuantNode%22%3A%5B%22s_42%22%2C%22s_42%22%5D%2C%22includeQuotesInTable%22%3Afalse%2C%22includeAllQuotesLastValue%22%3Afalse%2C%22markets%22%3A%7B%22panel%22%3A%22summary%22%7D%2C%22researchPanel%22%3A%22comparisonPanel%22%2C%22recentSearchTickers%22%3A%5B%22SPY%22%2C%22AMZN%22%2C%22AAPL%22%2C%22s_32%22%2C%22%5ENDX%22%2C%22AXP%22%2C%22XOM%22%2C%22AFL%22%2C%22%5EDJX%22%2C%22AIT%22%2C%22ADVC%22%5D%2C%22quotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22checkedQuotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22dashboard%22%3A%7B%22buttonRef%22%3A%22272%22%7D%2C%22tickerSelectedFeeds%22%3A%5B%22Benzinga%20News%22%2C%22Yahoo%20News%22%5D%2C%22marketSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Stock%20Market%20News%20-%20Investing.com%22%5D%2C%22bondsSelectedFeeds%22%3A%5B%22Bonds%20Strategy%20-%20Investing.com%22%5D%2C%22commoditiesSelectedFeeds%22%3A%5B%22Commodities%20%26%20Futures%20News%20-%20Investing.com%22%2C%22Commodities%20Fundamental%20Analysis%20-%20Investing.com%22%2C%22Commodities%20Strategy%20Analysis%20-%20Investing.com%22%5D%2C%22stocksSelectedFeeds%22%3A%5B%22CNNMoney%20News%22%2C%22Google%20News%22%2C%22Seeking%20Alpha%20Top%20Stories%22%5D%2C%22etfsSelectedFeeds%22%3A%5B%22Economy%20News%20-%20Investing.com%22%2C%22ETF%20Analysis%20-%20Investing.com%22%2C%22Investing%20Ideas%20-%20Investing.com%22%5D%2C%22topPanel%22%3A%22researchPanel%22%2C%22maxRecordsNode%22%3Afalse%2C%22version%22%3A7%2C%22lastGridSelectionsRaw%22%3A%5B%22ANDE%22%5D%2C%22lastSelectionScreeners%22%3A%22s_42%22%2C%22quotesDisabled%22%3Atrue%2C%22lastSelectionPortfolios%22%3A%22p_2%22%2C%22comparisonPanels%22%3A%7B%22Portfolio%22%3A%22p_2%22%2C%22Index%22%3A%22%5EDJX%22%2C%22Watchlist%22%3A%22Watchlists%22%2C%22Screener%22%3A%22s_42%22%7D%2C%22lastSelectionWatchlists%22%3A%22w_26%22%2C%22indicesSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Yahoo%20News%22%5D%2C%22newsActive%22%3A%22tickerNews%22%2C%22recentSearchMetrics%22%3A%5B%22Price%22%2C%22EPS%22%2C%22Sales%22%5D%2C%22editPanel%22%3A%22positionsPanel%22%2C%22newsType%22%3A%22marketNews%22%2C%22tableColumns%22%3A%5B%22ticker%22%2C%22rank%22%2C%22score_rank%22%2C%22filter_score%22%2C%22company%22%2C%22cash%22%2C%22currentassets%22%2C%22netppe%22%2C%22intangibles%22%2C%22totalassets%22%2C%22currentliabilities%22%2C%22longtermdebt%22%2C%22totaldebt%22%2C%22totalliabilities%22%2C%22equity%22%2C%22tangiblebookvalue%22%2C%22cash_short_term_p%22%2C%22net_ppe_p%22%2C%22intangibles_p%22%5D%2C%22last_save%22%3A1644837064%2C%22panels%22%3A%7B%22collapsed%22%3A%7B%22chp%22%3Atrue%2C%22ip%22%3Atrue%2C%22mp%22%3Afalse%2C%22qp%22%3Afalse%2C%22conp%22%3Atrue%2C%22fsp%22%3Afalse%7D%2C%22viewportWidth%22%3A%221920%22%2C%22viewportHeight%22%3A%221069%22%2C%22chartPanelHeight%22%3A483%2C%22controlPanelWidth%22%3A296%2C%22insightPanelWidth%22%3A%22485%22%2C%22quoteBoxHeight%22%3A200%2C%22navigationPanelWidth%22%3A277%7D%7D&updateMarket=true&page=1&start=0&limit=250"
headers = {
'authority': 'www.stockrover.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
'x-csrf-token': 'fAeVScD26lby5MQf5YFI5p3snudo3E+rw0TL0h1W3j/vcjsIMvgxAF5Z9DkMjjCU4trT/b4EV0VCCPvmms5VIw==',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': 'application/json',
'x-requested-with': 'XMLHttpRequest',
'sec-ch-ua-platform': '"Windows"',
'origin': 'https://www.stockrover.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.stockrover.com/research/all/313/s_42/ANDE',
'accept-language': 'en-US,en;q=0.9',
'cookie': 'remember_me_pref=0; user_name=test11964; plan=3; premiumBraintreeKey=MIIBCgKCAQEAzM4LJfrNnBOgRFB1dDJkmqTFCWT2Y%2BksOydD8xDH4R033WUzxbffMZb%2B3dqEyQvOVjLcwFIHByDc4Xwej7enas2E%2FVRyh7Cvyadn7M5zQeRyLcI9Ys5KCozMwxJPc0x76FlXPwiAo1Qlz3RcLb9wGHBag2R51FuTie%2BhVDCgzWajqDCREzRhi%2Fqlt3D%2FxXNo%2FiwJlpOUr%2Fx1QnkkILxgKlq1dD7KJ767O5ojYKXsO%2BV2Bfu7sSD3djsOxQJ1%2FRbaDm2E96EDkWhhOeOpPndQ6IuSl4NmnJg%2Fcq6f8csW8M3Ys%2BMZPFkdxPC4%2FfRM1XC9o76PjpVNBIO%2ByJEELKZedwIDAQAB; lr=1644876886; _Ruby2_session=OEs3djBGVmtrSXhSWjFibnp2ck9vem8vd1VmQ00wUkZCVWlaWmYzOFBQQUJyemc0MFNjMlVmTGRUeFpNSTFIRGliVitnb2M1L2JNcCt5SnQxN2xtZDV5M20waEd0elh3aUU3R0k3YnJiVy9BcUhObGpBUU54c2tQRE9RZHBZWk8wa0NBQXJub2tHU3pHZmUvM3dRNGZvVStsT0ZEbUNEa3ZyVDkxdDA5S1B5d3VZWlZNUERxL01VNlYzdHhBci9uSnduOGxodk8rSDJLaUJIWTVyM25GL3o4RHYva3pGeDdIY1NJODV4WkV4MnRiR2RoNHZDUktPWSsxaElPdXNEU0tSaCtKL1o0K2FsSE1rcVI2TkgxOFo0MktSL1JRWjgzRllBZGFhMjg4bitpTUJzQjR6Nk5OZzhzUFMzVGRpVFNZOUxrTnhWNTB2K0dYNXdDcFFXZnpZdlZleFR2cWY5cmJqaDNiY0JhWVJJT0s1TDEvOHU0UTV6NW5uTjcwZjFzdHpxeFg0cVQ3NTRob2xTMlA2ZDhCQT09LS1LOEljN3laVTlBVUpsVVo3c093Y2NnPT0%3D--b963330daa985315420ea5893f1cfa3e3a54c9d5; _Ruby2_session=UmE1NVo0cjJ1YmR3RGlQUis0V3JVWUprR0xtbzVaV0k2NTNhUmFUU2VsNVB4WFN6M0Y5M0xIVmpMdGdBTEgzMDZwb3JZNGpJRmFkdjRyWG9iY3NGT1pENTRaaXdZZit3b3FCZzdvMTVxZ3p2RGpUUUtyTGpOcjhZcXVxS0FIcElBTnhtZDlJQ3g3d1o2bUF3Tzl4NnIyZ2ZHYmMyb09vVnMwaGU5SEsvTWRwaHdFeUNGUDJZUDY4eXEwRlcyYzFqVVh2VzVjcEhDSU9jOUw4NmVJcmd6aHEwVUUya25Yb0Y5d21hY3g5VTdxbi81dkdiZ2Qrc0YrdEZtNWU5c3ZlcHFRSXVqcXdBVEM1RnV5bFo3N3hQNzg4Q1lJWTRtakp2MHJyN3gvUEtvN0h2R3lTZEFqZkwrVFlDRlk2czZoaDBQcXltUjdQbUNiTXJWMW42WnlPdUc2ZGxzUXRzY3JuYTN1V1VIMkVvdHVpeE40N0l6SjVLelJhUGh6aXBrZ3B2V1gySG1YRUVLbGZGYzQzRzR3QjJwSTFieDZRQ242QTdKNlNEWVFIQ3lTWHJPQzUzZ1lKVzl5S0FoZWxHQ0xnTy0tODZDejdtSmFDek5GeUVMMnJweTJtUT09--a126f3bcc5b8af0a5a824e6b674d55f1fe9ee12e; lr=1644876939'
}
for page in range(3):
pld = payload.format(page+1, page*250, 250)
response = requests.request("POST", url, headers=headers, data=pld)
stock_info = response.json()['stock_infos']
stock_data(stock_info)
Here's how you do it in a loop. This works; I've tried it here.
for page in range(3):
pld = payload.format(page+1, page*250, 250)
response = requests.request("POST", url, headers=headers, data=pld)
stock_info = response.json()['stock_infos']
stock_data(stock_info)
You will, of course, need to modify your code so that stock_data doesn't overwrite the CSV file every time. You can either append to one big dataframe, or append to the CSV file.
My request:
# python 3.7.3
import requests
from requests import Session
session = Session()
session.head('https://www.basspro.com/shop/en/blazer-brass-handgun-ammo')
cookies = requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies))
response = session.post(
url='https://www.basspro.com/shop/BPSGetInventoryStatusByIDView',
data={
'productId': '3074457345616736172',
'itemId': '3074457345616736949',
'isGunFlag': 'false',
},
cookies=cookies,
headers={
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'content-length': '72',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://www.basspro.com',
'referer': 'https://www.basspro.com/shop/en/blazer-brass-handgun-ammo',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.92 Safari/537.36 Vivaldi/2.9.1705.38',
'x-requested-with': 'XMLHttpRequest',
},
)
print(response.text)
Output:
<input type="hidden" class="relativeToAbsolute" value="true" />
/*
{
"onlineInventory": {
"status": "Status Not Available",
"image": "widget_product_info/outofstock_icon.svg",
"altText": "Status Not Available",
"isDropShip": false,
"availableDate":""
},
"inStoreInventory": {
"stores": [],
"checkStoreText": "Check Store Availability",
"isInStoreInventory": true,
"isPickupInventory": false
}
}
*/
My output when inspecting and running the same AJAX request via browser:
/*
{
"onlineInventory": {
"status": "Backordered",
"image": "widget_product_info/backordered_icon.svg",
"altText": "Backordered",
"isDropShip": false,
"quantity": 0,
"availableDate":"May 1-8"
},
"inStoreInventory": {
"stores": [{
id: '715839555',
name: '83',
gunRestricted: 'false',
dsName: 'TX - Round Rock',
status: 'Unavailable',
statusText: 'Out of Stock',
image: 'widget_product_info/outofstock_icon.svg',
altText: 'Out of Stock',
availableDate: '',
availableQuantity: '',
availableQuantityDisplay: 'false',
cityState: 'Round Rock, TX',
ISPavailableDate: '',
ISPavailableQuantity: '',
pickupTime: 'by 2:00pm',
offerISPOnBPS: 'Yes',
offerISPOnCAB: 'No'}],
"checkStoreText": "Change Store",
"isInStoreInventory": true,
"isPickupInventory": true
}
}
*/
I tried assigning cookies this way as well:
url = "https://www.basspro.com/shop/en/blazer-brass-handgun-ammo"
r = requests.get(url)
cookies = r.cookies
# fails to pass the right cookie
If I instead copy the cookie verbatim from an inspected GET request at https://www.basspro.com/shop/en/blazer-brass-handgun-ammo and put that into the POST headers, it works. How do I get cookies to work properly programatically?
EDIT:
Here's my attempt at just using Session() for cookies:
# python 3.7.3
import requests
from requests import Session
session = Session()
session.get("https://www.basspro.com/shop/en/blazer-brass-handgun-ammo")
# session.head('https://www.basspro.com/shop/en/blazer-brass-handgun-ammo')
response = session.post(
url='https://www.basspro.com/shop/BPSGetInventoryStatusByIDView',
data={
'productId': '3074457345616736172',
'itemId': '3074457345616736949',
'isGunFlag': 'false',
},
headers={
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'content-length': '72',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://www.basspro.com',
'referer': 'https://www.basspro.com/shop/en/blazer-brass-handgun-ammo',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.92 Safari/537.36 Vivaldi/2.9.1705.38',
'x-requested-with': 'XMLHttpRequest',
},
)
print(response.text)
I get the same result as before ("status": "Status Not Available", etc.)
Here's my attempt at the second solution:
# python 3.7.3
import requests
from requests import Session
url = "https://www.basspro.com/shop/en/blazer-brass-handgun-ammo"
r = requests.get(url)
cookies = r.cookies # the type is RequestsCookieJar
response = requests.post(
url='https://www.basspro.com/shop/BPSGetInventoryStatusByIDView',
data={
'productId': '3074457345616736172',
'itemId': '3074457345616736949',
'isGunFlag': 'false',
},
cookies=cookies,
headers={
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'content-length': '72',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://www.basspro.com',
'referer': 'https://www.basspro.com/shop/en/blazer-brass-handgun-ammo',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.92 Safari/537.36 Vivaldi/2.9.1705.38',
'x-requested-with': 'XMLHttpRequest',
},
)
print(response.text)
Again, I get the same result as before. What am I doing wrong?
can you try like this
session = Session()
session.get("https://www.basspro.com/shop/en/blazer-brass-handgun-ammo")
Then all the following calls with
session.xxx
donot use cookies parameter in it
another way I have tested,
cookies = r.cookies # the type is RequestsCookieJar
requests.post(.... cookies=cookies...)
at last ,I tested this works:
Please compare carefully
from requests import Session
session = Session()
agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'
r1 = session.get("https://www.basspro.com/shop/en/blazer-brass-handgun-ammo",headers={'user-agent': agent})
response = session.post(
url='https://www.basspro.com/shop/BPSGetOnlineInventoryStatusByIDView',
data={
'productId': '3074457345616736172',
'catalogId': '3074457345616676768',
'storeId': '715838534',
'langId':-1
},
headers={
'user-agent': agent,
'x-requested-with': 'XMLHttpRequest',
},
cookies=r1.cookies
)
print(response.text)