I can't send form data to Python Jupyter

I can't send form data to Python Jupyter - python

I try to build a python script which sends a POST with parameters for extracting the result, but I don't know where is my problem or why I can't get the page result with the html that I need...
import requests
url = ('https://ar.ec.universal-assistance.com/cotizar-asistencia-al-viajero')
data = {
'__RequestVerificationToken':'QWsTn0wqFmW9_jFfaBuuOjaWM4TE2Xk1XGn-oDTp0TENBO725YSkGnK8WeiAN53-jiPnjTDJ6zbZQjb6SzpprdCT4OlJg9jjZJKx1Wh7fGkZ5yCLkArUWCp6AIwq0t12gsonhP3orHzFJ2_1YqvIfJMcnzn2aXCb1-ZrDOzHM701',
'CCTLD':'.ar',
'CodigoOrganizacion':"",
'CodigoConvenio':"",
'OcultarTipoViaje':'false',
'CantidadPasajeros':1,
'CantidadDias':3,
'Origen':'ARGENTINA',
'Destino':'Centro america/Caribe',
'TipoViaje':'Un viaje',
'FechaInicio':'20/06/2019',
'FechaFin':'22/06/2019',
'Edad1':27,
'Edad2':"",
'Edad3':"",
'Edad4':"",
'Edad5':"",
'Edad6':"",
'Edad7':"",
'Edad8':"",
'Edad9':"",
'Edad10':"",
'Email':'no#no.com',
'Nombre':'PEDRO',
'Apellido':'PEREZ',
'CodigoArea':800,
'NumeroTelefono':9997777,
'dr':"",
'cn':'(direct)',
'cs':'(direct)',
'cm':'(none)',
'ck':'(not set 5)',
'cc':'(not set 5)',
'ua':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
'ref':'ar.ec.universal-assistance.com',
'sr':'1366x768',
'vp':'1366x728'
}
resp = requests.post(url = url, data = data )
print(resp.text)
And I tried:
import requests
url = ('https://ar.ec.universal-assistance.com/cotizar-asistencia-al-viajero')
header = {
":authority": "ar.ec.universal-assistance.com",
":method": "POST",
":path": "/cotizar-asistencia-al-viajero",
":scheme": "https",
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"accept-encoding": "gzip, deflate, br",
"accept-language": "es-ES,es;q=0.9",
"cache-control": "max-age=0",
"content-length": "921",
"content-type": "application/x-www-form-urlencoded",
"cookie":"__cfduid=db577552fb94c6b34d51ff081f56060601559763003; _ga=GA1.2.1051767382.1559763017; _fbp=fb.1.1559763016835.1821187230; ASP.NET_SessionId=zirpp0zdbsvt4p102zvfknic; __RequestVerificationToken=RuGNfaFUJxBI4FDOaVsMJBdBNwbqzUt_AMjdUu6Am3T6kpBrZ5__wM8CiDO3Ttw6z6iBseVrGvzsyD-GCoWI2XRuhHpJB3-qu7qXvjoDu3NQL6onXupDL1E4ZkUXuHpDSPi0mjQ7F5PSFf2l_SGtDA2; _gid=GA1.2.1308535569.1560799988; _gat=1",
"origin": "https://ar.ec.universal-assistance.com",
"referer": "https://ar.ec.universal-assistance.com/cotizar-asistencia-al-viajero",
"upgrade-insecure-requests": 1,
"user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
},
data = {
'__RequestVerificationToken':'QWsTn0wqFmW9_jFfaBuuOjaWM4TE2Xk1XGn-oDTp0TENBO725YSkGnK8WeiAN53-jiPnjTDJ6zbZQjb6SzpprdCT4OlJg9jjZJKx1Wh7fGkZ5yCLkArUWCp6AIwq0t12gsonhP3orHzFJ2_1YqvIfJMcnzn2aXCb1-ZrDOzHM701',
'CCTLD':'.ar',
'CodigoOrganizacion':"",
'CodigoConvenio':"",
'OcultarTipoViaje':'false',
'CantidadPasajeros':1,
'CantidadDias':3,
'Origen':'ARGENTINA',
'Destino':'Centro america/Caribe',
'TipoViaje':'Un viaje',
'FechaInicio':'20/06/2019',
'FechaFin':'22/06/2019',
'Edad1':27,
'Edad2':"",
'Edad3':"",
'Edad4':"",
'Edad5':"",
'Edad6':"",
'Edad7':"",
'Edad8':"",
'Edad9':"",
'Edad10':"",
'Email':'no#no.com',
'Nombre':'PEDRO',
'Apellido':'PEREZ',
'CodigoArea':800,
'NumeroTelefono':9997777,
'dr':"",
'cn':'(direct)',
'cs':'(direct)',
'cm':'(none)',
'ck':'(not set 5)',
'cc':'(not set 5)',
'ua':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
'ref':'ar.ec.universal-assistance.com',
'sr':'1366x768',
'vp':'1366x728'
}
resp = requests.post(url = url, data = data )
print(resp.text)
I expect the html of "https://ar.ec.universal-assistance.com/ofertas-asistencia-al-viajero"
That would be the next page from first url.

Related

Receiving hcaptcha error from discord while using 2captcha (works - working code pasted below)

from twocaptcha import TwoCaptcha
import json
import requests
import random
def rand(list):
return random.randrange(0, len(list))
user_agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246',
'Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1'
]
headers = {
'user-agent': user_agents[rand(user_agents)],
"accept": "*/*",
"authority": "discord.com",
"method": "POST",
"path": "/api/v9/auth/register",
"scheme": "https",
"origin": "discord.com",
"referer": "discord.com/register",
"x-debug-options": "bugReporterEnabled",
"accept-language": "en-US,en;q=0.9",
"connection": "keep-alive",
"content-Type": "application/json",
"x-super-properties": "eyJvcyI6IldpbmRvd3MiLCJicm93c2VyIjoiRGlzY29yZCBDbGllbnQiLCJyZWxlYXNlX2NoYW5uZWwiOiJzdGFibGUiLCJjbGllbnRfdmVyc2lvbiI6IjEuMC45MDAzIiwib3NfdmVyc2lvbiI6IjEwLjAuMjIwMDAiLCJvc19hcmNoIjoieDY0Iiwic3lzdGVtX2xvY2FsZSI6ImVuLVVTIiwiY2xpZW50X2J1aWxkX251bWJlciI6MTA0OTY3LCJjbGllbnRfZXZlbnRfc291cmNlIjpudWxsfQ==",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin"
}
solver = TwoCaptcha('redacted')
try:
result = solver.hcaptcha(
sitekey='4c672d35-0701-42b2-88c3-78380b0db560',
url='https://discord.com/register',
)
except Exception as e:
sys.exit(e)
else:
print(result['code'])
def getfingerprint():
request_url = "https://discord.com/api/v9/experiments?with_guild_experiments=true"
r = requests.get(request_url , headers = {
"user-agent": user_agents[rand(user_agents)],
"x-context-properties": "eyJsb2NhdGlvbiI6Ii9jaGFubmVscy9AbWUifQ==",
"x-super-properties": "eyJvcyI6IldpbmRvd3MiLCJicm93c2VyIjoiQ2hyb21lIiwiZGV2aWNlIjoiIiwic3lzdGVtX2xvY2FsZSI6ImVuLVVTIiwiYnJvd3Nlcl91c2VyX2FnZW50IjoiTW96aWxsYS81LjAgKFdpbmRvd3MgTlQgMTAuMDsgV2luNjQ7IHg2NCkgQXBwbGVXZWJLaXQvNTM3LjM2IChLSFRNTCwgbGlrZSBHZWNrbykgQ2hyb21lLzEwNS4wLjAuMCBTYWZhcmkvNTM3LjM2IiwiYnJvd3Nlcl92ZXJzaW9uIjoiMTA1LjAuMC4wIiwib3NfdmVyc2lvbiI6IjEwIiwicmVmZXJyZXIiOiIiLCJyZWZlcnJpbmdfZG9tYWluIjoiIiwicmVmZXJyZXJfY3VycmVudCI6IiIsInJlZmVycmluZ19kb21haW5fY3VycmVudCI6IiIsInJlbGVhc2VfY2hhbm5lbCI6InN0YWJsZSIsImNsaWVudF9idWlsZF9udW1iZXIiOjE0NTQyOSwiY2xpZW50X2V2ZW50X3NvdXJjZSI6bnVsbH0="
})
if r.status_code == 200:
fingerprint = json.loads(r.text)["fingerprint"]
print(fingerprint)
else:
return r.text
payload = {
'captcha_key': result['code'],
'consent': True,
'date_of_birth': '2001-11-19',
'email': 'enter email',
'fingerprint': getfingerprint(),
'gift_code_sku_id': None,
'invite': None,
'password': 'enter username',
'username': 'enter password'
}
def register():
data = payload
res = requests.post('https://discord.com/api/v9/auth/register', headers = headers, json = data)
print('Token: ' + res.text)
if __name__ == '__main__':
register()
Code now works and generates accounts
just have to enter your own emails, username and password
can tell when it works cuz it outputs a token
if you get an error mentioning invalid hcaptcha response change email and use a vpn and try again
if you do decide to loop this have it loop slowly or setup proxies

Changing layout and adding titles to JSON file with python

I'm trying to extract data from two different api endpoints and create a JSON file with said data. I wish to have titles for each object to distinguish the different data. My code is below:
import requests
import json
headers = {
'accept-language': 'en-US,en;q=0.9',
'origin': 'https://www.nasdaq.com/',
'referer': 'https://www.nasdaq.com/',
'accept': 'application/json, text/plain, */*',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
}
dataAAPL = requests.get('https://api.nasdaq.com/api/company/AAPL/insider-trades?limit=15&type=ALL&sortColumn=lastDate&sortOrder=DESC',
headers=headers).json()['data']['numberOfSharesTraded']['rows'][3]
dataMSFT = requests.get('https://api.nasdaq.com/api/company/MSFT/insider-trades?limit=15&type=ALL&sortColumn=lastDate&sortOrder=DESC',
headers=headers).json()['data']['numberOfSharesTraded']['rows'][3]
with open('AAPL_insider_piechart.json', 'w') as f:
json.dump(dataAAPL, f, indent=4)
json.dump(dataMSFT, f, indent=4)
And this is the output JSON:
{
"insiderTrade": "Net Activity",
"months3": "(1,317,881)",
"months12": "(1,986,819)"
}
{
"insiderTrade": "Net Activity",
"months3": "185,451",
"months12": "31,944"
}
What I need, is for the JSON to look something like this:
{
"AAPL":[
{
"insiderTrade": "Net Activity",
"months3": "(1,317,881)",
"months12": "(1,986,819)"
}
],
"MSFT":[
{
"insiderTrade": "Net Activity",
"months3": "185,451",
"months12": "31,944"
}
]
}

Just create a dictionary and place the two values in it before dumping it as JSON.
Solution
import requests
import json
headers = {
'accept-language': 'en-US,en;q=0.9',
'origin': 'https://www.nasdaq.com/',
'referer': 'https://www.nasdaq.com/',
'accept': 'application/json, text/plain, */*',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
}
dataAAPL = requests.get('https://api.nasdaq.com/api/company/AAPL/insider-trades?limit=15&type=ALL&sortColumn=lastDate&sortOrder=DESC',
headers=headers).json()['data']['numberOfSharesTraded']['rows'][3]
dataMSFT = requests.get('https://api.nasdaq.com/api/company/MSFT/insider-trades?limit=15&type=ALL&sortColumn=lastDate&sortOrder=DESC',
headers=headers).json()['data']['numberOfSharesTraded']['rows'][3]
with open('AAPL_insider_piechart.json', 'w') as f:
obj = {
"AAPL": [dataAAPL],
"MSFT": [dataMSFT]
}
json.dump(obj, f, indent=4)

Getting Response 500 python

I want to get access from this website https://www.truepeoplesearch.com/ but I am getting 500 error using requests I have also try proxies Api to get 200 response but still getting same response
Here is my code:
import requests
# headers = {
# 'authority': 'www.truepeoplesearch.com',
# 'method': 'GET',
# 'path': '/?__cf_chl_rt_tk=yRTk.U51_2F3gk2zvxX_GLO5MgGmfwXsQeScGiGloJM-1637358329-0-gaNycGzNCpE',
# 'scheme': 'https',
# 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
# 'accept-encoding': 'gzip, deflate, br',
# 'accept-language': 'en-US,en;q=0.9',
# 'cache-control': 'max-age=0',
# 'cookie':' __cf_bm=jDWsHwlBvdk987UFEnrW63LelWWz03HXgZg3jDPaYY0-1637358168-0-AeyRiZHGcxyaD4j/7LGGq1aVmo5sBj/qmFX58OY4gfcUtvfzaG1exKf4HiYNNAlSaqm6LZ3MWB2UBgaOeIugrxA=; aegis_uid=26435771721; aegis_tid=direct; _ga=GA1.2.1056913930.1637358268; _gid=GA1.2.364813628.1637358268; cf_chl_prog=F13; cf_chl_rc_m=7',
# 'referer': 'https://www.truepeoplesearch.com/?__cf_chl_rt_tk=yRTk.U51_2F3gk2zvxX_GLO5MgGmfwXsQeScGiGloJM-1637358329-0-gaNycGzNCpE',
# 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
# 'sec-ch-ua-mobile':' ?0',
# 'sec-ch-ua-platform':' "Windows"',
# 'sec-fetch-dest':' document',
# 'sec-fetch-mode': 'navigate',
# 'sec-fetch-site':' same-origin',
# 'upgrade-insecure-requests': '1',
# 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'
# }
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'}
payload = {'api_key': 'dd6a50aba760cc67e0276d840a7989cd', 'url': 'https://www.truepeoplesearch.com/'}
r = requests.get('http://api.scraperapi.com', params=payload, headers= headers)
print(r)
I have try both headers but didn't get 200 response, Anybody have any idea what I am doing wrong?

Web scraping - his application is out of date, please click the refresh button on your browser return message

I would like to scrape some data from the following web site: https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract
Steps I would like to automate are:
Choose "Opcinski sud/ZK odjel". For example choose "Zemljišnoknjižni odjel Benkovac".
Choose "Glavna knjiga". For example choose "BENKOVAC"
Enter "Broj kat. čestice:". For example, enter 576/2.
Select "Da" in "Povijesni pregled" (the last row, leave "Broj ZK uloska empty").
Click "Pregledaj" and solve the captcha.
Scrape html that appers.
I have tried to follow above steps using plain requests in python by following network, after opening inspector in the web browser.
There are lots of requests on the page. I will divide my code in several steps:
Start session and make requests that on the start of the page
import requests
import re
import shutil
from twocaptcha import TwoCaptcha
import pandas as pd
import numpy as np
import os
from pathlib import Path
import json
import uuid
# start session
url = 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract'
session = requests.Session()
session.get(url)
jid = session.cookies.get_dict()['JSESSIONID']
# some requests on the start of the page (probabbly redundandt)
headers = {
'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36',
}
session.get("https://oss.uredjenazemlja.hr/public/js/libs/modernizr-2.5.3.min.js", headers = headers) #
session.get("https://oss.uredjenazemlja.hr/public/js/libs/jquery-1.7.1.min.js", headers = headers) #
session.get("https://oss.uredjenazemlja.hr/public/js/script.js", headers = headers) # script.json
# no cache json
headers = {
'Cookie': 'ossprivatelang=hr_HR; gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; JSESSIONID=' + jid,
"Connection": "keep-alive",
'Host': 'oss.uredjenazemlja.hr',
'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
"sec-ch-ua": '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"Sec-Fetch-Dest": "script",
"Sec-Fetch-Mode": "no-cors",
"Sec-Fetch-Site": "same-origin",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
res = session.get('https://oss.uredjenazemlja.hr/public/gwt/hr.ericsson.oss.ui.pia.OssPiaModule.nocache.js', headers = headers)
cache_html = re.findall(r'bc=\'(.*\.cache.html)\',C', res.text)[0]
# cache_html = "1F6C776DEF6D55F56C900B938F84D726.cache.html"
# some more requests on the start of the page (probabbly redundandt)
headers = {
'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36',
}
session.get("https://oss.uredjenazemlja.hr/public/gwt/tiny_mce_editor/tiny_mce_src.js", headers = headers) # tiny_mce_src.js
session.get("https://oss.uredjenazemlja.hr/public/gwt/js/common.js", headers = headers)
session.get("https://oss.uredjenazemlja.hr/public/gwt/js/blueimp_tmpl.js", headers = headers) # blueimp_tmpl.js
# cache json
headers = {
"DNT": "1",
'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
"sec-ch-ua": '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'Sec-Fetch-Dest': 'iframe',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
session.get('https://oss.uredjenazemlja.hr/public/gwt/' + cache_html, headers = headers)
Then, I made requests for steps 1 and 2 above:
# commonRPCService opcinski sud 1
headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7',
'Connection': 'keep-alive',
# 'Content-Length': '166',
'Content-Type': 'text/x-gwt-rpc; charset=UTF-8',
'Cookie': 'gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; ossprivatelang=hr_HR; __utma=79801043.802441445.1616788486.1616788486.1616788486.1; __utmz=79801043.1616788486.1.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided); x-auto-31=m%3Acollapsed%7Cb%3Atrue; JSESSIONID=' + jid,
"DNT": "1",
'Host': 'oss.uredjenazemlja.hr',
'Origin': 'https://oss.uredjenazemlja.hr',
'Referer': 'https://oss.uredjenazemlja.hr/public/gwt/' + cache_html,
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
payload = '5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBook|1|2|3|4|0|'
res = session.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
headers = headers,
data=payload
)
print(res.text)
# commonRPCService opcinski sud 2
payload = '5|0|18|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getLrInstitutions|com.extjs.gxt.ui.client.data.BaseModel|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|dirty|java.lang.Boolean/476441737|new|deleted|resourceCode|java.lang.Integer/3438268394|elementSelected|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.Institution|name||1|2|3|4|1|5|6|7|7|8|0|9|-2|10|-2|11|12|0|13|-2|14|15|16|17|15|18|'
res = session.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
data=payload,
headers=headers
)
# print(res.text)
# commonRPCService glavna knjiga 1
payload = '5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBook|1|2|3|4|0|'
res = session.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
data=payload,
headers=headers
)
print(res.text)
# commonRPCService glavna knjiga 2
payload = ('5|0|34|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBooks|com.extjs.gxt.ui.client.data.BaseModel|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|dirty|java.lang.Boolean/476441737|new|deleted|resourceCode|java.lang.Integer/3438268394|elementSelected|cadastralMunicipality|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.CadastralMunicipality|hr.ericsson.jis.domain.admin.MainBook|institution|institutionId|parentInstitution|name|Općinski sud u Zadru|hr.ericsson.jis.domain.admin.Institution|institutionType|institutionTypeId|hr.ericsson.jis.domain.admin.InstitutionType|source|superviseInstitutionId|Zemljišnoknjižni odjel Benkovac|place|BENKOVAC|preconditionsRequired||1|2|3|4|1|5|6|10|7|8|0|9|-2|10|-2|11|12|0|13|-2|14|6|1|15|16|17|15|16|18|19|6|13|7|-2|9|-2|20|12|500|21|6|8|7|-2|9|-2|10|-2|20|12|605|11|12|0|13|-2|22|16|23|15|16|24|25|6|7|7|-2|9|-2|10|-2|26|12|14|11|-11|13|-2|15|16|27|28|12|1|10|-2|29|-10|11|-11|13|-2|22|16|30|31|16|32|15|-13|33|-2|22|16|34|').encode("utf-8")
res = session.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
data=payload,
headers=headers
)
Than I solve the captcha:
# some captcha post
payload = ('5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|isCaptchaDisabled|1|2|3|4|0|').encode('utf-8')
res = session.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
data=payload,
headers=headers
)
print(res.text)
# get and save captcha
TWO_CAPTCHA_APY_KEY = "myapikey"
solver = TwoCaptcha(TWO_CAPTCHA_APY_KEY)
save_path = 'D:/zkrh/captchas'
p = session.get('https://oss.uredjenazemlja.hr/servlets/kaptcha.jpg?1617088523212',
headers=headers,
stream=True)
captcha_path = os.path.join(Path(save_path), 'captcha' + ".jpg")
with open(captcha_path, 'wb') as out_file:
shutil.copyfileobj(p.raw, out_file)
# solve captcha
result = solver.normal(captcha_path, minLength=5, maxLength=5)
payload = ('5|0|6|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|validateCaptcha|java.lang.String|' +
result['code'] + '|1|2|3|4|1|5|6|').encode('utf-8')
res = requests.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
data=payload,
headers=headers
)
if res.text.startswith("//OK"):
os.rename(captcha_path, os.path.join(Path(save_path), result['code'] + ".jpg"))
else:
print("Kriva captcha. Rijesi!")
Now, here is the most important request and I can't get the right output from it. It should return lots of numbers where the most important number is one with 7 digits (\d{7}. the should be 1 or more of such numbers). I can use that number in the last step, to get html Here is my try:
payload = ('5|0|40|https://oss.uredjenazemlja.hr/public/gwt/|0EAC9F40996251FDB21FF254E1600E83|hr.ericsson.oss.ui.pia.client.rpc.IOssPublicRPCService|getLrUnitsByMainBookAndParcel|com.extjs.gxt.ui.client.data.BaseModel|java.lang.String|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|date|java.sql.Date/3996530531|dirty|java.lang.Boolean/476441737|new|cadastralMunicipality|id|java.lang.Integer/3438268394|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.CadastralMunicipality|cadastralMunicipalityId|source|creationDate|formatedName|BENKOVAC|userId|cadInstitution|deleted|institutionId|resourceCode|elementSelected|name|Odjel za katastar nekretnina Benkovac|hr.ericsson.jis.domain.admin.Institution|institution|Zemljišnoknjižni odjel Benkovac|place|sidMainBook|java.lang.Long/4227064769|hr.ericsson.jis.domain.admin.MainBook|status|576/2|1|2|3|4|2|5|6|7|18|8|9|115|10|21|10|11|0|12|-3|13|7|3|14|15|98|16|17|18|19|-5|20|15|1|21|9|116|0|1|22|17|23|24|15|-9999|25|7|8|10|-3|12|-3|26|-3|27|15|117|28|15|0|29|-3|30|17|31|16|17|32|33|7|9|10|-3|12|-3|26|-3|27|15|500|28|-13|29|-3|30|17|34|35|-9|16|-15|26|-3|28|15|0|29|-3|30|-9|36|37|4730091|0|14|15|30857|16|17|38|39|-19|40|').encode('utf-8')
res = session.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
data=payload,
headers=headers
)
print(res.text)
It returns:
"//EX[2,1,["com.google.gwt.user.client.rpc.IncompatibleRemoteServiceException/3936916533","This application is out of date, please click the refresh button on your browser. ( Blocked attempt to access interface 'hr.ericsson.oss.ui.pia.client.rpc.IOssPublicRPCService', which is not implemented by 'hr.ericsson.oss.ui.common.server.core.rpc.CommonRPCService'; this is either misconfiguration or a hack attempt )"],0,5]"
instead of numbers as I explained before.
Then, in the last step, I should use 7 digit number as lrUnitNumber parameter
# Publicreportservlet
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '169',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'ossprivatelang=hr_HR; gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; JSESSIONID=' + jid,
'Host': 'oss.uredjenazemlja.hr',
'Origin': 'https://oss.uredjenazemlja.hr',
'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
'Sec-Fetch-Dest': 'iframe',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
dataFrom = {
'pia': 1,
'report_type_id': 4,
'report_type_name': 'bzp_izvadak_oss',
'source': 1,
'institutionID': 500,
'mainBookId': 30857,
'lrUnitNumber': 5509665,
'lrunitID': 5799992,
'status': '0,1',
'footer': '',
'export_type': 'html'
}
res = session.post(
'https://oss.uredjenazemlja.hr/servlets/PublicReportServlet',
data=dataFrom,
headers=headers
)
res
I am providing the R ode too. Maybe someone with R and web scraping knowledge can help:
library(httr)
library(rvest)
library(stringr)
library(reticulate)
twocaptcha <- reticulate::import("twocaptcha")
# captcha python library
TWO_CAPTCHA_APY_KEY = ".."
solver = twocaptcha$TwoCaptcha(TWO_CAPTCHA_APY_KEY)
#
url = 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract'
session = GET(url)
jid <- cookies(session)$value
headers_cache = c(
'Referer'= 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
'User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
)
session <- rvest:::request_GET(content(session), 'https://oss.uredjenazemlja.hr/public/gwt/hr.ericsson.oss.ui.pia.OssPiaModule.nocache.js',
add_headers(headers_cache))
cache_html <- str_extract(session$response, "bc=\\'(.*\\.cache.html)\\',C")
cache_html <- gsub(".*=\\'|\\'.C", "", cache_html)
headers_cache = c(
'Referer'= 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
'User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
)
session <- rvest:::request_GET(session, paste0('https://oss.uredjenazemlja.hr/public/gwt/', cache_html), add_headers(headers_cache))
# meta
commonRPCServiceUrl <- "https://oss.uredjenazemlja.hr/rpc/commonRPCService"
headers = c(
'Accept'= '*/*',
'Accept-Encoding'= 'gzip, deflate, br',
'Accept-Language'= 'hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7',
'Connection'= 'keep-alive',
# 'Content-Length'= '166',
'Content-Type'= 'text/x-gwt-rpc; charset=UTF-8',
'Cookie'= paste0('gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; ossprivatelang=hr_HR; x-auto-31=m%3Acollapsed%7Cb%3Atrue; JSESSIONID=', jid),
'Host'= 'oss.uredjenazemlja.hr',
'Origin'= 'https://oss.uredjenazemlja.hr',
'Referer'= paste0('https://oss.uredjenazemlja.hr/public/gwt/', cache_html),
'Sec-Fetch-Dest'= 'empty',
'Sec-Fetch-Mode'= 'cors',
'Sec-Fetch-Site'= 'same-origin',
'User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
)
payload <- "5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBook|1|2|3|4|0|"
session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
session$response$content
readBin(session$response$content, character(), endian = "little")
payload <- "5|0|22|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBooks|com.extjs.gxt.ui.client.data.BaseModel|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|dirty|java.lang.Boolean/476441737|new|deleted|resourceCode|java.lang.Integer/3438268394|elementSelected|cadastralMunicipality|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.CadastralMunicipality|hr.ericsson.jis.domain.admin.MainBook|institution|preconditionsRequired|name|VELIKA GORICA|1|2|3|4|1|5|6|10|7|8|0|9|-2|10|-2|11|12|0|13|-2|14|6|1|15|16|17|15|16|18|19|0|20|-2|21|16|22|"
session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
session$response$content
readBin(session$response$content, character(), endian = "little")
# captcha
payload <- "5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|isCaptchaDisabled|1|2|3|4|0|"
session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
session$response$content
readBin(session$response$content, character(), endian = "little")
headers_captcha <- c(
"Accept"= "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
"Accept-Encoding"= "gzip, deflate, br",
"Accept-Language"=" hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7",
"Connection"= "keep-alive",
"Cookie"= paste0("gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; ossprivatelang=hr_HR; x-auto-31=m%3Acollapsed%7Cb%3Atrue; JSESSIONID=", jid),
"DNT"= "1",
"Host"= "oss.uredjenazemlja.hr",
"Referer"= "https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract",
"sec-ch-ua"= '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
"sec-ch-ua-mobile"= "?0",
"Sec-Fetch-Dest"= "image",
"Sec-Fetch-Mode"= "no-cors",
"Sec-Fetch-Site"= "same-origin",
"User-Agent"= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36"
)
captcha <- GET("https://oss.uredjenazemlja.hr/servlets/kaptcha.jpg?1617286122160", add_headers(headers_captcha))
# session <- rvest:::request_GET(session, "https://oss.uredjenazemlja.hr/servlets/kaptcha.jpg?1617286122160", add_headers(headers_captcha))
captcha$content
captcha$response$content
writeBin(captcha$content, "D:/zkrh/captchas/test.jpg")
result = solver$normal("D:/zkrh/captchas/test.jpg", minLength=5, maxLength=5)
payload <- paste0("5|0|6|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|validateCaptcha|java.lang.String|",
result$code, "|1|2|3|4|1|5|6|")
session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
session$response$content
readBin(p$response$content, character(), endian = "little")
# ID!!!!!!
headers = c(
'Accept'= '*/*',
'Accept-Encoding'= 'gzip, deflate, br',
'Accept-Language'= 'hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7',
'Connection'= 'keep-alive',
# 'Content-Length'= '166',
'Content-Type'= 'text/x-gwt-rpc; charset=UTF-8',
'Cookie'= paste0('gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; ossprivatelang=hr_HR; x-auto-31=m%3Acollapsed%7Cb%3Atrue; JSESSIONID=', jid),
'DNT' = '1',
'Host'= 'oss.uredjenazemlja.hr',
'Origin'= 'https://oss.uredjenazemlja.hr',
'Referer'= paste0('https://oss.uredjenazemlja.hr/public/gwt/', cache_html),
'sec-ch-ua' = '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
'sec-ch-ua-mobile' = "?0",
'Sec-Fetch-Dest'= 'empty',
'Sec-Fetch-Mode'= 'cors',
'Sec-Fetch-Site'= 'same-origin',
'User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
)
payload <- paste0("5|0|40|https://oss.uredjenazemlja.hr/public/gwt/|0EAC9F40996251FDB21FF254E1600E83|hr.ericsson.oss.ui.pia.client.rpc.IOssPublicRPCService|getLrUnitByMainBook|com.extjs.gxt.ui.client.data.BaseModel|java.lang.String|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|date|java.sql.Date/3996530531|dirty|java.lang.Boolean/476441737|new|cadastralMunicipality|id|java.lang.Integer/3438268394|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.CadastralMunicipality|cadastralMunicipalityId|source|creationDate|formatedName|VELIKA GORICA|userId|cadInstitution|deleted|institutionId|resourceCode|elementSelected|name|Odjel za katastar nekretnina Velika Gorica|hr.ericsson.jis.domain.admin.Institution|institution|Zemljišnoknjižni odjel Velika Gorica|place|sidMainBook|java.lang.Long/4227064769|hr.ericsson.jis.domain.admin.MainBook|status|1|1|2|3|4|2|5|6|7|18|8|9|114|1|21|10|11|0|12|-3|13|7|3|14|15|102844|16|17|18|19|-5|20|15|1|21|9|116|0|1|22|17|23|24|15|-20|25|7|8|10|-3|12|-3|26|-3|27|15|32|28|15|0|29|-3|30|17|31|16|17|32|33|7|9|10|-3|12|-3|26|-3|27|15|277|28|-13|29|-3|30|17|34|35|-9|16|-15|26|-3|28|-7|29|-3|30|-9|36|37|286610893|17179869184|14|15|21921|16|17|38|39|15|0|40|")
# Encoding(payload) <- "UTF-8"
# payload <- RCurl::curlEscape(payload)
session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
session$response$content
readBin(session$response$content, character())

I have found the error. The problem was in wrong url argument in one request.

You should really look into selenium and beautifulsoup4 to automate this - it's like requests on steroids.
see an example on my github: https://github.com/stevenhurwitt/reliant-scrape/blob/master/reliant_scrape.py

Download Access Denied

Introduction
Hi! Im trying to download application content-types from Microsoft TLU Delivery. Upon downloading, its always resulted with corrupted files. The downloaded files just have 1kb size, meanwhile the original ones have 200kb+. I already trying to use custom headers and cookies on the Download Module but its still the same. Here's my network connection that i get from chrome console :
Request URL: http://tlu.dl.delivery.mp.microsoft.com/filestreamingservice/files/f7ded4c4-f468-4bdc-96f0-03fd60c5ae81?p1=1604760658&p2=402&p3=2&p4=hcdsxul%2f8qkk8gjjjyomq0ki%2bxwjxulxxeninyf1jqrp3%2bsnzk%2fpwk4dgsnbkfzkzoor4%2bvaixilmxk6r%2bz6%2ba%3d%3d
Request Method: GET
Status Code: 200 OK
Remote Address: 8.241.131.254:80
Referrer Policy: strict-origin-when-cross-origin
#Response Header
Accept-Ranges: bytes
Age: 240667
Cache-Control: public, max-age=17280000
Connection: keep-alive
Content-Disposition: attachment; filename=Microsoft.Services.Store.Engagement_10.0.19011.0_x64__8wekyb3d8bbwe.Appx
Content-Length: 281728
Content-Type: application/octet-stream
Date: Sat, 21 Nov 2020 22:49:40 GMT
ETag: "cP6LQFFTB9bLaXCyN/YHr8kWbqM="
Expires: Wed, 09 Jun 2021 22:49:58 GMT
g: g
Last-Modified: Wed, 30 Jan 2019 14:28:15 GMT
MS-CorrelationId: c93312f7-dc52-41ef-b2d2-9294c1deda19
MS-CV: /24yhmSg0EivGZOl.0.3.8.2.1.0.0.22.2.6.2.1.1.0
MS-RequestId: 9378576c-a791-4636-93bf-7b8ef57c85a3
MSRegion: APAC
Server: Microsoft-IIS/10.0
X-AspNet-Version: 4.0.30319
X-AspNetMvc-Version: 5.2
x-ccc: SG
x-cid: 3
X-MS-Ref-OriginShield: Ref A: 4127806A5D794419ACF51D29DD883F50 Ref B: BAYEDGE0319 Ref C: 2019-01-30T14:32:41Z
X-MSEdge-Ref: Ref A: 6F76857D59174A4BA37F3076DE6671DC Ref B: SJCEDGE0406 Ref C: 2019-01-30T14:32:41Z
X-Powered-By: ASP.NET
#Request header
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
Accept-Encoding: gzip, deflate
Accept-Language: en-US,en;q=0.9,id;q=0.8,pt;q=0.7
Connection: keep-alive
Cookie: _ga=GA1.2.282065737.1604285823; MUID=07B92E6562A266AE0A83211366A260EF; AAMC_mscom_0=REGION%7C3; aam_uuid=33272659623977483402062764826775668547; mbox=PC#f8f94dce007c4a3c88788082771689c3.38_0#1668947906|session#af45a09527ce4486be93aff7c2a9adf1#1605704965; _cs_c=0; _cs_id=cd07a4b3-19b2-a8c7-95e4-7b8fd7c286f9.1605703107.1.1605703107.1605703107.1594299326.1639867107380.Lax.0; _uetvid=c457b5101e8a11eb956e2f93309fcf0c; IR_PI=f62c6991-299a-11eb-980c-0abbe301118c%7C1605789507507
DNT: 1
Host: tlu.dl.delivery.mp.microsoft.com
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36
#Query String
p1: 1604760658
p2: 402
p3: 2
p4: hcdsxul/8qkk8gjjjyomq0ki+xwjxulxxeninyf1jqrp3+snzk/pwk4dgsnbkfzkzoor4+vaixilmxk6r+z6+a==
Problematic
Here's several download module that i've try:
from clint import textui
from clint.textui import progress
import urllib.request
import requests
from tqdm import tqdm
from urllib.request import FancyURLopener
from tqdm import tqdm
class DownloadProgressBar(tqdm):
def update_to(self, b=1, bsize=1, tsize=None):
if tsize is not None:
self.total = tsize
self.update(b * bsize - self.n)
def CookDown(url, filename):
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'}
cookies = {'_ga': 'GA1.2.282065737.1604285823', 'MUID': '07B92E6562A266AE0A83211366A260EF','aam_uuid': '33272659623977483402062764826775668547','AAMC_mscom_0': 'REGION%7C3','_cs_c': '0','_cs_id': 'cd07a4b3-19b2-a8c7-95e4-7b8fd7c286f9.1605703107.1.1605703107.1605703107.1594299326.1639867107380.Lax.0','_uetvid': 'c457b5101e8a11eb956e2f93309fcf0c','IR_PI': 'f62c6991-299a-11eb-980c-0abbe301118c%7C1605789507507','mbox': 'PC#f8f94dce007c4a3c88788082771689c3.38_0#1668947906|session#af45a09527ce4486be93aff7c2a9adf1#1605704965'}
try:
read = requests.get(url, headers=headers, cookies=cookies)
with open(filename, 'wb') as w:
for chunk in read.iter_content(chunk_size=None):
if chunk:
w.write(chunk)
print(filename + ' downloaded successfully!!!')
except urllib.request.URLError as e:
print("Error :%s" % e)
def Cookdone(url, filename):
r = requests.get(url, stream=True)
with open(filename, "wb") as f:
for data in tqdm(response.iter_content()):
f.write(data)
def Cookdown(url, filename):
#headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'}
#cookies = {'_ga': 'GA1.2.282065737.1604285823', 'MUID': '07B92E6562A266AE0A83211366A260EF','aam_uuid': '33272659623977483402062764826775668547','AAMC_mscom_0': 'REGION%7C3','_cs_c': '0','_cs_id': 'cd07a4b3-19b2-a8c7-95e4-7b8fd7c286f9.1605703107.1.1605703107.1605703107.1594299326.1639867107380.Lax.0','_uetvid': 'c457b5101e8a11eb956e2f93309fcf0c','IR_PI': 'f62c6991-299a-11eb-980c-0abbe301118c%7C1605789507507','mbox': 'PC#f8f94dce007c4a3c88788082771689c3.38_0#1668947906|session#af45a09527ce4486be93aff7c2a9adf1#1605704965'}
import shutil
req = requests.get(url, stream=True)
print(req.headers)
with open(filename, "wb") as f:
req.raw.decode_content = False
shutil.copyfileobj(req.raw, f)
def download(url, path):
opener = FancyURLopener({})
opener.version = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
opener.retrieve(url, path)
def down(url, filename):
proxy = ProxyHandler({})
opener = build_opener(proxy)
opener.addheaders = [('User-Agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30')]
install_opener(opener)
urlretrieve(url, filename)
def DownLoad(url, filename):
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'}
r = requests.get(url, allow_redirects=True, headers=headers)
with open(filename, 'wb') as f:
for chunk in r.iter_content(1024):
f.write(chunk)
def DoneLoad():
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'}
r = requests.get(url, allow_redirects=True, headers=headers)
with open(filename, 'wb') as f:
total_length = int(r.headers.get('content-length'))
for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length/1024) + 1):
if chunk:
f.write(chunk)
f.flush()
def Download(url, path):
hdr = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36' }
req = urllib.request.Request(url, headers=hdr)
urllib.request.urlretrieve(req, path)
def downLoader(url, output_path):
with DownloadProgressBar(unit='B', unit_scale=True,
miniters=1, desc=url.split('/')[-1]) as t:
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')]
urllib.request.install_opener(opener)
urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)
def downLoad(url, output_path):
hdr = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36' }
r = requests.get(url, headers=hdr, verify=False, stream=True)
with open(output_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=None):
f.write(chunk)
Here's piece of code to execute every each module :
CookDown('http://tlu.dl.delivery.mp.microsoft.com/filestreamingservice/files/f7ded4c4-f468-4bdc-96f0-03fd60c5ae81?p1=1604760658&p2=402&p3=2&p4=hcdsxul%2f8qkk8gjjjyomq0ki%2bxwjxulxxeninyf1jqrp3%2bsnzk%2fpwk4dgsnbkfzkzoor4%2bvaixilmxk6r%2bz6%2ba%3d%3d','Microsoft.Services.Store.Engagement_10.0.19011.0_x64__8wekyb3d8bbwe.appx')
Using each module, resulted on the same corrupted file. Any answers will hugely appreciated because im stuck and doesnt have any ideas anymore. Please stackoverflow gods...Come here :D

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

I can't send form data to Python Jupyter - python

Related

Receiving hcaptcha error from discord while using 2captcha (works - working code pasted below)

Changing layout and adding titles to JSON file with python

Getting Response 500 python

Web scraping - his application is out of date, please click the refresh button on your browser return message

Download Access Denied

Categories

Resources