How can I read information about playing song using urllib3? Which headers should I use?
import urllib3
http = urllib3.PoolManager()
response = http.request("GET", "http://pool.cdn.lagardere.cz/fm-evropa2-128", headers={
'User-Agent': 'User-Agent: VLC/2.0.5 LibVLC/2.0.5',
'Icy-MetaData': '1',
'Range': 'bytes=0-',
})
print(response.data)
I tried this. But it stucks at sending request. Can anyone help me? Thanks for answers.
The following code returns all the header data of the given stream. Unfortunately I was not able to obtain song names this way.
import requests
url = 'http://pool.cdn.lagardere.cz/fm-evropa2-128'
def print_url(r, *args, **kwargs):
print(r.headers)
requests.get(url, hooks=dict(response=print_url))
Output is as follows:
{'icy-description': 'Evropa 2', 'Via': '1.1 s670-6.noc.rwth-aachen.de:80 (Cisco-WSA/8.8.0-085)', 'icy-genre': 'Various', 'icy-url': 'http://www.evropa2.cz', 'icy-pub': '0', 'ice-audio-info': 'ice-samplerate=44100;ice-bitrate=128;ice-channels=2', 'Date': 'Fri, 29 Jan 2016 17:24:20 GMT', 'icy-br': '128, 128', 'Content-Type': 'audio/mpeg', 'Connection': 'keep-alive', 'Transfer-Encoding': 'chunked', 'icy-name': 'Evropa 2', 'Server': 'Icecast 2.3.2', 'Cache-Control': 'no-cache'}
Related
I've got a script that identifies byte ranges in a very large file that I'd like to download. I'm using Python's requests library to download the content, specifying the byte ranges of interest in the range header. Here's a simplified version of the code (without the logic that constructs the byte range string):
import requests
URL = 'https://ncei.noaa.gov/data/rapid-refresh/access/historical/analysis/201602/20160217/rap_130_20160217_2200_000.grb2'
byte_range = '0-33510, 110484-147516, 219121-253904, 421175-454081, 685402-719065, 1039572-1076567, 1299982-1333158, 1398139-1429817, 1492109-1522167, \
1662765-1689414, 1870865-1896117, 2120537-2145725, 2301018-2335355, 2404445-2439381, 2511283-2547104, 2717931-2750956, 2971504-3001716, 3268591-3295610, \
3395201-3395200, 3395201-3461393, 3593639-3593638, 3593639-3659732, 3792859-3792858, 3792859-3859312, 4183232-4183231, 4183232-4245378, 4668359-4668358, \
4668359-4728450, 5283559-5283558, 5283559-5344745, 7251508-7317016, 7498496-7558460'
response = requests.get(URL, headers={"Range": "bytes={}".format(byte_range)})
print(response.headers)
As far as I can tell, this is a valid request and I don't get any errors. However, it downloads the entire file rather than the specified ranges. The output:
{'Date': 'Sat, 16 Oct 2021 15:26:17 GMT', 'Server': 'Apache', 'Strict-Transport-Security': 'max-age=31536000', 'Last-Modified': 'Thu, 18 Feb 2016 15:22:25 GMT', 'ETag': '"cfde25-52c0cef11064f"', 'Accept-Ranges': 'bytes', 'Content-Length': '13622821', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Headers': 'X-Requested-With, Content-Type', 'Connection': 'close'}
To debug, I tried shortening the number of byte ranges and that seemed to work. Seems like the max number of ranges for the request to return a subset of the file is in the low 20s.
# Shorter list of byte ranges
URL = 'https://ncei.noaa.gov/data/rapid-refresh/access/historical/analysis/201602/20160217/rap_130_20160217_2200_000.grb2'
byte_range = '0-33510, 110484-147516, 219121-253904, 421175-454081, 685402-719065, 1039572-1076567, 1299982-1333158, 1398139-1429817, 1492109-1522167, \
1662765-1689414, 1870865-1896117, 2120537-2145725, 2301018-2335355, 2404445-2439381, 2511283-2547104, 2717931-2750956, 2971504-3001716, 3268591-3295610'
response = requests.get(URL, headers={"Range": "bytes={}".format(byte_range)})
print(response.headers)
In this case, the content type is a multipart byte range, as expected.
{'Date': 'Sat, 16 Oct 2021 15:26:41 GMT', 'Server': 'Apache', 'Strict-Transport-Security': 'max-age=31536000', 'Last-Modified': 'Thu, 18 Feb 2016 15:22:25 GMT', 'ETag': '"cfde25-52c0cef11064f"', 'Accept-Ranges': 'bytes', 'Content-Length': '577544', 'Content-Type': 'multipart/byteranges; boundary=ccc35e3764d85dea', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Headers': 'X-Requested-With, Content-Type', 'Connection': 'close'}
My question now is where's the limitation - is this an issue with requests, the server, or just an issue with HTTP headers? I could break this up into multiple requests, but I need to try to avoid spamming the server with a lot of requests near the same time (this byte range list could get pretty long depending on what I want from the file). If I had to break up the request, what's the most efficient way to do so? I really don't want to download more data than I need as these files can be quite large.
I have the following code, but its gives be 200 OK with first page (state of default drop down) response. Please note that the Drop Down lists are dymanic and progressive until final search button appears , Can someone correct me as to what is wrong with my code?
def process(ghatno):
home_url = 'http://igrmaharashtra.gov.in/eASR/eASRCommon.aspx?hDistName=Nashik'
post_url = 'http://igrmaharashtra.gov.in/eASR/eASRCommon.aspx?hDistName=Nashik'
print "Please wait...getting details of :" + ghatno
with requests.Session() as session:
r = session.get(url=post_url)
cookies = r.cookies
pprint.pprint(r.headers)
gethead = r.headers
soup = BeautifulSoup(r.text, 'html.parser')
viewstate = soup.select('input[name="__VIEWSTATE"]')[0]['value']
csrftoken = soup.select('input[name="__CSRFTOKEN"]')[0]['value']
eventvalidation = soup.select('input[name="__EVENTVALIDATION"]')[0]['value']
viewgen = soup.select('input[name="__VIEWSTATEGENERATOR"]')[0]['value']
data = {
'__CSRFTOKEN':csrftoken,
'__EVENTARGUMENT':'',
'__EVENTTARGET':'',
'__LASTFOCUS':'',
'__SCROLLPOSITION':'0',
'__SCROLLPOSITIONY':'0',
'__EVENTVALIDATION': eventvalidation,
'__VIEWSTATE':viewstate,
'__VIEWSTATEGENERATOR': viewgen,
'ctl00$ContentPlaceHolder5$ddlLanguage' : 'en-US',
'ctl00$ContentPlaceHolder5$btnSearchCommonSr':'Search',
'ctl00$ContentPlaceHolder5$ddlTaluka': '2',
'ctl00$ContentPlaceHolder5$ddlVillage': '25',
'ctl00$ContentPlaceHolder5$ddlYear': '20192020',
'ctl00$ContentPlaceHolder5$grpSurveyLocation': 'rdbSurveyNo',
'ctl00$ContentPlaceHolder5$txtCommonSurvey': 363
}
headers = {
'Host': 'igrmaharashtra.gov.in',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0',
'Referer': 'http://igrmaharashtra.gov.in/eASR/eASRCommon.aspx?hDistName=Nashik',
'Host': 'igrmaharashtra.gov.in',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
}
r = requests.post(url=post_url, data=json.dumps(data), cookies=cookies, headers = headers)
soup = BeautifulSoup(r.text, 'html.parser')
table = SoupStrainer('tr')
soup = BeautifulSoup(soup.get_text(), 'html.parser', parse_only=table)
print(soup.get_text())
pprint.pprint(r.headers)
print r.text
getpost = r.headers
getpostrequest = r.request.headers
getresponsebody = r.request.body
f = open('/var/www/html/nashik/hiren.txt', 'w')
f.write(str(gethead))
f.write(str(getpostrequest))
f.write(str(getresponsebody))
f.write(str(getpost))
My response is as below :
Response header - (GET Request)
{'Content-Length': '5994', 'X-AspNet-Version': '4.0.30319', 'Set-Cookie': 'ASP.NET_SessionId=24wwh11lwvzy5gf0xlzi1we4; path=/; HttpOnly, __CSRFCOOKIE=d7b10286-fc9f-4ed2-863d-304737df8758; path=/; HttpOnly', 'Content-Encoding': 'gzip', 'Vary': 'Accept-Encoding', 'X-Powered-By': 'ASP.NET', 'Server': 'Microsoft-IIS/8.0', 'Cache-Control': 'private', 'Date': 'Thu, 02 May 2019 08:21:48 GMT', 'Content-Type': 'text/html; charset=utf-8'}
Response header - (GET Request)
{'Content-Length': '3726', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'Host': 'igrmaharashtra.gov.in', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0', 'Connection': 'keep-alive', 'Referer': 'http://igrmaharashtra.gov.in/eASR/eASRCommon.aspx?hDistName=Nashik', 'Cookie': '__CSRFCOOKIE=d7b10286-fc9f-4ed2-863d-304737df8758; ASP.NET_SessionId=24wwh11lwvzy5gf0xlzi1we4', 'Content-Type': 'application/x-www-form-urlencoded'}
Response header - (POST Request)
{'Content-Length': '7834', 'X-AspNet-Version': '4.0.30319', 'Content-Encoding': 'gzip', 'Vary': 'Accept-Encoding', 'X-Powered-By': 'ASP.NET', 'Server': 'Microsoft-IIS/8.0', 'Cache-Control': 'private', 'Date': 'Fri, 03 May 2019 10:21:45 GMT', 'Content-Type': 'text/html; charset=utf-8'}
**Default Page Selected Drop Down is returned **
नाशिक and
- - Select Taluka - - INSTEAD of option value "2" i.e इगतपुरी once option "2" is selected I want value "25" in next drop down before I put my final survey "363" for results.
Please note I tried Mechanize browser too, but no luck !!
Finally the solution is to do post requests multiple times in same "session" with same "cookie" and iterate through them. It works now !
Commands as .format and %s I use in every possible combination without any progress.
It work right when I use it this way:
last_issue = jira.search_issues('assignee = "ahmet" order by created desc')[0]
But I need assignee to be a varible and if I use it this way or smthing like:
assignee = "ahmet"
last_issue = jira.search_issues('assignee =', assignee, 'order by created desc')[0]
It gives mistake like
response headers = {'Vary': 'User-Agent', 'X-AREQUESTID': '578x1623860x1', 'X-ASESSIONID': 'x0ubjs', 'X-ASEN': 'SEN-L0000000', 'Cache-Control': 'no-cache, no-store, no-transform', 'X-Content-Type-Options': 'nosniff', 'X-AUSERNAME': 'ekaterina', 'X-Seraph-LoginReason': 'OK', 'Content-Encoding': 'gzip', 'Transfer-Encoding': 'chunked', 'Date': 'Mon, 11 Sep 2017 09:38:10 GMT', 'Content-Type': 'text/html;charset=UTF-8', 'Server': 'nginx/1.13.0', 'Connection': 'keep-alive'}
response text =
How should I make a variable in appropriate way?
It works!
var = "assignee = '{}' order by created desc".format(assignee)
last_issue = jira.search_issues(var)[0]
In Python, how can I obtain headers and payload information for a particular website to make requests via requests.Session()?
e.g.:
headers = {
'Host': 'www.testsite.com',
'Accept': 'application/json',
'Proxy-Connection': 'keep-alive',
'X-Requested-With': 'XMLHttpRequest',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-us',
'Content-Type': 'application/x-www-form-urlencoded',
'Origin': 'http://www.testsite.com',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Mobile/11D257',
'Referer': 'http://www.testsite.com/mobile'
}
Thank you in advance and will be sure to upvote and accept answer
Most of those headers are automatically supplied by the requests module. Here is an example:
import requests
from pprint import pprint
with requests.Session() as s:
s.get('http://httpbin.org/cookies/set?name=joe')
r = s.get('http://httpbin.org/cookies')
pprint(dict(r.request.headers))
assert r.json()['cookies']['name'] == 'joe'
The output of the pprint() call is this:
{'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive',
'Cookie': 'name=joe',
'User-Agent': 'python-requests/2.9.1'}
As you can see, s.get() fills in several headers.
A response object has a headers attribute:
import requests
with requests.Session() as s:
r = s.get("http://google.es")
print(r.headers)
Output:
>> {
'Date': 'Tue, 22 Aug 2017 00:37:13 GMT',
'Expires': '-1',
'Cache-Control': 'private,
max-age=0',
'Content-Type': 'text/html; charset=ISO-8859-1',
...
}
I’m trying to get the header from a website, encode it in JSON to write it to a file.
I’ve tried two different ways without success.
FIRST with urllib2 and json
import urllib2
import json
host = ("https://www.python.org/")
header = urllib2.urlopen(host).info()
json_header = json.dumps(header)
print json_header
in this way I get the error:
TypeError: is not
JSON serializable
So I try to bypass this issue by converting the object to a string -> json_header = str(header)
In this way I can json_header = json.dumps(header) but the output it’s weird:
"Date: Wed, 02 Jul 2014 13:33:37 GMT\r\nServer: nginx\r\nContent-Type:
text/html; charset=utf-8\r\nX-Frame-Options:
SAMEORIGIN\r\nContent-Length: 45682\r\nAccept-Ranges: bytes\r\nVia:
1.1 varnish\r\nAge: 1263\r\nX-Served-By: cache-fra1220-FRA\r\nX-Cache: HIT\r\nX-Cache-Hits: 2\r\nVary: Cookie\r\nStrict-Transport-Security:
max-age=63072000; includeSubDomains\r\nConnection: close\r\n"
SECOND with requests
import requests
r = requests.get(“https://www.python.org/”)
rh = r.headers
print rh
{'content-length': '45682', 'via': '1.1 varnish', 'x-cache': 'HIT',
'accept-ranges': 'bytes', 'strict-transport-security':
'max-age=63072000; includeSubDomains', 'vary': 'Cookie', 'server':
'nginx', 'x-served-by': 'cache-fra1226-FRA', 'x-cache-hits': '14',
'date': 'Wed, 02 Jul 2014 13:39:33 GMT', 'x-frame-options':
'SAMEORIGIN', 'content-type': 'text/html; charset=utf-8', 'age':
'1619'}
In this way the output is more JSON like but still not OK (see the ‘ ‘ instead of “ “ and other stuff like the = and ;).
Evidently there’s something (or a lot) I’m not doing in the right way.
I’ve tried to read the documentation of the modules but I can’t understand how to solve this problem.
Thank you for your help.
There are more than a couple ways to encode headers as JSON, but my first thought would be to convert the headers attribute to an actual dictionary instead of accessing it as requests.structures.CaseInsensitiveDict
import requests, json
r = requests.get("https://www.python.org/")
rh = json.dumps(r.headers.__dict__['_store'])
print rh
{'content-length': ('content-length', '45474'), 'via': ('via', '1.1
varnish'), 'x-cache': ('x-cache', 'HIT'), 'accept-ranges':
('accept-ranges', 'bytes'), 'strict-transport-security':
('strict-transport-security', 'max-age=63072000; includeSubDomains'),
'vary': ('vary', 'Cookie'), 'server': ('server', 'nginx'),
'x-served-by': ('x-served-by', 'cache-iad2132-IAD'), 'x-cache-hits':
('x-cache-hits', '1'), 'date': ('date', 'Wed, 02 Jul 2014 14:13:37
GMT'), 'x-frame-options': ('x-frame-options', 'SAMEORIGIN'),
'content-type': ('content-type', 'text/html; charset=utf-8'), 'age':
('age', '1483')}
Depending on exactly what you want on the headers you can specifically access them after this, but this will give you all the information contained in the headers, if in a slightly different format.
If you prefer a different format, you can also convert your headers to a dictionary:
import requests, json
r = requests.get("https://www.python.org/")
print json.dumps(dict(r.headers))
{"content-length": "45682", "via": "1.1 varnish", "x-cache": "HIT",
"accept-ranges": "bytes", "strict-transport-security":
"max-age=63072000; includeSubDomains", "vary": "Cookie", "server":
"nginx", "x-served-by": "cache-at50-ATL", "x-cache-hits": "5", "date":
"Wed, 02 Jul 2014 14:08:15 GMT", "x-frame-options": "SAMEORIGIN",
"content-type": "text/html; charset=utf-8", "age": "951"}
If you are only interested in the header, make a head request. convert the CaseInsensitiveDict in a dict object and then convert it to json.
import requests
import json
r = requests.head('https://www.python.org/')
rh = dict(r.headers)
json.dumps(rh)
import requests
import json
r = requests.get('https://www.python.org/')
rh = r.headers
print json.dumps( dict(rh) ) # use dict()
result:
{"content-length": "45682", "via": "1.1 varnish", "x-cache": "HIT", "accept-ranges": "bytes", "strict-transport-security": "max-age=63072000; includeSubDomains", "vary": "Cookie", "server": "nginx", "x-served-by": "cache-fra1224-FRA", "x-cache-hits": "5", "date": "Wed, 02 Jul 2014 14:08:04 GMT", "x-frame-options": "SAMEORIGIN", "content-type": "text/html; charset=utf-8", "age": "3329"}
I know this is an old question, but I stumbled across it when trying to put together a quick and dirty Python curl-esque URL getter. I kept getting an error:
TypeError: Object of type 'CaseInsensitiveDict' is not JSON serializable
The above solutions are good if need to output a JSON string immediately, but in my case I needed to return a python dictionary of the headers, and I wanted to normalize the capitalization to make all keys lowercase.
My solution was to use a dict comprehension:
import requests
response = requests.head('https://www.python.org/')
my_dict = {
'body': response.text,
'http_status_code': response.status_code,
'headers': {k.lower(): v for (k, v) in response.headers.items()}
}