My operating environment is: Python2.7, django1.9
My original code was:
req = urllib2.Request(url, obj, headers)
opener = urllib2.urlopen(req)
But there was an error:[SSL: CERTIFICATE_VERIFY_FAILED],I found a solution on the Internet:
First:
import ssl
import urllib2
context = ssl._create_unverified_context()
print urllib2.urlopen("https://imaojia.com/", context=context).read()
Second:
import ssl
import urllib2
ssl._create_default_https_context = ssl._create_unverified_context
print urllib2.urlopen("https://imaojia.com/").read()
After I use them,now the code becomes:
req = urllib2.Request(url, obj, headers)
import ssl
opener = urllib2.urlopen(req, context=ssl._create_unverified_context())
Now there are new errors:
HTTP Error 503: Service Unavailable
Internal Server Error: /deploy/key_list_import/
Traceback (most recent call last):
File "/usr/lib/python2.7/site-packages/django/core/handlers/base.py", line 149, in get_response
response = self.process_exception_by_middleware(e, request)
File "/usr/lib/python2.7/site-packages/django/core/handlers/base.py", line 147, in get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/usr/lib/python2.7/site-packages/django/contrib/auth/decorators.py", line 23, in _wrapped_view
return view_func(request, *args, **kwargs)
File "/project/soms/deploy/views.py", line 398, in salt_key_import
minions,minions_pre = sapi.list_all_key()
File "/project/soms/deploy/saltapi.py", line 54, in list_all_key
self.token_id()
File "/project/soms/deploy/saltapi.py", line 30, in token_id
content = self.postRequest(obj,prefix='/login')
File "/project/soms/deploy/saltapi.py", line 42, in postRequest
opener = urllib2.urlopen(req, context=ssl._create_unverified_context())
File "/usr/lib64/python2.7/urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib64/python2.7/urllib2.py", line 437, in open
response = meth(req, response)
File "/usr/lib64/python2.7/urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib64/python2.7/urllib2.py", line 475, in error
return self._call_chain(*args)
File "/usr/lib64/python2.7/urllib2.py", line 409, in _call_chain
result = func(*args)
File "/usr/lib64/python2.7/urllib2.py", line 558, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
HTTPError: HTTP Error 503: Service Unavailable
Who can give me some advice? Thank you!
Thanks to Chiheb Nexus for the answer that helped me solve, so to close this question, answer it and adopt it.
The answer for this question is my request would have caused HTTP Error 503.
Related
I was wondering if somebody can help me with getting my code to work.
import urllib.request
import urllib.parse
import re
url = 'https://www.google.com'
values = {'s':'basics',
'submit':'search'}
data = urllib.parse.urlencode(values)
data = data.encode('utf-8')
req = urllib.request.Request(url,data)
resp = urllib.request.urlopen(req)
respData = resp.read()
print(respData)
It is giving me this error message when running the code.
TypeError: POST data should be bytes, an iterable of bytes, or a file object. It cannot be of type str.
I hope someone can help me with my problem. If not thanks anyways.
It is giving me this gigantic error message:
It is giving me this gigantic error
Traceback (most recent call last):
File "C:\Users\user\OneDrive\Desktop\Lotto.py", line 11, in <module>
resp = urllib.request.urlopen(req)
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\urllib \request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 503, in _call_chain
result = func(*args)
File "C:\Users\user\AppData\Local\Programs\Python\Python37-32\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 405: Method Not Allowed
It's not working because you have a TYPO:
data = urllib.parse.urlencode(values)
date = data.encode('utf-8') # YOUR TYPO IS HERE
req = urllib.request.Request(url,data)
You need to change that line to:
data = data.encode('utf-8') # TYPO FIXED!
I want to read the value of different stocks from websites. Therefore I wrote this tiny script, which reads the page source and then parses out the value:
stock_reader.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from re import search
from urllib import request
def main():
links = [
[
'CSG',
'UBS',
],
[
'http://www.tradegate.de/orderbuch.php?isin=CH0012138530',
'http://www.tradegate.de/orderbuch.php?isin=CH0244767585',
],
]
for i in in range(len(links[0])):
url = links[1][i]
htmltext = request.urlopen(url).read().decode('utf-8')
source = htmltext.splitlines()
for line in source:
if 'id="bid"' in line:
m = search('\d+.\d+', line)
print('{}'.format(m.string[m.start():m.end()]))
if __name__ == '__main__':
main()
sometimes it works but sometimes this error gets raised:
error message
Traceback (most recent call last):
File "./aktien_reader.py", line 39, in <module>
main()
File "./aktien_reader.py", line 30, in main
htmltext = request.urlopen(url).read().decode('utf-8')
File "/usr/lib/python3.3/urllib/request.py", line 160, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 696, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 696, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 696, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 696, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 686, in http_error_302
self.inf_msg + msg, headers, fp)
urllib.error.HTTPError: HTTP Error 302: The HTTP server returned a redirect error that would lead to an infinite loop.
The last 30x error message was:
Found
My question is: why is it happening and how can I avoid it?
This happens probably because the destination site uses cookies and redirect you in case you don't send cookies.
What you can use is something like that :
from http.cookiejar import CookieJar
url = "http://www.tradegate.de/orderbuch.php?isin=CH0012138530"
req = urllib.request.Request(url, None, {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8','Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Accept-Encoding': 'gzip, deflate, sdch','Accept-Language': 'en-US,en;q=0.8','Connection': 'keep-alive'})
cj = CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
response = opener.open(req)
response.read()
This way, you support Cookies and website will allow you to get the page :-)
Another way would be to use the requests package which is really simplest to use. In your case, it would lead to :
import requests
url = "http://www.tradegate.de/orderbuch.php?isin=CH0012138530"
r = requests.get(url, headers={'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}, timeout=15)
print(r.content)
This answer is a simplification of the one by Cédric J. You don't really need to import CookieJar or set various Accept headers if you don't want to. You should however generally set a timeout. It is tested with Python 3.7. I would typically remember to use a new opener for each random URL that I want cookies for.
from urllib.request import build_opener, HTTPCookieProcessor, Request
url = 'https://www.cell.com/cell-metabolism/fulltext/S1550-4131(18)30630-2'
opener = build_opener(HTTPCookieProcessor())
Without a Request object:
response = opener.open(url, timeout=30)
content = response.read()
With a Request object:
request = Request(url)
response = opener.open(request, timeout=30)
content = response.read()
HTTP Status code 302 it's a kind of a redirect, it will have a header with a new URL for access (Not necessary a working URL..)
Location: http://www.example.com/x/y/
This is quite often used to block bots who make to many requests in too of a short time frame. So not an coding problem.
I use url lib, urllib2, cookie lib to scrape a web:get the login page and post the data.
def getpage():
codeurl=r"http://www.xxx/sign_in"
request=urllib2.Request(codeurl)
response=urllib2.urlopen(request)
return response
def parsecode(response):
"""
parse the login page to get the changed code
"""
pattern=re.compile(r"""<meta.*?csrf-token.*?content=(.*?)\s/>""")
code=re.findall(pattern,response.read())[0]
return code
def Hand():
"""
deal with cookie and header
"""
headers={
"Referer":"xxx",
"User-Agent":"xxx"
}
ck=cookielib.MozillaCookieJar()
handle=urllib2.HTTPCookieProcessor(ck)
openner=urllib2.build_opener(handle)
head=[]
for key,value in headers.items():
tup=(key,value)
head.append(tup)
openner.addheaders = head
return openner
def postdata(code,openner):
"""
post the data xxx.com needed
"""
logurl=r"http://www.jianshu.com/sessions"
sign_in={"name":"xxx","password":"xxx","authenticity_token":code}
data=urllib.urlencode(sign_in).encode("utf-8")
x=openner.open(logurl,data)
for item in ck:
print item
However,I met this bug:
Traceback (most recent call last):
File "jianshu.py", line 80, in
postdata(code,op)
File "jianshu.py", line 43, in postdata
x=openner.open(logurl,data)
File "/usr/lib64/python2.7/urllib2.py", line 437, in open
response = meth(req, response)
File "/usr/lib64/python2.7/urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib64/python2.7/urllib2.py", line 475, in error
return self._call_chain(*args)
File "/usr/lib64/python2.7/urllib2.py", line 409, in _call_chain
result = func(*args)
File "/usr/lib64/python2.7/urllib2.py", line 558, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 500: Internal Server Error
Are you possibly missing a ' in between the 'r' and 'http://...' this line:
codeurl=r"http://www.xxx/sign_in"
I'm trying to do this on remote Ubuntu Server:
>>> import urllib2, requests
>>> url = 'http://python.org/'
>>> urllib2.urlopen(url)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 406, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 519, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 444, in error
return self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 378, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 527, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 404: Not Found
>>> requests.get(url)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/api.py", line 55, in get
return request('get', url, **kwargs)
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/api.py", line 44, in request
return session.request(method=method, url=url, **kwargs)
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/sessions.py", line 382, in request
resp = self.send(prep, **send_kwargs)
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/sessions.py", line 505, in send
history = [resp for resp in gen] if allow_redirects else []
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/sessions.py", line 99, in resolve_redir ts
raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects)
requests.exceptions.TooManyRedirects: Exceeded 30 redirects.
But it works fine on local Windows machine:
>>> urllib2.urlopen(url)
<addinfourl at 57470168 whose fp = <socket._fileobject object at 0x036CB630>>
>>> requests.get(url)
<Response [200]>
I have absolutely no idea about what's going on and would appreciate any suggestion.
Update
I tried S.M. Al Mamun's suggestion and got an exception with long traceback:
>>> req = urllib2.Request(url, headers={ 'User-Agent': 'Mozilla/5.0' })
>>> urllib2.urlopen(req).read()
...
long traceback (more than one page)
...
urllib2.HTTPError: HTTP Error 303: The HTTP server returned a redirect error that would lead to an infinite loop.
The last 30x error message was:
See Other
Infinite loop again (I mean TooManyRedirects exception).
Try using a user-agent:
req = urllib2.Request(url, headers={ 'User-Agent': 'Mozilla/5.0' })
urllib2.urlopen(req).read()
If it doesn't work still, that might be your Ubuntu is offline!
I'm trying to use urllib2 and python-ntlm to connect to an NT authenticated server, but I'm getting an error. Here's the code I'm using, from the python-ntlm site:
user = 'DOMAIN\user.name'
password = 'Password123'
url = 'http://corporate.domain.com/page.aspx?id=foobar'
passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
passman.add_password(None, url, user, password)
# create the NTLM authentication handler
auth_NTLM = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler(passman)
# create and install the opener
opener = urllib2.build_opener(auth_NTLM)
urllib2.install_opener(opener)
# retrieve the result
response = urllib2.urlopen(url)
return response.read()
And here's the error I get:
Traceback (most recent call last):
File "C:\Python27\test.py", line 112, in get_ntlm_data
response = urllib2.urlopen(url)
File "C:\Python27\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 398, in open
response = meth(req, response)
File "C:\Python27\lib\urllib2.py", line 511, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 430, in error
result = self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 370, in _call_chain
result = func(*args)
File "C:\Python27\lib\site-packages\python_ntlm-1.0.1-py2.7.egg\ntlm\HTTPNtlmAuthHandler.py", line 99, in http_error_401
return self.http_error_authentication_required('www-authenticate', req, fp, headers)
File "C:\Python27\lib\site-packages\python_ntlm-1.0.1-py2.7.egg\ntlm\HTTPNtlmAuthHandler.py", line 35, in http_error_authentication_required
return self.retry_using_http_NTLM_auth(req, auth_header_field, None, headers)
File "C:\Python27\lib\site-packages\python_ntlm-1.0.1-py2.7.egg\ntlm\HTTPNtlmAuthHandler.py", line 72, in retry_using_http_NTLM_auth
UserName = user_parts[1]
IndexError: list index out of range
Any idea what I'm doing wrong?
Try:
user = r'DOMAIN\user.name'