I have this code at the top of my Google App Engine program:
from google.appengine.api import urlfetch
urlfetch.set_default_fetch_deadline(60)
I am using an opener to load stuff:
cj = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor( cj ) )
opener.addheaders = [ ( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64)' ) ]
resp = opener.open( 'http://www.example.com/' )
an exception is being thrown after 5 seconds:
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 404, in open
response = self._open(req, data)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 422, in _open
'_open', req)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 1222, in https_open
return self.do_open(httplib.HTTPSConnection, req)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 1187, in do_open
r = h.getresponse(buffering=True)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/gae_override/httplib.py", line 524, in getresponse
raise HTTPException(str(e))
HTTPException: Deadline exceeded while waiting for HTTP response from URL: http://www.example.com
How can I avoid the error?
Did you try setting the timeout on the .open() call?
resp = opener.open('http://example.com', None, 60)
If you reach the timeout as specified by set_default_fetch_deadline, Python will throw a DownloadError or DeadlineExceededErrors exception: https://cloud.google.com/appengine/docs/python/urlfetch/exceptions
You can also patch the httplib2 library and set the deadline to 60 seconds
httplib2/__init__.py:
def fixed_fetch(url, payload=None, method="GET", headers={},
allow_truncated=False, follow_redirects=True,
deadline=60):
return fetch(url, payload=payload, method=method, headers=headers,
allow_truncated=allow_truncated,
follow_redirects=follow_redirects, deadline=60,
validate_certificate=validate_certificate)
return fixed_fetch
It is a work around.
Related
I have been trying to send requests using custom headers but python keeps on giving this error message, what am I getting wrong?
Here is my code together with the full response from python:
Code
import urllib.request
import urllib.parse
url = 'https://nytimes.com/'
user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'
values = {'name': 'Kromanion Krank', 'location': 'Finland', 'language': 'Python'}
headers = {'User-Agent': user_agent}
data = urllib.parse.urlencode(values)
data = data.encode('ascii')
html_str = urllib.request.urlopen(url, data, headers)
html_txt = html_str.text
print(html_txt)
Output error
Traceback (most recent call last):
File "C:/Users/fpt84/PycharmProjects/WebC/TEST.py", line 11, in <module>
html_str = urllib.request.urlopen(url, data, headers)
File "C:\Python34\lib\urllib\request.py", line 161, in urlopen
return opener.open(url, data, timeout)
File "C:\Python34\lib\urllib\request.py", line 464, in open
response = self._open(req, data)
File "C:\Python34\lib\urllib\request.py", line 482, in _open
'_open', req)
File "C:\Python34\lib\urllib\request.py", line 442, in _call_chain
result = func(*args)
File "C:\Python34\lib\urllib\request.py", line 1226, in https_open
context=self._context, check_hostname=self._check_hostname)
File "C:\Python34\lib\urllib\request.py", line 1183, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "C:\Python34\lib\http\client.py", line 1137, in request
self._send_request(method, url, body, headers)
File "C:\Python34\lib\http\client.py", line 1182, in _send_request
self.endheaders(body)
File "C:\Python34\lib\http\client.py", line 1133, in endheaders
self._send_output(message_body)
File "C:\Python34\lib\http\client.py", line 963, in _send_output
self.send(msg)
File "C:\Python34\lib\http\client.py", line 898, in send
self.connect()
File "C:\Python34\lib\http\client.py", line 1279, in connect
super().connect()
File "C:\Python34\lib\http\client.py", line 871, in connect
self.timeout, self.source_address)
File "C:\Python34\lib\socket.py", line 504, in create_connection
sock.settimeout(timeout)
TypeError: a float is required
The signature of urllib.request.urlopen (from https://docs.python.org/3/library/urllib.request.html#urllib.request.urlopen) is:
urllib.request.urlopen(url, data=None, [timeout, ]*, cafile=None, capath=None, cadefault=False, context=None)
Your third "headers" argument is getting passed into the timeout argument of urlopen
You'll need to use a Request object to do what you want:
req = urllib.request.Request(url, data, headers)
resp = urllib.request.urlopen(req)
From your code example (I also had to modify the usage of the response)
import urllib.request
import urllib.parse
url = 'https://nytimes.com/'
user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'
values = {'name': 'Kromanion Krank', 'location': 'Finland', 'language': 'Python'}
headers = {'User-Agent': user_agent}
data = urllib.parse.urlencode(values)
data = data.encode('ascii')
request = urllib.request.Request(url, data, headers)
resp = urllib.request.urlopen(request)
html_txt = resp.read().decode('UTF-8')
print(html_txt)
I was trying to get a web page, but got into this problem. I've looked up some references, and this is what I've done so far:
import sys
import urllib2
from bs4 import BeautifulSoup
user = 'myuserID'
password = "mypassword"
ip = sys.argv[1]
url = "http://www.websites.com/" + ip
passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
passman.add_password(None, url, user, password)
handler = urllib2.HTTPBasicAuthHandler(passman)
opener = urllib2.build_opener(handler)
urllib2.install_opener(opener)
header = {
'Connection' : 'keep-alive',
'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',
'Accept-Language' : 'en-US,en;q=0.5',
'Accept-Encoding' : 'gzip, deflate'
}
html = urllib2.urlopen(urllib2.Request(url, None, header))
soup = BeautifulSoup(html, 'html.parser')
# some if else function afterwards #
When I try to run the script, it shows this kind of error:
python checker.py 8.8.8.8
Traceback (most recent call last):
File "checker.py", line 34, in <module>
html = urllib2.urlopen(urllib2.Request(url, None, header))
File "C:\Python27\lib\urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 437, in open
response = meth(req, response)
File "C:\Python27\lib\urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 469, in error
result = self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 409, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 656, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "C:\Python27\lib\urllib2.py", line 437, in open
response = meth(req, response)
File "C:\Python27\lib\urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 475, in error
return self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 409, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 558, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 401: authenticationrequired
But if I opened the page or other web page, and manually enter my credential, this script works fine after that. Am I missing something?
Just to add, my current network are using McAfee web gateway device. So sometimes we need to enter our credential to proceed browsing the net. Our user/pass are integrated with Active Directory. Is that may cause the issue?
This seems to work really well (taken from another thread)
import urllib2
import base64
import sys
user = 'myuserID'
password = "mypassword"
ip = sys.argv[1]
url = "http://www.websites.com/" + ip
request = urllib2.Request(url)
base64string = base64.encodestring('%s:%s' % (user, password)).replace('\n', '')
request.add_header("Authorization", "Basic %s" % base64string)
result = urllib2.urlopen(request)
Or you may use requests:
from requests.auth import HTTPBasicAuth
user = 'myuserID'
password = "mypassword"
ip = sys.argv[1]
url = "http://www.websites.com/" + ip
res=requests.get(url , auth=HTTPBasicAuth(user, password))
print res.text
I'm trying to do this on remote Ubuntu Server:
>>> import urllib2, requests
>>> url = 'http://python.org/'
>>> urllib2.urlopen(url)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 406, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 519, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 444, in error
return self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 378, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 527, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 404: Not Found
>>> requests.get(url)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/api.py", line 55, in get
return request('get', url, **kwargs)
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/api.py", line 44, in request
return session.request(method=method, url=url, **kwargs)
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/sessions.py", line 382, in request
resp = self.send(prep, **send_kwargs)
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/sessions.py", line 505, in send
history = [resp for resp in gen] if allow_redirects else []
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/sessions.py", line 99, in resolve_redir ts
raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects)
requests.exceptions.TooManyRedirects: Exceeded 30 redirects.
But it works fine on local Windows machine:
>>> urllib2.urlopen(url)
<addinfourl at 57470168 whose fp = <socket._fileobject object at 0x036CB630>>
>>> requests.get(url)
<Response [200]>
I have absolutely no idea about what's going on and would appreciate any suggestion.
Update
I tried S.M. Al Mamun's suggestion and got an exception with long traceback:
>>> req = urllib2.Request(url, headers={ 'User-Agent': 'Mozilla/5.0' })
>>> urllib2.urlopen(req).read()
...
long traceback (more than one page)
...
urllib2.HTTPError: HTTP Error 303: The HTTP server returned a redirect error that would lead to an infinite loop.
The last 30x error message was:
See Other
Infinite loop again (I mean TooManyRedirects exception).
Try using a user-agent:
req = urllib2.Request(url, headers={ 'User-Agent': 'Mozilla/5.0' })
urllib2.urlopen(req).read()
If it doesn't work still, that might be your Ubuntu is offline!
When using Python 2.7 with urllib2 to retrieve data from an API, I get the error [Errno 104] Connection reset by peer. Whats causing the error, and how should the error be handled so that the script does not crash?
ticker.py
def urlopen(url):
response = None
request = urllib2.Request(url=url)
try:
response = urllib2.urlopen(request).read()
except urllib2.HTTPError as err:
print "HTTPError: {} ({})".format(url, err.code)
except urllib2.URLError as err:
print "URLError: {} ({})".format(url, err.reason)
except httplib.BadStatusLine as err:
print "BadStatusLine: {}".format(url)
return response
def get_rate(from_currency="EUR", to_currency="USD"):
url = "https://finance.yahoo.com/d/quotes.csv?f=sl1&s=%s%s=X" % (
from_currency, to_currency)
data = urlopen(url)
if "%s%s" % (from_currency, to_currency) in data:
return float(data.strip().split(",")[1])
return None
counter = 0
while True:
counter = counter + 1
if counter==0 or counter%10:
rateEurUsd = float(get_rate('EUR', 'USD'))
# does more stuff here
Traceback
Traceback (most recent call last):
File "/var/www/testApp/python/ticker.py", line 71, in <module>
rateEurUsd = float(get_rate('EUR', 'USD'))
File "/var/www/testApp/python/ticker.py", line 29, in get_exchange_rate
data = urlopen(url)
File "/var/www/testApp/python/ticker.py", line 16, in urlopen
response = urllib2.urlopen(request).read()
File "/usr/lib/python2.7/urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 406, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 519, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 438, in error
result = self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 378, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 625, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python2.7/urllib2.py", line 406, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 519, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 438, in error
result = self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 378, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 625, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python2.7/urllib2.py", line 400, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 418, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 378, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1207, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib/python2.7/urllib2.py", line 1180, in do_open
r = h.getresponse(buffering=True)
File "/usr/lib/python2.7/httplib.py", line 1030, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 365, in _read_status
line = self.fp.readline()
File "/usr/lib/python2.7/socket.py", line 447, in readline
data = self._sock.recv(self._rbufsize)
socket.error: [Errno 104] Connection reset by peer
error: Forever detected script exited with code: 1
"Connection reset by peer" is the TCP/IP equivalent of slamming the phone back on the hook. It's more polite than merely not replying, leaving one hanging. But it's not the FIN-ACK expected of the truly polite TCP/IP converseur. (From other SO answer)
So you can't do anything about it, it is the issue of the server.
But you could use try .. except block to handle that exception:
from socket import error as SocketError
import errno
try:
response = urllib2.urlopen(request).read()
except SocketError as e:
if e.errno != errno.ECONNRESET:
raise # Not error we are looking for
pass # Handle error here.
You can try to add some time.sleep calls to your code.
It seems like the server side limits the amount of requests per timeunit (hour, day, second) as a security issue. You need to guess how many (maybe using another script with a counter?) and adjust your script to not surpass this limit.
In order to avoid your code from crashing, try to catch this error with try .. except around the urllib2 calls.
There is a way to catch the error directly in the except clause with ConnectionResetError, better to isolate the right error.
This example also catches the timeout.
from urllib.request import urlopen
from socket import timeout
url = "http://......"
try:
string = urlopen(url, timeout=5).read()
except ConnectionResetError:
print("==> ConnectionResetError")
pass
except timeout:
print("==> Timeout")
pass
there are 2 solution you can try.
request too frequently.
try sleep after per request
time.sleep(1)
the server detect the request client is python, so reject.
add User-Agent in header to handle this.
headers = {
"Content-Type": "application/json;charset=UTF-8",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)"
}
try:
res = requests.post("url", json=req, headers=headers)
except Exception as e:
print(e)
pass
the second solution save me
I'm trying to use urllib2 and python-ntlm to connect to an NT authenticated server, but I'm getting an error. Here's the code I'm using, from the python-ntlm site:
user = 'DOMAIN\user.name'
password = 'Password123'
url = 'http://corporate.domain.com/page.aspx?id=foobar'
passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
passman.add_password(None, url, user, password)
# create the NTLM authentication handler
auth_NTLM = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler(passman)
# create and install the opener
opener = urllib2.build_opener(auth_NTLM)
urllib2.install_opener(opener)
# retrieve the result
response = urllib2.urlopen(url)
return response.read()
And here's the error I get:
Traceback (most recent call last):
File "C:\Python27\test.py", line 112, in get_ntlm_data
response = urllib2.urlopen(url)
File "C:\Python27\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 398, in open
response = meth(req, response)
File "C:\Python27\lib\urllib2.py", line 511, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 430, in error
result = self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 370, in _call_chain
result = func(*args)
File "C:\Python27\lib\site-packages\python_ntlm-1.0.1-py2.7.egg\ntlm\HTTPNtlmAuthHandler.py", line 99, in http_error_401
return self.http_error_authentication_required('www-authenticate', req, fp, headers)
File "C:\Python27\lib\site-packages\python_ntlm-1.0.1-py2.7.egg\ntlm\HTTPNtlmAuthHandler.py", line 35, in http_error_authentication_required
return self.retry_using_http_NTLM_auth(req, auth_header_field, None, headers)
File "C:\Python27\lib\site-packages\python_ntlm-1.0.1-py2.7.egg\ntlm\HTTPNtlmAuthHandler.py", line 72, in retry_using_http_NTLM_auth
UserName = user_parts[1]
IndexError: list index out of range
Any idea what I'm doing wrong?
Try:
user = r'DOMAIN\user.name'