Python Requests timeout parameter is being ignored - python

I'm using Python 2.7, I want every request to timeout after some seconds, but the requests timeout almost immediately. Following is my code.
requestsTimeout = 5
link = 'http://' + IP + '/api/v1.0/system/info'
while (1):
try:
return requests.get(link, timeout = requestsTimeout)
except requests.exceptions.RequestException as e:
log._print0(_printID, 'getting DAQ Info' ,str(e)) # just printing
time.sleep(0.1)
Now if I disconnect my wifi, I should get a printout of timeout exception after every 5 seconds, but I'm getting prints at a very fast rate (multiple times in one second).

When host is unreachable ConnectionError is raised without waiting time set by timeout. You could overcome this by handling this exception separately:
requestsTimeout = 5
link = 'http://' + IP + '/api/v1.0/system/info'
while True:
try:
return requests.get(link, timeout=requestsTimeout)
except requests.exceptions.ConnectionError as e:
time.sleep(requestsTimeout)
except requests.exceptions.RequestException as e:
log._print0(_printID, 'getting DAQ Info' ,str(e)) # just printing
time.sleep(0.1)

Related

Timeout for requests.post not working in Python

I have a python script using requests.post :
try:
r = request.post(url, json=data, timeout=10)
except requests.Timeout:
print("timeout")
(I have also tried with except Timeout: and except requests.exceptions.Timeout)
This code should print "Timeout" after around 10 seconds if the server is down, right?
However, it doesn't. My script is waiting indefinitely, like if timeout was None
Do you know why?
Thanks
EDIT
Here is the whole code:
import requests
from twisted.internet import task, reactor
import json
import sys
import os\
timeout = 30 # 30 sec timeout to loop PostParam
url = os.getenv('URL',"http://127.0.0.1:5000")
def PostParams():
# Subscribe to MQTT Broker
data = subscribing_broker()
# Iterate in the JSON payload to get the different units
for unit in data:
try:
# Make the POST request - data as JSON - blocking call - timeout 10s
req_result = requests.post(url, json=unit, timeout=10)
# Get the answer in json
pred = req_result.json()
# Publish to MQTT
publishing_broker(pred, pub_topic )
# Connection timeout, continue
except Timeout:
print("Connection timed out, passing")
pass
# Infinite loop - Timeout is 30 sec
loop = task.LoopingCall(PostParams)
loop.start(timeout)
reactor.run()

Python 3 - requests library - iter_lines - Handling possible server stalls when reading chunk of streamed data

I need to find out a way to smoothly manage server stalls when I have been reading data from it. I wrote the piece of code below:
def listener():
resp = requests.get(someurl, stream=True)
if resp.status_code == 200:
for line in resp.iter_lines():
if line:
do_something_with_the_line
print(result)
price_thread = threading.Thread(target=listener, name="StreamingThread", args=[])
trade_thread.start()
The code works well until a stall of the server occur (the API provider suggests a stall occur when no "lines" are received in 10 seconds).
How can I implement this in my code?
In other words I would try to recall the listener method without exiting the price_thread thread when a stall occur.
Thanks in advance
You can use the timeout property to ensure that your connection breaks off after no data is received in the specified amount of time, and then respawn the connection:
def listener():
while True:
try:
resp = requests.get(someurl, stream=True, timeout=10)
if resp.status_code == 200:
for line in resp.iter_lines():
if line:
# do_something_with_the_line
pass
elif resp.status_code == 429:
print("Too many reconnects, exiting.")
break
else:
print("Unhandled status `{}` retreived, exiting.".format(resp.status_code))
break
except requests.exceptions.Timeout:
pass # we'll ignore timeout errors and reconnect
except requests.exceptions.RequestException as e:
print("Request exception `{}`, exiting".format(e))
break
You may also add a reconnect counter instead of having the while True loop indefinitely.

Python requests.exceptions.ConnectionError: HTTPConnectionPool(host='10.10.10.10', port=80): Max retries exceeded with url

I'm writing a script to check a large list of URLs and return the HTTP status codes for each one. I tried everything I could think of, or find online for the exception handling. The script runs for a while, then it crashes eventually with the error:
requests.exceptions.ConnectionError: HTTPConnectionPool(host='10.10.10.10', port=80): Max retries exceeded with url: /wmedia (Caused by NewConnectionError("<urllib3.connection.HTTPConnection object at 0x1029bfe10>: Failed to establish a new connection: [Errno 49] Can't assign requested address",))
I think the server gets overwhelmed with too many requests after a while, and the sleep time doesn't help.
This is the worker function I'm using with process pool:
def get(url):
r = requests.get(url, timeout=2)
try:
r.raise_for_status()
except requests.exceptions.HTTPError as err:
print(err)
pass
except requests.ConnectionError as e:
print("OOPS!! Connection Error")
r.status_code = "Connection refused"
time.sleep(2)
print(str(e))
except requests.Timeout as e:
print("OOPS!! Timeout Error")
r.status_code = "Timed out"
time.sleep(2)
print(str(e))
except requests.RequestException as e:
print("OOPS!! General Error")
r.status_code = "Error"
print(str(e))
except KeyboardInterrupt:
print("Someone closed the program")
r.status_code = "Interrupted"
except Exception as e:
print(e)
r.status_code = "Error"
return param, r.status_code
Any suggestions?
You can use urllib to get HTTP status code. This site has all the possible HTTP status code separated by commas (that I used in example below as 'httpStatusCodes.txt').
import urllib
from collections import defaultdict
adict = {}
with open("httpStatusCodes.txt") as f:
for line in f:
line = line.rstrip()
(key,val) = line.split(',')
adict[int(key)] = val
So we read all the status code in a dict and make provision for when code is not available.
adict = defaultdict(lambda: "'Code not defined'", adict)
Then we iterate through a list of site and get its status codes.
websites = ['facebook.com', 'twitter.com', 'google.com',
'youtube.com', 'icantfindthiswebsite.com']
for url in websites:
try:
code = urllib.urlopen('http://' +url).getcode()
except IOError:
code = None
print "url = {}, code = {}, status = {}".format(url, code, adict[code])
Notice that I purposely listed icantfindthiswebsite.com to simulate sites that cant be reached. This exception is handled via IOError:.
Result
>>> ================================ RESTART ================================
>>>
url = facebook.com, code = 200, status = OK
url = twitter.com, code = 200, status = OK
url = google.com, code = 200, status = OK
url = youtube.com, code = 200, status = OK
url = icantfindthiswebsite.com, code = None, status = 'Code not defined'
>>>

Python scraper shows TimeoutError and WinError in the midst of it's activity

When i run my python script i can see that it scrapes 1 or 2 pages and suddenly breaks showing [TimeoutError: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond]. I could notice that the website is very slow to display it's content. Anyways, i hope there is any workaround. Thanks in advance. Here is the full code:
import requests
from lxml import html
def Startpoint(mpage):
leaf=1
while leaf<=mpage:
link="http://www.austrade.gov.au/"
address = "http://www.austrade.gov.au/suppliersearch.aspx?smode=AND&ind=Agribusiness%7c%7cArts+%26+Recreation%7c%7cBuilding+%26+Construction%7c%7cBusiness+%26+Other+Services%7c%7cConsumer+Goods%2c+Non-Food%7c%7cDefence%2c+Security+%26+Safety%7c%7cEducation+%26+Training%7c%7cEnvironment+%26+Energy%7c%7cFinance+%26+Insurance%7c%7cFood+%26+Beverage%7c%7cGovernment%7c%7cHealth%2c+Biotechnology+%26+Wellbeing%7c%7cICT%7c%7cManufacturing+(Other)%7c%7cMining%7c%7cTourism+%26+Hospitality%7c%7cTransport&folderid=1736&pg=" + str(leaf)
try :
page = requests.get(address, timeout=30)
except requests.exceptions.ReadTimeout:
print('timed out')
continue
page = requests.get(address)
tree = html.fromstring(page.text)
titles=tree.xpath('//a[#class="Name"]')
for title in titles:
href = link + title.xpath('./#href')[0]
Endpoint(href)
leaf+=1
def Endpoint(address):
try :
page = requests.get(address, timeout=30)
except requests.exceptions.ReadTimeout:
print('timed out')
else :
tree=html.fromstring(page.text)
titles = tree.xpath('//div[#class="contact-details block dark"]')
for title in titles:
try :
Name=title.xpath('.//p[1]/text()')[0] if len(title.xpath('.//p[1]/text()'))>0 else None
Name1=title.xpath('.//p[3]/text()')[0] if len(title.xpath('.//p[3]/text()'))>0 else None
Metco=(Name,Name1)
print(Metco)
except:
continue
Startpoint(10)
You could catch the timeout exception and continue the execution of your script
try :
page = requests.get(address, timeout=30) # set the max timeout , eg 30 sec #
except requests.exceptions.ReadTimeout :
print('timed out')
except Exception as ex :
print(type(ex).__name__)

Exception Handling with requests_futures in python

I am trying to use requests_futures (https://github.com/ross/requests-futures) for asynchronous requests which seems to work fine. The only problem is, it doesn't throw any exceptions for me (i.e. TimeOut Exception). The code I used is:
from concurrent.futures import ThreadPoolExecutor
from requests_futures.sessions import FuturesSession
session = FuturesSession(executor=ThreadPoolExecutor(max_workers=10))
def callback(sess, resp):
# Print the ip address in callback
print 'IP', resp.text
proxy = {'http': 'http://176.194.189.57:8080'}
try:
future = session.get('http://api.ipify.org', background_callback=callback, timeout=5, proxies=proxy)
except Exception as e:
print "Error %s" % e
# future2 = session.get('http://api.ipify.org', background_callback=callback, timeout=5)
The first session.get() should throw an Exception as it isn't a valid proxy.
For the exception to be raised, you have to check the result() method of the future object you just created.

Categories

Resources