Generate http error from python3 requests - python

I have a simple long poll thing using python3 and the requests package. It currently looks something like:
def longpoll():
session = requests.Session()
while True:
try:
fetched = session.get(MyURL)
input = base64.b64decode(fetched.content)
output = process(data)
session.put(MyURL, data=base64.b64encode(response))
except Exception as e:
print(e)
time.sleep(10)
There is a case where instead of processing the input and puting the result, I'd like to raise an http error. Is there a simple way to do this from the high level Session interface? Or do I have to drill down to use the lower level objects?

Since You have control over the server you may want to reverse the 2nd call
Here is an example using bottle to recive the 2nd poll
def longpoll():
session = requests.Session()
while True: #I'm guessing that the server does not care that we call him a lot of times ...
try:
session.post(MyURL, {"ip_address": my_ip_address}) # request work or I'm alive
#input = base64.b64decode(fetched.content)
#output = process(data)
#session.put(MyURL, data=base64.b64encode(response))
except Exception as e:
print(e)
time.sleep(10)
#bottle.post("/process")
def process_new_work():
data = bottle.request.json()
output = process(data) #if an error is thrown an HTTP error will be returned by the framework
return output
This way the server will get the output or an bad HTTP status

Related

Python InfluxDB2 - write_api.write(...) How to check for success?

I need to write historic data into InfluxDB (I'm using Python, which is not a must in this case, so I maybe willing to accept non-Python solutions). I set up the write API like this
write_api = client.write_api(write_options=ASYNCHRONOUS)
The Data comes from a DataFrame with a timestamp as key, so I write it to the database like this
result = write_api.write(bucket=bucket, data_frame_measurement_name=field_key, record=a_data_frame)
This call does not throw an exception, even if the InfluxDB server is down. result has a protected attribute _success that is a boolean in debugging, but I cannot access it from the code.
How do I check if the write was a success?
If you use background batching, you can add custom success, error and retry callbacks.
from influxdb_client import InfluxDBClient
def success_cb(details, data):
url, token, org = details
print(url, token, org)
data = data.decode('utf-8').split('\n')
print('Total Rows Inserted:', len(data))
def error_cb(details, data, exception):
print(exc)
def retry_cb(details, data, exception):
print('Retrying because of an exception:', exc)
with InfluxDBClient(url, token, org) as client:
with client.write_api(success_callback=success_cb,
error_callback=error_cb,
retry_callback=retry_cb) as write_api:
write_api.write(...)
If you are eager to test all the callbacks and don't want to wait until all retries are finished, you can override the interval and number of retries.
from influxdb_client import InfluxDBClient, WriteOptions
with InfluxDBClient(url, token, org) as client:
with client.write_api(success_callback=success_cb,
error_callback=error_cb,
retry_callback=retry_cb,
write_options=WriteOptions(retry_interval=60,
max_retries=2),
) as write_api:
...
if you want to immediately write data into database, then use SYNCHRONOUS version of write_api - https://github.com/influxdata/influxdb-client-python/blob/58343322678dd20c642fdf9d0a9b68bc2c09add9/examples/example.py#L12
The asynchronous write should be "triggered" by call .get() - https://github.com/influxdata/influxdb-client-python#asynchronous-client
Regards
write_api.write() returns a multiprocessing.pool.AsyncResult or multiprocessing.pool.AsyncResult (both are the same).
With this return object you can check on the asynchronous request in a couple of ways. See here: https://docs.python.org/2/library/multiprocessing.html#multiprocessing.pool.AsyncResult
If you can use a blocking request, then write_api = client.write_api(write_options=SYNCRONOUS) can be used.
from datetime import datetime
from influxdb_client import WritePrecision, InfluxDBClient, Point
from influxdb_client.client.write_api import SYNCHRONOUS
with InfluxDBClient(url="http://localhost:8086", token="my-token", org="my-org", debug=False) as client:
p = Point("my_measurement") \
.tag("location", "Prague") \
.field("temperature", 25.3) \
.time(datetime.utcnow(), WritePrecision.MS)
try:
client.write_api(write_options=SYNCHRONOUS).write(bucket="my-bucket", record=p)
reboot = False
except Exception as e:
reboot = True
print(f"Reboot? {reboot}")

Force a maximum time to download image from URL

I'm trying to implement a method which tries to make a few attempts to download an image from url. To do so, I'm using requests lib. An example of my code is:
while attempts < nmr_attempts:
try:
attempts += 1
response = requests.get(self.basis_url, params=query_params, timeout=response_timeout)
except Exception as e:
pass
Each attempt can't spend more than "response_timeout" making the request. However It seems that the timeout variable is not doing anything since it does not respect the times given by myself.
How can I limit the max blocking time at response.get() call.
Thanks in advance
Can you try following (get rid of try-except block) and see if it helps? except Exception is probably suppressing the exception that requests.get throws.
while attempts < nmr_attempts:
response = requests.get(self.basis_url, params=query_params, timeout=response_timeout)
Or with your original code, you can catch requests.exceptions.ReadTimeout exception. Such as:
while attempts < nmr_attempts:
try:
attempts += 1
response = requests.get(self.basis_url, params=query_params, timeout=response_timeout)
except requests.exceptions.ReadTimeout as e:
do_something()

Python Error Handling when using requests

I wrote the script below to be able to connect to a remote server and get some data from the XML file. I added some error handling to be able to skip issues with some devices. For some reason whenever the script gets a 401 message back, it breaks the whole loop and I get the message "Could not properly read the csv file". I tried other ways of handling the exception and it would fail at other points. Any info on how to properly deal with this?
#!/usr/bin/python
import sys, re, csv, xmltodict
import requests, logging
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
def version(ip, username, password):
baseUrl = "https://" + ip
session = requests.Session()
session.verify = False
session.timeout = 45
print "Connecting to " + ip
try:
r = session.get(baseUrl + '/getxml?location=/Status', auth=(username, password))
r.raise_for_status()
except Exception as error:
print err
doc = xmltodict.parse(r.text)
version = str(doc['Status']['#version'])
def main():
try:
with open('list.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
version(row['ip'], row['Username'], row['Password'])
except Exception as error:
print ValueError("Could not properly read the csv file \r")
sys.exit(0)
if __name__ == "__main__":
main()
The doc and version variables in def version are outside the try: catch: so when r is None due to exception, the next 2 operations also fail, raising some uncaught exception. Which surfaces in main. Can you try including doc and version within the try: catch: and see if it works.
A related suggestion: catch specific exceptions as this helps know more about why your code crashed. ex. Response.raise_for_status() raises requests.exceptions.HTTPError. Catch that, raise all other exceptions. xml might raise something else, catch that, instead of catching ALL.

How to keep a program running if there is an Traceback error

I made a simple script for amusment that takes the latest comment from http://www.reddit.com/r/random/comments.json?limit=1 and speaks through espeak. I ran into a problem however. If Reddit fails to give me the json data, which it commonly does, the script stops and gives a traceback. This is a problem, as it stops the script. Is there any sort of way to retry to get the json if it fails to load. I am using requests if that means anything
If you need it, here is the part of the code that gets the json data
url = 'http://www.reddit.com/r/random/comments.json?limit=1'
r = requests.get(url)
quote = r.text
body = json.loads(quote)['data']['children'][0]['data']['body']
subreddit = json.loads(quote)['data']['children'][0]['data']['subreddit']
For the vocabulary, the actual error you're having is an exception that has been thrown at some point in a program because of a detected runtime error, and the traceback is the program thread that tells you where the exception has been thrown.
Basically, what you want is an exception handler:
try:
url = 'http://www.reddit.com/r/random/comments.json?limit=1'
r = requests.get(url)
quote = r.text
body = json.loads(quote)['data']['children'][0]['data']['body']
subreddit = json.loads(quote)['data']['children'][0]['data']['subreddit']
except Exception as err:
print err
so that you jump over the part that needs the thing that couldn't work. Have a look at that doc as well: HandlingExceptions - Python Wiki
As pss suggests, if you want to retry after the url failed to load:
done = False
while not done:
try:
url = 'http://www.reddit.com/r/random/comments.json?limit=1'
r = requests.get(url)
except Exception as err:
print err
done = True
quote = r.text
body = json.loads(quote)['data']['children'][0]['data']['body']
subreddit = json.loads(quote)['data']['children'][0]['data']['subreddit']
N.B.: That solution may not be optimal, since if you're offline or the URL is always failing, it'll do an infinite loop. If you retry too fast and too much, Reddit may also ban you.
N.B. 2: I'm using the newest Python 3 syntax for exception handling, which may not work with Python older than 2.7.
N.B. 3: You may also want to choose a class other than Exception for the exception handling, to be able to select what kind of error you want to handle. It mostly depends on your app design, and given what you say, you might want to handle requests.exceptions.ConnectionError, but have a look at request's doc to choose the right one.
Here's what you may want, but please think this through and adapt it to your use case:
import requests
import time
import json
def get_reddit_comments():
retries = 5
while retries != 0:
try:
url = 'http://www.reddit.com/r/random/comments.json?limit=1'
r = requests.get(url)
break # if the request succeeded we get out of the loop
except requests.exceptions.ConnectionError as err:
print("Warning: couldn't get the URL: {}".format(err))
time.delay(1) # wait 1 second between two requests
retries -= 1
if retries == 0: # if we've done 5 attempts, we fail loudly
return None
return r.text
def use_data(quote):
if not quote:
print("could not get URL, despites multiple attempts!")
return False
data = json.loads(quote)
if 'error' in data.keys():
print("could not get data from reddit: error code #{}".format(quote['error']))
return False
body = data['data']['children'][0]['data']['body']
subreddit = data['data']['children'][0]['data']['subreddit']
# … do stuff with your data here
if __name__ == "__main__":
quote = get_reddit_comments()
if not use_data(quote):
print("Fatal error: Couldn't handle data receipt from reddit.")
sys.exit(1)
I hope this snippet will help you correctly design your program. And now that you've discovered exceptions, please always remember that exceptions are for handling things that shall stay exceptional. If you throw an exception at some point in one of your programs, always ask yourself if this is something that should happen when something unexpected happens (like a webpage not loading), or if it's an expected error (like a page loading but giving you an output that is not expected).

Will RPC on App Engine localize to a single instance?

I'm using RPC to fetch multiple URLs asynchronously. I'm using a global variable to track completion and notice that the contents of that global have radically different contents before and after the RPC calls complete.
Feels like I'm missing something obvious... Is it possible for the rpc.wait() to result in the app context being loaded on a new instance when the callbacks are made?
Here's the basic pattern...
aggregated_results = {}
def aggregateData(sid):
# local variable tracking results
aggregated_results[sid] = []
# create a bunch of asynchronous url fetches to get all of the route data
rpcs = []
for r in routes:
rpc = urlfetch.create_rpc()
rpc.callback = create_callback(rpc,sid)
urlfetch.make_fetch_call(rpc, url)
rpcs.append(rpc)
# all of the schedule URLs have been fetched. now wait for them to finish
for rpc in rpcs:
rpc.wait()
# look at results
try:
if len(aggregated_results[sid]) == 0:
logging.debug("We couldn't find results for transaction")
except KeyError as e:
logging.error('aggregation error: %s' % e.message)
logging.debug(aggregated_results)
return aggregated_results[sid]
def magic_callback(rpc, sid):
# do some work to parse the result
# of the urlfetch call...
# <hidden>
#
try:
if len(aggregated_results[sid]) == 0:
aggregated_results[sid] = [stop]
else:
done = False
for i, s in enumerate(aggregated_results[sid]):
if stop.time <= s.time:
aggregated_results[sid].insert(i,stop)
done = True
break
if not done:
aggregated_results[sid].append(stop)
except KeyError as e:
logging.error('aggregation error: %s' % e.message)
The KeyError is thrown both inside the callback as well as the end of processing all of the results. Neither of those should happen.
When I print out the contents of the dictionary, the sid is in fact gone, but there are other entries for other requests that are being processed. In some cases, more entries than I see when the respective request starts.
This pattern is called on a web request handler. Not in the background.
It's as if, the callbacks occur on a difference instance.
The sid key in this case is a combination of strings that includes a time string and I'm confident it is unique.

Categories

Resources