I am using a thread pool to send requests in parallel (generate a list of urls, fetch in parallel, request all of the urls concurrently), parsing some data out of the response to a couple of dicts, cross-mapping keys across dicts and then writing it back to pandas.DataFrame
def fetch_point(point_url):
try:
r = requests.get(point_url, headers=self.headers)
r.raise_for_status()
except requests.exceptions.HTTPError as errh:
logging.error(f'HTTP Error: {errh}')
except requests.exceptions.ConnectionError as errc:
logging.error(f'Connection Error: {errc}')
except requests.exceptions.Timeout as errt:
logging.error(f'Timeout Error: {errt}')
except requests.exceptions.RequestException as e:
logging.error(e)
raise SystemExit(e)
result = r.json().get('data')
# print(result)
building_url = result['building']
return point_url, building_url
def fetch_building(building_url):
try:
r = requests.get(building_url, headers=self.headers)
r.raise_for_status()
except requests.exceptions.HTTPError as errh:
logging.error(f'HTTP Error: {errh}')
except requests.exceptions.ConnectionError as errc:
logging.error(f'Connection Error: {errc}')
except requests.exceptions.Timeout as errt:
logging.error(f'Timeout Error: {errt}')
except requests.exceptions.RequestException as e:
logging.error(e)
raise SystemExit(e)
result = r.json().get('data')
building_name = result['name']
return building_url, building_name
pool = ThreadPoolExecutor()
point_urls = df.loc[~df['Point Url'].isnull(), 'Point Url'].to_list()
building_urls = {}
for point_url, building_url in pool.map(fetch_point, point_urls):
building_urls[point_url] = building_url
building_urls_list = building_urls.values()
building_names = {}
for building_url, building_name in pool.map(fetch_building, building_urls_list):
building_names[building_url] = building_name
point_building_map = {k: building_names[building_urls[k]] for k in building_urls}
for key in point_building_map.keys():
df.loc[df['Point Url'] == key, 'Building'] = point_building_map[key]
I am wondering if there is a more optimized approach I could consider. Should I go for asyncio/aiohttp instead of ThreadPoolExecutor?
Related
I'm trying the following:
try:
scroll_token = response["scroll_token"]
except ValueError as e:
logging.info(f'could not find "scroll_token" {e}')
try:
scroll_token = response["scroll_id"]
except ValueError as e:
logging.info(f'could not find "scroll_id" {e}')
Pretty much if the response doesn't have "scroll_token" in the response then I want it to see if there is "scroll_id" in the response instead. but for some reason this isn't working, it just keeps failing at the 1st try case and says:
scroll_token = response["scroll_token"] KeyError: 'scroll_token'
You are catching the wrong exception; it's KeyError that you are expecting could be raised.
try:
scroll_token = response["scroll_token"]
except KeyError as e:
logging.info(f'could not find "scroll_token" {e}')
try:
scroll_token = response["scroll_id"]
except KeyError as e:
logging.info(f'could not find "scroll_id" {e}')
You can write this more simply with a loop.
for k in ["scroll_token", "scroll_id"]:
try:
scroll_token = response[k]
break
except KeyError:
logging.info("could not find %s", k)
else:
# What to do if neither key is found?
...
I'm trying to test my function that returns dictionary. I'm getting error AttributeError: 'dict' object has no attribute 'content_type'. How can I test if the response is a dictionary or not?
def response_get(url):
try:
response = requests.get(url)
except requests.exceptions.RequestException as e:
raise SystemExit(e)
data = response.json()
return data
def test_response_get(self):
response = response_get('https://ghibliapi.herokuapp.com/films/58611129-2dbc-4a81-a72f-77ddfc1b1b49')
self.assertEqual(response.content_type, 'application/dict')
You are already returning a dict and there is no content_type on a dictionary.
Instead you can use isinstance and assertTrue.
import requests
import unittest
def response_get(url):
try:
response = requests.get(url)
except requests.exceptions.RequestException as e:
raise SystemExit(e)
data = response.json()
return data
class SimpleTest(unittest.TestCase):
def test_response_get(self):
response = response_get('https://ghibliapi.herokuapp.com/films/58611129-2dbc-4a81-a72f-77ddfc1b1b49')
self.assertTrue(isinstance(response, dict))
if __name__ == '__main__':
unittest.main()
Output:
.
----------------------------------------------------------------------
Ran 1 test in 0.966s
OK
For loop only iterating one object from the list provided by the function. Below are the code and terminal logs.
Note:- I want to delete both the URLs which are the part of below list
function delete_index_url() output is like :-
['https://vpc.xxx.es.amazonaws.com/staging-logs-2019.09.13', 'https://vpc.xxx.es.amazonaws.com/staging-logs-2019.09.16']
def clean_index( ):
delete_urls = delete_index_url() # above function output assign to variable
for i in delete_urls:
print(i) <-- this only print "https://vpc.xxx.es.amazonaws.com/staging-logs-2019.09.13"
try:
req = requests.delete(i)
except requests.exceptions.ConnectionError as e:
print ('ERROR: Not able to connect to URL')
return 0
except requests.exceptions.Timeout as e:
print ('ERROR: ElasticSearch time out')
return 0
except requests.exceptions.HTTPError as e:
print ('ERROR: HTTP error')
return 0
else:
print ('INFO: ElasticSearch response status code was %s' % req.status_code)
if req.status_code != 200:
return 0
else:
return 1
print(clean_index())
Logs output from a python script:-
INFO: Sorting indexes
['https://vpc.xxx.es.amazonaws.com/staging-logs-2019.09.13', 'https://vpc.xxx.es.amazonaws.com/staging-logs-2019.09.16']
INFO: Getting a list of indexes
INFO: ElasticSearch response status code was 200
INFO: Found 200 indexes
INFO: Sorting indexes
https://vpc.xxx.es.amazonaws.com/staging-logs-2019.09.13 # only 2019.09.13, not 2019.09.16 logs URLs
Instead of returning 0 or 1 and ending the function right away, you can create a list and store the responses and return it:
def clean_index( ):
responses = []
delete_urls = delete_index_url() # above function output assign to variable
for i in delete_urls:
print(i)
try:
req = requests.delete(i)
except requests.exceptions.ConnectionError as e:
print ('ERROR: Not able to connect to URL')
responses.append(0)
except requests.exceptions.Timeout as e:
print ('ERROR: ElasticSearch time out')
responses.append(0)
except requests.exceptions.HTTPError as e:
print ('ERROR: HTTP error')
responses.append(0)
else:
print ('INFO: ElasticSearch response status code was %s' % req.status_code)
if req.status_code != 200:
responses.append(0)
else:
responses.append(1)
return responses
print(clean_index())
I am a decently new python coder and i wish to create a twitter bot in which everytime it retweets, it favourites the tweet as well. I am not exactly sure how to do that but when the bot searches, it sends out an error message of 'list index out of range'.
import tweepy, time, traceback
from tweepy.auth import OAuthHandler
from tweepy.streaming import StreamListener, Stream
ckey = ''
csecret = ''
atoken = ''
asecret = ''
auths = OAuthHandler(ckey, csecret)
auths.set_access_token(atoken, asecret)
api = tweepy.API(auths)
class listener(StreamListener):
def on_data(self, raw_data):
try:
tweet_text = raw_data.lower().split('"text":')[1].split('","source":"')[0].replace(",", "")
screen_name = raw_data.lower().split('"screen_name":"')[1].split('","location"')[0].replace(",", "")
tweet_cid = raw_data.split('"id:')[1].split('"id_str":')[0].replace(",", "")
#there is ment to be 4 spaces at tweet_text
accs = [''] # banned accounts screen name goes in here
words = ['hate' , 'derp' , 'racist' , 'evil' , 'keemstar' , 'mario' , 'kirby'] #banned words goes in here
if not any(acc in screen_name.lower() for acc in accs):
if not any(word in tweet_text.lower() for word in words):
fav(tweet_cid)
follow(screen_name)
retweet(tweet_cid)
tweet(myinput)
#call what u want to do here
#fav(tweet_cid)
#retweet(tweet_cid)
return True
except Exception as e:
print (str(e)) # prints the error message, if you dont want it to comment it out.
pass
def on_error(self, status_code):
try:
print( "error" + status_code)
except Exception as e:
print(str(e))
pass
def retweet(tweet_cid):
try:
api.retweet(tweet_cid)
time.sleep(random.randit(range(50,900)))
except Exception as e:
print(str(e))
pass
def follow(screen_name):
try:
api.create_friendship(screen_name)
time.sleep(random.randit(range(50,900)))
except Exception as e:
print(str(e))
pass
def fav(tweet_cid):
try:
api.create_favourite(tweet_cid)
time.sleep(random.randit(range(600,1100)))
except Exception as e:
print(str(e))
pass
def unfav(tweet_cid):
try:
api.destroy_tweet(tweet_cid)
time.sleep(random.randit(range(8000,9000)))
except Exception as e:
print(str(e))
pass
def tweet(myinput):
try:
api.update_status(myinput)
time.sleep(random.randit(range(1000,4000)))
except Exception as e:
print(str(e))
pass
# tags below
track_words = [""] #deleted all tags so easier to read
follow_acc = [] # all username converted to user ids
try:
twt = Stream(auths, listener())
twt.filter(track=track_words, follow = follow_acc)
except Exception as e:
print (str(e))
pass
Is this what you are asking for? It gives the stack trace of the exception.
import traceback
try:
s='hi'
s=s+1
except Exception as e:
print(traceback.format_exc())
Output:
Traceback (most recent call last):
File "<stdin>", line 3, in <module>
TypeError: cannot concatenate 'str' and 'int' objects
Hope this helps! :)
I am trying to implement a decorator to retry a urllib2.urlopen n times.
I cannot get the decorator to work. When I run it I get the followin error:
Traceback (most recent call last):
File "F:\retry\dec_class.py", line 60, in
x.getURLdata('127.0.0.1')
TypeError: 'NoneType' object is not callable
Can anyone give me hand please?
import serial, urllib2, time
from functools import wraps
import xml.etree.cElementTree as ET
from xml.etree.cElementTree import parse
class Retry(object):
default_exceptions = (Exception)
def __init__(self, tries, exceptions=None, delay=0):
self.tries = tries
if exceptions is None:
exceptions = Retry.default_exceptions
self.exceptions = exceptions
self.delay = delay
def __call__(self, f):
def fn(*args, **kwargs):
tried = 0
exception = None
while tried <= self.tries:
try:
return f(*args, **kwargs)
except self.exceptions, e:
print "Retry, exception: "+str(e)
time.sleep(self.delay)
tried += 1
exception = e
#if no success after tries, raise last exception
raise exception
return fn
class getURL(object):
#Retry(2 )
def getURLdata(self, IPaddress):
try:
f = urllib2.urlopen(''.join(['http://', IPaddress]))
f = ET.parse(f)
return f
except IOError, err:
print("L112 IOError is %s" %err)
except urllib2.URLError, err:
print("L114 urllib2.URLError is %s" %err)
except urllib2.HTTPError, err:
print("L116 urllib2.HTTPError is %s" %err)
except Exception, err :
print("L118 Exception is %s" %err)
x = getURL()
x.getURLdata('127.0.0.1')
Your __call__ method doesn't return fn. Instead, it implicitly returns None and so None is bound to getURLdata.