I'm using httpclient.HTTPRequest library to send Async requests, but need to add delay between requests.
This means lets say I configure RPS (Requests per second) = 5. Then I send a request each 0.2 but asynchronously. How can I send the requests asynchronously without waiting for each request response.
This is my code:
def process_campaign(self, campaign_instance):
ioloop.IOLoop.current().run_sync(lambda: start_campaign(campaign_instance))
#gen.coroutine
def start_campaign(campaign_instance):
...
while True:
try:
log.info("start_campaign() Requests in Queue: {}".format(len(web_requests)))
web_request = web_requests.pop()
time.sleep(delay)
headers = {'Content-Type': 'application/json'}
request = httpclient.HTTPRequest(auth_username=settings.api_account,
auth_password=settings.api_password,
url=settings.api_url,
body=json.dumps(web_request),
headers=headers,
request_timeout=15,
method="POST")
response = yield http_client.fetch(request)
except httpclient.HTTPError, e:
log.exception("start_campaign() " + str(e))
except IndexError:
log.info('start_campaign() Campaign web requests completed. Errors {}'.format(api_errors))
break
But seems to wait for HTTP response before proceeding.
You can try:
class WebRequest(RequestHandler):
def __init__(self, web_request):
self.delay = 0
self.web_request = web_request
#asynchronous
def post(self):
IOLoop.instance().add_timeout(self.delay, self._process)
#gen.coroutine
def _process(self):
try:
http_client = httpclient.AsyncHTTPClient()
log.info("start_campaign() Web request: {}".format(self.web_request))
headers = {'Content-Type': 'application/json'}
request = httpclient.HTTPRequest(auth_username=settings.api_account,
auth_password=settings.api_password,
url=settings.api_url,
body=json.dumps(self.web_request),
headers=headers,
request_timeout=15,
method="POST")
response = yield http_client.fetch(request)
except Exception, exception:
log.exception(exception)
Re-use your while Loop:
while True:
try:
web_request = web_requests.pop()
time.sleep(delay)
client = WebRequest(web_request)
client.post()
except IndexError:
break
Related
When trying to send a function to the stream that parses the page and then executes the html.render, an error occurs:
Error: There is no current event loop in thread 'Thread-1 (take_proxy_us_spys_one_thread)
I started talking about a similar problem and realized that a friend here somehow managed to implement this. But I still get an error.
Here is my code which should be repeated all the time.
Help, please, to understand.
import urllib3
import requests
import time
from requests_html import HTMLSession
import threading
import fake_useragent
def take_proxy_us_spys_one(urls: list=[], header:dict = None,):
for url in urls:
try:
url_first = 'https://spys.one'
r = requests.get(url_first, headers=header)
cookies = r.cookies
session = HTMLSession()
r = session.post(url,
data={'xx00': '','xpp': '5','xf1': '0','xf2': '0','xf3': '0','xf4': '0', 'xf5': '0'},
headers=header,
cookies=cookies)
r.html.render(reload=False,)
print(str(r))
except Exception as exc:
print("Error: " + str(exc))
def take_proxy_us_spys_one_thread(event, sleeptime= 60, urls=[], lock = None):
while event.is_set():
try:
user = fake_useragent.UserAgent().random
header = {'User-Agent': user}
lock.acquire() if lock!=None else None
proxies_1 = take_proxy_us_spys_one(urls=urls, header=header)
lock.release() if lock != None else None
time.sleep(sleeptime)
except Exception as exc:
print("Error: " + str(exc))
time.sleep(sleeptime)
if __name__ == '__main__':
start_in_thread = True
urllib3.disable_warnings()
urls_spys_one = [
'https://spys.one/free-proxy-list/ALL/'
]
lock = threading.Lock()
event = threading.Event()
event.set()
t2 = threading.Thread(target=take_proxy_us_spys_one_thread, args=(event, 10, urls_spys_one, lock),).start()
I tried to implement the mechanism from here.
Word of notice: This is my first approach with asyncio, so I might have done something really stupid.
Scenario is as follows:
I need to "http-ping" a humongous list of urls to check if they respond 200 or any other value. I get timeouts for each and every request, though tools like gobuster report 200,403, etc.
My code is sth similar to this:
import asyncio,aiohttp
import datetime
#-------------------------------------------------------------------------------------
async def get_data_coroutine(session,url,follow_redirects,timeout_seconds,retries):
#print('#DEBUG '+datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+' '+url)
try:
async with session.get(url,allow_redirects=False,timeout=timeout_seconds) as response:
status = response.status
#res = await response.text()
if( status==404):
pass
elif(300<=status and status<400):
location = str(response).split("Location': \'")[1].split("\'")[0]
print('#HIT '+datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+' '+str(status)+' '+url+' ---> '+location)
if(follow_redirects==True):
return await get_data_coroutine(session,location,follow_redirects,timeout_seconds,retries)
else:
print('#HIT '+datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+' '+str(status)+' '+url)
return None
except asyncio.exceptions.TimeoutError as e:
print('#ERROR '+datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+' '+' '+' '+url+' TIMEOUT '+str(e))
return None
#---------------------------------------------------------------------------
async def main(loop):
base_url = 'http://192.168.59.37'
extensions = ['','.html','php']
fd = open('/usr/share/wordlists/dirb/common.txt','r')
words_without_suffix = [x.strip() for x in fd.readlines()]#[-5:] #DEBUG!
words_with_suffix = [base_url+'/'+x+y for x in words_without_suffix for y in extensions]
follow = True
total_timeout = aiohttp.ClientTimeout(total=60*60*24)
timeout_seconds = 10
retries = 1
async with aiohttp.ClientSession(loop=loop,timeout=total_timeout) as session:
tasks = [get_data_coroutine(session,url,follow,timeout_seconds,retries) for url in words_with_suffix]
await asyncio.gather(*tasks)
print('DONE')
#---------------------------------------------------------------------------
if(__name__=='__main__'):
loop = asyncio.get_event_loop()
result = loop.run_until_complete(main(loop))
Did I do something really wrong?
Any word of advice?
Thank you SO much!
Actually, I ended up finding an open issue in aio-libs/aiohttp:
https://github.com/aio-libs/aiohttp/issues/3203
This way, they suggest a workaround that achieves my needs:
session_timeout = aiohttp.ClientTimeout(total=None,sock_connect=timeout_seconds,sock_read=timeout_seconds)
async with aiohttp.ClientSession(timeout=session_timeout) as session:
async with session.get(url,allow_redirects=False,timeout=1) as response:
...
To answer your question - no you did nothing wrong. I can't see anything wrong with your code in terms of http request/response/timeout handling.
If indeed all your requests are timing out to the host (http://192.168.59.37) I suspect the issues are you are experiencing are most likely down to how your network is resolving requests (or how your code is building the url).
You can confirm whether requests are independently succeeding/failing using a tool like curl, eg:
curl "http://192.168.59.37/abc.html"
I tested it locally by using
python3 -m http.server 8080
and placing an empty files 'abc' and 'abc.html' in the same directory, updating the base_url
base_url = "http://127.0.0.1:8080"
with my minor updates (code below) here's the output.
http://127.0.0.1:8080/.bashrc.php
#404
http://127.0.0.1:8080/.bashrc
#404
http://127.0.0.1:8080/.bashrc.html
#404
http://127.0.0.1:8080/abc
#HIT 2020-11-03 12:57:33 200 http://127.0.0.1:8080/abc
http://127.0.0.1:8080/zt.php
#404
http://127.0.0.1:8080/zt.html
#404
http://127.0.0.1:8080/zt
#404
http://127.0.0.1:8080/abc.html
#HIT 2020-11-03 12:57:33 200 http://127.0.0.1:8080/abc.html
http://127.0.0.1:8080/abc.php
#404
DONE
My updates are mostly minor but it might help with further debugging.
For debug, print the url. Important to determine if the code was building the url correctly. This highlighted to me that 'php' extension is missing a ".", so it would be looking for abcphp, not abc.php.
Use response.ok to test a successful http response, your code wasn't handling 500 errors (instead it was returning hit).
using python f-string for cleaner formatting
import asyncio
import aiohttp
import datetime
async def get_data_coroutine(session, url, follow_redirects, timeout_seconds, retries):
try:
async with session.get(
url, allow_redirects=False, timeout=timeout_seconds
) as response:
print(url)
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
if response.ok:
print(f"#HIT {now} {response.status} {url}")
else:
status = response.status
if status == 404:
print("#404")
elif 300 <= status and status < 400:
location = str(response).split("Location': '")[1].split("'")[0]
print(f"#HIT {now} {status} {url} ---> {location}")
if follow_redirects is True:
return await get_data_coroutine(
session, location, follow_redirects, timeout_seconds, retries
)
else:
print("#ERROR ", response.status)
return None
except asyncio.TimeoutError as e:
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"#ERROR {now} {url} TIMEOUT ", e)
return None
async def main(loop):
base_url = "http://127.0.0.1:8080"
extensions = ["", ".html", ".php"]
fd = open("/usr/share/wordlists/dirb/common.txt", "r")
words_without_suffix = [x.strip() for x in fd.readlines()]
words_with_suffix = [
base_url + "/" + x + y for x in words_without_suffix for y in extensions
]
follow = True
total_timeout = aiohttp.ClientTimeout(total=60 * 60 * 24)
timeout_seconds = 10
retries = 1
async with aiohttp.ClientSession(loop=loop, timeout=total_timeout) as session:
tasks = [
get_data_coroutine(session, url, follow, timeout_seconds, retries)
for url in words_with_suffix
]
await asyncio.gather(*tasks)
print("DONE")
if __name__ == "__main__":
loop = asyncio.get_event_loop()
result = loop.run_until_complete(main(loop))
I am observing that with python requests module, HTTP keep-alive is not being honored.
I dont see Acks for keep-alive being sent from the host where i am running the python script.
Please let me know how it can be fixed.Following is my code:
import json
import requests
import logging
import sys
import time
from threading import Thread
logging.basicConfig(level=logging.DEBUG)
class NSNitro:
def __init__(self,*args):
if len(args) > 2:
self.ip = args[0]
self.username = args[1]
self.password = args[2]
self.session_id = None
url = 'http://'+self.ip+'/nitro/v1/config/login'
payload = { "login": { "username":"nsroot", "password":"nsroot" }}
headers = {"Content-type": "application/json", 'Connection': 'keep-alive'}
try:
r = requests.post(url=url,headers=headers,data=json.dumps(payload),timeout=5)
logging.info(r.json()["sessionid"])
if(r.json()["sessionid"] != None):
self.session_id = r.json()["sessionid"]
except requests.exceptions.RequestException:
logging.critical("Some error occurred during connection")
else:
logging.error("Not sufficient parameters provided.Required : ipaddress , username , password")
def install_build(self,build_url):
url = 'http://ip/nitro/v1/config/install'
headers = {"Content-type": "application/json","Connection": "keep-alive"}
payload = {"install": {"url": build_url}}
try:
cookie = {"NITRO_AUTH_TOKEN": self.session_id}
r = requests.post(timeout=5, url=url, data=json.dumps(payload), headers=headers,cookies=cookie)
except requests.exceptions.RequestException:
print("Connection Error occurred")
raise '''this will give details of exception'''
else:
assert r.status_code == 201, "Status code seen: " + str(r.status_code) + "\n" + "Error message from system: " + \
r.json()["message"]
print("Successfully triggered job on device to install build")
def __del__(self):
logging.debug("Deleted the object")
if __name__ == '__main__':
ns_session = NSNitro(ip,username,password)
url_i = 'https://myupload-server.net/build-13.0-480.16.tgz'
t1 = Thread(target=ns_session.install_build,args=(url_i,))
t1.start()
''' while t1.is_alive():
t2 = Thread(target=ns_session.get_installed_version,)
t2.start()
t2.join()'''
time.sleep(100)
logging.info("Install thread completed")
t1.join()
ns_session.logout()
When the request is posted using curl command, the acks are sent in specified keep-alive intervals. Without ack being sent , server is resetting the connection.
I wrote a piece of code to rotate proxies for a multithreaded crawler, but it doesn't look very good and I want to see what I can improve.
What I had in mind:
1) Make a number of requests (a random range) with a proxy, then change it
2) If blocked, change the proxy (remove it from proxies list) and retry.
3) If a HTTP error occurs, retry with same proxy
4) If a proxy error occurs, change the proxy (remove it from proxies list), and retry.
Usually, it works pretty decent, though I see some problems which may appear:
1) make_request function is calling itsel which may lead in some cases to an infinte loop
2) proxy errors are not handled properly
Here is my code:
import requests
import threading
import random
import time
import logging
import os
class Crawler():
def __init__(self):
self.user_agents = []
with open('user_agents.txt', 'r') as inpt:
for line in inpt:
if line.strip():
self.user_agents.append(line.strip())
self.proxies = []
with open('proxies.txt', 'r') as inpt:
for line in inpt:
if not line.strip():
continue
self.proxies.append({"http": ''.join(["http://",
line.strip()]),
"https": ''.join(["https://",
line.strip()])})
self.headers = {'User-agent': random.choice(self.user_agents)}
self.session = requests.Session()
self.counter = 0
self.current_proxy = None
self.lock = threading.Lock()
self.set_proxy()
def make_request(self, method, url, **kwargs):
"""Request a page and return its content
#method - string, POST or GET
#url - string
#return: string, HTML page source
or bytes for binary files
"""
# make only 10 to 20 requests using a proxy
with self.lock:
if self.counter > random.randrange(10, 20):
self.set_proxy()
else:
self.counter += 1
try:
if method == 'GET':
if kwargs.get('download', False):
req = self.session.get(url,
headers=self.headers,
stream=True, verify=False)
return req.raw
req = self.session.get(url,
headers=self.headers,
verify=False,
**kwargs)
else:
req = self.session.post(url,
headers=self.headers,
verify=False,
**kwargs)
if req.status_code == 407:
logging.exception('make_request[Proxy Authentication]')
os._exit(1)
if req.encoding not in ['utf8', 'utf-8', None]:
html = req.content.decode(req.encoding)
else:
html = req.text
if 'Access Denied' in html:
# website's error message. proxy blocked
with self.lock:
self.set_proxy(remove=True)
time.sleep(1)
return self.make_request(method, url, **kwargs)
else:
return html
except requests.exceptions.HTTPError as e:
if e.response.status_code == 403:
# access forbidden. proxy blocked
with self.lock:
self.set_proxy(remove_proxy=True)
time.sleep(1)
return self.make_request(method, url, **kwargs)
elif e.response.status_code == 404:
logging.exception(' '.join([
'make_request[HTTPError]',
url, str(e)]))
return
elif e.response.status_code == 429:
# too many requests. proxy blocked
with self.lock:
self.set_proxy(remove_proxy=True)
time.sleep(1)
return self.make_request(method, url, **kwargs)
else:
logging.exception(' '.join([
'make_request[unknown HTTPError]',
url, str(e)]))
return None
except requests.exceptions.InvalidURL as e:
logging.exception(' '.join([
'make_request[InvalidURL]',
url, str(e)]))
return None
except requests.exceptions.Timeout:
time.sleep(1)
return self.make_request(method, url, **kwargs)
except requests.exceptions.ConnectionError as e:
# Connection refused
if '403 Forbidden' in str(e):
logging.exception('make_requests[403 forbidden]')
os._exit(1)
with self.lock:
self.set_proxy()
time.sleep(1)
return self.make_request(method, url, **kwargs)
except Exception as e:
logging.exception(' '.join([
'make_request[unknown Exception]',
url, str(e)]))
return None
def set_proxy(self, remove_proxy=False):
"""Get a random proxy from the list"""
if remove_proxy:
try:
self.proxies.remove(self.current_proxy)
except:
pass
while True:
if self.proxies:
proxy = random.choice(self.proxies)
if not self.is_alive(proxy):
continue
self.current_proxy = proxy
self.session = requests.Session()
self.session.proxies = self.current_proxy
self.headers = {'User-agent': random.choice(self.user_agents)}
self.counter = 0
break
else:
logging.exception('EMPTY PROXY LIST')
os._exit(1)
break
def is_alive(self, proxy):
"""Check if a proxy is alive or not
#proxy - dict
#return: True if alive, False otherwise
"""
try:
requests.get('http://www.google.com',
proxies=proxy, timeout=5)
return True
except:
return False
Thanks
Suppose my django/flask application pulls in information from API's, how can I test that connection exceptions are caught and handled properly?
So for example here is a function that calls an API:
import requests
def call_the_api():
url = 'http://httpbin.org/get'
try:
req = requests.get(url)
if req.json().get('errors'):
logger.warn("API error response")
return {'request_error': 'api_error_response'}
except requests.exceptions.ConnectionError:
logger.warn('ConnectionError')
return {'request_error': 'ConnectionTimeout'}
except requests.exception.Timeout:
logger.warn('API request timed out')
return {'request_error': 'Timeout'}
except Exception, ex:
logger.warn("API request Exception: %s", ex)
return {'request_error': ex}
else:
return req.json()
For testing responses from the API I found mock to be very useful.
def mock_get_request():
response = requests.get.return_value
json_file = 'sample_response.json'
json_file_path = os.path.join(os.path.dirname(__file__), json_file)
with open(json_file_path, 'r') as f:
response.content = response.text = f.read()
response.status_code = 200
response.encoding = 'utf-8'
response.json = lambda: json.loads(response.content.decode(response.encoding))
response.url = u'%s' % args[0]
return response
class TestSuitabilityFunctions(TestCase):
def test_call_the_api(self):
requests.get = MagicMock(side_effect=mock_get_request)
resp = call_the_api()
self.assertEqual(resp.get('url'), "http://httpbin.org/get")
So my question is how would I go about simulating a connection timeout or error?
Untested code but...
def connection_error():
raise requests.exceptions.ConnectionError
class TestSuitabilityFunctions(TestCase):
#patch.object(module_that_youre_testing, "requests")
def test_connection_error(self, mock_requests):
mock_requests.get = MagicMock(side_effect=connection_error)
with self.assertRaises(requests.exceptions.ConnectionError) as cm:
resp = call_the_api()
exception = cm.exception
self.assertEqual(resp, {'request_error': 'ConnectionTimeout'})
... or similar should do the trick. Off the top of my head I can't remember how assertRaises interacts with errors that are caught. Maybe you don't even need the assertRaises part.