I'm using Python asyncio to implement a fast http client.
As you can see in the comments below inside the worker function I get the responses as soon as they are finished. I would like to get the responses ordered and this is why I'm using asyncio.gather.
Why is it returning None? Can anybody help?
Thank you so much!
import time
import aiohttp
import asyncio
MAXREQ = 100
MAXTHREAD = 500
URL = 'https://google.com'
g_thread_limit = asyncio.Semaphore(MAXTHREAD)
async def worker(session):
async with session.get(URL) as response:
await response.read() #If I print this line I get the responses correctly
async def run(worker, *argv):
async with g_thread_limit:
await worker(*argv)
async def main():
async with aiohttp.ClientSession() as session:
await asyncio.gather(*[run(worker, session) for _ in range(MAXREQ)])
if __name__ == '__main__':
totaltime = time.time()
print(asyncio.get_event_loop().run_until_complete(main())) #I'm getting a None here
print (time.time() - totaltime)
Your function run doesn't return nothing explicitly, so it returns None implicitly. Add return statement and you'll get a result
async def worker(session):
async with session.get(URL) as response:
return await response.read()
async def run(worker, *argv):
async with g_thread_limit:
return await worker(*argv)
Related
I'have a list of url which i need to fetch concurrently. In order to do that im using asyncio and aiohttp in this way:
async def fetch(session: aiohttp.ClientSession, res):
async with session.get(f"/foo/bar/{res}") as res:
return await res.json()
async def get_resources():
async with aiohttp.ClientSession(base_url=BASE_URL, headers=HEADERS) as session:
return await asyncio.gather(*[
fetch(session, res)
for res in [resource1, resource2, ...]
])
def main():
return asyncio.get_event_loop().run_until_complete(
get_resources()
)
this work as i expected, all resources all fire concurrently. but i noticed that i can also use asyncio.ensure_future:
async def get_resources():
async with aiohttp.ClientSession(base_url=BASE_URL) as session:
return await asyncio.gather(*[
asyncio.ensure_future(fetch(session, res))
for res in [resource1, resource2, ...]
])
so im wondering whats the benefit of using another extra call to before call gather... in this case asyncio.ensure_future. why not to directly pass a coroutine to gather like i did in the first example?
My code is pretty similar to this:
import aiohttp
import asyncio
class Multiple_HTTP:
#----------------------------------------------------------------------------------
#staticmethod
async def fetch(session,url):
try:
async with session.get(url) as response:
status = response.status
text = await response.text()
return (url,status,text)
except Exception as e:
return e
#----------------------------------------------------------------------------------
#staticmethod
async def fetch_all(urls,timeout):
loop = asyncio.get_event_loop()
session_timeout = aiohttp.ClientTimeout(total=None,sock_connect=timeout,sock_read=timeout)
async with aiohttp.ClientSession(loop=loop , timeout=session_timeout , connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
cwlist = [loop.create_task(Multiple_HTTP.fetch(session,url)) for url in urls]
results = []
results = await asyncio.gather(*cwlist,return_exceptions=True)
return results
#----------------------------------------------------------------------------------
#staticmethod
def run(urls,timeout=5):
return asyncio.run(Multiple_HTTP.fetch_all(urls,timeout))
#----------------------------------------------------------------------------------
And, for some URLs, it hangs at response.text()
It does not honour the timeout, and the web renderizes alright on Chrome running on a Windows box.
Is there something wrong in my code?
How can I modify it for it to work (work as in raising a TimeOut Exception instead of just hanging, for example)?
I'm using python 3.7 and trying to make a crawler that can go multiple domains asynchronously. I'm using for this asyncio and aiohttp but i'm experiencing problems with the aiohttp.ClientSession. This is my reduced code:
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
print(await response.text())
async def main():
loop = asyncio.get_event_loop()
async with aiohttp.ClientSession(loop=loop) as session:
cwlist = [loop.create_task(fetch(session, url)) for url in ['http://python.org', 'http://google.com']]
asyncio.gather(*cwlist)
if __name__ == "__main__":
asyncio.run(main())
The thrown exception is this:
_GatheringFuture exception was never retrieved
future: <_GatheringFuture finished exception=RuntimeError('Session is closed')>
What am i doing wrong here?
You forgot to await the asyncio.gather result:
async with aiohttp.ClientSession(loop=loop) as session:
cwlist = [loop.create_task(fetch(session, url)) for url in ['http://python.org', 'http://google.com']]
await asyncio.gather(*cwlist)
If you ever have an async with containing no await expressions you should be fairly suspicious.
When I run this it lists off the websites in the database one by one with the response code and it takes about 10 seconds to run through a very small list. It should be way faster and isn't running asynchronously but I'm not sure why.
import dblogin
import aiohttp
import asyncio
import async_timeout
dbconn = dblogin.connect()
dbcursor = dbconn.cursor(buffered=True)
dbcursor.execute("SELECT thistable FROM adatabase")
website_list = dbcursor.fetchall()
async def fetch(session, url):
with async_timeout.timeout(30):
async with session.get(url, ssl=False) as response:
await response.read()
return response.status, url
async def main():
async with aiohttp.ClientSession() as session:
for all_urls in website_list:
url = all_urls[0]
resp = await fetch(session, url)
print(resp, url)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
dbcursor.close()
dbconn.close()
This article explains the details. What you need to do is pass each fetch call in a Future object, and then pass a list of those to either asyncio.wait or asyncio.gather depending on your needs.
Your code would look something like this:
async def fetch(session, url):
with async_timeout.timeout(30):
async with session.get(url, ssl=False) as response:
await response.read()
return response.status, url
async def main():
tasks = []
async with aiohttp.ClientSession() as session:
for all_urls in website_list:
url = all_urls[0]
task = asyncio.create_task(fetch(session, url))
tasks.append(task)
responses = await asyncio.gather(*tasks)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
future = asyncio.create_task(main())
loop.run_until_complete(future)
Also, are you sure that loop.close() call is needed? The docs mention that
The loop must not be running when this function is called. Any pending callbacks will be discarded.
This method clears all queues and shuts down the executor, but does not wait for the executor to finish.
As mentioned in the docs and in the link that #user4815162342 posted, it is better to use the create_task method instead of the ensure_future method when we know that the argument is a coroutine. Note that this was added in Python 3.7, so previous versions should continue using ensure_future instead.
I need to parse repeatedly one link content. synchronous way gives me 2-3 responses per second, i need faster (yes, i know, that too fast is bad too)
I found some async examples, but all of them show how to handle result after all links are parsed, whereas i need to parse it immediately after receiving, something like this, but this code doesn't give any speed improvement:
import aiohttp
import asyncio
import time
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
while True:
async with aiohttp.ClientSession() as session:
html = await fetch(session, 'https://example.com')
print(time.time())
#do_something_with_html(html)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
but this code doesn't give any speed improvement
asyncio (and async/concurrency in general) gives speed improvement for I/O things that interleave each other.
When everything you do is await something and you never create any parallel tasks (using asyncio.create_task(), asyncio.ensure_future() etc.) then you are basically doing the classic synchronous programming :)
So, how to make the requests faster:
import aiohttp
import asyncio
import time
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def check_link(session):
html = await fetch(session, 'https://example.com')
print(time.time())
#do_something_with_html(html)
async def main():
async with aiohttp.ClientSession() as session:
while True:
asyncio.create_task(check_link(session))
await asyncio.sleep(0.05)
asyncio.run(main())
Notice: the async with aiohttp.Cliensession() as session: must be above (outside) while True: for this to work. Actually, having a single ClientSession() for all your requests is a good practice anyway.
I gave up using async, threading solved my problem, thanks to this answer
https://stackoverflow.com/a/23102874/5678457
from threading import Thread
import requests
import time
class myClassA(Thread):
def __init__(self):
Thread.__init__(self)
self.daemon = True
self.start()
def run(self):
while True:
r = requests.get('https://ex.com')
print(r.status_code, time.time())
for i in range(5):
myClassA()