today is another question day. I am doing requests in python, and I am trying to speed things up. if you recall, I already asked a question about this. Today i have finished that code, and move on to face a similar problem.
Running this code produces a RuntimeError. It says:
RuntimeError: There is no current event loop in thread 'Thread-2'
I don't know why I am getting this error, since I am using threading.Thread() instead of asyncio. Why is it asking for an event loop?
def func(mininum, maximum=None):
if maximum == None:
maximum = mininum
import asyncio
import aiohttp
from bs4 import BeautifulSoup
async def find_account(i_d, session):
try:
async with session.get(f'https://web.roblox.com/users/{i_d}/profile') as response:
if response.status == 200:
r = await response.read()
soup = BeautifulSoup(r, 'html.parser')
stuff = list(soup.find_all('h2'))
stuff = stuff[0]
stuff = list(stuff)
stuff = stuff[0]
print(f'{i_d} is done')
return str(stuff) + ' ID: {id}'.format(id=i_d)
else:
return 'None'
except aiohttp.ServerDisconnectedError:
await find_account(i_d, session)
except asyncio.exceptions.TimeoutError:
await find_account(i_d, session)
async def id_range(minimum, maximum):
tasks = []
async with aiohttp.ClientSession() as session:
for i in range(minimum, maximum + 1):
tasks.append(asyncio.create_task(find_account(i_d=i, session=session)))
return await asyncio.gather(*tasks)
event_loop = asyncio.get_event_loop()
return event_loop.run_until_complete(id_range(mininum, maximum))
import threading
all = []
p1 = threading.Thread(target=func, args=(1,1000)).start()
p2 = threading.Thread(target=func, args=(1001,2000)).start()
p1 : threading.Thread
p2 : threading.Thread
p1.join()
p2.join()```
#Comment if you need more traceback.
You cannot use the async/await keywords when using threading.
Instead, the OS will decide automatically when to switch threads.
Related
I want to understand why async await didnt work while looping around a range, below is the code. Is it that the function I called is not asynchronous. Not able to understand it. Problem is same thing worked when I used non async process instead of for loop. Is there something i'm missing. Thanks
import asyncio
import time
async def test():
response = {}
s = time.time()
tasks=[]
answers=[]
for k in range(4):
print(k)
tasks.append(asyncio.create_task(take_time()))
answers = await asyncio.gather(*tasks)
response['ans'] = answers
response["Time Taken"] = round((time.time() - s),2)
print(response)
return response
async def take_time():
# # time.sleep(4)
# await asyncio.sleep(4)
for i in range(100000000):
o=i
return str(o)
if __name__=='__main__':
asyncio.run(test())
I'm trying to learn how to run tasks concurrently using Python's asyncio module. In the following code, I've got a mock "web crawler" for an example. Basically, I am trying to make it where there are a max of two active fetch() requests happening at any given time, and I want process() to be called during the sleep() period.
import asyncio
class Crawler():
urlq = ['http://www.google.com', 'http://www.yahoo.com',
'http://www.cnn.com', 'http://www.gamespot.com',
'http://www.facebook.com', 'http://www.evergreen.edu']
htmlq = []
MAX_ACTIVE_FETCHES = 2
active_fetches = 0
def __init__(self):
pass
async def fetch(self, url):
self.active_fetches += 1
print("Fetching URL: " + url);
await(asyncio.sleep(2))
self.active_fetches -= 1
self.htmlq.append(url)
async def crawl(self):
while self.active_fetches < self.MAX_ACTIVE_FETCHES:
if self.urlq:
url = self.urlq.pop()
task = asyncio.create_task(self.fetch(url))
await task
else:
print("URL queue empty")
break;
def process(self, page):
print("processed page: " + page)
# main loop
c = Crawler()
while(c.urlq):
asyncio.run(c.crawl())
while c.htmlq:
page = c.htmlq.pop()
c.process(page)
However, the code above downloads the URLs one by one (not two at a time concurrently) and doesn't do any "processing" until after all URLs have been fetched. How can I make the fetch() tasks run concurrently, and make it so that process() is called in between during sleep()?
Your crawl method is waiting after each individual task; you should change it to this:
async def crawl(self):
tasks = []
while self.active_fetches < self.MAX_ACTIVE_FETCHES:
if self.urlq:
url = self.urlq.pop()
tasks.append(asyncio.create_task(self.fetch(url)))
await asyncio.gather(*tasks)
EDIT: Here's a cleaner version with comments that fetches and processes all at the same time, while preserving the basic ability to put a cap on the maximum number of fetchers.
import asyncio
class Crawler:
def __init__(self, urls, max_workers=2):
self.urls = urls
# create a queue that only allows a maximum of two items
self.fetching = asyncio.Queue()
self.max_workers = max_workers
async def crawl(self):
# DON'T await here; start consuming things out of the queue, and
# meanwhile execution of this function continues. We'll start two
# coroutines for fetching and two coroutines for processing.
all_the_coros = asyncio.gather(
*[self._worker(i) for i in range(self.max_workers)])
# place all URLs on the queue
for url in self.urls:
await self.fetching.put(url)
# now put a bunch of `None`'s in the queue as signals to the workers
# that there are no more items in the queue.
for _ in range(self.max_workers):
await self.fetching.put(None)
# now make sure everything is done
await all_the_coros
async def _worker(self, i):
while True:
url = await self.fetching.get()
if url is None:
# this coroutine is done; simply return to exit
return
print(f'Fetch worker {i} is fetching a URL: {url}')
page = await self.fetch(url)
self.process(page)
async def fetch(self, url):
print("Fetching URL: " + url);
await asyncio.sleep(2)
return f"the contents of {url}"
def process(self, page):
print("processed page: " + page)
# main loop
c = Crawler(['http://www.google.com', 'http://www.yahoo.com',
'http://www.cnn.com', 'http://www.gamespot.com',
'http://www.facebook.com', 'http://www.evergreen.edu'])
asyncio.run(c.crawl())
You can make htmlq an asyncio.Queue(), and change htmlq.append to htmlq.push. Then your main can be async, like this:
async def main():
c = Crawler()
asyncio.create_task(c.crawl())
while True:
page = await c.htmlq.get()
if page is None:
break
c.process(page)
Your top-level code boils down to a call to asyncio.run(main()).
Once you are done with crawling, crawl() can enqueue None to notify the main coroutine that the work is done.
Using Python 3.6 and asyncio and aiohttp I wrote a simple async program:
from aiohttp import ClientSession
import asyncio, ssl, time
base_url = 'https://my-base-url.com/api'
async def fetch(session, id):
query_params = {'qp1':'v1','qp2':'v2', 'id': id}
async with session.get(base_url, params=query_params, ssl=ssl.SSLContext()) as response:
res_json = await response.json()
if response.status == 200:
time.sleep(2)
min_rating = res_json.get('minRating')
max_rating = res_json.get('maxRating')
print("id = %s, min = %s, max = %s" % (id, min_rating, max_rating))
async def run(ids):
tasks = []
async with ClientSession() as session:
for id in ids:
task = asyncio.ensure_future(fetch(session, id))
tasks.append(task)
responses = await asyncio.gather(*tasks)
return responses
if __name__ == '__main__':
ids = [123, 456, 789]
future = asyncio.ensure_future(run(ids))
event_loop = asyncio.get_event_loop()
event_loop.run_until_complete(future)
print("\n\ndone")
The time.sleep(2) inside fetch(session, id) makes it seem like this program is not asynchronous because it gets one response, sleeps, gets another, sleeps, so on and so forth. When I remove the sleep call, it does seem to be async/concurrent because the responses come back in a random order. What is sleep doing in this case? Is it locking all threads? Why does it appear to be sequential instead of parallel?
time.sleep(2) is a synchronous (blocking) call hence you are stopping the asynchronous call with it, you should use await asyncio.sleep(2) which it will "liberate" the resource.
I have been trying all kinds of things to be able to use an asyncio loop inside another asyncio loop. Most of the time my test just end in errors, such as:
RuntimeError: This event loop is already running
My example code below is just the base test I started with, so you can see the basics of what I am trying to do. I tried so many things after this test, it was just too confusing, so I figured I should keep it simple when asking for help. If anyone can point me in the right direction, that would be great. Thank you for your time!
import asyncio
async def fetch(data):
message = 'Hey {}!'.format(data)
other_data = ['image_a.com', 'image_b.com', 'image_c.com']
images = sub_run(other_data)
return {'message' : message, 'images' : images}
async def bound(sem, data):
async with sem:
r = await fetch(data)
return r
async def build(dataset):
tasks = []
sem = asyncio.Semaphore(400)
for data in dataset:
task = asyncio.ensure_future(bound(sem, data))
tasks.append(task)
r = await asyncio.gather(*tasks)
return r
def run(dataset):
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(build(dataset))
responses = loop.run_until_complete(future)
loop.close()
return responses
async def sub_fetch(data):
image = 'https://{}'.format(data)
return image
async def sub_bound(sem, data):
async with sem:
r = await sub_fetch(data)
return r
async def sub_build(dataset):
tasks = []
sem = asyncio.Semaphore(400)
for data in dataset:
task = asyncio.ensure_future(sub_bound(sem, data))
tasks.append(task)
r = await asyncio.gather(*tasks)
return r
def sub_run(dataset):
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(sub_build(dataset))
responses = loop.run_until_complete(future)
loop.close()
return responses
if __name__ == '__main__':
dataset = ['Joe', 'Bob', 'Zoe', 'Howard']
responses = run(dataset)
print (responses)
Running loop.run_until_compete inside a running event loop would block the outer loop, thus defeating the purpose of using asyncio. Because of that, asyncio event loops aren't recursive, and one shouldn't need to run them recursively. Instead of creating an inner event loop, await a task on the existing one.
In your case, remove sub_run and simply replace its usage:
images = sub_run(other_data)
with:
images = await sub_build(other_data)
And it will work just fine, running the sub-coroutines and not continuing with the outer coroutine until the inner one is complete, as you likely intended from the sync code.
I have 2 functions: The first one, def_a, is an asynchronous function and the second one is def_b which is a regular function and called with the result of def_a as a callback with the add_done_callback function.
My code looks like this:
import asyncio
def def_b(result):
next_number = result.result()
# some work on the next_number
print(next_number + 1)
async def def_a(number):
await some_async_work(number)
return number + 1
loop = asyncio.get_event_loop()
task = asyncio.ensure_future(def_a(1))
task.add_done_callback(def_b)
response = loop.run_until_complete(task)
loop.close()
And it's work perfectly.
The problem began when also the second function, def_b, became asynchronous. Now it looks like this:
async def def_b(result):
next_number = result.result()
# some asynchronous work on the next_number
print(next_number + 1)
But now I can not provide it to the add_done_callback function, because it's not a regular function.
My question is- Is it possible and how can I provide def_b to the add_done_callback function if def_b is asynchronous?
add_done_callback is considered a "low level" interface. When working with coroutines, you can chain them in many ways, for example:
import asyncio
async def my_callback(result):
print("my_callback got:", result)
return "My return value is ignored"
async def coro(number):
await asyncio.sleep(number)
return number + 1
async def add_success_callback(fut, callback):
result = await fut
await callback(result)
return result
loop = asyncio.get_event_loop()
task = asyncio.ensure_future(coro(1))
task = add_success_callback(task, my_callback)
response = loop.run_until_complete(task)
print("response:", response)
loop.close()
Keep in mind add_done_callback will still call the callback if your future raises an exception (but calling result.result() will raise it).
This only works for one future job, if you have multiple async jobs, they will blocks each other, a better way is using asyncio.as_completed() to iterate future list:
import asyncio
async def __after_done_callback(future_result):
# await for something...
pass
async def __future_job(number):
await some_async_work(number)
return number + 1
loop = asyncio.get_event_loop()
tasks = [asyncio.ensure_future(__future_job(x)) for x in range(100)] # create 100 future jobs
for f in asyncio.as_completed(tasks):
result = await f
await __after_done_callback(result)
loop.close()
You can try the aiodag library. It's a very lightweight wrapper around asyncio that abstracts away some of the async plumbing that you usually have to think about. From this example you won't be able to tell that things are running asynchronously since it's just 1 task that depends on another, but it is all running async.
import asyncio
from aiodag import task
#task
async def def_b(result):
# some asynchronous work on the next_number
print(result + 1)
#task
async def def_a(number):
await asyncio.sleep(number)
return number + 1
async def main():
a = def_a(1)
b = def_b(a) # this makes task b depend on task a
return await b
loop = asyncio.get_event_loop()
asyncio.set_event_loop(loop)
response = loop.run_until_complete(main())