I make API calls with asyncio but I have a limit of 300 calls per minutes. How can I pause or limit asyncio? I tried with "semaphore" with no success. How can I do this?
from asyncio.tasks import sleep
from aiohttp import ClientSession
import asyncio
import time
import aiohttp
semaphore = asyncio.Semaphore(2)
async def get_sites(sites):
tasks = [asyncio.create_task(fetch_site(s)) for s in sites]
return await asyncio.gather(*tasks)
NCAVStock = []
async def fetch_site(url):
async with ClientSession() as session:
async with session.get(url) as resp:
data = await resp.json()
data = data['symbol']
NCAVStock.append(data)
print(NCAVStock)
return data
if __name__ == '__main__':
List_Not_China = ['SRDX', 'AQB', 'CDTI', 'VRSN', 'MEC', 'NFG', 'KTOS', 'PRO', 'BEAT', 'HRB', 'JBLU', 'SRTS', 'PCRX', 'RVLV', 'CTSH', 'DHCNL', 'SYX', 'FARM', 'BAM', 'CALX', 'VTIQW', 'LKQ', 'ISR', 'GLDW', 'WORK', 'UTI', 'MXL', 'MTOR', 'CRWS', 'CHWY', 'GKOS', 'MDEX', 'AGI', 'LH', 'IDIV', 'CVEO', 'URI', 'FIX', 'RICK', 'ITW', 'STRT', 'SGLBW', 'EIX', 'AWX', 'ADSK', 'INS', 'MLHR', 'IIIV']
sites = [
f'http://financialmodelingprep.com/api/v3/financials/balance-sheet-statement/{company}?period=quarter&apikey=APIKEY'for company in List_Not_China
]
data = asyncio.run(get_sites(sites))
Related
I have a dataframe with 287.000+ file urls at the column df1['Gravação']. I want to get the length of all the files and save into the list tamanho_mb = [ ]
I was able to make the synchronous formula normally, but since the amount of data is very large, I would like to use asyncio to make it faster.
the synchronous approach for this is:
import pandas as pd
import csv
import urllib.request as ur
import requests
import asyncio
df1 = pd.read_csv("report.csv", sep = ';', encoding = 'utf-8')
df1['Gravação'] = ['https://www.pngall.com/wp-content/uploads/11/Harbor-Seal-PNG-Pic.png', 'https://www.pngall.com/wp-content/uploads/11/Harbor-Seal-PNG-File.png', 'https://www.pngall.com/wp-content/uploads/11/Harbor-Seal-PNG-Photo.png', 'https://www.pngall.com/wp-content/uploads/11/Harbor-Seal-PNG-Clipart.png'\]
tamanho_mb = [ ]
for i in df1['Gravação']:
tamanho = ur.urlopen(i)
tamanho = tamanho.length
tamanho_mb.append(tamanho)
how could i achieve the same result using asyncio?
You can try:
import asyncio
import aiohttp
lst = [
"https://www.pngall.com/wp-content/uploads/11/Harbor-Seal-PNG-Pic.png",
"https://www.pngall.com/wp-content/uploads/11/Harbor-Seal-PNG-File.png",
"https://www.pngall.com/wp-content/uploads/11/Harbor-Seal-PNG-Photo.png",
"https://www.pngall.com/wp-content/uploads/11/Harbor-Seal-PNG-Clipart.png",
]
# limit concurrency to 2 to not spam the server:
sem = asyncio.Semaphore(2)
async def get_lenght(session, url):
async with sem, session.get(url) as resp:
content = await resp.read()
return url, len(content)
async def main():
async with aiohttp.ClientSession() as session:
tasks = {get_lenght(session, url) for url in lst}
for task in asyncio.as_completed(tasks):
result = await task
print(result)
asyncio.run(main())
Prints:
('https://www.pngall.com/wp-content/uploads/11/Harbor-Seal-PNG-File.png', 209659)
('https://www.pngall.com/wp-content/uploads/11/Harbor-Seal-PNG-Photo.png', 99739)
('https://www.pngall.com/wp-content/uploads/11/Harbor-Seal-PNG-Pic.png', 240212)
('https://www.pngall.com/wp-content/uploads/11/Harbor-Seal-PNG-Clipart.png', 13111524)
I'm using Binance UMFutures testnet and want to build stream for user_data ( order updates).
Keys are ok. Other methods like klines work perfect as well. But .futures_user_socket() shows nothing when I place or cancel orders. Could you please advice what am I doing wrong?
`
from binance import AsyncClient , BinanceSocketManager
import asyncio
api_key_testnet = '<--->'
secret_key_testnet = '<--->'
async def user_data_listener(client):
bm = BinanceSocketManager(client)
async with bm.futures_user_socket() as stream:
while True:
res = await stream.recv()
print(res)
async def main():
client = await AsyncClient.create(api_key_testnet,secret_key_testnet,testnet=True)
await user_data_listener(client)
if __name__ == "__main__":
asyncio.run(main())
`
I have a code to receive data from binance, about current prices:
import asyncio
from binance import AsyncClient, BinanceSocketManager
import time
from datetime import datetime
def analyze(res):
kline = res['k']
if kline['x']: #candle is compleated
print('{} start_sleeping {} {}'.format(
datetime.now(),
kline['s'],
datetime.fromtimestamp(kline['t'] / 1000),
))
time.sleep(5)
print('{} finish_sleeping {}'.format(datetime.now(), kline['s']))
async def open_binance_stream(symbol):
client = await AsyncClient.create()
bm = BinanceSocketManager(client)
ts = bm.kline_socket(symbol)
async with ts as tscm:
while True:
res = await tscm.recv()
analyze(res)
await client.close_connection()
async def main():
t1 = asyncio.create_task(open_binance_stream('ETHBTC'))
t2 = asyncio.create_task(open_binance_stream('XRPBTC'))
await asyncio.gather(*[t1, t2])
if __name__ == "__main__":
asyncio.run(main())
How to make analyze function to be called concurently.
Binance sends info in the same time with both streams data (ETHBTC and XRPBTC)
But function analyze will be called only once previous analyze (sleep) is completed.
I wish function analyze is called immediately and independently.
Have you tried to put analyze in a thread. I think it will achieve what you want.
import asyncio
from binance import AsyncClient, BinanceSocketManager
import time
from datetime import datetime
from threading import Thread
def analyze(res):
kline = res['k']
if kline['x']: #candle is compleated
print('{} start_sleeping {} {}'.format(
datetime.now(),
kline['s'],
datetime.fromtimestamp(kline['t'] / 1000),
))
time.sleep(5)
print('{} finish_sleeping {}'.format(datetime.now(), kline['s']))
async def open_binance_stream(symbol):
client = await AsyncClient.create()
bm = BinanceSocketManager(client)
ts = bm.kline_socket(symbol)
async with ts as tscm:
while True:
res = await tscm.recv()
Thread(target= analyze, args = (res)).start()
await client.close_connection()
async def main():
t1 = asyncio.create_task(open_binance_stream('ETHBTC'))
t2 = asyncio.create_task(open_binance_stream('XRPBTC'))
await asyncio.gather(*[t1, t2])
if __name__ == "__main__":
asyncio.run(main())
This should work as expected.
I'm working on a python client that will asynchronously download vinyl cover art. My problem is that I'm new to python (especially asynchronous python) and I don't think my code is running ansychronously. I have another client written in Node.js that is able to get approx. 40 images/sec whereas this python one is only managing to get around 1.5/sec.
import aiohttp
import asyncio
from os import path,makedirs
caa_base_url = "https://coverartarchive.org/release"
image_download_dir = path.realpath('images')
# small,large, None = Max
image_size = None
async def getImageUrls(release_mbid,session):
async with session.get(f'{caa_base_url}/{release_mbid}') as resp:
if resp.status == 404 or resp.status == 403:
return
return [release_mbid,await resp.json()]
async def getImage(url,session):
try:
async with session.get(url) as resp:
return [url,await resp.read()]
except (aiohttp.ServerDisconnectedError):
return await getImage(url,session)
async def getMBIDs(mb_page_url):
async with aiohttp.ClientSession() as session:
async with session.get(mb_page_url) as resp:
mb_json = await resp.json()
tasks = []
async with aiohttp.ClientSession() as caa_session:
for release in mb_json["releases"]:
task = asyncio.ensure_future(getImageUrls(release["id"],caa_session))
tasks.append(task)
responses = await asyncio.gather(*tasks)
async with aiohttp.ClientSession() as caa_image_session:
for response in responses:
if response is not None:
caaTasks = []
release_mbid = response[0]
result = response[1]
for image in result["images"]:
if image["front"] == True:
caaTask = asyncio.ensure_future(getImage(image["image"],caa_session))
caaTasks.append(caaTask)
image_responses = await asyncio.gather(*caaTasks)
for image_response in image_responses:
image_url = image_response[0]
image_binary = image_response[1]
new_file_dir = path.join(image_download_dir,release_mbid)
if not path.isdir(new_file_dir):
makedirs(new_file_dir)
file_name = image_url[image_url.rfind("/")+1:]
file_path = path.join(new_file_dir,file_name)
new_file = open(file_path,'wb')
new_file.write(image_binary)
mb_base_url = "https://musicbrainz.org/ws/2/release"
num_pages = 100
releases_per_page = 100
mb_page_urls = []
async def getMBPages():
for page_index in range(num_pages):
await getMBIDs('%s?query=*&type=album&format=Vinyl&limit=%s&offset=%s&fmt=json' % (mb_base_url,releases_per_page,page_index*releases_per_page))
await asyncio.sleep(1)
loop = asyncio.get_event_loop()
loop.run_until_complete(getMBPages())
P.S. The sleep is because musicbrainz api limits to 1 request/sec
I followed up this tutorial: https://pawelmhm.github.io/asyncio/python/aiohttp/2016/04/22/asyncio-aiohttp.html and everything works fine when I am doing like 50 000 requests. But I need to do 1 milion API calls and then I have problem with this code:
url = "http://some_url.com/?id={}"
tasks = set()
sem = asyncio.Semaphore(MAX_SIM_CONNS)
for i in range(1, LAST_ID + 1):
task = asyncio.ensure_future(bound_fetch(sem, url.format(i)))
tasks.add(task)
responses = asyncio.gather(*tasks)
return await responses
Because Python needs to create 1 milion tasks, it basically just lags and then prints Killed message in terminal. Is there any way to use a generator insted of pre-made set (or list) of urls? Thanks.
Schedule all 1 million tasks at once
This is the code you are talking about. It takes up to 3 GB RAM so it is easily possible that it will be terminated by the operating system if you have low free memory.
import asyncio
from aiohttp import ClientSession
MAX_SIM_CONNS = 50
LAST_ID = 10**6
async def fetch(url, session):
async with session.get(url) as response:
return await response.read()
async def bound_fetch(sem, url, session):
async with sem:
await fetch(url, session)
async def fetch_all():
url = "http://localhost:8080/?id={}"
tasks = set()
async with ClientSession() as session:
sem = asyncio.Semaphore(MAX_SIM_CONNS)
for i in range(1, LAST_ID + 1):
task = asyncio.create_task(bound_fetch(sem, url.format(i), session))
tasks.add(task)
return await asyncio.gather(*tasks)
if __name__ == '__main__':
asyncio.run(fetch_all())
Use queue to streamline the work
This is my suggestion how to use asyncio.Queue to pass URLs to worker tasks. The queue is filled as-needed, there is no pre-made list of URLs.
It takes only 30 MB RAM :)
import asyncio
from aiohttp import ClientSession
MAX_SIM_CONNS = 50
LAST_ID = 10**6
async def fetch(url, session):
async with session.get(url) as response:
return await response.read()
async def fetch_worker(url_queue):
async with ClientSession() as session:
while True:
url = await url_queue.get()
try:
if url is None:
# all work is done
return
response = await fetch(url, session)
# ...do something with the response
finally:
url_queue.task_done()
# calling task_done() is necessary for the url_queue.join() to work correctly
async def fetch_all():
url = "http://localhost:8080/?id={}"
url_queue = asyncio.Queue(maxsize=100)
worker_tasks = []
for i in range(MAX_SIM_CONNS):
wt = asyncio.create_task(fetch_worker(url_queue))
worker_tasks.append(wt)
for i in range(1, LAST_ID + 1):
await url_queue.put(url.format(i))
for i in range(MAX_SIM_CONNS):
# tell the workers that the work is done
await url_queue.put(None)
await url_queue.join()
await asyncio.gather(*worker_tasks)
if __name__ == '__main__':
asyncio.run(fetch_all())
asyncio is memory bound (like any other program). You can not spawn more task that memory can hold. My guess is that you hit a memory limit. Check dmesg for more information.
1 millions RPS doesn't mean there is 1M tasks. A task can do several request in the same second.