How to connect via websocket? - python

I have such part of code(Python):
def func_login_params():
async with websockets.connect(url) as ws:
# login
timestamp = str(get_local_timestamp())
login_str = login_params(timestamp, api_key, passphrase, secret_key)
await ws.send(login_str)
res = await ws.recv()
# params
sub_str = json.dumps(params)
await ws.send(sub_str)
Upper code works - okey. But I need to have two function: first - login, second - send params. As I thought I could do this:
def func_login():
async with websockets.connect(url) as ws:
# login
timestamp = str(get_local_timestamp())
login_str = login_params(timestamp, api_key, passphrase, secret_key)
await ws.send(login_str)
res = await ws.recv()
def func_params():
async with websockets.connect(url) as ws:
# params
sub_str = json.dumps(params)
await ws.send(sub_str)
But in this situation params don't come. Maybe it don't connect correctly?

When you try to connect websockets, it creates new connection.
So, in that case, you should take an websocket connection as function argument:
async def func_login(ws):
timestamp = str(get_local_timestamp())
login_str = login_params(timestamp, api_key, passphrase, secret_key)
await ws.send(login_str)
res = await ws.recv()
async def func_params(ws):
sub_str = json.dumps(params)
await ws.send(sub_str)
def main():
async with web sockets.connect(url) as ws:
await func_login(ws)
await func_params(ws)

Related

Getting duplicate Network Response Playwright Python

I have a working script but when I changed the page.on in playwright it actually runs the network response a certain number of times as per the loop count. I have been trying to figure out why that happens.
For example at i=0 it gives one response.url print but at i=10 it prints response.url 10 times and then send 10 duplicate data to mongodb. I have no idea why this is happening. The link being sent based on the print are all the same.
Would be a great help if anyone can let me know what it is that I am doing wrong that is causing this issue.
Pls see sample code here.
#imports here
today = datetime.today().strftime("%m%d%Y")
filenamearr = []
mongousername = 'XXX'
mongopassword = 'XXXX'
client = MongoClient("mongodb+srv://%s:%s#XXXXX.XXXX.mongodb.net/?retryWrites=true&w=majority"%(mongousername,mongopassword))
db = client.DB1
logg = []
async def runbrowser(playwright,url):
async def handle_response(response,buttonnumber):
l = str(response.url)
para = 'param'
if para in l:
print(response.url)
textdata = await response.text()
subtask = asyncio.create_task(jsonparse(textdata))
done, pending = await asyncio.wait({subtask})
if subtask in done:
print("Success in Json parser")
result = await subtask
status = [buttonnumber,result]
logg.append(status)
print(status)
logdf = pd.DataFrame(logg)
logdf.columns = ['BUTTON','RESULT']
fname = 'XXXX' + today +".csv"
logdf.to_csv(fname,index=False)
async def jsonparse(textdata):
try:
#parsing happens here to output to MongoDB
return "Success"
except Exception as e:
print("Failled parsing")
return e
browser = await playwright.firefox.launch(
headless=True,
)
context = await browser.new_context(
locale='en-US',
ignore_https_errors = True,
)
page = await context.new_page()
await page.goto(url,timeout=0)
button = page.locator("xpath=//button[#event-list-item='']")
bcount = button.locator(":scope",has_text="Locator")
count = await bcount.count()
print(count)
for i in range(count):
print("\n\n\n\n\nSleeping 10 seconds before clicking button")
buttonnumber = i
await asyncio.sleep(10)
print("Clickking Button: ", i)
cbtn = bcount.nth(i)
await cbtn.hover()
await asyncio.sleep(4)
await cbtn.click()
if i==0:
print("i=0")
await page.reload(timeout=0)
retry = page.on("response",lambda response: handle_response(response,buttonnumber))
title = await page.title()
print(title)
print("Heading back to the main page.")
await page.go_back(timeout=0)
await page.reload()
await page.wait_for_timeout(5000)
await page.close()
print("Closing Tab")
await browser.close()
async def main():
tasks = []
async with async_playwright() as playwright:
url = 'https://samplelink.com'
tasks.append(asyncio.create_task(runbrowser(playwright,url)))
for t in asyncio.as_completed(tasks):
print(await t)
await asyncio.gather(*tasks)
asyncio.run(main())

Multiple client request using httpx Runtime Error: Cannot send a request, as the client has been closed

I have the below piece of code
async def get_data(uuid):
async with sema, httpx.AsyncClient(
base_url=udi_data_url, params=params
) as udi_client:
udi_result = udi_client.get(f"/{uuid}")
async with sema, httpx.AsyncClient(
base_url=manufacturer_data_url, params=params
) as client:
manufacturing_result = client.get(f"/{uuid}")
result1, result2 = await asyncio.gather(udi_result, manufacturing_result)
print(result1, result2)
async def main():
await get_data(uuid)
asyncio.run(main())
How do I keep the client connections open as i understand the moment i reach this line
result1, result2 = await asyncio.gather(udi_result, manufacturing_result)
I know that i can do something like
udi_result = await udi_client.get(f"/{uuid}")
and
manufacturing_result = await client.get(f"/{uuid}")
But that's not what I want to do.
I am out of the context and thus
I am getting an error.

Run two asynco loop in a python script (telethon, aiopg)

I would like to use Telethon (Telegram bot) and aiopg (PostgreSQL) library.
Telethon example:
from telethon import TelegramClient
api_id = 12345
api_hash = '0123456789abcdef0123456789abcdef'
client = TelegramClient('anon', api_id, api_hash)
async def main():
# Getting information about yourself
me = await client.get_me()
print(me.stringify())
#client.on(events.NewMessage)
async def my_event_handler(event):
if 'hello' in event.raw_text:
await event.reply('hi!')
client.start()
client.run_until_disconnected()
aiopg example:
import aiopg
dsn = 'dbname=aiopg user=aiopg password=passwd host=127.0.0.1'
async def notify(conn):
async with conn.cursor() as cur:
for i in range(5):
msg = "message {}".format(i)
print('Send ->', msg)
await cur.execute("NOTIFY channel, %s", (msg,))
await cur.execute("NOTIFY channel, 'finish'")
async def listen(conn):
async with conn.cursor() as cur:
await cur.execute("LISTEN channel")
while True:
msg = await conn.notifies.get()
if msg.payload == 'finish':
return
else:
print('Receive <-', msg.payload)
async def main():
async with aiopg.create_pool(dsn) as pool:
async with pool.acquire() as conn1:
listener = listen(conn1)
async with pool.acquire() as conn2:
notifier = notify(conn2)
await asyncio.gather(listener, notifier)
print("ALL DONE")
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
I would like to use both in a same python script.
I tried to find the solution and it is maybe the asyncio.gather(...), but I don't know how can I combine these two library, how to start both loop.
Can you please help me?
create a new async function which creates a new client instance and add all the handlers you need, for the sake of this example I showed some example handlers of mine.
async def init_bot() -> TelegramClient:
client = TelegramClient(
session="trade-bot",
api_hash=Config.API_HASH,
api_id=Config.API_ID,
)
await client.start(bot_token=Config.BOT_TOKEN)
client.add_event_handler(
register_handler,
events.NewMessage(incoming=True, pattern=r"^[\/?!]register$"),
)
client.add_event_handler(
get_webhook_handler,
events.NewMessage(incoming=True, pattern=r"^[\/?!]webhook$"),
)
client.add_event_handler(
status_handler,
events.NewMessage(incoming=True, pattern=r"^[\/?!]status$"),
)
_LOG.info("Bot client started")
return client
then later on you main function
client = await init_bot()
await client.connect()
# the below code is non-blocking
asyncio.create_task(client.run_until_disconnected())

How to change async crawl program based on aiohttp lib faster?

I am using aysnc and aiohttp to crawl web image, but when it was running, I found it was not crawling as fast as I expected.
Is there any code that I can improve there?
In the for loop I am using many await inside, is that the correct way to deal with that?
async def fetch(url):
async with aiohttp.ClientSession() as session:
async with session.get(url=url,
headers=HEADERS,
proxy=PROXY_STR,
) as response:
text = await response.text()
resp = Selector(text=text)
nodes = resp.xpath('//div[#class="kl1-2"]')
for node in nodes:
next_url = node.xpath('.//div[#class="kl1-2a2"]/a/#href').extract_first()
title = node.xpath('.//div[#class="kl1-2a2"]/a/#title').extract_first()
await detail(session=session, next_url=next_url, title=title)
print('next page')
async def detail(**kwargs):
session = kwargs['session']
next_url = kwargs['next_url']
title = kwargs['title']
print(next_url)
print(title)
async with session.get(
url=next_url,
headers=HEADERS,
proxy=PROXY_STR,
) as response:
text = await response.text()
resp = Selector(text=text)
nodes = resp.xpath('//div[#class="kl2-1"]//img/#src').extract()
nodes = list(set(nodes))
for img in nodes:
await download_img(session=session,url=img,title=title)
print('next image')
async def download_img(**kwargs):
url= kwargs['url']
title= kwargs['title']
try:
conn = aiohttp.TCPConnector(ssl=False) # 防止ssl报错
async with aiohttp.ClientSession(connector=conn, trust_env=True) as session:
async with session.get(url=url, headers=SIMPLE_HEADERS, proxy=PROXY_STR) as response:
if response.status>=200 and response.status<300:
f=await aiofiles.open(save_file,'wb')
await f.write(await response.read())
await f.close()
except Exception as e:
return
async def main():
total_page = 3640
for page in range(0,total_page,35):
url = START_URL.format(page=page)
await fetch(url)
await asyncio.sleep(0)
print(f'downing page {page}-')
loop = asyncio.get_event_loop()
loop.run_until_complete(main())

API calls not running ansynchronously

I'm working on a python client that will asynchronously download vinyl cover art. My problem is that I'm new to python (especially asynchronous python) and I don't think my code is running ansychronously. I have another client written in Node.js that is able to get approx. 40 images/sec whereas this python one is only managing to get around 1.5/sec.
import aiohttp
import asyncio
from os import path,makedirs
caa_base_url = "https://coverartarchive.org/release"
image_download_dir = path.realpath('images')
# small,large, None = Max
image_size = None
async def getImageUrls(release_mbid,session):
async with session.get(f'{caa_base_url}/{release_mbid}') as resp:
if resp.status == 404 or resp.status == 403:
return
return [release_mbid,await resp.json()]
async def getImage(url,session):
try:
async with session.get(url) as resp:
return [url,await resp.read()]
except (aiohttp.ServerDisconnectedError):
return await getImage(url,session)
async def getMBIDs(mb_page_url):
async with aiohttp.ClientSession() as session:
async with session.get(mb_page_url) as resp:
mb_json = await resp.json()
tasks = []
async with aiohttp.ClientSession() as caa_session:
for release in mb_json["releases"]:
task = asyncio.ensure_future(getImageUrls(release["id"],caa_session))
tasks.append(task)
responses = await asyncio.gather(*tasks)
async with aiohttp.ClientSession() as caa_image_session:
for response in responses:
if response is not None:
caaTasks = []
release_mbid = response[0]
result = response[1]
for image in result["images"]:
if image["front"] == True:
caaTask = asyncio.ensure_future(getImage(image["image"],caa_session))
caaTasks.append(caaTask)
image_responses = await asyncio.gather(*caaTasks)
for image_response in image_responses:
image_url = image_response[0]
image_binary = image_response[1]
new_file_dir = path.join(image_download_dir,release_mbid)
if not path.isdir(new_file_dir):
makedirs(new_file_dir)
file_name = image_url[image_url.rfind("/")+1:]
file_path = path.join(new_file_dir,file_name)
new_file = open(file_path,'wb')
new_file.write(image_binary)
mb_base_url = "https://musicbrainz.org/ws/2/release"
num_pages = 100
releases_per_page = 100
mb_page_urls = []
async def getMBPages():
for page_index in range(num_pages):
await getMBIDs('%s?query=*&type=album&format=Vinyl&limit=%s&offset=%s&fmt=json' % (mb_base_url,releases_per_page,page_index*releases_per_page))
await asyncio.sleep(1)
loop = asyncio.get_event_loop()
loop.run_until_complete(getMBPages())
P.S. The sleep is because musicbrainz api limits to 1 request/sec

Categories

Resources