How to make batch requests in web3.py? - python

This is my take for async based on this
How to use AsyncHTTPProvider in web3py?
article. However, upon running this code it executes like a
synchronous function.
For web3.js, there is a support for batch request
https://dapp-world.com/smartbook/web3-batch-request-Eku8 . However,
web3.py does not have any.
I am using Ethereum Alchemy API which supports about 19 API calls per
second.
I have about 1000 Ethereum Addresses
How do I modify the code
such that I am able to batch 19 addresses per second?
from web3 import Web3
from web3.eth import AsyncEth
import time
import pandas as pd
import aiohttp
import asyncio
alchemy_url = "https://eth-mainnet.g.alchemy.com/v2/zCTn-wyjipF5DvGFVNEx_XqCKZakaB57"
w3 = Web3(Web3.AsyncHTTPProvider(alchemy_url), modules={'eth': (AsyncEth,)}, middlewares=[])
start = time.time()
df = pd.read_csv('Ethereum/ethereumaddresses.csv')
Wallet_Address=(df.loc[:,'Address'])
#Balance_storage = []
session_timeout = aiohttp.ClientTimeout(total=None)
async def get_balances():
for address in Wallet_Address:
balance = await w3.eth.get_balance(address)
print(address, balance)
asyncio.run(get_balances())
end = time.time()
total_time = end - start
print(f"It took {total_time} seconds to make {len(Wallet_Address)} API calls")

I think my idea isn't the best but you can use it as a temporary solution.
For this, you have to use ThreadpoolExecutor.
I executed a benchmark and found these results:
Without ThreadpoolExecutor, using BSC Public RPC, just running in for loop, takes more than 3 minutes to finish the process.
Click here to see the output of test 1
With ThreadpoolExecutor, BSC Public RPC, and 100ms Delay using time.sleep(0.1), finishes in less than 40 seconds as you can see in the next image. Click here to see the output of test 2
With ThreadpoolExecutor, using Quicknode, and 100ms Delay, finishes in 35 seconds. Click here to see the output of test 3
Doing simple math (1000 wallets / 19 calls per sec.) we know your process needs to take at least something close to 50 seconds. Try running at 100ms delays and if it doesn't work you can increase more delay.
One of the problems with using time.sleep is if you are using GUI or something like that which we can't pause (because GUI will freeze) during the process. (I think you can use multiprocessing to bypass this xD)
The second problem is that doing this will probably change each address's position in CSV. (You can attribute _id or something like that for each address to organize with For Loops after ends.)
Code: Working Good at BSC (Just change the RPC). This code will find all balances and store them inside self.data (defaultdict). After this, save it in new CSV file called "newBalances.csv" (You can change this)
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from web3 import Web3
import pandas as pd
import time
class multiGetBalanceExample():
def __init__(self):
self.initialtime = datetime.now() #initial time
#=== Setup Web3 ===#
self.bsc = "https://bsc-dataseed.binance.org/" #rpc (change this)
self.web3 = Web3(Web3.HTTPProvider(self.bsc)) #web3 connect
#=== Loading Csv file ===#
self.df = pd.read_csv(r"./Ethereum/ethereumaddresses.csv")
self.wallet_address=(self.df.loc[:,'Address'])
#=== Setup Temporary Address/Balance Save Defaultdict ===#
self.data = defaultdict(list)
#=== Start ===#
self.start_workers(self.data)
#=== Finish ===#
self.saveCsv() #saving in new csv file
self.finaltime = datetime.now() #end time
print(f"\nFinished! Process takes: {self.finaltime - self.initialtime}")
def start_workers(self, data, workers=10):
with ThreadPoolExecutor(max_workers=workers) as executor:
[executor.submit(self.getBalances, _data=data, _from=0, _to=101)]
[executor.submit(self.getBalances, _data=data, _from=101, _to=201)]
[executor.submit(self.getBalances, _data=data, _from=201, _to=301)]
[executor.submit(self.getBalances, _data=data, _from=301, _to=401)]
[executor.submit(self.getBalances, _data=data, _from=401, _to=501)]
[executor.submit(self.getBalances, _data=data, _from=501, _to=601)]
[executor.submit(self.getBalances, _data=data, _from=601, _to=701)]
[executor.submit(self.getBalances, _data=data, _from=701, _to=801)]
[executor.submit(self.getBalances, _data=data, _from=801, _to=901)]
[executor.submit(self.getBalances, _data=data, _from=901, _to=1000)]
return data
def getBalances(self, _data, _from, _to):
for i in range (_from, _to):
# == Getting Balances from each wallet == #
get_balance = self.web3.eth.get_balance(self.wallet_address[i])
# == Appending in self.data == #
_data["Address"].append(self.wallet_address[i])
_data["Balance"].append(get_balance)
# == Print and time.sleep(100ms) == #
print(f"Found: {self.wallet_address[i], get_balance}\n") #printing process.
time.sleep(0.1) #change this conform to your max limit (in my test 100ms takes 40 seconds to finish.)
return _data
def saveCsv(self):
#== Creating new CSV File ==#
headers = ["Address","Balance"]
new_df = pd.DataFrame(columns=headers)
new_df["Address"] = self.data["Address"]
new_df["Balance"] = self.data["Balance"]
new_df.to_csv(r"./Ethereum/newBalances.csv", index=False) #save
multiGetBalanceExample()

Related

Pass stream data from multiprocess to asyncio

I am trying to use the cryptofeed module to receive API OHLC data, store the data in a global variable by placing the cryptofeed stream in a separate multiprocessing thread, then accessing the global variable from a separate asyncio instance.
I am having trouble storing the global data using the multiprocess, the async function of close(), returns an empty pandas dataframe. I would like a suggestion on how to approach this problem.
from cryptofeed import FeedHandler
from cryptofeed.backends.aggregate import OHLCV
from cryptofeed.defines import TRADES
from cryptofeed.exchanges import BinanceFutures
import pandas as pd
from multiprocessing import Process
from concurrent.futures import ProcessPoolExecutor
import asyncio
data1 = pd.DataFrame() # Create an empty DataFrame
queue = multiprocessing.Queue()
async def ohlcv(data):
global data1
# Convert data to a Pandas DataFrame
df = pd.DataFrame.from_dict(data, orient='index')
# Reset the index
df.reset_index(inplace=True)
df.index = [pd.Timestamp.now()]
data1 = data1.append(df)
queue.put('nd')
# Append the rows of df to data
async def close(data):
while True:
print(data)
await asyncio.sleep(15)
def main1():
f = FeedHandler()
f.add_feed(BinanceFutures(symbols=['BTC-USDT-PERP'], channels=[TRADES], callbacks={TRADES: OHLCV(ohlcv, window=10)}))
f.run()
if __name__ == '__main__':
p = Process(target=main1)
p.start()
asyncio.run(close(data1))
It appears that you are trying to combine asyncio with multiprocessing in some fashion. I don't have access to your FeedHandler and BinanceFutures classes, so I will just have main1 directly call ohlcv and since it is running in a separate process from the main process, which is using asyncio, I can't see any reason with the code you posted why oh1cv would need to be a coroutine (asyncio function).
asyncio has a provision for running multiprocessing tasks and that is the way to proceed. So we run close as a coroutine and it runs main1 in a child process (a multiprocessing pool process, actually) returning back the result that was returned from main1. There is no need for explicit queue operations to return any result:
import asyncio
import pandas as pd
from concurrent.futures import ProcessPoolExecutor
def ohlcv(data):
# Convert data to a Pandas DataFrame
df = pd.DataFrame.from_dict(data, orient='index')
# Reset the index
df.reset_index(inplace=True)
df.index = [pd.Timestamp.now()]
return df
def main1():
"""
f = FeedHandler()
f.add_feed(BinanceFutures(symbols=['BTC-USDT-PERP'], channels=[TRADES], callbacks={TRADES: OHLCV(ohlcv, window=10)}))
f.run()
"""
return ohlcv({'a': 1})
async def close():
loop = asyncio.get_running_loop()
with ProcessPoolExecutor(1) as executor:
return await loop.run_in_executor(executor, main1)
if __name__ == '__main__':
df = asyncio.run(close())
print(df)
Prints:
index 0
2023-01-08 15:59:44.939261 a 1

python calling a main thread function from another thread via value rather than reference

so I've been thinking about this for a couple days now and I cant figure it out, I've searched around but couldn't find the answer I was looking for, so any help would be greatly appreciated.
Essentially what I am trying to do is call a method on a group of objects in my main thread from a separate thread, just once after 2 seconds and then the thread can exit, I'm just using threading as a way of creating a non-blocking 2 second pause (if there are other ways of accomplishing this please let me know.
So I have a pyqtplot graph/plot that updates from a websocket stream and the gui can only be updated from the thread that starts it (the main one).
What happens is I open a websocket stream fill up a buffer for about 2 seconds, make an REST request, apply the updates from the buffer to the data from the REST request and then update the data/plot as new messages come in. Now the issue is I can't figure out how to create a non blocking 2 second pause in the main thread without creating a child thread. If I create a child thread and pass the object that contains the dictionary I want to update after 2 seconds, I get issues regarding updating the plot from a different thread. So what I THINK is happening is when that new spawned thread is spawned the reference to the object I want to update is actually the object itself, or the data (dictionary) containing the update data is now in a different thread as the gui and that causes issues.
open websocket --> start filling buffer --> wait 2 seconds --> REST request --> apply updates from buffer to REST data --> update data as new websocket updates/messages come in.
Unfortunately the websocket and gui only start when you run pg.exec() and you can't break them up to start individually, you create them and then start them together (or at least I have failed to find a way to start them individually, alternatively I also tried using a separate library to handle websockets however this requires starting a thread for incoming messages as well)
This is the minimum reproducible example, sorry it's pretty long but I couldn't really break it down anymore without removing required functionality as well as preserving context:
import json
import importlib
from requests.api import get
import functools
import time
import threading
import numpy as np
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore
QtWebSockets = importlib.import_module(pg.Qt.QT_LIB + '.QtWebSockets')
class coin():
def __init__(self):
self.orderBook = {'bids':{}, 'asks':{}}
self.SnapShotRecieved = False
self.last_uID = 0
self.ordBookBuff = []
self.pltwgt = pg.PlotWidget()
self.pltwgt.show()
self.bidBar = pg.BarGraphItem(x=[0], height=[1], width= 1, brush=(25,25,255,125), pen=(0,0,0,0))
self.askBar = pg.BarGraphItem(x=[1], height=[1], width= 1, brush=(255,25,25,125), pen=(0,0,0,0))
self.pltwgt.addItem(self.bidBar)
self.pltwgt.addItem(self.askBar)
def updateOrderBook(self, message):
for side in ['a','b']:
bookSide = 'bids' if side == 'b' else 'asks'
for update in message[side]:
if float(update[1]) == 0:
try:
del self.orderBook[bookSide][float(update[0])]
except:
pass
else:
self.orderBook[bookSide].update({float(update[0]): float(update[1])})
while len(self.orderBook[bookSide]) > 1000:
del self.orderBook[bookSide][(min(self.orderBook['bids'], key=self.orderBook['bids'].get)) if side == 'b' else (max(self.orderBook['asks'], key=self.orderBook['asks'].get))]
if self.SnapShotRecieved == True:
self.bidBar.setOpts(x0=self.orderBook['bids'].keys(), height=self.orderBook['bids'].values(), width=1 )
self.askBar.setOpts(x0=self.orderBook['asks'].keys(), height=self.orderBook['asks'].values(), width=1 )
def getOrderBookSnapshot(self):
orderBookEncoded = get('https://api.binance.com/api/v3/depth?symbol=BTCUSDT&limit=1000')
if orderBookEncoded.ok:
rawOrderBook = orderBookEncoded.json()
orderBook = {'bids':{}, 'asks':{}}
for orders in rawOrderBook['bids']:
orderBook['bids'].update({float(orders[0]): float(orders[1])})
for orders in rawOrderBook['asks']:
orderBook['asks'].update({float(orders[0]): float(orders[1])})
last_uID = rawOrderBook['lastUpdateId']
while self.ordBookBuff[0]['u'] <= last_uID:
del self.ordBookBuff[0]
if len(self.ordBookBuff) == 0:
break
if len(self.ordBookBuff) >= 1 :
for eachUpdate in self.ordBookBuff:
self.last_uID = eachUpdate['u']
self.updateOrderBook(eachUpdate)
self.ordBookBuff = []
self.SnapShotRecieved = True
else:
print('Error retieving order book.') #RESTfull request failed
def on_text_message(message, refObj):
messaged = json.loads(message)
if refObj.SnapShotRecieved == False:
refObj.ordBookBuff.append(messaged)
else:
refObj.updateOrderBook(messaged)
def delay(myObj):
time.sleep(2)
myObj.getOrderBookSnapshot()
def main():
pg.mkQApp()
refObj = coin()
websock = QtWebSockets.QWebSocket()
websock.connected.connect(lambda : print('connected'))
websock.disconnected.connect(lambda : print('disconnected'))
websock.error.connect(lambda e : print('error', e))
websock.textMessageReceived.connect(functools.partial(on_text_message, refObj=refObj))
url = QtCore.QUrl("wss://stream.binance.com:9443/ws/btcusdt#depth#1000ms")
websock.open(url)
getorderbook = threading.Thread(target = delay, args=(refObj,), daemon=True) #, args = (lambda : websocketThreadExitFlag,)
getorderbook.start()
pg.exec()
if __name__ == "__main__":
main()

Running a method in parallel with waiting

I have a function that generates a random reaction time and waits before printing out to the console.
Here is my code
import time
import random
def saySnap(player):
reactionTime = random.randint(120, 401) / 1000
time.sleep(reactionTime)
print("{} : Snap!".format(player))
saySnap("p1")
saySnap("p2")
This results in 'p1' always being first since time.sleep blocks the program. How can I make sure that either player can print first?
You can use threading:
import time
import random
import threading
def saySnap(player):
reactionTime = random.randint(120, 401) / 1000
time.sleep(reactionTime)
print(f"{player}: Snapped in {reactionTime}!")
p1_thread = threading.Thread(target=saySnap, args=("p1",))
p2_thread = threading.Thread(target=saySnap, args=("p2",))
p1_thread.start()
p2_thread.start()
Which gives results randomly on your reaction times you set above.
You can use the threading.Timer class from built in threading module which represents an action that should be run only after a certain amount of time has passed.
Use:
import threading
def printSnap(player): # action that will be performed after reactionTime has been elapsed
print("{} : Snap!".format(player))
def saySnap(player):
reactionTime = random.randint(120, 401) / 1000
# instantiate and start the timer.
threading.Timer(reactionTime, printSnap, args=(player,)).start()
Or, if you don't want to define another function printSnap, Use:
def saySnap(player):
reactionTime = random.randint(120, 401) / 1000
threading.Timer(
reactionTime, lambda p: print(f"{p} : Snap!"), args=(player,)).start()
Calling the function:
saySnap("p1")
saySnap("p2")

How do I implement and execute threading with multiple classes in Python?

I'm very new to Python (with most of my previous programming experience being in intermediate C++ and Java) and am trying to develop a script which will read sensor data and log it to a .csv file. To do this I created separate classes for the code-- one will read the sensor data and output it to the console, while the other is supposed to take that data and log it-- and combined them together into a master script containing each class. Separately, they work perfectly, but together only the sensorReader class functions. I am trying to get each class to run in its own thread, while passing the sensor data from the first class (sensorReader) to the second class (csvWriter) as well. I've posted some of my pseudocode below, but I'd be happy to clarify any questions with the actual source code if needed.
import time
import sensorStuff
import csv
import threading
import datetime
class sensorReader:
# Initializers for the sensors.
this.code(initializes the sensors)
while True:
try:
this.code(prints the sensor data to the console)
this.code(throws exceptions)
this.code(waits 60 seconds)
class csvWriter:
this.code(fetches the date and time)
this.code(writes the headers for the excel sheet once)
while True:
this.code(gets date and time)
this.code(writes the time and one row of data to excel)
this.code(writes a message to console then repeats every minute)
r = sensorReader()
t = threading.Thread(target = r, name = "Thread #1")
t.start()
t.join
w = csvWriter()
t = threading.Thread(target = w, name = "Thread #2")
t.start()
I realize the last part doesn't really make sense, but I'm really punching above my weight here, so I'm not even sure why only the first class works and not the second, let alone how to implement threading for multiple classes. I would really appreciate it if anyone could point me in the right direction.
Thank you!
EDIT
I've decided to put up the full source code:
import time
import board
import busio
import adafruit_dps310
import adafruit_dht
import csv
import threading
import datetime
# import random
class sensorReader:
# Initializers for the sensors.
i2c = busio.I2C(board.SCL, board.SDA)
dps310 = adafruit_dps310.DPS310(i2c)
dhtDevice = adafruit_dht.DHT22(board.D4)
while True:
# Print the values to the console.
try:
global pres
pres = dps310.pressure
print("Pressure = %.2f hPa"%pres)
global temperature_c
temperature_c = dhtDevice.temperature
global temperature_f
temperature_f = temperature_c * (9 / 5) + 32
global humidity
humidity = dhtDevice.humidity
print("Temp: {:.1f} F / {:.1f} C \nHumidity: {}% "
.format(temperature_f, temperature_c, humidity))
print("")
# Errors happen fairly often with DHT sensors, and will occasionally throw exceptions.
except RuntimeError as error:
print("n/a")
print("")
# Waits 60 seconds before repeating.
time.sleep(10)
class csvWriter:
# Fetches the date and time for future file naming and data logging operations.
starttime=time.time()
x = datetime.datetime.now()
# Writes the header for the .csv file once.
with open('Weather Log %s.csv' % x, 'w', newline='') as f:
fieldnames = ['Time', 'Temperature (F)', 'Humidity (%)', 'Pressure (hPa)']
thewriter = csv.DictWriter(f, fieldnames=fieldnames)
thewriter.writeheader()
# Fetches the date and time.
while True:
from datetime import datetime
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
# Writes incoming data to the .csv file.
with open('Weather Log %s.csv', 'a', newline='') as f:
fieldnames = ['TIME', 'TEMP', 'HUMI', 'PRES']
thewriter = csv.DictWriter(f, fieldnames=fieldnames)
thewriter.writerow({'TIME' : current_time, 'TEMP' : temperature_f, 'HUMI' : humidity, 'PRES' : pres})
# Writes a message confirming the data's entry into the log, then sets a 60 second repeat cycle.
print("New entry added.")
time.sleep(10.0 - ((time.time() - starttime) % 10.0)) # Repeat every ten seconds.
r = sensorReader()
t = threading.Thread(target = r, name = "Thread #1")
t.start()
t.join
w = csvWriter()
t = threading.Thread(target = w, name = "Thread #2")
t.start()
It would work better structured like this. If you put the first loop in a function, you can delay its evaluation until you're ready to start the thread. But in a class body it would run immediately and you never get to the second definition.
def sensor_reader():
# Initializers for the sensors.
this.code(initializes the sensors)
while True:
try:
this.code(prints the sensor data to the console)
except:
print()
this.code(waits 60 seconds)
threading.Thread(target=sensor_reader, name="Thread #1", daemon=True).start()
this.code(fetches the date and time)
this.code(writes the headers for the excel sheet once)
while True:
this.code(gets date and time)
this.code(writes the time and one row of data to excel)
this.code(writes a message to console then repeats every minute)
I made it a daemon so it will stop when you terminate the program. Note also that we only needed to create one thread, since we already have the main thread.

How to run my computer's time setter as a silent backgound PID?

I guess my computer's battery of bios got dead; consequently the system time is never accurate, it is sometimes stuck at 1 PM with a faulty date, so I came up with this code with the idea of fetching universal time from an api and setting my computer's time accordingly.
My problem is how to make my code run in the background without printing any ping results to the screen or showing anything. I need it to function as a PID and keep alive as long as the computer is on.
PS: I am using Windows 7
from json import loads
from urllib.request import urlopen
import logging
from win32api import SetSystemTime
from datetime import datetime
from time import sleep
import re
from os import system
while True:
# connection is dead with 1, connection is alive with 0
connection_is_dead = 1
while connection_is_dead != 0:
connection_is_dead = system('ping -n 1 google.com')
logging.basicConfig(level=logging.INFO)
logging.disable(level=logging.INFO) # logging off
logging.info('Connection is up...')
try:
with urlopen("http://worldtimeapi.org/api/timezone/africa/tunis") as time_url:
text = time_url.read()
logging.info('Time api imported...')
mytime_dict = loads(text)
time_now = mytime_dict['datetime']
logging.info(time_now)
time_stamp = re.compile(r'(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})\.(\d+)[+|-].*')
time_match = list(map(int, re.match(time_stamp, time_now).groups()))
# winapi32.SetSystemTime(year, month , dayOfWeek , day , hour , minute , second , millseconds )
dayOfWeek = datetime(*time_match[:3]).weekday()
SetSystemTime( *time_match[:2],dayOfWeek, *time_match[2:6], 0)
logging.info('Time updated successfully...')
#system('exit')
sleep(1800) # 3O min / reset time every 30 min
except:
logging.error('Time was not updated due to an unexpected error... ')
#system('exit')

Categories

Resources