I have written a simple command to loop through all of Result objects and check its www field (representing URL of the published scientific result eg. https://doi.org/10.1109/5.771073)
There is 1M results in our db and I want to check the www field, if link is corrupted, I will guess it by appending actual doi to https://doi.org/ and save it (in the www field)
This is my first time working with asyncio but I think barebones of my code are right and I can't find out, why code gets ran synchronously.
Main command:
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import asyncio
import time
from django.core.management.base import BaseCommand
from models import Result
def run_statistics(array_of_results, num_of_results):
num_of_correct_urls = 0
sum_check_time = 0
max_check_time = 0
for res in array_of_results:
if res[0]:
num_of_correct_urls += 1
if res[1] > max_check_time:
max_check_time = res[1]
sum_check_time += res[1]
return f"""ran statistics on {num_of_results} results \n
----------------------------------------------------------------------------
correct/corrupted link ratio: {num_of_correct_urls} / {num_of_results - num_of_correct_urls}\n
Mean time to check URL: {sum_check_time / num_of_results}\n
"""
class Command(BaseCommand):
help = 'checks url in www field of result, if the link is unresponsive, tries to generate new hyperlink ' \
'(using DOI) and saves it in www_processed field'
async def run_check(self, obj):
"""
Takes care of checking Result www filed.
`await obj.get_www()` passes function control back to the event loop.
:returns
True on unchanged url
False otherwise
"""
print('STARTING run_check', file=self.stdout)
start_time = time.perf_counter()
final_url = await obj.get_www_coroutine()
if final_url == obj.www:
print('STOPPING run_check', file=self.stdout)
return True, time.perf_counter() - start_time
else:
print('STOPPING run_check', file=self.stdout)
return False, time.perf_counter() - start_time
async def main(self, objs):
await asyncio.gather(self.run_check(objs[0]), self.run_check(objs[1]))
def handle(self, *args, **kwargs):
start_time = time.perf_counter()
print('started the process', file=self.stdout)
objs = Result.objects.all().only('www', 'www_processed', 'www_last_checked').order_by('?')[:2]
num_of_results = 10 # Result.objects.all().count()
print('running main', file=self.stdout)
async def _main_routine():
array_of_responses = await asyncio.gather(*(self.run_check(_) for _ in objs))
print(f'retrieved {num_of_results} results, running command', file=self.stdout)
# print(res_array, file=self.stdout)
print(run_statistics(array_of_responses, 10) + f'total time: {time.perf_counter() - start_time}\n',
file=self.stdout)
asyncio.run(_main_routine())
Method for checking www field and saving guessed link, if it needs to be done
async def get_www_coroutine(self):
if not self.www_last_checked or datetime.date.today() - self.www_last_checked > datetime.timedelta(days=365):
if not self.www or not await check_url_returns_200_in_time_coroutine(self.www): # www is corrupted
if self.doi:
self.www_processed = self.get_doi_url()
else:
self.www_processed = None
self.www_last_checked = datetime.date.today()
else: # www looks alright
self.www_processed = self.www
self.save()
return self.www_processed or False
Method for checking if link returns 200
async def check_url_returns_200_in_time_coroutine(url, timeout=1):
try:
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
return response.status == 200
except aiohttp.client_exceptions.InvalidURL:
return False
the actual output:
started the process
running main
STARTING run_check
STOPPING run_check
STARTING run_check
STOPPING run_check
retrieved 10 results, running command
ran statistics on 10 results
----------------------------------------------------------------------------
correct/corrupted link ratio: 1 / 9
Mean time to check URL: 0.17720807899999896
total time: 73.279784077
As you can see code is executed sequentially and takes too long to complete. I expect to see STARTING run_check for all objects first, followed by STOPPING run_check
I have finally solved the issue!!!
Code runs asynchronously (I have tested only two results, therefore it wasn't clear from the output)
Bottleneck was actually db query, objs = Result.objects.all().only('www', 'www_processed', 'www_last_checked').order_by('?')[:2] takes a lot of time since there is 1M objects, and order_by(?) needs to do some logic first. more here: How to pull a random record using Django's ORM?
Related
Is it possible to use multi processing in Django on a request.
#so if I send a request to http://127.0.0.1:8000/wallet_verify
def wallet_verify(request):
walelts = botactive.objects.all()
#here I check if the user want to be included in the process or not so if they set it to True then i'll include them else ignore.
for active in walelts:
check_active = active.active
if check_active == True:
user_is_active = active.user
#for the ones that want to be included I then go to get their key data.
I need to get both api and secret so then I loop through to get the data from active users.
database = Bybitapidatas.objects.filter(user=user_is_active)
for apikey in database:
apikey = apikey.apikey
for apisecret in database:
apisecret = apisecret.apisecret
#since I am making a request to an exchange endpoint I can only include one API and secret at a time . So for 1 person at a time this is why I want to run in parallel.
for a, b in zip(list(Bybitapidatas.objects.filter(user=user_is_active).values("apikey")), list(Bybitapidatas.objects.filter(user=user_is_active).values("apisecret"))):
session =spot.HTTP(endpoint='https://api-testnet.bybit.com/', api_key=a['apikey'], api_secret=b['apisecret'])
#here I check to see if they have balance to open trades if they have selected to be included.
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
for i in GET_USDT_BALANCE:
if 'USDT' in i.values():
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
idx_USDT = GET_USDT_BALANCE.index(i)
GET_USDTBALANCE = session.get_wallet_balance()['result']['balances'][idx_USDT]['free']
print(round(float(GET_USDTBALANCE),2))
#if they don't have enough balance I skip the user.
if round(float(GET_USDTBALANCE),2) < 11 :
pass
else:
session.place_active_order(
symbol="BTCUSDT",
side="Buy",
type="MARKET",
qty=10,
timeInForce="GTC"
)
How can I run this process in parallel while looping through the database to also get data for each individual user.
I am still new to coding so hope I explained that it makes sense.
I have tried multiprocessing and pools but then I get that the app has not started yet and I have to run it outside of wallet_verify is there a way to do it in wallet_verify
and when I send the Post Request.
Any help appreciated.
Filtering the Database to get Users who have set it to True
Listi - [1,3](these are user ID's Returned
processess = botactive.objects.filter(active=True).values_list('user')
listi = [row[0] for row in processess]
Get the Users from the listi and perform the action.
def wallet_verify(listi):
# print(listi)
database = Bybitapidatas.objects.filter(user = listi)
print("---------------------------------------------------- START")
for apikey in database:
apikey = apikey.apikey
print(apikey)
for apisecret in database:
apisecret = apisecret.apisecret
print(apisecret)
start_time = time.time()
session =spot.HTTP(endpoint='https://api-testnet.bybit.com/', api_key=apikey, api_secret=apisecret)
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
for i in GET_USDT_BALANCE:
if 'USDT' in i.values():
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
idx_USDT = GET_USDT_BALANCE.index(i)
GET_USDTBALANCE = session.get_wallet_balance()['result']['balances'][idx_USDT]['free']
print(round(float(GET_USDTBALANCE),2))
if round(float(GET_USDTBALANCE),2) < 11 :
pass
else:
session.place_active_order(
symbol="BTCUSDT",
side="Buy",
type="MARKET",
qty=10,
timeInForce="GTC"
)
print ("My program took", time.time() - start_time, "to run")
print("---------------------------------------------------- END")
return HttpResponse("Wallets verified")
Verifyt is what I use for the multiprocessing since I don't want it to run without being requested to run. also initialiser starts apps for each loop
def verifyt(request):
with ProcessPoolExecutor(max_workers=4, initializer=django.setup) as executor:
results = executor.map(wallet_verify, listi)
return HttpResponse("done")
```
I am very new to asynchronous programming and I was playing around with httpx. I have the following code and I am sure I am doing something wrong - just don't know what it is. There are two methods, one synchronous and other asynchronous. They are both pull from google finance. On my system I am seeing the time spent as following:
Asynchronous: 5.015218734741211
Synchronous: 5.173618316650391
Here is the code:
import httpx
import asyncio
import time
#
#--------------------------------------------------------------------
#
#--------------------------------------------------------------------
#
def sync_pull(url):
r = httpx.get(url)
print(r.status_code)
#
#--------------------------------------------------------------------
#
#--------------------------------------------------------------------
#
async def async_pull(url):
async with httpx.AsyncClient() as client:
r = await client.get(url)
print(r.status_code)
#
#--------------------------------------------------------------------
#
#--------------------------------------------------------------------
#
if __name__ == "__main__":
goog_fin_nyse_url = 'https://www.google.com/finance/quote/'
tickers = ['F', 'TWTR', 'CVX', 'VZ', 'GME', 'GM', 'PG', 'AAL',
'MARK', 'AAP', 'THO', 'NGD', 'ZSAN', 'SEAC',
]
print("Running asynchronously...")
async_start = time.time()
for ticker in tickers:
url = goog_fin_nyse_url + ticker + ':NYSE'
asyncio.run(async_pull(url))
async_end = time.time()
print(f"Time lapsed is: {async_end - async_start}")
print("Running synchronously...")
sync_start = time.time()
for ticker in tickers:
url = goog_fin_nyse_url + ticker + ':NYSE'
sync_pull(url)
sync_end = time.time()
print(f"Time lapsed is: {sync_end - sync_start}")
I had hoped the asynchronous method approach would require a fraction of the time the synchronous approach is requiring. What am I doing wrong?
When you say asyncio.run(async_pull) you're saying run 'async_pull' and wait for the result to come back. Since you do this once per each ticker in your loop, you're essentially using asyncio to run things synchronously and won't see performance benefits.
What you need to do is create several async calls and run them concurrently. There are several ways to do this, the easiest is to use asyncio.gather (see https://docs.python.org/3/library/asyncio-task.html#asyncio.gather) which takes in a sequence of coroutines and runs them concurrently. Adapting your code is fairly straightforward, you create an async function to take a list of urls and then call async_pull on each of them and then pass that in to asyncio.gather and await the results. Adapting your code to this looks like the following:
import httpx
import asyncio
import time
def sync_pull(url):
r = httpx.get(url)
print(r.status_code)
async def async_pull(url):
async with httpx.AsyncClient() as client:
r = await client.get(url)
print(r.status_code)
async def async_pull_all(urls):
return await asyncio.gather(*[async_pull(url) for url in urls])
if __name__ == "__main__":
goog_fin_nyse_url = 'https://www.google.com/finance/quote/'
tickers = ['F', 'TWTR', 'CVX', 'VZ', 'GME', 'GM', 'PG', 'AAL',
'MARK', 'AAP', 'THO', 'NGD', 'ZSAN', 'SEAC',
]
print("Running asynchronously...")
async_start = time.time()
results = asyncio.run(async_pull_all([goog_fin_nyse_url + ticker + ':NYSE' for ticker in tickers]))
async_end = time.time()
print(f"Time lapsed is: {async_end - async_start}")
print("Running synchronously...")
sync_start = time.time()
for ticker in tickers:
url = goog_fin_nyse_url + ticker + ':NYSE'
sync_pull(url)
sync_end = time.time()
print(f"Time lapsed is: {sync_end - sync_start}")
Running this way, the asynchronous version runs in about a second for me as opposed to seven synchronously.
Here's a nice pattern I use (I tend to change it a little each time). In general, I make a module async_utils.py and just import the top-level fetching function (e.g. here fetch_things), and then my code is free to forget about the internals (other than error handling). You can do it in other ways, but I like the 'functional' style of aiostream, and often find the repeated calls to the process function take certain defaults I set using functools.partial.
Note: async currying with partials is Python 3.8+ only
You can pass in a tqdm.tqdm progress bar to pbar (initialised with known size total=len(things)) to have it update when each async response is processed.
import asyncio
import httpx
from aiostream import stream
from functools import partial
__all__ = ["fetch", "process", "async_fetch_urlset", "fetch_things"]
async def fetch(session, url, raise_for_status=False):
response = await session.get(str(url))
if raise_for_status:
response.raise_for_status()
return response
async def process_thing(data, things, pbar=None, verbose=False):
# Map the response back to the thing it came from in the things list
source_url = data.history[0].url if data.history else data.url
thing = next(t for t in things if source_url == t.get("thing_url"))
# Handle `data.content` here, where `data` is the `httpx.Response`
if verbose:
print(f"Processing {source_url=}")
build.update({"computed_value": "result goes here"})
if pbar:
pbar.update()
async def async_fetch_urlset(urls, things, pbar=None, verbose=False, timeout_s=10.0):
timeout = httpx.Timeout(timeout=timeout_s)
async with httpx.AsyncClient(timeout=timeout) as session:
ws = stream.repeat(session)
xs = stream.zip(ws, stream.iterate(urls))
ys = stream.starmap(xs, fetch, ordered=False, task_limit=20)
process = partial(process_thing, things=things, pbar=pbar, verbose=verbose)
zs = stream.map(ys, process)
return await zs
def fetch_things(urls, things, pbar=None, verbose=False):
return asyncio.run(async_fetch_urlset(urls, things, pbar, verbose))
In this example, the input is a list of dicts (with string keys and values), things: list[dict[str,str]], and the key "thing_url" is accessed to retrieve the URL. Having a dict or object is desirable instead of just the URL string for when you want to 'map' the result back to the object it came from. The process_thing function is able to modify the input list things in-place (i.e. any changes are not scoped within the function, they change it back in the scope that called it).
You'll often find errors arise during async runs that you don't get when running synchronously, so you'll need to catch them, and re-try. A common gotcha is to retry at the wrong level (e.g. around the entire loop)
In particular, you'll want to import and catch httpcore.ConnectTimeout, httpx.ConnectTimeout, httpx.RemoteProtocolError, and httpx.ReadTimeout.
Increasing the timeout_s parameter will reduce the frequency of the timeout errors by letting the AsyncClient 'wait' for longer, but doing so may in fact slow down your program (it won't "fail fast" quite as fast).
Here's an example of how to use the async_utils module given above:
from async_utils import fetch_things
import httpx
import httpcore
# UNCOMMENT THIS TO SEE ALL THE HTTPX INTERNAL LOGGING
#import logging
#log = logging.getLogger()
#log.setLevel(logging.DEBUG)
#log_format = logging.Formatter('[%(asctime)s] [%(levelname)s] - %(message)s')
#console = logging.StreamHandler()
#console.setLevel(logging.DEBUG)
#console.setFormatter(log_format)
#log.addHandler(console)
things = [
{"url": "https://python.org", "name": "Python"},
{"url": "https://www.python-httpx.org/", "name": "HTTPX"},
]
#log.debug("URLSET:" + str(list(t.get("url") for t in things)))
def make_urlset(things):
"""Make a URL generator (empty if all have been fetched)"""
urlset = (t.get("url") for t in things if "computed_value" not in t)
return urlset
retryable_errors = (
httpcore.ConnectTimeout,
httpx.ConnectTimeout, httpx.RemoteProtocolError, httpx.ReadTimeout,
)
# ASYNCHRONOUS:
max_retries = 100
for i in range(max_retries):
print(f"Retry {i}")
try:
urlset = make_urlset(things)
foo = fetch_things(urls=urlset, things=things, verbose=True)
except retryable_errors as exc:
print(f"Caught {exc!r}")
if i == max_retries - 1:
raise
except Exception:
raise
# SYNCHRONOUS:
#for t in things:
# resp = httpx.get(t["url"])
In this example I set a key "computed_value" on a dictionary once the async response has successfully been processed which then prevents that URL from being entered into the generator on the next round (when make_urlset is called again). In this way, the generator gets progressively smaller. You can also do it with lists but I find a generator of the URLs to be pulled works reliably. For an object you'd change the dictionary key assignment/access (update/in) to attribute assignment/access (settatr/hasattr).
I wanted to post working version of the coding using futures - virtually the same run-time:
import httpx
import asyncio
import time
#
#--------------------------------------------------------------------
# Synchronous pull
#--------------------------------------------------------------------
#
def sync_pull(url):
r = httpx.get(url)
print(r.status_code)
#
#--------------------------------------------------------------------
# Asynchronous Pull
#--------------------------------------------------------------------
#
async def async_pull(url):
async with httpx.AsyncClient() as client:
r = await client.get(url)
print(r.status_code)
#
#--------------------------------------------------------------------
# Build tasks queue & execute coroutines
#--------------------------------------------------------------------
#
async def build_task() -> None:
goog_fin_nyse_url = 'https://www.google.com/finance/quote/'
tickers = ['F', 'TWTR', 'CVX', 'VZ', 'GME', 'GM', 'PG', 'AAL',
'MARK', 'AAP', 'THO', 'NGD', 'ZSAN', 'SEAC',
]
tasks= []
#
## Following block of code will create a queue full of function
## call
for ticker in tickers:
url = goog_fin_nyse_url + ticker + ':NYSE'
tasks.append(asyncio.ensure_future(async_pull(url)))
start_time = time.time()
#
## This block of code will derefernce the function calls
## from the queue, which will cause them all to run
## rapidly
await asyncio.gather(*tasks)
#
## Calculate time lapsed
finish_time = time.time()
elapsed_time = finish_time - start_time
print(f"\n Time spent processing: {elapsed_time} ")
# Start from here
if __name__ == "__main__":
asyncio.run(build_task())
So here is my use case:
I read from a database rows containing information to make a complex SOAP call (I'm using zeep to do these calls).
One row from the database corresponds to a request to the service.
There can be up to 20 thousand lines, so I don't want to read everything in memory before making the calls.
I need to process the responses - when the
response is OK, I need to store some returned information back into
my database, and when there is an exception I need to process the
exception for that particular request/response pair.
I need also to capture some external information at the time of the request creation, so that I know where to store the response from the request. In my current code I'm using the delightful property of gather() that makes the results come in the same order.
I read the relevant PEPs and Python documentation but I'm still very confused, as there seems to be multiple ways to solve the same problem.
I also went through countless exercises on the web, but the examples are all trivial - it's either asyncio.sleep() or some webscraping with a finite list of urls.
The solution that I have come up so far kinda works - the asyncio.gather() method is very, very, useful, but I have not been able to 'feed' it from a generator. I'm currently just counting to an arbitrary size and then starting a .gather() operation. I've transcribed the code, with boring parts left out and I've tried to anonymise the code
I've tried solutions involving semaphores, queues, different event loops, but I'm failing every time. Ideally I'd like to be able to create Futures 'continuously' - I think I'm missing the logic of 'convert this awaitable call to a future'
I'd be grateful for any help!
import asyncio
from asyncio import Future
import zeep
from zeep.plugins import HistoryPlugin
history = HistoryPlugin()
max_concurrent_calls = 5
provoke_errors = True
def export_data_async(db_variant: str, order_nrs: set):
st = time.time()
results = []
loop = asyncio.get_event_loop()
def get_client1(service_name: str, system: Systems = Systems.ACME) -> Tuple[zeep.Client, zeep.client.Factory]:
client1 = zeep.Client(wsdl=system.wsdl_url(service_name=service_name),
transport=transport,
plugins=[history],
)
factory_ns2 = client1.type_factory(namespace='ns2')
return client1, factory_ns2
table = 'ZZZZ'
moveback_table = 'EEEEEE'
moveback_dict = create_default_empty_ordered_dict('attribute1 attribute2 attribute3 attribute3')
client, factory = get_client1(service_name='ACMEServiceName')
if log.isEnabledFor(logging.DEBUG):
client.wsdl.dump()
zeep_log = logging.getLogger('zeep.transports')
zeep_log.setLevel(logging.DEBUG)
with Db(db_variant) as db:
db.open_db(CON_STRING[db_variant])
db.init_table_for_read(table, order_list=order_nrs)
counter_failures = 0
tasks = []
sids = []
results = []
def handle_future(future: Future) -> None:
results.extend(future.result())
def process_tasks_concurrently() -> None:
nonlocal tasks, sids, counter_failures, results
futures = asyncio.gather(*tasks, return_exceptions=True)
futures.add_done_callback(handle_future)
loop.run_until_complete(futures)
for i, response_or_fault in enumerate(results):
if type(response_or_fault) in [zeep.exceptions.Fault, zeep.exceptions.TransportError]:
counter_failures += 1
log_webservice_fault(sid=sids[i], db=db, err=response_or_fault, object=table)
else:
db.write_dict_to_table(
moveback_table,
{'sid': sids[i],
'attribute1': response_or_fault['XXX']['XXX']['xxx'],
'attribute2': response_or_fault['XXX']['XXX']['XXXX']['XXX'],
'attribute3': response_or_fault['XXXX']['XXXX']['XXX'],
}
)
db.commit_db_con()
tasks = []
sids = []
results = []
return
for row in db.rows(table):
if int(row.id) % 2 == 0 and provoke_errors:
payload = faulty_message_payload(row=row,
factory=factory,
)
else:
payload = message_payload(row=row,
factory=factory,
)
tasks.append(client.service.myRequest(
MessageHeader=factory.MessageHeader(**message_header_arguments(row=row)),
myRequestPayload=payload,
_soapheaders=[security_soap_header],
))
sids.append(row.sid)
if len(tasks) == max_concurrent_calls:
process_tasks_concurrently()
if tasks: # this is the remainder of len(db.rows) % max_concurrent_calls
process_tasks_concurrently()
loop.run_until_complete(transport.session.close())
db.execute_this_statement(statement=update_sql)
db.commit_db_con()
log.info(db.activity_log)
if counter_failures:
log.info(f"{table :<25} Count failed: {counter_failures}")
print("time async: %.2f" % (time.time() - st))
return results
Failed attempt with Queue: (blocks at await client.service)
loop = asyncio.get_event_loop()
counter = 0
results = []
async def payload_generator(db_variant: str, order_nrs: set):
# code that generates the data for the request
yield counter, row, payload
async def service_call_worker(queue, results):
while True:
counter, row, payload = await queue.get()
results.append(await client.service.myServicename(
MessageHeader=calculate_message_header(row=row)),
myPayload=payload,
_soapheaders=[security_soap_header],
)
)
print(colorama.Fore.BLUE + f'after result returned {counter}')
# Here do the relevant processing of response or error
queue.task_done()
async def main_with_q():
n_workers = 3
queue = asyncio.Queue(n_workers)
e = pprint.pformat(queue)
p = payload_generator(DB_VARIANT, order_list_from_args())
results = []
workers = [asyncio.create_task(service_call_worker(queue, results))
for _ in range(n_workers)]
async for c in p:
await queue.put(c)
await queue.join() # wait for all tasks to be processed
for worker in workers:
worker.cancel()
if __name__ == '__main__':
try:
loop.run_until_complete(main_with_q())
loop.run_until_complete(transport.session.close())
finally:
loop.close()
Hello i have a Person Detector script i want to send an info if any person detected by mail.In order to prevent mail spamming i need a timer for sendMail function.Function might be triggered anytime but it will only respond if its not on cooldown.
I tried using async task but couldn't implemented because if a person detected it goes to a loop where it sends email every 5 minutes even if there isn’t any person detected after the first sight.
Example:
Person detection script is running.
Person detected on camera -> Send an email(start the 5 minute cooldown)
Person sighted again after 2 minutes(didn't send any email because there is still 3 minutes cooldown).
Person sighted after 6 minutes send another email(because 5 minute cooldown is over).
Summary of my code.(Necessary parts only detection and sending mail works cooldown (timer) doesn't work
async def sendAlert():
server.sendmail(sender_email, receiver_email, message)
print('sent!')
await asyncio.sleep(300)
if __name__ == "__main__":
while True:
for i in range(len(boxes)):
if classes[i] == 1 and scores[i] > threshold:
with smtplib.SMTP_SSL("smtp.gmail.com", port, context=context) as server:
sendAlert(server)
box = boxes[i]
cv2.rectangle(img,(box[1],box[0]),(box[3],box[2]),(255,0,0),2)
If there is a person detected, script will send an alert by email.Afterwards if a person detected again in 5 minutes sendAlert function shouldn't respond until 5 minutes passed
I agree with #Prune that you need to create a small (minimal) use-case and present your code so that it is not only relevant to you, but also relevant to others. Additionally, your question should have a section with a verifiable example. Without these attributes, your question becomes hard for people to grasp, solve and/or suggest any verifiable solution.
However, as I understand, you have some action (sending email if a person is detected) that you would like to perform after certain cool-off period. So, in other words, you want a mechanism of keeping track of time. Hence, you would need the datetime library.
So, your pseudo code should look something like this:
Pseudo Code
import datetime
start = capture_timestamp()
cutoff = '00:05:00'
dt_cutoff = read_in_cutoff_as_timedelta(cutoff)
if person_detected:
now = capture_timestamp()
dt = now - start
if dt >= dt_cutoff:
# Send notification
send_email_notification()
else:
# Do not Send Notification
print('now: {} | dt: {}'.format(now, dt))
You could use datetime.datetime.utcnow() for timestamp. And datetime.timedelta() for defining dt_cutoff. For reading in a time-string as time you could do this:
tm = datetime.datetime.strptime(cutoff).time()
dt_cutoff = datetime.timedelta(hours = tm.hour, minutes = tm.minute, seconds = tm.second)
I hope this gives you some idea about how to model this.
Additional Resources
https://www.guru99.com/date-time-and-datetime-classes-in-python.html
https://docs.python.org/3/library/datetime.html
https://thispointer.com/python-how-to-convert-a-timestamp-string-to-a-datetime-object-using-datetime-strptime/
Complete Solution
Now, finally if you are in a hurry to use a ready made solution, you may use the following class object as shown. All you would need is to instantiate the class object by specifying your cool-off-period (timer_cutoff) and then call the method is_timeout(). If this returns True, then you send your notification. There is also an obj.istimeout attribute which stores this decision (True/False).
import time
# Set cutoff time to 2 seconds to test the output
# after 5 seconds: expect istimeout = True
# and instantiate the TimeTracker class object.
ttk = TimeTracker(timer_cutoff = '00:00:02') # 'HH:MM:SS'
# Wait for 3 seconds
time.sleep(3)
print('start timestamp: {}'.format(ttk.timestamp_start_str))
print('cutoff timestamp'.format(ttk.timestamp_cutoff_str))
print('timer_cutoff: {}'.format(ttk.timer_cutoff_str))
# Now check if cutoff time reached
ttk.is_timeout()
print('Send Notification: {}'.format(ttk.istimeout))
print('now_timestamp: {}'.format(ttk.timestamp_now_str))
class TimeTracker
Here is the class TimeTracker class:
import datetime
class TimeTracker(object):
def __init__(self,
timer_cutoff = '00:05:00',
cutoff_strformat = '%H:%M:%S'):
self.timer_cutoff_str = timer_cutoff
self.cutoff_strformat = cutoff_strformat
self.timestamp_start, self.timestamp_start_str = self.get_timestamp()
self.dt_cutoff = None # timedelta for cutoff
self.timestamp_cutoff = None
self.timestamp_cutoff_str = None
self.update_timestamp_cutoff()
self.timestamp_now = None
self.timestamp_now_str = None
self.dt_elapsed = None
self.istimeout = False
def get_timestamp(self):
ts = datetime.datetime.utcnow()
tss = str(ts)
return (ts, tss)
def readin_cutoff_as_timedelta(self):
td = datetime.datetime.strptime(self.timer_cutoff_str,
self.cutoff_strformat)
tdm = td.time()
self.dt_cutoff = datetime.timedelta(hours = tdm.hour,
minutes = tdm.minute,
seconds = tdm.second)
def update_timestamp_cutoff(self):
self.readin_cutoff_as_timedelta()
self.timestamp_cutoff = self.timestamp_start + self.dt_cutoff
self.timestamp_cutoff_str = str(self.timestamp_cutoff)
def time_elapsed(self):
self.dt_elapsed = self.timestamp_now - self.timestamp_start
def is_timeout(self):
self.timestamp_now, self.timestamp_now_str = self.get_timestamp()
self.time_elapsed()
if (self.dt_elapsed < self.dt_cutoff):
self.istimeout = False
else:
self.istimeout = True
return self.istimeout
My current code as it stands prints an empty list, how do I wait for all requests and callbacks to finish before continuing with the code flow?
from requests_futures.sessions import FuturesSession
from time import sleep
session = FuturesSession(max_workers=100)
i = 1884001540 - 100
list = []
def testas(session, resp):
print(resp)
resp = resp.json()
print(resp['participants'][0]['stats']['kills'])
list.append(resp['participants'][0]['stats']['kills'])
while i < 1884001540:
url = "https://acs.leagueoflegends.com/v1/stats/game/NA1/" + str(i)
temp = session.get(url, background_callback=testas)
i += 1
print(list)
From looking at session.py in requests-futures-0.9.5.tar.gz its necesssary to create a future in order to wait for its result as shown in this code:
from requests_futures import FuturesSession
session = FuturesSession()
# request is run in the background
future = session.get('http://httpbin.org/get')
# ... do other stuff ...
# wait for the request to complete, if it hasn't already
response = future.result()
print('response status: {0}'.format(response.status_code))
print(response.content)
As shown in the README.rst a future can and should be created for every session.get() and waited on to complete.
This might be applied in your code as follows starting just before the while loop:
future = []
while i < 1884001540:
url = "https://acs.leagueoflegends.com/v1/stats/game/NA1/" + str(i)
future.append(session.get(url, background_callback=testas)
i += 1
for f in future:
response = f.result()
# the following print statements may be useful for debugging
# print('response status: {0}'.format(response.status_code))
# print(response.content, "\n")
print(list)
I'm not sure how your system will respond to a large number (1884001440) of futures and another way to do it is by processing them in smaller groups say 100 or 1000 at a time. It might be wise to test the script with a relatively small number of them at the beginning to find out how fast they return results.
from here https://pypi.python.org/pypi/requests-futures it says
from requests_futures.sessions import FuturesSession
session = FuturesSession()
# first request is started in background
future_one = session.get('http://httpbin.org/get')
# second requests is started immediately
future_two = session.get('http://httpbin.org/get?foo=bar')
# wait for the first request to complete, if it hasn't already
response_one = future_one.result()
so it seems that .result() is what you are looking for