Looping deferLater with timer to choose schedule - python

I am working on a more complex example however I think this is a simplified version. The function should pause for 1 second, and given a delay to fire the function, we loop this call by intervals with a start value and stop the scheduler afterwards.
from twisted.internet import reactor
import time
from twisted.internet import task
class timer:
def __init__(self, *args):
self._paused = True
self._unpaused = False
def sleep(self):
if self._paused:
print(f"You have paused for this many seconds: {1}s")
time.sleep(1)
def scheduler(self, delay=0, *args):
if self._paused:
from twisted.internet import reactor
self._paused = task.deferLater(reactor,delay, self, *args)
if __name__ == '__main__':
pause_timer= timer()
timer_list = task.LoopingCall(pause_timer.scheduler)
timer_list.start(5)
reactor.callLater(10, reactor.stop)
reactor.run()
However, I get this error:
builtins.TypeError: 'timer' object is not callable
I will throw the complex example. am working with in here also:
import scrapy
from scrapy.utils import reactor
from scrapy import signals
import logging
logger = logging.getLogger(__name__)
class TestSpider(scrapy.Spider):
name = 'pause'
start_urls = [ f'http://quotes.toscrape.com/page/{i}/' for i in range(1, 11) ]
custom_settings = {
'DOWNLOAD_DELAY':1
}
def __init__(self, stats, pause):
self.stats = stats
self.pause = pause
#classmethod
def from_crawler(cls, crawler, *args, **kwargs):
stat = cls(crawler.stats, crawler)
crawler.signals.connect(stat.spider_opened, signals.spider_opened)
return stat
def spider_opened(self):
reactor.CallLaterOnce(self.pause.engine.pause).schedule(20)
def parse(self, response):
logger.info("Urls passed to: %s", response.url)
The class for callLaterOnce is defined by (I updated the scheduler in the reactor.py module):
class CallLaterOnce:
"""Schedule a function to be called in the next reactor loop, but only if
it hasn't been already scheduled since the last time it ran.
"""
def __init__(self, func, *a, **kw):
self._func = func
self._a = a
self._kw = kw
self._call = None
def schedule(self, delay=0):
from twisted.internet import reactor
if self._call is None:
self._call = task.deferLater(reactor,0, self)
scheduler = task.LoopingCall(self._call)
scheduler.start(delay)
scheduler.stop()

Related

how to load test a grpc server with locust

i have a simple grpc server that has two services:
signin, ping, encapsulated in the following class that also has a private method to authenticate the requests:
class Listener(pingpong_pb2_grpc.PingPongServiceServicer):
def __init__(self):
self.counter = counter_g
self.last_print_time = time.time()
def __str__(self):
return self.__class__.__name__
def auth_request(self, request, context):
metadata_dict = dict(context.invocation_metadata())
if metadata_dict.get("authorization").split(" ")[1] == "jf90845h5gfip345t8":
pass
else:
print("Auth Failed")
context.abort(grpc.StatusCode.UNAUTHENTICATED, "Auth Failed")
def signin(self, request, context):
"""The signin function is the rpc call that is called by the client"""
if request.username == "test" and request.password == "test":
print('Signin Success')
return pingpong_pb2.SignInResponse(token="jf90845h5gfip345t8", success=True)
else:
print('Signin Failed')
return pingpong_pb2.SignInResponse(token="bad token", success=False)
def ping(self, request, context):
"""The ping function is the rpc call that is called by the client"""#
self.auth_request(request, context)
self.counter += 1
if self.counter > 1000:
print("1000 calls in %3f seconds" % (time.time() - self.last_print_time))
self.last_print_time = time.time()
self.counter = 0
response = pingpong_pb2.Pong(count=request.count + 1)
return response
in order to make the grpc tasks report back execution time and success/failure events, i wrote this decorator:
def grpctask(func):
def wrapper(*args, **kwargs):
# get task's function name
task_name = func.__name__
start = time.time()
result = None
try:
result = func(*args, **kwargs)
except grpc.RpcError as e:
total = int((time.time() - start) * 1000)
events.request_failure.fire(request_type="grpc",
name=task_name,
response_time=total,
response_length=0,
exception=e)
else:
total = int((time.time() - start) * 1000)
events.request_success.fire(request_type="grpc",
name=task_name,
response_time=total,
response_length=5)
return result
return wrapper
my user behaviour is as follows:
every 31 seconds the user should execute:\ (behaviour 1)
ping_server_1
ping_server_2
ping_server_3
(note that each funtion is diffrent that have similar names only)
every 43 seconds the user should excute:\ (behaviour 2)
hello_server_1
hello_server_2
the two user actions should be independent, meaning that the user may execute both at the same time (not really parallel, just wait time between behaviour 1 and 2 should be zero ) \
i wrote the following script, nesting ping_server_1, ping_server_2, ping_server_3 inside a task, made locust not able to show data for each of those sub tasks"
from locust import TaskSet, between, task, User, events, HttpUser, constant, SequentialTaskSet
import random
import grpc
from google.protobuf import json_format
from client import PingClient
import time
from tools import grpctask
class TaskOne(SequentialTaskSet):
#task
class PingTest(SequentialTaskSet):
host = "localhost:9999"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.stub = None
self.vacancy_id = None
self.token = None
self.ping_client = PingClient(host="localhost:9999")
def on_start(self):
self.connect_to_server()
self.login()
def connect_to_server(self):
# use the ping client to connect to the server
self.ping_client.connect_to_server()
def login(self):
# use the ping client to login
self.ping_client.set_token()
#task
#grpctask
def ping_server(self):
self.ping_client.ping()
#task
#grpctask
def ping_server_2(self):
self.ping_client.ping()
#task
#grpctask
def ping_server_3(self):
self.ping_client.ping()
self.interrupt()
#task
def empty(self):
print("PingTest is empty")
self.interrupt()
class TaskTwo(SequentialTaskSet):
#task
class HelloServer(TaskSet):
host = "localhost:9999"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.stub = None
self.vacancy_id = None
self.token = None
self.ping_client = PingClient(host="localhost:9999")
def on_start(self):
self.connect_to_server()
self.login()
def connect_to_server(self):
# use the ping client to connect to the server
self.ping_client.connect_to_server()
def login(self):
# use the ping client to login
self.ping_client.set_token()
#task
#grpctask
def hello_server(self):
self.ping_client.ping()
#task
#grpctask
def hello_server_2(self):
self.ping_client.ping()
self.interrupt()
#task
def empty(self):
print("TaskTwo is empty")
self.interrupt()
class PingUser(User):
# force TaskOne to be executed every 31 seconds,
# and TaskTwo to be executed every 43 seconds
tasks = [TaskOne, TaskTwo]
is there a way to define a wait time for TaskOne and TaskTwo independetly from each other?
if not, what can be done to achieve the user behaviour described above while still treating each function as a task to get metrics for each function (task) (write each action as one function wont give metrics on each function)

Make callback to function

I want to make a callback to spider_opened with the values given in either deferred1, deferred2. This should pause and then immediately unpause the spiders. However, I am getting a fairly simple python issue:
TypeError: spider_opened() takes 1 positional argument but 2 were given
It turns out that when I send multiple callback/errback python interprets this as adding multiple parameters back. If I uncomment out one of these, then I do not get an issue. Therefore, how do I properly implement this?
Here's my example scraper:
import scrapy
from scrapy.utils import reactor
from scrapy import signals
import logging
from twisted.internet import defer
logger = logging.getLogger(__name__)
class TestSpider(scrapy.Spider):
name = 'pause_test'
start_urls = [ f'http://quotes.toscrape.com/page/{i}/' for i in range(1, 5) ]
custom_settings = {
'DOWNLOAD_DELAY':1
}
def __init__(self, stats, pause):
self.stats = stats
self.pause = pause
#classmethod
def from_crawler(cls, crawler):
stat = cls(crawler.stats, crawler)
crawler.signals.connect(stat.spider_later, signals.response_downloaded)
crawler.signals.connect(stat.spider_opened, signals.spider_opened)
return stat
def spider_opened(self):
for (success, value) in self:
if success:
print(value)
self.pause.engine.pause()
print('Success:', value)
else:
self.pause.engine.unpause()
print('Failure:', value.getErrorMessage())
def spider_later(self):
# Create three deferreds.
deferred1 = defer.Deferred()
deferred2 = defer.Deferred()
# Pack them into a DeferredList
dl = defer.DeferredList([deferred1, deferred2], consumeErrors=True)
# Add our callback
dl.addCallback(self.spider_opened)
# Fire our three deferreds with various values.
deferred1.callback(True)
deferred2.errback(Exception('bang!'))
def parse(self, response):
logger.info("Urls passed to: %s", response.url)

Python threading.join() hangs

My problem is as follows:
I have a class that inherits from threading.Thread that I want to be able to stop gracefully. This class also has a Queue it get's its work from.
Since there are quite some classes in my project that should have this behaviour, I've created some superclasses to reduce duplicate code like this:
Thread related behaviour:
class StoppableThread(Thread):
def __init__(self):
Thread.__init__(self)
self._stop = Event()
def stop(self):
self._stop.set()
def stopped(self):
return self._stop.isSet()
Queue related behaviour:
class Queueable():
def __init__(self):
self._queue = Queue()
def append_to_job_queue(self, job):
self._queue.put(job)
Combining the two above and adding queue.join() to the stop() call
class StoppableQueueThread(StoppableThread, Queueable):
def __init__(self):
StoppableThread.__init__(self)
Queueable.__init__(self)
def stop(self):
super(StoppableQueueThread, self).stop()
self._queue.join()
A base class for a datasource:
class DataSource(StoppableThread, ABC):
def __init__(self, data_parser):
StoppableThread.__init__(self)
self.setName("DataSource")
ABC.__init__(self)
self._data_parser = data_parser
def run(self):
while not self.stopped():
record = self._fetch_data()
self._data_parser.append_to_job_queue(record)
#abstractmethod
def _fetch_data(self):
"""implement logic here for obtaining a data piece
should return the fetched data"""
An implementation for a datasource:
class CSVDataSource(DataSource):
def __init__(self, data_parser, file_path):
DataSource.__init__(self, data_parser)
self.file_path = file_path
self.csv_data = Queue()
print('loading csv')
self.load_csv()
print('done loading csv')
def load_csv(self):
"""Loops through csv and adds data to a queue"""
with open(self.file_path, 'r') as f:
self.reader = reader(f)
next(self.reader, None) # skip header
for row in self.reader:
self.csv_data.put(row)
def _fetch_data(self):
"""Returns next item of the queue"""
item = self.csv_data.get()
self.csv_data.task_done()
print(self.csv_data.qsize())
return item
Suppose there is a CSVDataSource instance called ds, if I want to stop the thread I call:
ds.stop()
ds.join()
The ds.join() call however, never returns. I'm not sure why this is, because the run() method does check if the stop event is set.
Any Ideas?
Update
A little more clarity as requested: the applications is build up out of several threads. The RealStrategy thread (below) is the owner of all the other threads and is responsible for starting and terminating them. I haven't set the daemon flag for any of the threads, so they should be non-daemonic by default.
The main thread looks like this:
if __name__ == '__main__':
def exit_handler(signal, frame):
rs.stop_engine()
rs.join()
sys.exit(0)
signal.signal(signal.SIGINT, exit_handler)
rs = RealStrategy()
rs.run_engine()
And here are the rs.run_engine() and rs.stop_engine() methods that are called in main:
class RealStrategy(Thread):
.....
.....
def run_engine(self):
self.on_start()
self._order_handler.start()
self._data_parser.start()
self._data_source.start()
self.start()
def stop_engine(self):
self._data_source.stop()
self._data_parser.stop()
self._order_handler.stop()
self._data_source.join()
self._data_parser.join()
self._order_handler.join()
self.stop()
If you want to use queue.Queue.join, then you must also use queue.Queue.task_done. You can read the linked documentation or see the following copied from information available online:
Queue.task_done()
Indicate that a formerly enqueued task is complete.
Used by queue consumer threads. For each get() used to fetch a task, a
subsequent call to task_done() tells the queue that the processing on
the task is complete.
If a join() is currently blocking, it will resume when all items have
been processed (meaning that a task_done() call was received for every
item that had been put() into the queue).
Raises a ValueError if called more times than there were items placed
in the queue.
Queue.join()
Blocks until all items in the queue have been gotten and processed.
The count of unfinished tasks goes up whenever an item is added to the
queue. The count goes down whenever a consumer thread calls
task_done() to indicate that the item was retrieved and all work on it
is complete. When the count of unfinished tasks drops to zero, join()
unblocks.
To test your problem, an example implementation was created to find out what was going on. It is slightly different from how your program works but demonstrates a method to solving your problem:
#! /usr/bin/env python3
import abc
import csv
import pathlib
import queue
import sys
import threading
import time
def main():
source_path = pathlib.Path(r'C:\path\to\file.csv')
data_source = CSVDataSource(source_path)
data_source.start()
processor = StoppableThread(target=consumer, args=[data_source])
processor.start()
time.sleep(0.1)
data_source.stop()
def consumer(data_source):
while data_source.empty:
time.sleep(0.001)
while not data_source.empty:
task = data_source.get_from_queue(True, 0.1)
print(*task.data, sep=', ', flush=True)
task.done()
class StopThread(StopIteration):
pass
threading.SystemExit = SystemExit, StopThread
class StoppableThread(threading.Thread):
def _bootstrap(self, stop=False):
# noinspection PyProtectedMember
if threading._trace_hook:
raise RuntimeError('cannot run thread with tracing')
def terminate():
nonlocal stop
stop = True
self.__terminate = terminate
# noinspection PyUnusedLocal
def trace(frame, event, arg):
if stop:
raise StopThread
sys.settrace(trace)
super()._bootstrap()
def terminate(self):
try:
self.__terminate()
except AttributeError:
raise RuntimeError('cannot terminate thread '
'before it is started') from None
class Queryable:
def __init__(self, maxsize=1 << 10):
self.__queue = queue.Queue(maxsize)
def add_to_queue(self, item):
self.__queue.put(item)
def get_from_queue(self, block=True, timeout=None):
return self.__queue.get(block, timeout)
#property
def empty(self):
return self.__queue.empty()
#property
def full(self):
return self.__queue.full()
def task_done(self):
self.__queue.task_done()
def join_queue(self):
self.__queue.join()
class StoppableQueryThread(StoppableThread, Queryable):
def __init__(self, target=None, name=None, args=(), kwargs=None,
*, daemon=None, maxsize=1 << 10):
super().__init__(None, target, name, args, kwargs, daemon=daemon)
Queryable.__init__(self, maxsize)
def stop(self):
self.terminate()
self.join_queue()
class DataSource(StoppableQueryThread, abc.ABC):
#abc.abstractmethod
def __init__(self, maxsize=1 << 10):
super().__init__(None, 'DataSource', maxsize=maxsize)
def run(self):
while True:
record = self._fetch_data()
self.add_to_queue(record)
#abc.abstractmethod
def _fetch_data(self):
pass
class CSVDataSource(DataSource):
def __init__(self, source_path):
super().__init__()
self.__data_parser = self.__build_data_parser(source_path)
#staticmethod
def __build_data_parser(source_path):
with source_path.open(newline='') as source:
parser = csv.reader(source)
next(parser, None)
yield from parser
def _fetch_data(self):
try:
return Task(next(self.__data_parser), self.task_done)
except StopIteration:
raise StopThread from None
class Task:
def __init__(self, data, callback):
self.__data = data
self.__callback = callback
#property
def data(self):
return self.__data
def done(self):
self.__callback()
if __name__ == '__main__':
main()

Class Decorators Singleton?

So for example, I'm making an async decorator and wanted to limit the number of concurrent threads:
from multiprocessing import cpu_count
from threading import Thread
class async:
def __init__(self, function):
self.func = function
self.max_threads = cpu_count()
self.current_threads = []
def __call__(self, *args, **kwargs):
func_thread = Thread(target = self.func, args = args, kwargs = kwargs)
func_thread.start()
self.current_threads.append(func_thread)
while len(self.current_threads) > self.max_threads:
self.current_threads = [t for t in self.current_threads if t.isAlive()]
from time import sleep
#async
def printA():
sleep(1)
print "A"
#async
def printB():
sleep(1)
print "B"
Is this going to limit the total concurrent threads? IE. If I had 8 cores, would the current code end up having 16+ threads due to two separate async objects existing?
If so, how would I fix that?
Thanks!

Tornado gen.sleep add delay

I'm trying to add a delay between requests in an asynchronous way.
When I use Tornado gen.sleep(x) my function (launch) doesn't get executed.
If I remove yield from yield gen.sleep(1.0), function is called, but no delay is added.
How to add delay between requests in my for loop? I need to control Request per second to external API.
If I use time.sleep the response is delayed after all requests are completed.
Tried to add #gen.engine decorator to launch function and no results.
Code:
import collections
import tornado.httpclient
class BacklogClient(object):
MAX_CONCURRENT_REQUESTS = 20
def __init__(self, ioloop):
self.ioloop = ioloop
self.client = tornado.httpclient.AsyncHTTPClient(max_clients=self.MAX_CONCURRENT_REQUESTS)
self.client.configure(None, defaults=dict(connect_timeout=20, request_timeout=30))
self.backlog = collections.deque()
self.concurrent_requests = 0
def __get_callback(self, function):
def wrapped(*args, **kwargs):
self.concurrent_requests -= 1
self.try_run_request()
return function(*args, **kwargs)
return wrapped
def try_run_request(self):
while self.backlog and self.concurrent_requests < self.MAX_CONCURRENT_REQUESTS:
request, callback = self.backlog.popleft()
self.client.fetch(request, callback=callback)
self.concurrent_requests += 1
def fetch(self, request, callback=None):
wrapped = self.__get_callback(callback)
self.backlog.append((request, wrapped))
self.try_run_request()
import time
from tornado import ioloop, httpclient, gen
class TornadoBacklog:
def __init__(self):
self.queue = 0
self.debug = 1
self.toProcess = [
'http://google.com',
'http://yahoo.com',
'http://nytimes.com',
'http://msn.com',
'http://cnn.com',
'http://twitter.com',
'http://facebook.com',
]
def handle_request(self, response):
print response.code
if not self.backlog.backlog and self.backlog.concurrent_requests == 0:
ioloop.IOLoop.instance().stop()
def launch(self):
self.ioloop = ioloop.IOLoop.current()
self.backlog = BacklogClient(self.ioloop)
for item in self.toProcess:
yield gen.sleep(1.0)
print item
self.backlog.fetch(
httpclient.HTTPRequest(
item,
method='GET',
headers=None,
),
self.handle_request
)
self.ioloop.start()
def main():
start_time = time.time()
scraper = TornadoBacklog()
scraper.launch()
elapsed_time = time.time() - start_time
print('Process took %f seconds processed %d items.' % (elapsed_time, len(scraper.toProcess)))
if __name__ == "__main__":
main()
Reference: https://github.com/tornadoweb/tornado/issues/1400
Tornado coroutines have two components:
They contain "yield" statements
They are decorated with "gen.coroutine"
Use the "coroutine" decorator on your "launch" function:
#gen.coroutine
def launch(self):
Run a Tornado coroutine from start to finish like this:
tornado.ioloop.IOLoop.current().run_sync(launch)
Remove the call to "ioloop.start" from your "launch" function: the loop runs the "launch" function, not vice-versa.

Categories

Resources