I'm trying to use the multiprocessing library to parallelize some expensive calculations without blocking some others, much lighter. The both need to interact through some variables, although the may run with different paces.
To show this, I have created the following example, that works fine:
import multiprocessing
import time
import numpy as np
class SumClass:
def __init__(self):
self.result = 0.0
self.p = None
self.return_value = None
def expensive_function(self, new_number, return_value):
# Execute expensive calculation
#######
time.sleep(np.random.random_integers(5, 10, 1))
return_value.value = self.result + new_number
#######
def execute_function(self, new_number):
print(' New number received: %f' % new_number)
self.return_value = multiprocessing.Value("f", 0.0, lock=True)
self.p = multiprocessing.Process(target=self.expensive_function, args=(new_number, self.return_value))
self.p.start()
def is_executing(self):
if self.p is not None:
if not self.p.is_alive():
self.result = self.return_value.value
self.p = None
return False
else:
return True
else:
return False
if __name__ == '__main__':
sum_obj = SumClass()
current_value = 0
while True:
if not sum_obj.is_executing():
# Randomly determine whether the function must be executed or not
if np.random.rand() < 0.25:
print('Current sum value: %f' % sum_obj.result)
new_number = np.random.rand(1)[0]
sum_obj.execute_function(new_number)
# Execute other (light) stuff
#######
print('Executing other stuff')
current_value += sum_obj.result * 0.1
print('Current value: %f' % current_value)
time.sleep(1)
#######
Basically, in the main loop some light function is executed, and depending on a random condition, some heavy work is sent to another process if it has already finished the previous one, carried out by an object which needs to store some data between executions. Although expensive_function needs some time, the light function keeps on executing without being blocked.
Although the above code gets the job done, I'm wondering: is it the best/most appropriate method to do this?
Besides, let us suppose the class SumClass has an instance of another object, which also needs to store data. For example:
import multiprocessing
import time
import numpy as np
class Operator:
def __init__(self):
self.last_value = 1.0
def operate(self, value):
print(' Operation, last value: %f' % self.last_value)
self.last_value *= value
return self.last_value
class SumClass:
def __init__(self):
self.operator_obj = Operator()
self.result = 0.0
self.p = None
self.return_value = None
def expensive_function(self, new_number, return_value):
# Execute expensive calculation
#######
time.sleep(np.random.random_integers(5, 10, 1))
# Apply operation
number = self.operator_obj.operate(new_number)
# Apply other operation
return_value.value = self.result + number
#######
def execute_function(self, new_number):
print(' New number received: %f' % new_number)
self.return_value = multiprocessing.Value("f", 0.0, lock=True)
self.p = multiprocessing.Process(target=self.expensive_function, args=(new_number, self.return_value))
self.p.start()
def is_executing(self):
if self.p is not None:
if not self.p.is_alive():
self.result = self.return_value.value
self.p = None
return False
else:
return True
else:
return False
if __name__ == '__main__':
sum_obj = SumClass()
current_value = 0
while True:
if not sum_obj.is_executing():
# Randomly determine whether the function must be executed or not
if np.random.rand() < 0.25:
print('Current sum value: %f' % sum_obj.result)
new_number = np.random.rand(1)[0]
sum_obj.execute_function(new_number)
# Execute other (light) stuff
#######
print('Executing other stuff')
current_value += sum_obj.result * 0.1
print('Current value: %f' % current_value)
time.sleep(1)
#######
Now, inside the expensive_function, a function member of the object Operator is used, which needs to store the number passed.
As expected, the member variable last_value does not change, i.e. it does not keep any value.
Is there any way of doing this properly?
I can imagine I could arrange everything so that I only need to use one class level, and it would work well. However, this is a toy example, in reality there are different levels of complex objects and it would be hard.
Thank you very much in advance!
from concurrent.futures import ThreadPoolExecutor
from numba import jit
import requests
import timeit
def timer(number, repeat):
def wrapper(func):
runs = timeit.repeat(func, number=number, repeat=repeat)
print(sum(runs) / len(runs))
return wrapper
URL = "https://httpbin.org/uuid"
#jit(nopython=True, nogil=True,cache=True)
def fetch(session, url):
with session.get(url) as response:
print(response.json()['uuid'])
#timer(1, 1)
def runner():
with ThreadPoolExecutor(max_workers=25) as executor:
with requests.Session() as session:
executor.map(fetch, [session] * 100, [URL] * 100)
executor.shutdown(wait=True)
executor._adjust_thread_count
Maybe this might help.
I'm using ThreadPoolExecutor for multithreading. you can also use ProcessPoolExecutor.
For your compute expensive operation you can use numba for making cached byte code of your function for faster exeution.
I'm having a problem with my code.
I am using the Simpy for Python and I'm trying to make a P2P simulator utilizing Simpy.
Bellow is the code of my generator of peers, I don't know why but I never enter in the function generate(). The console don't shows the print('I am here').
Anyone knows what I'm doing wrong on my code? Sorry if I'm doing something very wrong.
import simpy
import random
# PARAMETERS
RANDOM_SEED = 93817
N_NODES = 10 # 2000
RUN_TIME = 10 # 86400 # 24 hours
TIME_TO_GENERATE = 3 # at each 3 seconds
# FUNCTIONS
def peer(env, N_PEER):
print('Peer %d join at %d' % (N_PEER, env.now))
def chanceToGenerate():
value = random.random()*100
if value < 50:
return False
else:
return True
def generate(env, N_PEER):
print('I am here')
chance = chanceToGenerate()
if chance:
yield env.process(peer(env, N_PEER))
return True
else:
return False
def peerGenerator(env):
N_PEER = 0
while True:
if N_PEER < N_NODES:
generated = generate(env, N_PEER)
if generated:
N_PEER += 1
print('time: %d' % env.now)
yield env.timeout(TIME_TO_GENERATE)
# RUNNING
random.seed(RANDOM_SEED)
env = simpy.Environment()
env.process(peerGenerator(env))
env.run(until=RUN_TIME)
I solved the problem, what was my solution ?
Answer: I removed the function generate() and moved the yield env.process(peer(env, N_PEER)) to the functiongenerator().
Why I did that?
I have been reading the documentation of Simpy and I found out that I can't make a non-process yields another process. So only the peerGenerator() function can yields another process.
Code:
import simpy
import random
# PARAMETERS
RANDOM_SEED = 93817
N_NODES = 10 # 2000
RUN_TIME = 10 # 86400 # 24 hours
TIME_TO_GENERATE = 3 # at each 3 seconds
# FUNCTIONS
class peerGenerator:
def __init__(self, env):
self.env = env
self.generator_proc = env.process(self.generator(env))
def peer(self, env, N_PEER):
print('Peer %d join at %d' % (N_PEER, env.now))
yield env.timeout(0)
def chanceToGenerate(self):
value = random.randint(0,100)
if value < 50:
print('Tried to create a peer')
return False
else:
return True
def generator(self, env):
N_PEER = 0
while True:
if N_PEER < N_NODES:
chance = self.chanceToGenerate()
if chance:
yield env.process(self.peer(env, N_PEER))
N_PEER += 1
print('time: %d' % env.now)
yield env.timeout(TIME_TO_GENERATE)
# RUNNING
env = simpy.Environment()
bootstrap = peerGenerator(env)
env.run(until=RUN_TIME)
I've never had much opportunity to play with threading but I need it for a project i'm working on. I've significantly simplified my problem for the example I present below, but I'm pretty sure the solution to the simpler problem will get me most of the way to solving the more complex problem.
That said, here's my reduced case: I have a class whose job it is to monitor an incoming data stream that is accessed through a class method. I am calculating statistics from the stream. I would like to print a report about the incoming data to one terminal window and short intervals, and also print a summary report to another window at regular (longer) intervals.
This demo below generates data and prints reports to the same window: how would I rework this to print the separate reports to two different windows?
from __future__ import division
from random import gauss
import time
class MyStreamMonitor(object):
def __init__(self):
self.sum = 0
self.count = 0
#property
def mu(self):
return outv = self.sum/self.count
def generate_values(self):
while True:
yield gauss(0,1)
def monitor(self, report_interval=1):
start1 = time.time()
start2 = time.time()
for x in self.generate_values():
self.sum += x
self.count += 1
# print this to terminal 1
if time.time() - start1 > report_interval:
start1 = time.time()
print self.count, x
# print this to terminal 2
if time.time() - start2 > 5*report_interval:
start2 = time.time()
print self.mu
if __name__ == '__main__':
stream = MyStreamMonitor()
stream.monitor()
One approach is to write to a text file, and then in second terminal, just tail the file:
def monitor(self, report_interval=1):
second = open('report.txt', 'wt')
start1 = time.time()
start2 = time.time()
for x in self.generate_values():
self.sum += x
self.count += 1
# print this to terminal 1
if time.time() - start1 > report_interval:
start1 = time.time()
print self.count, x
second.write('%d, %s\n' % (self.count, x))
# print this to terminal 2
if time.time() - start2 > 5*report_interval:
start2 = time.time()
print self.mu
second.write('%s\n', self.mu)
second.flush()
Then on the second terminal:
$ tail -f report.txt
I ended up taking #reptilicus' advice and built this as a client/server application with redis. Here's a minimum working example:
server.py
from __future__ import division
from random import gauss
import time
import redis
class MyStreamMonitor(object):
def __init__(self):
self.sum = 0
self.count = 0
self.r = redis.StrictRedis()
#property
def mu(self):
if self.count >1:
outv = self.sum/self.count
else:
outv = 0
return outv
def generate_values(self):
while True:
yield gauss(0,1)
def monitor(self):
for x in self.generate_values():
self.sum += x
self.count += 1
# This is the magic here
self.r.publish('count', self.count)
self.r.publish('mu', self.mu)
if __name__ == '__main__':
stream = MyStreamMonitor()
stream.monitor()
listener.py
import redis
import time
import sys
r = redis.StrictRedis()
channel = sys.argv[1]
interval = float(sys.argv[2])
while True:
# Resubscribe at each iteration to ensure we are only receiving
# the newest message
pubsub = r.pubsub()
pubsub.subscribe(channel)
_ = pubsub.listen().next() # the first message is always just a "1"
message = pubsub.listen().next()
print message['data']
time.sleep(interval )
The server publishes data to the "count" and "mu" channels. So to run this, first we need to open up a terminal and kick off the server:
$ python server.py
Then we can open up a separate terminal for each channel we want to listen on, passing in the channel we want to listen to and the sleep interval as arguments.
First terminal:
$ python listener.py mu 1
Second terminal:
$ python listener.py count 2
For the record: installing redis was extremely painless and didn't really require any configuration at all. Depending on your needs installation/configuration may be more complex, but at least for this toy example I didn't need to do anything fancy.
import time #useful for measuring code execution
class StopWatch:
def __init__(self, startTime = 0, endTime = 0, elapsedTime = 0):
self.__startTime = startTime
self.__endTime = endTime
self.__elapsedTime = elapsedTime
def start(self):
self.__startTime = time.clock()
def stop(self):
return self.getElapsedTime()
def reset(self):
self.__startTime = 0
self.__elapsedTime = 0
def getstarttime(self):
return self.__startTime
def getendtime(self):
return self.__endTime
def getElapsedTime(self):
elapsedTime = self.__elapsedTime
elapsedTime +=((time.clock() - self.__startTime) * 1000)
return elapsedTime
def main():
x = StopWatch()
x.start
a = time.clock() #code only works with this line of code in place (I don't understand why?)
sum = 0
for i in range(1 , 10000000):
sum += i
x.stop
print("Elapsed execution time is", x.getElapsedTime())
print(sum)
x.reset
main()
The code fails to produce the correct result if I remove the
a = time.clock()
portion. With that in place it produces the correct result but I am not really sure why it does this?
I realize there may be better ways to do this, but Im kind of a beginner at Python so I'd appreciate the help. Thanks! I am using a Windows system.
You wouldn't happen to be a rubyist, would you? x.start works to call methods in Ruby, but not in Python. You need x.start() - notice the parentheses. You have the same problem with x.stop and x.reset.
a = time.clock() is helping because time.clock() will sometimes (platform-dependent) return the time since the first call to clock(), instead of from process start. The actual assignment to a isn;t doing anything, it's simply creating a start point for clock to reference later. Don't rely on this - the Python docs state "Return the CPU time or real time since the start of the process or since the first call to clock()."
I want to create a python function to test the time spent in each function and print its name with its time, how i can print the function name and if there is another way to do so please tell me
def measureTime(a):
start = time.clock()
a()
elapsed = time.clock()
elapsed = elapsed - start
print "Time spent in (function name) is: ", elapsed
First and foremost, I highly suggest using a profiler or atleast use timeit.
However if you wanted to write your own timing method strictly to learn, here is somewhere to get started using a decorator.
Python 2:
def timing(f):
def wrap(*args):
time1 = time.time()
ret = f(*args)
time2 = time.time()
print '%s function took %0.3f ms' % (f.func_name, (time2-time1)*1000.0)
return ret
return wrap
And the usage is very simple, just use the #timing decorator:
#timing
def do_work():
#code
Python 3:
def timing(f):
def wrap(*args, **kwargs):
time1 = time.time()
ret = f(*args, **kwargs)
time2 = time.time()
print('{:s} function took {:.3f} ms'.format(f.__name__, (time2-time1)*1000.0))
return ret
return wrap
Note I'm calling f.func_name to get the function name as a string(in Python 2), or f.__name__ in Python 3.
After playing with the timeit module, I don't like its interface, which is not so elegant compared to the following two method.
The following code is in Python 3.
The decorator method
This is almost the same with #Mike's method. Here I add kwargs and functools wrap to make it better.
def timeit(func):
#functools.wraps(func)
def new_func(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
elapsed_time = time.time() - start_time
print('function [{}] finished in {} ms'.format(
func.__name__, int(elapsed_time * 1_000)))
return result
return new_func
#timeit
def foobar():
mike = Person()
mike.think(30)
The context manager method
from contextlib import contextmanager
#contextmanager
def timeit_context(name):
start_time = time.time()
yield
elapsed_time = time.time() - start_time
print('[{}] finished in {} ms'.format(name, int(elapsed_time * 1_000)))
For example, you can use it like:
with timeit_context('My profiling code'):
mike = Person()
mike.think()
And the code within the with block will be timed.
Conclusion
Using the first method, you can easily comment out the decorator to get the normal code. However, it can only time a function. If you have some part of code that you don't what to make it a function, then you can choose the second method.
For example, now you have
images = get_images()
big_image = ImagePacker.pack(images, width=4096)
drawer.draw(big_image)
Now you want to time the big_image = ... line. If you change it to a function, it will be:
images = get_images()
big_image = None
#timeit
def foobar():
nonlocal big_image
big_image = ImagePacker.pack(images, width=4096)
drawer.draw(big_image)
Looks not so great...What if you are in Python 2, which has no nonlocal keyword.
Instead, using the second method fits here very well:
images = get_images()
with timeit_context('foobar'):
big_image = ImagePacker.pack(images, width=4096)
drawer.draw(big_image)
I don't see what the problem with the timeit module is. This is probably the simplest way to do it.
import timeit
timeit.timeit(a, number=1)
Its also possible to send arguments to the functions. All you need is to wrap your function up using decorators. More explanation here: http://www.pythoncentral.io/time-a-python-function/
The only case where you might be interested in writing your own timing statements is if you want to run a function only once and are also want to obtain its return value.
The advantage of using the timeit module is that it lets you repeat the number of executions. This might be necessary because other processes might interfere with your timing accuracy. So, you should run it multiple times and look at the lowest value.
Timeit has two big flaws: it doesn't return the return value of the function, and it uses eval, which requires passing in extra setup code for imports. This solves both problems simply and elegantly:
def timed(f):
start = time.time()
ret = f()
elapsed = time.time() - start
return ret, elapsed
timed(lambda: database.foo.execute('select count(*) from source.apachelog'))
(<sqlalchemy.engine.result.ResultProxy object at 0x7fd6c20fc690>, 4.07547402381897)
There is an easy tool for timing. https://github.com/RalphMao/PyTimer
It can work like a decorator:
from pytimer import Timer
#Timer(average=False)
def matmul(a,b, times=100):
for i in range(times):
np.dot(a,b)
Output:
matmul:0.368434
matmul:2.839355
It can also work like a plug-in timer with namespace control(helpful if you are inserting it to a function which has a lot of codes and may be called anywhere else).
timer = Timer()
def any_function():
timer.start()
for i in range(10):
timer.reset()
np.dot(np.ones((100,1000)), np.zeros((1000,500)))
timer.checkpoint('block1')
np.dot(np.ones((100,1000)), np.zeros((1000,500)))
np.dot(np.ones((100,1000)), np.zeros((1000,500)))
timer.checkpoint('block2')
np.dot(np.ones((100,1000)), np.zeros((1000,1000)))
for j in range(20):
np.dot(np.ones((100,1000)), np.zeros((1000,500)))
timer.summary()
for i in range(2):
any_function()
Output:
========Timing Summary of Default Timer========
block2:0.065062
block1:0.032529
========Timing Summary of Default Timer========
block2:0.065838
block1:0.032891
Hope it will help
Decorator method using decorator Python library:
import decorator
#decorator
def timing(func, *args, **kwargs):
'''Function timing wrapper
Example of using:
``#timing()``
'''
fn = '%s.%s' % (func.__module__, func.__name__)
timer = Timer()
with timer:
ret = func(*args, **kwargs)
log.info(u'%s - %0.3f sec' % (fn, timer.duration_in_seconds()))
return ret
See post on my Blog:
post on mobilepro.pl Blog
my post on Google Plus
My way of doing it:
from time import time
def printTime(start):
end = time()
duration = end - start
if duration < 60:
return "used: " + str(round(duration, 2)) + "s."
else:
mins = int(duration / 60)
secs = round(duration % 60, 2)
if mins < 60:
return "used: " + str(mins) + "m " + str(secs) + "s."
else:
hours = int(duration / 3600)
mins = mins % 60
return "used: " + str(hours) + "h " + str(mins) + "m " + str(secs) + "s."
Set a variable as start = time() before execute the function/loops, and printTime(start) right after the block.
and you got the answer.
Elaborating on #Jonathan Ray I think this does the trick a bit better
import time
import inspect
def timed(f:callable):
start = time.time()
ret = f()
elapsed = 1000*(time.time() - start)
source_code=inspect.getsource(f).strip('\n')
logger.info(source_code+": "+str(elapsed)+" seconds")
return ret
It allows to take a regular line of code, say a = np.sin(np.pi) and transform it rather simply into
a = timed(lambda: np.sin(np.pi))
so that the timing is printed onto the logger and you can keep the same assignment of the result to a variable you might need for further work.
I suppose in Python 3.8 one could use the := but I do not have 3.8 yet
Below is a Timer class that:
Easy to use: use directly or as decorator function, < 100 lines
Measures a lot: total calls, total time, average time, and std. deviation.
Prints pretty time
Thread-safe
This is how you use it:
# Create the timer
timer1 = Timer("a name", log_every=2)
# Use "with"
with timer1:
print("timer1")
# Reuse as a decorator
#timer1
def my_func():
print("my_func")
# Instantiate as a decorator
#Timer("another timer", log_every=1)
def my_func2():
print("my_func2")
my_func()
my_func2()
my_func()
Below is the class
from datetime import datetime
import time, logging, math, threading
class Timer(object):
'''A general timer class. Does not really belong in a judicata file here.'''
def __init__(self, name, log_every = 1):
self.name = name
self.log_every = 1
self.calls = 0
self.total_time = 0
self.total_squared_time = 0
self.min, self.max = None, 0
# Make timer thread-safe by storing the times in thread-local storage.
self._local = threading.local()
self._lock = threading.Lock()
def __enter__(self):
"""Start a new timer"""
self._local.start = datetime.utcnow()
def __exit__(self, exc_type, exc_val, exc_tb):
"""Stop the timer, and report the elapsed time"""
elapsed_time = (datetime.utcnow() - self._local.start).total_seconds()
with self._lock:
self.calls += 1
self.total_time += elapsed_time
if self.min == None or elapsed_time < self.min:
self.min = elapsed_time
if elapsed_time > self.max:
self.max = elapsed_time
self.total_squared_time += elapsed_time * elapsed_time
if self.log_every and (self.calls % self.log_every) == 0:
self.log()
def __call__(self, fn):
'''For use as a decorator.'''
def decorated_timer_function(*args, **kwargs):
with self:
return fn(*args, **kwargs)
return decorated_timer_function
#classmethod
def time_str(cls, secs):
if isinstance(secs, six.string_types):
try:
secs = float(secs)
except:
return "(bad time: %s)"%secs
sign = lambda x: x
if secs < 0:
secs = -secs
sign = lambda x: ("-" + x)
return sign("%d secs"%int(secs) if secs >= 120 else
"%.2f secs" % secs if secs >= 1 else
"%d ms" % int(secs * 1000) if secs >= .01 else
"%.2f ms" % (secs * 1000) if secs >= .0001 else
"%d ns" % int(secs * 1000 * 10000) if secs >= 1e-9 else
"%s" % secs)
def log(self):
if not self.calls:
logging.info("<Timer %s: no calls>"%self.name)
return
avg = 1.0 * self.total_time / self.calls
var = 1.0 * self.total_squared_time / self.calls - avg*avg
std_dev = self.time_str(math.sqrt(var))
total = self.time_str(self.total_time)
min, max, avg = [self.time_str(t) for t in [self.min, self.max, avg]]
logging.info("<Timer %s: N=%s, total=%s, avg=%s, min/max=%s/%s, std=%s>"
%(self.name, self.calls, total, avg, min, max, std_dev))
You can use timeit.default_timer along with a contextmanager:
from timeit import default_timer
from contextlib import contextmanager
#contextmanager
def timer():
start_time = default_timer()
try:
yield
finally:
print("--- %s seconds ---" % (default_timer() - start_time))
Use it with with statement:
def looper():
for i in range(0, 100000000):
pass
with timer():
looper()
Output:
--- 2.651526927947998 seconds ---
Here is a generic solution
def timed(fn):
# make sure wherever u used this, imports will be ready
from time import perf_counter
from functools import wraps
# wraps preserves the metadata of fn
#wraps(fn)
def inner(*args, **kwargs):
start = perf_counter()
result = fn(*args, **kwargs)
end = perf_counter()
elapsed = end - start
args_ = [str(a) for a in args]
kwargs_ = ["{0}={1}".format(k, v) for (k, v) in kwargs.items()]
all_args = args_ + kwargs_
args_str = ",".join(all_args)
print("{0} ({1}) took {2:.6f} to run.".format(fn.__name__, args_str, elapsed))
return result
return inner
define a function:
#timed
def sum_up(a,b):
return a+b
now call it:
sum_up(2,9)
For the case using timeit.timeit, if command
timeit.timeit(function_to_test, n=10000)
raise error ValueError: stmt is neither a string nor callable
or command
timeit.timeit('function_to_test', n=10000)
raise error name 'function_to_test' is not defined, then you need:
replace function_to_test or 'function_to_test' with str(function_to_test), that is
timeit.timeit(str(function_to_test), n=10000)
or if Python version >= 3.6, another way is using f string as
timeit.timeit(f'{function_to_test}', n=10000)
About version use lambda, i.e. timeit.timeit(lambda: function_to_test, n=10000), it work but, from my test, it take much longer time.
Here, is a concrete example:
import timeit
def function_to_test(n):
s = 1
for i in range(n):
s += 1
return s
print("time run function_to_test: ", timeit.timeit(str(function_to_test(1000000)), number=10000))
print("time run function_to_test: ", timeit.timeit(f'{function_to_test(1000000)}', number=10000))