Python code to benchmark in flops using threading - python

I'm having trouble writing a benchmark code in python using threading. I was able to get my threading to work, but I can't get my object to return a value. I want to take the values and add them to a list so I can calculate the flops.
create class to carry out threading
class myThread(threading.Thread):
def calculation(self):
n=0
start=time.time()
ex_time=0
while ex_time < 30:
n+=1
end=time.time()
ex_time=end-start
return ex_time
def run(self):
t = threading.Thread(target = self.calculation)
t.start()
function to create threads
def make_threads(num):
times=[]
calcs=[]
for i in range(num):
print('start thread', i+1)
thread1=myThread()
t=thread1.start()
times.append(t)
#calcs.append(n)
#when trying to get a return value it comes back as none as seen
print(times)
#average out the times,add all the calculations to get the final numbers
#to calculate flops
time.sleep(32) #stop the menu from printing until calc finish
def main():
answer=1
while answer != 0:
answer=int(input("Please indicate how many threads to use: (Enter 0 to exit)"))
print("\n\nBenchmark test with ", answer, "threads")
make_threads(answer)
main()

Two ways to do this:
1. Using static variables (hacky, but efficient and quick)
Define some global variable that you then manipulate in the thread. I.e.:
import threading
import time
class myThread(threading.Thread):
def calculation(self):
n=0
start=time.time()
ex_time=0
print("Running....")
while ex_time < 30:
n+=1
end=time.time()
ex_time=end-start
self.myThreadValues[self.idValue] = ex_time
print(self.myThreadValues)
return ex_time
def setup(self,myThreadValues=None,idValue=None):
self.myThreadValues = myThreadValues
self.idValue = idValue
def run(self):
self.calculation()
#t = threading.Thread(target = self.calculation)
#t.start()
def make_threads(num):
threads=[]
calcs=[]
myThreadValues = {}
for i in range(num):
print('start thread', i+1)
myThreadValues[i] = 0
thread1=myThread()
thread1.setup(myThreadValues,i)
thread1.start()
#times.append(t)
threads.append(thread1)
# Now we need to wait for all the threads to finish. There are a couple ways to do this, but the best is joining.
print("joining all threads...")
for thread in threads:
thread.join()
#calcs.append(n)
#when trying to get a return value it comes back as none as seen
print("Final thread values: " + str(myThreadValues))
print("Done")
#average out the times,add all the calculations to get the final numbers
#to calculate flops
#time.sleep(32) #stop the menu from printing until calc finish
def main():
answer=1
while answer != 0:
answer=int(input("Please indicate how many threads to use: (Enter 0 to exit)"))
print("\n\nBenchmark test with ", answer, "threads")
make_threads(answer)
main()
2. The proper way to do this is with Processes
Processes are designed for passing information back and forth, versus threads which are commonly used for async work. See explanation here: https://docs.python.org/3/library/multiprocessing.html
See this answer: How can I recover the return value of a function passed to multiprocessing.Process?
import multiprocessing
from os import getpid
def worker(procnum):
print 'I am number %d in process %d' % (procnum, getpid())
return getpid()
if __name__ == '__main__':
pool = multiprocessing.Pool(processes = 3)
print pool.map(worker, range(5))

Related

shared memory between process and main task in python

with some help I could run a process in python, Now I wan't to share a value betwenn the two tasks. I can set the value inside the init, but I can't change it inside the run method.
And by the way: how to kill the process when the main process stops?
from multiprocessing import Process, Value
import serial
import time
class P(Process):
def __init__(self, num):
num.value = 15
super(P, self).__init__()
def run(self):
while True:
num.value = num.value + 1
print("run simple process")
time.sleep(0.5)
def main():
while True:
print("run main")
print (num.value)
time.sleep(2.5)
if __name__ == "__main__":
num = Value('d', 0.0)
p = P(num)
p.start()
#p.join()
main()
In your simplified case you just passed num value upon initialization time.
To be able to access that value in other process's methods - set it as a state of the process:
class P(Process):
def __init__(self, num):
self.num = num
self.num.value = 15
super(P, self).__init__()
def run(self):
while True:
self.num.value += 1
print("run simple process")
time.sleep(0.5)
For a more "serious" cases - consider using Managers and Synchronization primitives.

Python 3 Limit count of active threads (finished threads do not quit)

I want to limit the number of active threads. What i have seen is, that a finished thread stays alive and does not exit itself, so the number of active threads keep growing until an error occours.
The following code starts only 8 threads at a time but they stay alive even when they finished. So the number keeps growing:
class ThreadEx(threading.Thread):
__thread_limiter = None
__max_threads = 2
#classmethod
def max_threads(cls, thread_max):
ThreadEx.__max_threads = thread_max
ThreadEx.__thread_limiter = threading.BoundedSemaphore(value=ThreadEx.__max_threads)
def __init__(self, target=None, args:tuple=()):
super().__init__(target=target, args=args)
if not ThreadEx.__thread_limiter:
ThreadEx.__thread_limiter = threading.BoundedSemaphore(value=ThreadEx.__max_threads)
def run(self):
ThreadEx.__thread_limiter.acquire()
try:
#success = self._target(*self._args)
#if success: return True
super().run()
except:
pass
finally:
ThreadEx.__thread_limiter.release()
def call_me(test1, test2):
print(test1 + test2)
time.sleep(1)
ThreadEx.max_threads(8)
for i in range(0, 99):
t = ThreadEx(target=call_me, args=("Thread count: ", str(threading.active_count())))
t.start()
Due to the for loop, the number of threads keep growing to 99.
I know that a thread has done its work because call_me has been executed and threading.active_count() was printed.
Does somebody know how i make sure, a finished thread does not stay alive?
This may be a silly answer but to me it looks you are trying to reinvent ThreadPool.
from multiprocessing.pool import ThreadPool
from time import sleep
p = ThreadPool(8)
def call_me(test1):
print(test1)
sleep(1)
for i in range(0, 99):
p.apply_async(call_me, args=(i,))
p.close()
p.join()
This will ensure only 8 concurrent threads are running your function at any point of time. And if you want a bit more performance, you can import Pool from multiprocessing and use that. The interface is exactly the same but your pool will now be subprocesses instead of threads, which usually gives a performance boost as GIL does not come in the way.
I have changed the class according to the help of Hannu.
I post it for reference, maybe it's useful for others that come across this post:
import threading
from multiprocessing.pool import ThreadPool
import time
class MultiThread():
__thread_pool = None
#classmethod
def begin(cls, max_threads):
MultiThread.__thread_pool = ThreadPool(max_threads)
#classmethod
def end(cls):
MultiThread.__thread_pool.close()
MultiThread.__thread_pool.join()
def __init__(self, target=None, args:tuple=()):
self.__target = target
self.__args = args
def run(self):
try:
result = MultiThread.__thread_pool.apply_async(self.__target, args=self.__args)
return result.get()
except:
pass
def call_me(test1, test2):
print(test1 + test2)
time.sleep(1)
return 0
MultiThread.begin(8)
for i in range(0, 99):
t = MultiThread(target=call_me, args=("Thread count: ", str(threading.active_count())))
t.run()
MultiThread.end()
The maximum of threads is 8 at any given time determined by the method begin.
And also the method run returns the result of your passed function if it returns something.
Hope that helps.

Is it possible to execute function every x seconds in python, when it is performing pool.map?

I am running pool.map on big data array and i want to print report in console every minute.
Is it possible? As i understand, python is synchronous language, it can't do this like nodejs.
Perhaps it can be done by threading.. or how?
finished = 0
def make_job():
sleep(1)
global finished
finished += 1
# I want to call this function every minute
def display_status():
print 'finished: ' + finished
def main():
data = [...]
pool = ThreadPool(45)
results = pool.map(make_job, data)
pool.close()
pool.join()
You can use a permanent threaded timer, like those from this question: Python threading.timer - repeat function every 'n' seconds
from threading import Timer,Event
class perpetualTimer(object):
# give it a cycle time (t) and a callback (hFunction)
def __init__(self,t,hFunction):
self.t=t
self.stop = Event()
self.hFunction = hFunction
self.thread = Timer(self.t,self.handle_function)
def handle_function(self):
self.hFunction()
self.thread = Timer(self.t,self.handle_function)
if not self.stop.is_set():
self.thread.start()
def start(self):
self.stop.clear()
self.thread.start()
def cancel(self):
self.stop.set()
self.thread.cancel()
Basically this is just a wrapper for a Timer object that creates a new Timer object every time your desired function is called. Don't expect millisecond accuracy (or even close) from this, but for your purposes it should be ideal.
Using this your example would become:
finished = 0
def make_job():
sleep(1)
global finished
finished += 1
def display_status():
print 'finished: ' + finished
def main():
data = [...]
pool = ThreadPool(45)
# set up the monitor to make run the function every minute
monitor = PerpetualTimer(60,display_status)
monitor.start()
results = pool.map(make_job, data)
pool.close()
pool.join()
monitor.cancel()
EDIT:
A cleaner solution may be (thanks to comments below):
from threading import Event,Thread
class RepeatTimer(Thread):
def __init__(self, t, callback, event):
Thread.__init__(self)
self.stop = event
self.wait_time = t
self.callback = callback
self.daemon = True
def run(self):
while not self.stop.wait(self.wait_time):
self.callback()
Then in your code:
def main():
data = [...]
pool = ThreadPool(45)
stop_flag = Event()
RepeatTimer(60,display_status,stop_flag).start()
results = pool.map(make_job, data)
pool.close()
pool.join()
stop_flag.set()
One way to do this, is to use main thread as the monitoring one. Something like below should work:
def main():
data = [...]
results = []
step = 0
pool = ThreadPool(16)
pool.map_async(make_job, data, callback=results.extend)
pool.close()
while True:
if results:
break
step += 1
sleep(1)
if step % 60 == 0:
print "status update" + ...
I've used .map() instead of .map_async() as the former is synchronous one. Also you probably will need to replace results.extend with something more efficient. And finally, due to GIL, speed improvement may be much smaller than expected.
BTW, it is little bit funny that you wrote that Python is synchronous in a question that asks about ThreadPool ;).
Consider using the time module. The time.time() function returns the current UNIX time.
For example, calling time.time() right now returns 1410384038.967499. One second later, it will return 1410384039.967499.
The way I would do this would be to use a while loop in the place of results = pool(...), and on every iteration to run a check like this:
last_time = time.time()
while (...):
new_time = time.time()
if new_time > last_time+60:
print "status update" + ...
last_time = new_time
(your computation here)
So that will check if (at least) a minute has elapsed since your last status update. It should print a status update approximately every sixty seconds.
Sorry that this is an incomplete answer, but I hope this helps or gives you some useful ideas.

Python threading. How do I lock a thread?

I'm trying to understand the basics of threading and concurrency. I want a simple case where two threads repeatedly try to access one shared resource.
The code:
import threading
class Thread(threading.Thread):
def __init__(self, t, *args):
threading.Thread.__init__(self, target=t, args=args)
self.start()
count = 0
lock = threading.Lock()
def increment():
global count
lock.acquire()
try:
count += 1
finally:
lock.release()
def bye():
while True:
increment()
def hello_there():
while True:
increment()
def main():
hello = Thread(hello_there)
goodbye = Thread(bye)
while True:
print count
if __name__ == '__main__':
main()
So, I have two threads, both trying to increment the counter. I thought that if thread 'A' called increment(), the lock would be established, preventing 'B' from accessing until 'A' has released.
Running the makes it clear that this is not the case. You get all of the random data race-ish increments.
How exactly is the lock object used?
Additionally, I've tried putting the locks inside of the thread functions, but still no luck.
You can see that your locks are pretty much working as you are using them, if you slow down the process and make them block a bit more. You had the right idea, where you surround critical pieces of code with the lock. Here is a small adjustment to your example to show you how each waits on the other to release the lock.
import threading
import time
import inspect
class Thread(threading.Thread):
def __init__(self, t, *args):
threading.Thread.__init__(self, target=t, args=args)
self.start()
count = 0
lock = threading.Lock()
def incre():
global count
caller = inspect.getouterframes(inspect.currentframe())[1][3]
print "Inside %s()" % caller
print "Acquiring lock"
with lock:
print "Lock Acquired"
count += 1
time.sleep(2)
def bye():
while count < 5:
incre()
def hello_there():
while count < 5:
incre()
def main():
hello = Thread(hello_there)
goodbye = Thread(bye)
if __name__ == '__main__':
main()
Sample output:
...
Inside hello_there()
Acquiring lock
Lock Acquired
Inside bye()
Acquiring lock
Lock Acquired
...
import threading
# global variable x
x = 0
def increment():
"""
function to increment global variable x
"""
global x
x += 1
def thread_task():
"""
task for thread
calls increment function 100000 times.
"""
for _ in range(100000):
increment()
def main_task():
global x
# setting global variable x as 0
x = 0
# creating threads
t1 = threading.Thread(target=thread_task)
t2 = threading.Thread(target=thread_task)
# start threads
t1.start()
t2.start()
# wait until threads finish their job
t1.join()
t2.join()
if __name__ == "__main__":
for i in range(10):
main_task()
print("Iteration {0}: x = {1}".format(i,x))

Python 3, multithreading - getting data into main module as each thread finishes

The following code executes two threads (multithread), each with different time delays so that each thread will finish at a different time.
Once both threads are finished module display1.py issues a print statement saying they are BOTH finished.
I would like module display1.py to issue a 'finished' statement for EACH thread AS EACH thread finishes
How can i do this ... amendments to my working code appreciated! I'd like to change as little of the current code as possible so a better form of variable transfer between the two modules might be what I'm after
display1.py
from threads1 import *
manager = ThreadManager()
manager.start(False)
print (manager.GetResults())
threads1.py
from threading import Thread
import time
class ThreadManager:
def __init__(self):
pass
def start(self, answer):
self.answer = answer
thread_refs = []
t1 = MyThread(70, 'Not finished')
t1.daemon = True
t1.start()
t2 = MyThread(2, 'Not finished')
t2.daemon = True
t2.start()
while True:
if t1.AskFinished == 'Finished' and t2.AskFinished == 'Finished': #If I break the loop after EACH site, Only the first to finish will be sent via GetResults to display1.py
global results
results = [t1.AskFinished, t2.AskFinished]
print("Both Finished")
break
def GetResults(self):
global results
return(results)
class MyThread(Thread):
def __init__(self, SleepWait, AskFinished):
Thread.__init__(self)
self.SleepWait = SleepWait
self.AskFinished = AskFinished
def run(self):
time.sleep(self.SleepWait)
self.AskFinished = 'Finished'
What you have here (entering a very tight check loop in the main thread) is a very naive approach to threading in many languages, but especially in python where GIL contention will just slow the threads down a great bit.
What is a better idea is instead using queue.Queue to push info when a thread is completed. This allows the main thread to block on the queue instead, which is less CPU intensive as well as allowing you to know (out of order) which one is finished.
The changes you would need to make:
at the top of the module threads1.py:
import queue
finished_queue = queue.Queue()
in your start():
num_finished = 0
while True:
info = finished_queue.get()
num_finished += 1
if info is t1:
print("t1 finished")
else:
print("t2 finished")
if num_finished == 2:
global results
results = [t1.AskFinished, t2.AskFinished]
print("Both Finished")
break
and finally in run():
def run(self):
time.sleep(self.SleepWait)
self.AskFinished = 'Finished'
finished_queue.put(self)
Some more fundamental modifications I'd make is actually pushing the result into the queue and then fetching the results out, skipping the extra step before GetResults. Furthermore, if GetResults had to stay, I'd pass them through a field on self e.g. self.results = [t1.AskFinished, t2.AskFinished]
Update:
Ok, so you want to know more about how to have display1.py print the results. It would be helpful if you could explain why it matters, because that might make a difference in how you should do this, but here's a first approach:
# threads1.py
from threading import Thread
import time
class ThreadManager:
def __init__(self):
self.threads = {}
def start(self):
t1 = MyThread(4)
t1.daemon = True
t1.start()
self.threads[1] = t1
t2 = MyThread(1)
t2.daemon = True
t2.start()
self.threads[2] = t2
def is_alive(self, thread_id):
return self.threads[thread_id].is_alive()
def GetResults(self): # or you could just access results directly
return self.results
class MyThread(Thread):
def __init__(self, SleepWait):
Thread.__init__(self)
self.SleepWait = SleepWait
def run(self):
time.sleep(self.SleepWait)
And then...
# display1.py
from threads1 import *
manager = ThreadManager()
manager.start()
t1_state = t2_state = True
while manager.is_alive(1) or manager.is_alive(2):
time.sleep(1)
if manager.is_alive(1) != t1_state:
print("t1 finished")
t1_state = manager.is_alive(1)
if manager.is_alive(2) != t2_state:
print("t2 finished")
t2_state = manager.is_alive(2)
if not manager.is_alive(1) and not manager.is_alive(2):
print("Both Finished")
break
You should eventually consider using a Queue as suggested by Crast; but let's focus on getting this right first.
Original Post:
There are a number of problems with this code.
First, you should use t1.is_alive() to check if a thread is finished. There's no need to reimplement it with AskFinished.
Second, the while True: loop in threads1.py is doing nothing at a furious rate while it waits for your threads to terminate. Take a look at the cpu usage while this is running if you don't believe me. You should throw a time.sleep(1) statement in there.
Third, why are you using a global var to return your results? That's a really strange thing to do. Just store it in self!
And finally, why does display1.py have to print the messages? Why can't thread1.py do that?
With these four points in mind, here's a thread1.py that works more sensibly:
from threading import Thread
import time
class ThreadManager:
def __init__(self):
self.results = None
def start(self, answer): # why is "answer" here?
self.answer = answer
thread_refs = []
t1 = MyThread(4, 'Not finished')
t1.daemon = True
t1.start()
t2 = MyThread(1, 'Not finished')
t2.daemon = True
t2.start()
t1_state = t2_state = True
while t1.is_alive() or t2.is_alive():
time.sleep(1)
if t1.is_alive() != t1_state:
print("t1 finished")
t1_state = t1.is_alive()
if t2.is_alive() != t2_state:
print("t2 finished")
t2_state = t2.is_alive()
if not t1.is_alive() and not t2.is_alive():
self.results = [t1.AskFinished, t2.AskFinished]
print("Both Finished")
break
def GetResults(self): # or you could just access results directly
return self.results
class MyThread(Thread):
def __init__(self, SleepWait, AskFinished):
Thread.__init__(self)
self.SleepWait = SleepWait
self.AskFinished = AskFinished
def run(self):
time.sleep(self.SleepWait)
self.AskFinished = 'Finished'
Now, this still doesn't do exactly what you wanted, because you asked for display.py to do the displaying. To make that work, you'd have to put your while True loop in display.py and add an ThreadManager.is_alive() method that display.py could use to check whether a thread is alive or not. If you want to see how to do that let me know.
Im not familiar with threading but since no answers yet ill give it a shot.
In this:
Cant you just add two if statements before hand?
while True:
if t1.askFinished == 'Finished':
print("t1 Finished")
if t2.askFinished == 'Finished':
print("t2 Finished")
if t1.AskFinished == 'Finished' and t2.AskFinished == 'Finished': #If I break the loop after EACH site, Only the first to finish will be sent via GetResults to display1.py
global results
results = [t1.AskFinished, t2.AskFinished]
print("Both Finished")
break
edit: I tried changing your code as little as possible... it's not very well written though tbh. (No offense)

Categories

Resources