Threading incorporated into a class

Threading incorporated into a class - python

I am trying to use threading in one of my features of a class. The class is not inheriting the Thread class, it is just making use of it, calling a function in the class. The code has been refined to the problem of my class.
The problem that occurs seems to be the implementation of using threads in a class and recursively calling a method? This is just my guess, I',m not a expert in python schemes so I don't know.
Set the thread variable in the method run to enable threading.
import threading
class Alpha:
# Set variable defaults
leader = 'Swansonburg'
# - Thread variables
MAX_CONNECTIONS = 5
lock = threading.BoundedSemaphore(value=MAX_CONNECTIONS)
def __init__(self, leader=''):
if (leader): self.leader = leader
def run(self):
print "[+] Alpha: %s, Calling For Backup!" % self.leader
self.ETA = 101
for percent in range(self.ETA):
""" SET TRUE TO ENABLE THREADING """
THREADING = False
self.lock.acquire() # Set thread cap
if (not THREADING):
self.CallBackup(percent)
else:
t = threading.Thread(target=self.CallBackup, args=(percent))
t.start()
def CallBackup(self, percent):
if (percent == 0): pass
elif (percent % 10 == 0) | (percent == 100):
output = ("=" * percent) + (" %% - %d" % percent)
print (output)
self.lock.release()
def main():
new_team = Alpha()
new_team.run()
if (__name__ == '__main__'):
main()

When I tried running this, it gave me the error that
CallBackup() argument after * must be a sequence, not int
Why do I get TypeError in Threading in Python explains the same problem, you need to replace args=(percent) to args=(percent,) to make the argument list a sequence instead of a tuple.

Related

Python multithreading- memory leak when use an shared object(so)

I have a python programs that gets memory leaks when use an third-party SO.
I simplify my code like this:
import time
import sys
import threading
import codecs
import ctypes
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
class TestThirdPartySo(object):
def __init__(self):
# this so uses thread-specific data
self.call_stat_so = ctypes.CDLL("./third_party_fun.so")
self.handle = self.call_stat_so._handle
def test_fun(self):
self.call_stat_so.fun_xxx()
def thread_fun():
TestThirdPartySo().test_fun()
def test_main(num):
count = 0
while True:
# create 3 * num threads
thread_num = 3
thread_list = []
for _ in range(thread_num):
thread_list.append(threading.Thread(target=thread_fun))
for thread in thread_list:
thread.start()
for thread in thread_list:
thread.join()
count += thread_num
time.sleep(0.01)
if count % 100 == 0:
print("finied %s" % count)
if count > num:
break
print("end !!!!")
if __name__ == '__main__':
num = sys.argv[1]
test_main(int(num))
Now, I know this shared object uses thread-specific data.And I have tried to close the SO after called it like this:
class TestThirdPartySo(object):
def __init__(self):
# this so uses thread-specific data
self.call_stat_so = ctypes.CDLL("./third_party_fun.so")
self.handle = self.call_stat_so._handle
def test_fun(self):
self.call_stat_so.fun_xxx()
def __del__(self):
dlclose_func(self.handle)
def dlclose_func(_handle):
dlclose_func_tmp = ctypes.cdll.LoadLibrary('libdl.so').dlclose
dlclose_func_tmp.argtypes = [ctypes.c_void_p]
dlclose_func_tmp(_handle)
But I failed to close the so. And I'm also not sure if the leaked memory will be freed after closing the so.
If the program not uses multi-threads or creates a fixed number of threads(threadpool), it works ok.
For some reason,I need create threads constantly in my program. What can I do to prevent this memory leaks?

Using Python multiprocessing library inside nested objects

I'm trying to use the multiprocessing library to parallelize some expensive calculations without blocking some others, much lighter. The both need to interact through some variables, although the may run with different paces.
To show this, I have created the following example, that works fine:
import multiprocessing
import time
import numpy as np
class SumClass:
def __init__(self):
self.result = 0.0
self.p = None
self.return_value = None
def expensive_function(self, new_number, return_value):
# Execute expensive calculation
#######
time.sleep(np.random.random_integers(5, 10, 1))
return_value.value = self.result + new_number
#######
def execute_function(self, new_number):
print(' New number received: %f' % new_number)
self.return_value = multiprocessing.Value("f", 0.0, lock=True)
self.p = multiprocessing.Process(target=self.expensive_function, args=(new_number, self.return_value))
self.p.start()
def is_executing(self):
if self.p is not None:
if not self.p.is_alive():
self.result = self.return_value.value
self.p = None
return False
else:
return True
else:
return False
if __name__ == '__main__':
sum_obj = SumClass()
current_value = 0
while True:
if not sum_obj.is_executing():
# Randomly determine whether the function must be executed or not
if np.random.rand() < 0.25:
print('Current sum value: %f' % sum_obj.result)
new_number = np.random.rand(1)[0]
sum_obj.execute_function(new_number)
# Execute other (light) stuff
#######
print('Executing other stuff')
current_value += sum_obj.result * 0.1
print('Current value: %f' % current_value)
time.sleep(1)
#######
Basically, in the main loop some light function is executed, and depending on a random condition, some heavy work is sent to another process if it has already finished the previous one, carried out by an object which needs to store some data between executions. Although expensive_function needs some time, the light function keeps on executing without being blocked.
Although the above code gets the job done, I'm wondering: is it the best/most appropriate method to do this?
Besides, let us suppose the class SumClass has an instance of another object, which also needs to store data. For example:
import multiprocessing
import time
import numpy as np
class Operator:
def __init__(self):
self.last_value = 1.0
def operate(self, value):
print(' Operation, last value: %f' % self.last_value)
self.last_value *= value
return self.last_value
class SumClass:
def __init__(self):
self.operator_obj = Operator()
self.result = 0.0
self.p = None
self.return_value = None
def expensive_function(self, new_number, return_value):
# Execute expensive calculation
#######
time.sleep(np.random.random_integers(5, 10, 1))
# Apply operation
number = self.operator_obj.operate(new_number)
# Apply other operation
return_value.value = self.result + number
#######
def execute_function(self, new_number):
print(' New number received: %f' % new_number)
self.return_value = multiprocessing.Value("f", 0.0, lock=True)
self.p = multiprocessing.Process(target=self.expensive_function, args=(new_number, self.return_value))
self.p.start()
def is_executing(self):
if self.p is not None:
if not self.p.is_alive():
self.result = self.return_value.value
self.p = None
return False
else:
return True
else:
return False
if __name__ == '__main__':
sum_obj = SumClass()
current_value = 0
while True:
if not sum_obj.is_executing():
# Randomly determine whether the function must be executed or not
if np.random.rand() < 0.25:
print('Current sum value: %f' % sum_obj.result)
new_number = np.random.rand(1)[0]
sum_obj.execute_function(new_number)
# Execute other (light) stuff
#######
print('Executing other stuff')
current_value += sum_obj.result * 0.1
print('Current value: %f' % current_value)
time.sleep(1)
#######
Now, inside the expensive_function, a function member of the object Operator is used, which needs to store the number passed.
As expected, the member variable last_value does not change, i.e. it does not keep any value.
Is there any way of doing this properly?
I can imagine I could arrange everything so that I only need to use one class level, and it would work well. However, this is a toy example, in reality there are different levels of complex objects and it would be hard.
Thank you very much in advance!

from concurrent.futures import ThreadPoolExecutor
from numba import jit
import requests
import timeit
def timer(number, repeat):
def wrapper(func):
runs = timeit.repeat(func, number=number, repeat=repeat)
print(sum(runs) / len(runs))
return wrapper
URL = "https://httpbin.org/uuid"
#jit(nopython=True, nogil=True,cache=True)
def fetch(session, url):
with session.get(url) as response:
print(response.json()['uuid'])
#timer(1, 1)
def runner():
with ThreadPoolExecutor(max_workers=25) as executor:
with requests.Session() as session:
executor.map(fetch, [session] * 100, [URL] * 100)
executor.shutdown(wait=True)
executor._adjust_thread_count
Maybe this might help.
I'm using ThreadPoolExecutor for multithreading. you can also use ProcessPoolExecutor.
For your compute expensive operation you can use numba for making cached byte code of your function for faster exeution.

Python code to benchmark in flops using threading

I'm having trouble writing a benchmark code in python using threading. I was able to get my threading to work, but I can't get my object to return a value. I want to take the values and add them to a list so I can calculate the flops.
create class to carry out threading
class myThread(threading.Thread):
def calculation(self):
n=0
start=time.time()
ex_time=0
while ex_time < 30:
n+=1
end=time.time()
ex_time=end-start
return ex_time
def run(self):
t = threading.Thread(target = self.calculation)
t.start()
function to create threads
def make_threads(num):
times=[]
calcs=[]
for i in range(num):
print('start thread', i+1)
thread1=myThread()
t=thread1.start()
times.append(t)
#calcs.append(n)
#when trying to get a return value it comes back as none as seen
print(times)
#average out the times,add all the calculations to get the final numbers
#to calculate flops
time.sleep(32) #stop the menu from printing until calc finish
def main():
answer=1
while answer != 0:
answer=int(input("Please indicate how many threads to use: (Enter 0 to exit)"))
print("\n\nBenchmark test with ", answer, "threads")
make_threads(answer)
main()

Two ways to do this:
1. Using static variables (hacky, but efficient and quick)
Define some global variable that you then manipulate in the thread. I.e.:
import threading
import time
class myThread(threading.Thread):
def calculation(self):
n=0
start=time.time()
ex_time=0
print("Running....")
while ex_time < 30:
n+=1
end=time.time()
ex_time=end-start
self.myThreadValues[self.idValue] = ex_time
print(self.myThreadValues)
return ex_time
def setup(self,myThreadValues=None,idValue=None):
self.myThreadValues = myThreadValues
self.idValue = idValue
def run(self):
self.calculation()
#t = threading.Thread(target = self.calculation)
#t.start()
def make_threads(num):
threads=[]
calcs=[]
myThreadValues = {}
for i in range(num):
print('start thread', i+1)
myThreadValues[i] = 0
thread1=myThread()
thread1.setup(myThreadValues,i)
thread1.start()
#times.append(t)
threads.append(thread1)
# Now we need to wait for all the threads to finish. There are a couple ways to do this, but the best is joining.
print("joining all threads...")
for thread in threads:
thread.join()
#calcs.append(n)
#when trying to get a return value it comes back as none as seen
print("Final thread values: " + str(myThreadValues))
print("Done")
#average out the times,add all the calculations to get the final numbers
#to calculate flops
#time.sleep(32) #stop the menu from printing until calc finish
def main():
answer=1
while answer != 0:
answer=int(input("Please indicate how many threads to use: (Enter 0 to exit)"))
print("\n\nBenchmark test with ", answer, "threads")
make_threads(answer)
main()
2. The proper way to do this is with Processes
Processes are designed for passing information back and forth, versus threads which are commonly used for async work. See explanation here: https://docs.python.org/3/library/multiprocessing.html
See this answer: How can I recover the return value of a function passed to multiprocessing.Process?
import multiprocessing
from os import getpid
def worker(procnum):
print 'I am number %d in process %d' % (procnum, getpid())
return getpid()
if __name__ == '__main__':
pool = multiprocessing.Pool(processes = 3)
print pool.map(worker, range(5))

Multithreading (?): Manual interference in a loop

I've been looking into a way to directly change variables in a running module.
What I want to achieve is that a load test is being run and that I can manually adjust the call pace or whatsoever.
Below some code that I just created (not-tested e.d.), just to give you an idea.
class A():
def __init__(self):
self.value = 1
def runForever(self):
while(1):
print self.value
def setValue(self, value):
self.value = value
if __name__ == '__main__':
#Some code to create the A object and directly apply the value from an human's input
a = A()
#Some parallelism or something has to be applied.
a.runForever()
a.setValue(raw_input("New value: "))
Edit #1: Yes, I know that now I will never hit the a.setValue() :-)

Here is a multi-threaded example. This code will work with the python interpreter but not with the Python Shell of IDLE, because the raw_input function is not handled the same way.
from threading import Thread
from time import sleep
class A(Thread):
def __init__(self):
Thread.__init__(self)
self.value = 1
self.stop_flag = False
def run(self):
while not self.stop_flag:
sleep(1)
print(self.value)
def set_value(self, value):
self.value = value
def stop(self):
self.stop_flag = True
if __name__ == '__main__':
a = A()
a.start()
try:
while 1:
r = raw_input()
a.set_value(int(r))
except:
a.stop()

The pseudo code you wrote is quite similar to the way Threading / Multiprocessing works in python. You will want to start a (for example) thread that "runs forever" and then instead of modifying the internal rate value directly, you will probably just send a message through a Queue that gives the new value.
Check out this question.
Here is a demonstration of doing what you asked about. I prefer to use Queues to directly making calls on threads / processes.
import Queue # !!warning. if you use multiprocessing, use multiprocessing.Queue
import threading
import time
def main():
q = Queue.Queue()
tester = Tester(q)
tester.start()
while True:
user_input = raw_input("New period in seconds or (q)uit: ")
if user_input.lower() == 'q':
break
try:
new_speed = float(user_input)
except ValueError:
new_speed = None # ignore junk
if new_speed is not None:
q.put(new_speed)
q.put(Tester.STOP_TOKEN)
class Tester(threading.Thread):
STOP_TOKEN = '<<stop>>'
def __init__(self, q):
threading.Thread.__init__(self)
self.q = q
self.speed = 1
def run(self):
while True:
# get from the queue
try:
item = self.q.get(block=False) # don't hang
except Queue.Empty:
item = None # do nothing
if item:
# stop when requested
if item == self.STOP_TOKEN:
break # stop this thread loop
# otherwise check for a new speed
try:
self.speed = float(item)
except ValueError:
pass # whatever you like with unknown input
# do your thing
self.main_code()
def main_code(self):
time.sleep(self.speed) # or whatever you want to do
if __name__ == '__main__':
main()

python conditional lock

How can I implement conditional lock in threaded application, for instance I haw
30 threads that are calling function and for most off the time all threads can access is simultaneous, but depending on function input there can be condition when only one thread can do that one thing. (If value for input is repeated and some thread is still working then I need lock.)
I now that there is module threading with Rlock() but I don't now how to use it in a way that i described it in first part.
Edit: The question is actually about how to prevent any two threads from running the same function with the same argument at the same time. (Thanks to David for helping me formulate my question :) )

Try this: have a lock in the module where your function is, and if the input to the function is such that locking is required, acquire the lock inside the function. Otherwise don't.
l = threading.RLock()
def fn(arg):
if arg == arg_that_needs_lock:
l.acquire()
try:
# do stuff
finally:
l.release()
else:
# do other stuff
EDIT:
As far as I can tell now, the question is actually about how to prevent any two threads from running the same function with the same argument at the same time. There's no problem with two threads running the same function with different arguments at the same time, though. The simple method to do this, if all valid arguments to the function can be dictionary keys, is to create a dictionary of arguments to locks:
import threading
dict_lock = threading.RLock()
locks = {}
def fn_dict(arg):
dict_lock.acquire()
try:
if arg not in dict:
locks[arg] = threading.RLock()
l = locks[arg]
finally:
dict_lock.release()
l.acquire()
try:
# do stuff
finally:
l.release()
If your function can be called with many different arguments, though, that amounts to a lot of locks. Probably a better way is to have a set of all arguments with which the function is currently executing, and have the contents of that set protected by a lock. I think this should work:
set_condition = threading.Condition()
current_args = set()
def fn_set(arg):
set_condition.acquire()
try:
while arg in current_args:
set_condition.wait()
current_args.add(arg)
finally:
set_condition.release()
# do stuff
set_condition.acquire()
try:
current_args.remove(arg)
set_condition.notifyAll()
finally:
set_condition.release()

It sounds like you want something similar to a Readers-Writer lock.
This is probably not what you want, but might be a clue:
from __future__ import with_statement
import threading
def RWLock(readers = 1, writers = 1):
m = _Monitor(readers, writers)
return (_RWLock(m.r_increment, m.r_decrement), _RWLock(m.w_increment, m.w_decrement))
class _RWLock(object):
def __init__(self, inc, dec):
self.inc = inc
self.dec = dec
def acquire(self):
self.inc()
def release(self):
self.dec()
def __enter__(self):
self.inc()
def __exit__(self):
self.dec()
class _Monitor(object):
def __init__(self, max_readers, max_writers):
self.max_readers = max_readers
self.max_writers = max_writers
self.readers = 0
self.writers = 0
self.monitor = threading.Condition()
def r_increment(self):
with self.monitor:
while self.writers > 0 and self.readers < self.max_readers:
self.monitor.wait()
self.readers += 1
self.monitor.notify()
def r_decrement(self):
with self.monitor:
while self.writers > 0:
self.monitor.wait()
assert(self.readers > 0)
self.readers -= 1
self.monitor.notify()
def w_increment(self):
with self.monitor:
while self.readers > 0 and self.writers < self.max_writers:
self.monitor.wait()
self.writers += 1
self.monitor.notify()
def w_decrement(self):
with self.monitor:
assert(self.writers > 0)
self.writers -= 1
self.monitor.notify()
if __name__ == '__main__':
rl, wl = RWLock()
wl.acquire()
wl.release()
rl.acquire()
rl.release()
(Unfortunately not tested)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Threading incorporated into a class - python

When I tried running this, it gave me the error that CallBackup() argument after * must be a sequence, not int Why do I get TypeError in Threading in Python explains the same problem, you need to replace args=(percent) to args=(percent,) to make the argument list a sequence instead of a tuple.

Related

Python multithreading- memory leak when use an shared object(so)

Using Python multiprocessing library inside nested objects

Python code to benchmark in flops using threading

Multithreading (?): Manual interference in a loop

python conditional lock

Categories

Resources