Python multithreading- memory leak when use an shared object(so)

Python multithreading- memory leak when use an shared object(so) - python

I have a python programs that gets memory leaks when use an third-party SO.
I simplify my code like this:
import time
import sys
import threading
import codecs
import ctypes
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
class TestThirdPartySo(object):
def __init__(self):
# this so uses thread-specific data
self.call_stat_so = ctypes.CDLL("./third_party_fun.so")
self.handle = self.call_stat_so._handle
def test_fun(self):
self.call_stat_so.fun_xxx()
def thread_fun():
TestThirdPartySo().test_fun()
def test_main(num):
count = 0
while True:
# create 3 * num threads
thread_num = 3
thread_list = []
for _ in range(thread_num):
thread_list.append(threading.Thread(target=thread_fun))
for thread in thread_list:
thread.start()
for thread in thread_list:
thread.join()
count += thread_num
time.sleep(0.01)
if count % 100 == 0:
print("finied %s" % count)
if count > num:
break
print("end !!!!")
if __name__ == '__main__':
num = sys.argv[1]
test_main(int(num))
Now, I know this shared object uses thread-specific data.And I have tried to close the SO after called it like this:
class TestThirdPartySo(object):
def __init__(self):
# this so uses thread-specific data
self.call_stat_so = ctypes.CDLL("./third_party_fun.so")
self.handle = self.call_stat_so._handle
def test_fun(self):
self.call_stat_so.fun_xxx()
def __del__(self):
dlclose_func(self.handle)
def dlclose_func(_handle):
dlclose_func_tmp = ctypes.cdll.LoadLibrary('libdl.so').dlclose
dlclose_func_tmp.argtypes = [ctypes.c_void_p]
dlclose_func_tmp(_handle)
But I failed to close the so. And I'm also not sure if the leaked memory will be freed after closing the so.
If the program not uses multi-threads or creates a fixed number of threads(threadpool), it works ok.
For some reason,I need create threads constantly in my program. What can I do to prevent this memory leaks?

Related

shared memory between process and main task in python

with some help I could run a process in python, Now I wan't to share a value betwenn the two tasks. I can set the value inside the init, but I can't change it inside the run method.
And by the way: how to kill the process when the main process stops?
from multiprocessing import Process, Value
import serial
import time
class P(Process):
def __init__(self, num):
num.value = 15
super(P, self).__init__()
def run(self):
while True:
num.value = num.value + 1
print("run simple process")
time.sleep(0.5)
def main():
while True:
print("run main")
print (num.value)
time.sleep(2.5)
if __name__ == "__main__":
num = Value('d', 0.0)
p = P(num)
p.start()
#p.join()
main()

In your simplified case you just passed num value upon initialization time.
To be able to access that value in other process's methods - set it as a state of the process:
class P(Process):
def __init__(self, num):
self.num = num
self.num.value = 15
super(P, self).__init__()
def run(self):
while True:
self.num.value += 1
print("run simple process")
time.sleep(0.5)
For a more "serious" cases - consider using Managers and Synchronization primitives.

Using Parallel process to update object attributes in main process

I've been looking into the multiprocessing module to figure this out but I'm not entirely sure of all the components I need or how to structure them. The basic structure/logic that I'm trying to get goes something like this though:
import datetime
import time
import multiprocessing
class So_Classy():
def __init__(self):
self.value = 0
def update_values(self):
print('In the process doing stuff!')
while True:
self.value = self.value + 1
time.sleep(0.1)
print("Self.value = {self.value}")
def run(self):
# Constantly update old value to a new one
try:
if __name__ == '__main__':
p = multiprocessing.Process(target=self.update_values)
p.start()
print("Process started!")
except Exception as e:
print(str(e))
def get_result(self, arg):
return self.value*arg
##### MAIN PROGRAM #####
# Initialize process given certain parameters
sc = So_Classy()
# This spawns a process running an infinite while loop
sc.run()
one_second = datetime.datetime.now() + datetime.timedelta(seconds=1)
while True:
if datetime.datetime.now() > one_second:
# Pass an arg to sc and do a calc with it
print(sc.get_result(5))
one_second = datetime.datetime.now() + datetime.timedelta(seconds=1)
The run() function is making it through to the end without causing an exception but it doesn't appear to actually be entering the process. No idea why. :\
The real process I will be using will be computationally intensive so it has to run as a separate process.

Checking a value of an object in a running process

In this easy example how can I access the value of a counter object while the process is still running?
import multiprocessing
import time
class Counter(object):
def __init__(self):
self.value = 0
def update(self):
self.value += 1
def job(Counter):
while True:
counter.update()
if __name__ == '__main__':
counter = Counter()
p = multiprocessing.Process(target=job,args=(counter,)
p.start()
time.sleep(10)
# I want to check the value of the counter object here
p.terminate()

You have to use multiprocessing.Queue() or multiprocessing.Pipe() to communicate between processes.
multiprocessing.Pipe() creates two endpoints conn_1, conn_2 and you have to use one of them in main process and second in subprocess.
Use poll() to check if there is something in pipe and then you can use recv() to receive data. (if you use directly recv() then it will block program till you send something to pipe).
Now you can use send() to send message with result.
Here I use conn_2 in job()
import multiprocessing
import time
class Counter(object):
def __init__(self):
self.value = 0
def update(self):
self.value += 1
def job(counter, conn):
while True:
counter.update()
if conn.poll():
print('job:', conn.recv())
conn.send(counter.value)
if __name__ == '__main__':
conn_1, conn_2 = multiprocessing.Pipe()
counter = Counter()
p = multiprocessing.Process(target=job, args=(counter, conn_2))
p.start()
time.sleep(2)
# I want to check the value of the counter object here
conn_1.send('give me result')
print('result:', conn_1.recv())
p.terminate()
Here I use conn_2 directly in class
import multiprocessing
import time
class Counter(object):
def __init__(self, conn):
self.conn = conn
self.value = 0
def update(self):
self.value += 1
if self.conn.poll(): # if message from main process
print('Counter:', self.conn.recv())
self.conn.send(self.value)
def job(counter):
while True:
counter.update()
if __name__ == '__main__':
conn_1, conn_2 = multiprocessing.Pipe()
counter = Counter(conn_2)
p = multiprocessing.Process(target=job, args=(counter,))
p.start()
time.sleep(2)
conn_1.send('give me result')
print('result:', conn_1.recv())
p.terminate()

You could consider attaching a debugger (such as the PyDev debugger, GDB or others) to the running process. You can then freeze the process with a breakpoint and inspect state.

python threading in a loop

I have a project that requires a bunch of large matrices, which are stored in ~200 MB files, to be cross-correlated (i.e. FFT * conj(FFT)) with each other. The number of files is such that I can't just load them all up and then do my processing. On the other hand, reading in each file as I need it is slower than I'd like.
what I have so far is something like:
result=0
for i in xrange(N_files):
f1 = file_reader(file_list[i])
############################################################################
# here I want to have file_reader go start reading the next file I'll need #
############################################################################
in_place_processing(f1)
for j in xrange(i+1,N_files):
f2 = file_reader(file_list[j])
##################################################################
# here I want to have file_reader go start reading the next file #
##################################################################
in_place_processing(f2)
result += processing_function(f1,f2)
So basically, I just want to have two threads that will each read a file, give it to me when I ask for it (or as soon as it's done after I ask for it), and then go start reading the next file for when I ask for it. The object the file_reader returns is rather large and complicated, so I'm not sure if multiprocessing is the way to go here...
I've read about threading and queues but can't seem to figure out the part where I ask the thread to go read the file and can proceed with the program while it does. I don't want the threads to simply go about their business in the background -- am I missing a detail here, or is threading not the way to go?

Below is an example of using the multiprocessing module that will spawn off child processes to call your file_reader method and queue up their results. The queue should block when full, so you can control the number of read ahead's you'd like to perform with the QUEUE_SIZE constant.
This utilizes a standard Producer/Consumer model of multiprocess communication, with the child processes act as Producers, with the main thread being the Consumer. The join method call in the class destructor ensures the child process resources are cleaned up properly. There are some print statements interspersed for demonstration purposes.
Additionally, I added the ability for the QueuedFileReader class to offload work to a worker thread or run in the main thread, rather than using a child process, for comparison. This is done by specifying the mode parameter at class initialization to MODE_THREADS or MODE_SYNCHRONOUS, respectively.
import multiprocessing as mp
import Queue
import threading
import time
QUEUE_SIZE = 2 #buffer size of queue
## Placeholder for your functions and variables
N_files = 10
file_list = ['file %d' % i for i in range(N_files)]
def file_reader(filename):
time.sleep(.1)
result = (filename,'processed')
return result
def in_place_processing(f):
time.sleep(.2)
def processing_function(f1,f2):
print f1, f2
return id(f1) & id(f2)
MODE_SYNCHRONOUS = 0 #file_reader called in main thread synchronously
MODE_THREADS = 1 #file_reader executed in worker thread
MODE_PROCESS = 2 #file_reader executed in child_process
##################################################
## Class to encapsulate multiprocessing objects.
class QueuedFileReader():
def __init__(self, idlist, mode=MODE_PROCESS):
self.mode = mode
self.idlist = idlist
if mode == MODE_PROCESS:
self.queue = mp.Queue(QUEUE_SIZE)
self.process = mp.Process(target=QueuedFileReader.worker,
args=(self.queue,idlist))
self.process.start()
elif mode == MODE_THREADS:
self.queue = Queue.Queue(QUEUE_SIZE)
self.thread = threading.Thread(target=QueuedFileReader.worker,
args=(self.queue,idlist))
self.thread.start()
#staticmethod
def worker(queue, idlist):
for i in idlist:
queue.put((i, file_reader(file_list[i])))
print id(queue), 'queued', file_list[i]
queue.put('done')
def __iter__(self):
if self.mode == MODE_SYNCHRONOUS:
self.index = 0
return self
def next(self):
if self.mode == MODE_SYNCHRONOUS:
if self.index == len(self.idlist): raise StopIteration
q = (self.idlist[self.index],
file_reader(file_list[self.idlist[self.index]]))
self.index += 1
else:
q = self.queue.get()
if q == 'done': raise StopIteration
return q
def __del__(self):
if self.mode == MODE_PROCESS:
self.process.join()
elif self.mode == MODE_THREADS:
self.thread.join()
#mode = MODE_PROCESS
mode = MODE_THREADS
#mode = MODE_SYNCHRONOUS
result = 0
for i, f1 in QueuedFileReader(range(N_files),mode):
in_place_processing(f1)
for j, f2 in QueuedFileReader(range(i+1,N_files),mode):
in_place_processing(f2)
result += processing_function(f1,f2)
If your intermediate values are too large to pass through the Queue, you can execute each iteration of the outer loop in its own process. A handy way to do that would be using the Pool class in multiprocessing as in the example below.
import multiprocessing as mp
import time
## Placeholder for your functions and variables
N_files = 10
file_list = ['file %d' % i for i in range(N_files)]
def file_reader(filename):
time.sleep(.1)
result = (filename,'processed')
return result
def in_place_processing(f):
time.sleep(.2)
def processing_function(f1,f2):
print f1, f2
return id(f1) & id(f2)
def file_task(file_index):
print file_index
f1 = file_reader(file_list[file_index])
in_place_processing(f1)
task_result = 0
for j in range(file_index+1, N_files):
f2 = file_reader(file_list[j])
in_place_processing(f2)
task_result += processing_function(f1,f2)
return task_result
pool = mp.Pool(processes=None) #processes default to mp.cpu_count()
result = 0
for file_result in pool.map(file_task, range(N_files)):
result += file_result
print 'result', result
#or simply
#result = sum(pool.map(file_task, range(N_files)))

Python threading. How do I lock a thread?

I'm trying to understand the basics of threading and concurrency. I want a simple case where two threads repeatedly try to access one shared resource.
The code:
import threading
class Thread(threading.Thread):
def __init__(self, t, *args):
threading.Thread.__init__(self, target=t, args=args)
self.start()
count = 0
lock = threading.Lock()
def increment():
global count
lock.acquire()
try:
count += 1
finally:
lock.release()
def bye():
while True:
increment()
def hello_there():
while True:
increment()
def main():
hello = Thread(hello_there)
goodbye = Thread(bye)
while True:
print count
if __name__ == '__main__':
main()
So, I have two threads, both trying to increment the counter. I thought that if thread 'A' called increment(), the lock would be established, preventing 'B' from accessing until 'A' has released.
Running the makes it clear that this is not the case. You get all of the random data race-ish increments.
How exactly is the lock object used?
Additionally, I've tried putting the locks inside of the thread functions, but still no luck.

You can see that your locks are pretty much working as you are using them, if you slow down the process and make them block a bit more. You had the right idea, where you surround critical pieces of code with the lock. Here is a small adjustment to your example to show you how each waits on the other to release the lock.
import threading
import time
import inspect
class Thread(threading.Thread):
def __init__(self, t, *args):
threading.Thread.__init__(self, target=t, args=args)
self.start()
count = 0
lock = threading.Lock()
def incre():
global count
caller = inspect.getouterframes(inspect.currentframe())[1][3]
print "Inside %s()" % caller
print "Acquiring lock"
with lock:
print "Lock Acquired"
count += 1
time.sleep(2)
def bye():
while count < 5:
incre()
def hello_there():
while count < 5:
incre()
def main():
hello = Thread(hello_there)
goodbye = Thread(bye)
if __name__ == '__main__':
main()
Sample output:
...
Inside hello_there()
Acquiring lock
Lock Acquired
Inside bye()
Acquiring lock
Lock Acquired
...

import threading
# global variable x
x = 0
def increment():
"""
function to increment global variable x
"""
global x
x += 1
def thread_task():
"""
task for thread
calls increment function 100000 times.
"""
for _ in range(100000):
increment()
def main_task():
global x
# setting global variable x as 0
x = 0
# creating threads
t1 = threading.Thread(target=thread_task)
t2 = threading.Thread(target=thread_task)
# start threads
t1.start()
t2.start()
# wait until threads finish their job
t1.join()
t2.join()
if __name__ == "__main__":
for i in range(10):
main_task()
print("Iteration {0}: x = {1}".format(i,x))

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python multithreading- memory leak when use an shared object(so) - python

Related

shared memory between process and main task in python

Using Parallel process to update object attributes in main process

Checking a value of an object in a running process

python threading in a loop

Python threading. How do I lock a thread?

Categories

Resources