Multi-core processing hangs

Multi-core processing hangs - python

My code looks like the following. It seems to be "hanging" during the proc.join() loop. If I create the dataframe df with 10 records, the whole process completes fast, but starting with 10000 (as shown), then the program seems to just hang. I am using htop to look at the CPU core usages, and I do see all of them spike up to 100%, but then long after they go back down to 0%, the program doesn't seem to continue. Any ideas on what I'm doing wrong?
import pandas as pd
import numpy as np
import multiprocessing
from multiprocessing import Process, Queue
def do_something(df, partition, q):
for index in partition:
q.put([v for v in df.iloc[index]])
def start_parallel_processing(df, partitions):
q = Queue()
procs = []
results = []
for partition in partitions:
proc = Process(target=do_something, args=(df, partition, q))
proc.start()
procs.extend([proc])
for i in range(len(partitions)):
results.append(q.get(True))
for proc in procs:
proc.join()
return results
num_cpus = multiprocessing.cpu_count()
df = pd.DataFrame([(x, x+1) for x in range(10000)], columns=['x','y'])
partitions = np.array_split(df.index, num_cpus)
results = start_parallel_processing(df, partitions)
len(results)

It appears Queue.Queue doesn't behave as you want and it wasn't made for sharing between multiple process, instead you must use Manager.Queue()
I have added some print to understand your code flow,
You can still polish your code to use Pool() instead of num_cpus
import pandas as pd
import numpy as np
import multiprocessing
import pprint
from multiprocessing import Process, Queue, Manager
def do_something(df, partition, q):
# print "do_something " + str(len(partition)) + " times"
for index in partition:
# print index
for v in df.iloc[index]:
#print "sending v to queue: " + str(len(df.iloc[index]))
q.put(v, False)
print "task_done(), qsize is "+ str(q.qsize())
def start_parallel_processing(df, partitions):
m = Manager()
q = m.Queue()
procs = []
results = []
print "START: launching "+ str(len(partitions)) + " process(es)"
index = 0
for partition in partitions:
print "launching "+ str(len(partitions)) + " process"
proc = Process(target=do_something, args=(df, partition, q))
procs.extend([proc])
proc.start()
index += 1
print "launched "+ str(index) + "/" + str(len(partitions)) + " process(es)"
while True:
try:
results.append(q.get( block=False ))
except:
print "QUEUE END"
break
print pprint.pformat(results)
process_count = 0
for proc in procs:
process_count += 1
print "joining "+ str(process_count) + "/" + str(len(procs)) + " process(es)"
proc.join()
return results
num_cpus = multiprocessing.cpu_count()
df = pd.DataFrame([(x, x+1) for x in range(10000)], columns=['x','y'])
partitions = np.array_split(df.index, num_cpus)
results = start_parallel_processing(df, partitions)
print "len(results) is: "+ str(len(results))

Related

Problem with append to dict in Python multiprocessing code

I have problem with multiprocessing code below, after running it in Jupyter notebook program freeze and kernel restarting is requited. Data variable contains 60x2001 DataFrame, some of this data is extracted to dict_results as key of dictionary and variable score as value.
import multiprocessing as mp
def search_regression_loop(data, row, dict_results):
for row2 in range(2000):
if row != row2:
score = linear_model_pred_iloc(data.iloc[:,:51], row, row2)
if (score > 0.30) and (score != 1):
if (data.iloc[row,54] is not None) and (data.iloc[row2,54] is not None):
name =str(data.iloc[row,54]) + ' - ' + str(data.iloc[row2,54])
dict_results[name] = score
else:
name = str(data.iloc[row].name) + ' - ' + str(data.iloc[row2].name)
dict_results[name] = score
manager = mp.Manager()
dict_results = manager.dict()
procs = []
for row in tqdm(range(2000)):
p = mp.Process(target=search_regression_loop, args=(tumor,row,dict_results))
procs.append(p)
p.start()
for p in procs:
p.join()

Python multiprocessing shared memory; one write, multiple read

SYSTEM
Linux (Manjaro KDE)
Python 3.8.3
PROGRAM:
I have incoming string data on a UDP port. The main loop spools up the processes prior to using selectors to monitor the UDP port. I want the UDP data, which is constantly updated, available for each process.
TRIED:
Multiprocessing Queues with maxsize = 1 and that became a headache and quickly broke down.
Multiprocessing Arrays (this is where I'm at now)
I have checked, and the Array at each location I'm looking at has the same memory address (I think). For whatever reason, when I try to access the contents of the Array in the child process, the process hangs.
NOT TRIED
Pipes. I have a feeling this may be the way to go. But I'm already deep in uncharted territory; I've never used them before.
WHAT I WANT
I would like to access the UDP data from the child processes - these are the camera_view method.
Dummy UDP string
import socket
import random
import datetime
import time
conn = ('127.0.0.1', 6666)
def rand_value(f_val, t_val):
result = round(random.uniform(f_val, t_val), 2)
result = random.uniform(f_val, t_val)
return result
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
while True:
time.sleep(6)
timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
overlay = timestamp
for i in range(9):
val = rand_value(i*10, i*10+10)
if i == 8: val = 'TASK: Im the real Batman'
overlay = overlay + "," + str(val)
print(overlay)
sock.sendto(overlay.encode(), conn)
My Program
import datetime
import selectors
import socket
import time
from multiprocessing import Lock, Process, Queue
from multiprocessing.sharedctypes import Array
from ctypes import c_char_p
REQUIRED_CAMERAS = 1
CAMERA_CONN = {'name':['Colour Camera'], 'ip':['127.0.0.1'], 'port':[9000]}
OVERLAY_CONN = ('0.0.0.0', 6666)
CONTROL_CONN = ('0.0.0.0', 6667)
NUMBER_OF_ITEMS_IN_OVERLAY = 10
class Camera():
def __init__(self, cam_name, cam_ip, cam_port):
self.ip = cam_ip
self.port = cam_port
self.capture = cv2.VideoCapture(0)
self.frame_width = int(self.capture.get(3))
self.frame_height = int(self.capture.get(4))
self.name = cam_name
def get_overlay(data_packet):
data = data_packet.decode()
data = data.split(',')
field0 = data[0]
field1 = 'KP: ' + str(round(float(data[1]), 3))
field2 = 'DCC: ' + str(round(float(data[2]), 2)) + 'm'
field3 = 'E: ' + str(round(float(data[3]), 2)) + 'm'
field4 = 'N: ' + str(round(float(data[4]), 2)) + 'm'
field5 = 'D: ' + str(round(float(data[5]), 2)) + 'm'
field6 = 'H: ' + str(round(float(data[6]), 2)) # + '°'
field7 = 'R: ' + str(round(float(data[7]), 2)) # + '°'
field8 = 'P: ' + str(round(float(data[8]), 2)) # + '°'
field9 = data[9]
x = []
for i in range(NUMBER_OF_ITEMS_IN_OVERLAY):
x.append(eval('field' + str(i)).encode())
# if i == 0:
# print(x[i])
return x
def socket_reader(sock, mask, q, REQUIRED_CAMERAS, overlay):
data_packet, sensor_ip = sock.recvfrom(1024)
sensor_port = sock.getsockname()[1]
print(f'SENSOR PORT {sensor_port} and SENSOR_IP {sensor_ip}')
if sensor_port == OVERLAY_CONN[1]:
x = get_overlay(data_packet)
for i in range(len(x)):
overlay[i] = x[i]
print(f'Socket Reader {overlay}')
def camera_view(CAMERA_CONN, cam_name, camera, overlay_q, control_q, overlay):
while True:
print(f'PROCESS {camera} RUNNING FOR: {cam_name}')
try:
print(f'Camera View {overlay}')
for i in range(len(overlay)):
print(overlay[i])
except:
pass
time.sleep(1)
def controller(REQUIRED_CAMERAS, CAMERA_CONN, OVERLAY_CONN, CONTROL_CONN):
if REQUIRED_CAMERAS > len(CAMERA_CONN['name']):
print(f'REQURIED_CAMERAS: {REQUIRED_CAMERAS} - more than connections in CAMERA_CONN ')
else:
# Set up a UDP connection for the overlay string and the control commands
sock_overlay = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock_control = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock_overlay.bind(OVERLAY_CONN)
sock_control.bind(CONTROL_CONN)
# Set up the selector to watch over the socket
# and trigger when data is ready for reading
sel = selectors.DefaultSelector()
sel.register(fileobj=sock_overlay, events=selectors.EVENT_READ, data=socket_reader)
sel.register(fileobj=sock_control, events=selectors.EVENT_READ, data=socket_reader)
# create shared memory
overlay_q = Queue(maxsize=1)
control_q = Queue(maxsize=1)
overlay = Array(c_char_p, range(NUMBER_OF_ITEMS_IN_OVERLAY))
print(f'Init Overlay {overlay}')
# Generate the processes; one per camera
processes = []
for camera in range(REQUIRED_CAMERAS):
processes.append(Process(target=camera_view, args=(CAMERA_CONN, CAMERA_CONN['name'][camera], camera, overlay_q, control_q, overlay)))
for process in processes:
process.daemon = True
process.start()
# Spin over the selector
while True:
# Only have one connnection registered, so to stop
# the loop spinning up the CPU, I have made it blocking
# with the timeout = 1 (sec) instead of =0.
events = sel.select(timeout=None)
for key, mask in events:
# the selector callback is the data= from the register above
callback = key.data
# the callback gets the sock, mask and the sensor queues
if key.fileobj == sock_overlay:
callback(key.fileobj, mask, overlay_q, REQUIRED_CAMERAS, overlay)
else:
callback(key.fileobj, mask, control_q, REQUIRED_CAMERAS, overlay)
if __name__ == "__main__":
controller(REQUIRED_CAMERAS, CAMERA_CONN, OVERLAY_CONN, CONTROL_CONN)
EDIT1:
from multiprocessing import Process, Array
from ctypes import c_char_p
import time
def worker(arr):
count = 0
while True:
count += 1
val = 'val' + str(count)
arr[0] = val
print(arr[:])
time.sleep(2)
def main():
arr = Array(c_char_p, 1)
p = Process(target=worker, args=(arr,))
p.daemon = True
p.start()
while True:
print(arr[:])
try:
print(arr[:].decode('utf-8'))
except :
pass
# try:
# val = arr[:]
# val = val.decode('utf-8')
# print(f'main {val}')
# except:
# pass
time.sleep(1)
if __name__ == "__main__":
main()
'''
from multiprocessing import Process, Array
from ctypes import c_char_p
import time
def worker(arr):
count = 0
while True:
count += 1
val = 'val' + str(count)
arr[0] = bytes(val, 'utf-8')
print(arr[:])
time.sleep(2)
def main():
arr = Array(c_char_p, 1)
p = Process(target=worker, args=(arr,))
p.daemon = True
p.start()
while True:
print(arr[:])
try:
print(arr[:].decode('utf-8'))
except :
pass
time.sleep(1)
if __name__ == "__main__":
main()
if __name__ == "__main__":
main()
'''
EDIT2:
Thanks to #RolandSmith, I have persevered with Queues and I think I have got a template on how I can move forward. See below code. If I can't get this to work in program, I'll be back here.
from multiprocessing import Process, Queue
import time
import datetime
def worker(camera, q):
val = ''
while True:
if q.full() == True:
val = q.get()
else:
val = val
print(f'WORKER{camera} {val}')
time.sleep(0.2)
def main():
cameras = 2
processes = []
queues = []
for camera in range(cameras):
queues.append(Queue(maxsize=1))
processes.append(Process(target=worker, args=(camera, queues[camera])))
for process in processes:
process.daemon = True
process.start()
while True:
for q in queues:
if not q.empty():
try:
_ = q.get()
except:
pass
else:
q.put(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
time.sleep(.5)
if __name__ == "__main__":
main()

In my view, using Queue is a less error-prone solution than using an Array.
Here is your second example, converted to using a Queue:
from multiprocessing import Process, Queue
import time
def worker(q):
count = 0
while True:
count += 1
val = 'val' + str(count)
q.put(val)
print('worker:', val)
time.sleep(2)
def main():
q = Queue()
p = Process(target=worker, args=(q, ))
p.daemon = True
p.start()
while True:
if not q.empty():
print('main:', q.get())
time.sleep(1)
if __name__ == "__main__":
main()
This yields:
> python3 test3.py
worker: val1
main: val1
worker: val2
main: val2
worker: val3
main: val3
worker: val4
main: val4
worker: val5
Here is the same example using a Pipe:
from multiprocessing import Process, Pipe
import time
def worker(p):
count = 0
while True:
count += 1
val = 'val' + str(count)
p.send(val)
print('worker:', val)
time.sleep(2)
def main():
child, parent = Pipe()
p = Process(target=worker, args=(child, ))
p.daemon = True
p.start()
while True:
if parent.poll():
print('main:', parent.recv())
time.sleep(1)
if __name__ == "__main__":
main()
This produces the same result as the previous example.
Additionally, by default a pipe is bidirectional.
So you could also send back data from the workers to the parent.

Multiprocessing.Process do not run process in parallel

I tried to run a very simple multiprocessing code, but the code is still serially processed.
I have tried to run it on Mac(macOS 10.13) and Linux(Ubuntu 18.04) with python 2 and 3, but in both environments I had the same problem.
the function _process has to receive numpy array as arguments, so I decided to use Multiprocess.Process instead of Multiprocess.Pool.map() and Multiprocess.Pool.apply_async() because pickle is broken when use pool.map() in a class. https://stackoverflow.com/a/21345308/4755986
import time
from multiprocessing import Process, Queue
import numpy as np
class model:
def __init__(self):
self.results = []
self.jobs = []
self.start = time.time()
def _process(self, x,y,z):
j= 0
for i in range(10**8):
j = i+j
return j
def work(self,X,Y,Z, result_queue):
start = time.time() -self.start
result = self._process(X,Y,Z)
result_queue.put(result)
print(result)
end = time.time() -self.start
print( 'start time: ', start)
print('end time:', end)
# return result_queue
def fit(self,num):
for i in range(num):
X, Y, Z = np.ones([5,5]), np.ones([3,3]), np.ones([2,2])
result_queue = Queue()
p = Process(target=self.work, args = (X,Y,Z, result_queue))
self.jobs.append(p)
p.start()
print( 'ChildProcess...',i)
result = result_queue.get()
self.results.append(result)
for p in self.jobs:
p.join()
p.close()
return self.results
R = model()
k = R.fit(10)
print(k)
The time of start and end of each process is printed, and the second process only starts after the first process is finished. This is strange because each process should be automatically assign to different core and run in parallel.

result = result_queue.get()
result_queue.get() will block if it is empty. An item will only be added when a process finishes, hence the next process will be spawned only if the previous has finished.
Below is a version that does spawn 10 processes at once. I've marked the section I've added:
import time
from multiprocessing import Process, Queue
import numpy as np
class model:
def __init__(self):
self.results = []
self.jobs = []
self.start = time.time()
def _process(self, x,y,z):
j= 0
for i in range(10**8):
j = i+j
return j
def work(self,X,Y,Z, result_queue):
start = time.time() -self.start
result = self._process(X,Y,Z)
result_queue.put(result)
print(result)
end = time.time() -self.start
print( 'start time: ', start)
print('end time:', end)
# return result_queue
def fit(self,num):
for i in range(num):
X, Y, Z = np.ones([5,5]), np.ones([3,3]), np.ones([2,2])
result_queue = Queue()
p = Process(target=self.work, args = (X,Y,Z, result_queue))
self.jobs.append(p)
p.start()
print( 'ChildProcess...',i)
#result = result_queue.get() # <--- This blocks
#self.results.append(result)
for p in self.jobs:
p.join()
p.close()
for result in result_queue: # <-----
self.results.append(result) # <-----
return self.results
R = model()
k = R.fit(10)
print(k)

Python multiprocessing global numpy arrays

I have following script:
max_number = 100000
minimums = np.full((max_number), np.inf, dtype=np.float32)
data = np.zeros((max_number, 128, 128, 128), dtype=np.uint8)
if __name__ == '__main__':
main()
def worker(array, start, end):
for in_idx in range(start, end):
value = data[start:end][in_idx] # compute something using this array
minimums[in_idx] = value
def main():
jobs = []
num_jobs = 5
for i in range(num_jobs):
start = int(i * (1000 / num_jobs))
end = int(start + (1000 / num_jobs))
p = multiprocessing.Process(name=('worker_' + str(i)), target=worker, args=(start, end))
jobs.append(p)
p.start()
for proc in jobs:
proc.join()
print(jobs)
How can I ensure that the numpy array is global and can be accessed by each worker? Each worker uses a different part of the numpy array

import numpy as np
import multiprocessing as mp
ar = np.zeros((5,5))
def callback_function(result):
x,y,data = result
ar[x,y] = data
def worker(num):
data = ar[num,num]+3
return num, num, data
def apply_async_with_callback():
pool = mp.Pool(processes=5)
for i in range(5):
pool.apply_async(worker, args = (i, ), callback = callback_function)
pool.close()
pool.join()
print "Multiprocessing done!"
if __name__ == '__main__':
ar = np.ones((5,5)) #This will be used, as local scope comes before global scope
apply_async_with_callback()
Explanation: You set up your data array and your workers and callback functions. The number of processes in the pool set up a number of independent workers, where each worker can do more than one task. The callback writes the result back to the array.
The __name__=='__main__' protects the following line from being run at each import.

How to let a multi-processing python application quit cleanly

When I run a python script that uses multiprocessing I find it hard to get it to stop cleanly when it receives Ctrl-C. Ctrl-C has to be pressed multiple times and all sorts of error messages appear on the screen.
How can you make a python script that uses multiprocessing and quits
cleanly when it receives a Ctrl-C ?
Take this script for example
import numpy as np, time
from multiprocessing import Pool
def countconvolve(N):
np.random.seed() # ensure seed is random
count = 0
iters = 1000000 # 1million
l=12
k=12
l0=l+k-1
for n in range(N):
t = np.random.choice(np.array([-1,1], dtype=np.int8), size=l0 * iters)
v = np.random.choice(np.array([-1,1], dtype=np.int8), size = l * iters)
for i in xrange(iters):
if (not np.convolve(v[(l*i):(l*(i+1))],
t[(l0*i):(l0*(i+1))], 'valid').any()):
count += 1
return count
if __name__ == '__main__':
start = time.clock()
num_processes = 8
N = 13
pool = Pool(processes=num_processes)
res = pool.map(countconvolve, [N] * num_processes)
print res, sum(res)
print (time.clock() - start)

Jon's solution is probably better, but here it is using a signal handler. I tried it in a VBox VM which was extremely slow, but worked. I hope it will help.
import numpy as np, time
from multiprocessing import Pool
import signal
# define pool as global
pool = None
def term_signal_handler(signum, frame):
global pool
print 'CTRL-C pressed'
try:
pool.close()
pool.join()
except AttributeError:
print 'Pool has been already closed'
def countconvolve(N):
np.random.seed() # ensure seed is random
count = 0
iters = 1000000 # 1million
l=12
k=12
l0=l+k-1
for n in range(N):
t = np.random.choice(np.array([-1,1], dtype=np.int8), size=l0 * iters)
v = np.random.choice(np.array([-1,1], dtype=np.int8), size = l * iters)
for i in xrange(iters):
if (not np.convolve(v[(l*i):(l*(i+1))],t[(l0*i):(l0*(i+1))], 'valid').any()):
count += 1
return count
if __name__ == '__main__':
# Register the signal handler
signal.signal(signal.SIGINT, term_signal_handler)
start = time.clock()
num_processes = 8
N = 13
pool = Pool(processes=num_processes)
res = pool.map(countconvolve, [N] * num_processes)
print res, sum(res)
print (time.clock() - start)

I believe the try-catch mentioned in a similar post here on SO could be adapted to cover it.
If you wrap the pool.map call in the try-catch and then call terminate and join I think that would do it.
[Edit]
Some experimentation suggests something along these lines works well:
from multiprocessing import Pool
import random
import time
def countconvolve(N):
try:
sleepTime = random.randint(0,5)
time.sleep(sleepTime)
count = sleepTime
except KeyboardInterrupt as e:
pass
return count
if __name__ == '__main__':
random.seed(0)
start = time.clock()
num_processes = 8
N = 13
pool = Pool(processes=num_processes)
try:
res = pool.map(countconvolve, [N] * num_processes)
print res, sum(res)
print (time.clock() - start)
except KeyboardInterrupt as e:
print 'Stopping..'
I simplified your example somewhat to avoid having to load numpy on my machine to test but the critical part is the two try-except calls which handle the CTRL+C key presses.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Multi-core processing hangs - python

Related

Problem with append to dict in Python multiprocessing code

Python multiprocessing shared memory; one write, multiple read

Multiprocessing.Process do not run process in parallel

Python multiprocessing global numpy arrays

How to let a multi-processing python application quit cleanly

Categories

Resources