I tried to run a very simple multiprocessing code, but the code is still serially processed.
I have tried to run it on Mac(macOS 10.13) and Linux(Ubuntu 18.04) with python 2 and 3, but in both environments I had the same problem.
the function _process has to receive numpy array as arguments, so I decided to use Multiprocess.Process instead of Multiprocess.Pool.map() and Multiprocess.Pool.apply_async() because pickle is broken when use pool.map() in a class. https://stackoverflow.com/a/21345308/4755986
import time
from multiprocessing import Process, Queue
import numpy as np
class model:
def __init__(self):
self.results = []
self.jobs = []
self.start = time.time()
def _process(self, x,y,z):
j= 0
for i in range(10**8):
j = i+j
return j
def work(self,X,Y,Z, result_queue):
start = time.time() -self.start
result = self._process(X,Y,Z)
result_queue.put(result)
print(result)
end = time.time() -self.start
print( 'start time: ', start)
print('end time:', end)
# return result_queue
def fit(self,num):
for i in range(num):
X, Y, Z = np.ones([5,5]), np.ones([3,3]), np.ones([2,2])
result_queue = Queue()
p = Process(target=self.work, args = (X,Y,Z, result_queue))
self.jobs.append(p)
p.start()
print( 'ChildProcess...',i)
result = result_queue.get()
self.results.append(result)
for p in self.jobs:
p.join()
p.close()
return self.results
R = model()
k = R.fit(10)
print(k)
The time of start and end of each process is printed, and the second process only starts after the first process is finished. This is strange because each process should be automatically assign to different core and run in parallel.
result = result_queue.get()
result_queue.get() will block if it is empty. An item will only be added when a process finishes, hence the next process will be spawned only if the previous has finished.
Below is a version that does spawn 10 processes at once. I've marked the section I've added:
import time
from multiprocessing import Process, Queue
import numpy as np
class model:
def __init__(self):
self.results = []
self.jobs = []
self.start = time.time()
def _process(self, x,y,z):
j= 0
for i in range(10**8):
j = i+j
return j
def work(self,X,Y,Z, result_queue):
start = time.time() -self.start
result = self._process(X,Y,Z)
result_queue.put(result)
print(result)
end = time.time() -self.start
print( 'start time: ', start)
print('end time:', end)
# return result_queue
def fit(self,num):
for i in range(num):
X, Y, Z = np.ones([5,5]), np.ones([3,3]), np.ones([2,2])
result_queue = Queue()
p = Process(target=self.work, args = (X,Y,Z, result_queue))
self.jobs.append(p)
p.start()
print( 'ChildProcess...',i)
#result = result_queue.get() # <--- This blocks
#self.results.append(result)
for p in self.jobs:
p.join()
p.close()
for result in result_queue: # <-----
self.results.append(result) # <-----
return self.results
R = model()
k = R.fit(10)
print(k)
Related
SYSTEM
Linux (Manjaro KDE)
Python 3.8.3
PROGRAM:
I have incoming string data on a UDP port. The main loop spools up the processes prior to using selectors to monitor the UDP port. I want the UDP data, which is constantly updated, available for each process.
TRIED:
Multiprocessing Queues with maxsize = 1 and that became a headache and quickly broke down.
Multiprocessing Arrays (this is where I'm at now)
I have checked, and the Array at each location I'm looking at has the same memory address (I think). For whatever reason, when I try to access the contents of the Array in the child process, the process hangs.
NOT TRIED
Pipes. I have a feeling this may be the way to go. But I'm already deep in uncharted territory; I've never used them before.
WHAT I WANT
I would like to access the UDP data from the child processes - these are the camera_view method.
Dummy UDP string
import socket
import random
import datetime
import time
conn = ('127.0.0.1', 6666)
def rand_value(f_val, t_val):
result = round(random.uniform(f_val, t_val), 2)
result = random.uniform(f_val, t_val)
return result
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
while True:
time.sleep(6)
timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
overlay = timestamp
for i in range(9):
val = rand_value(i*10, i*10+10)
if i == 8: val = 'TASK: Im the real Batman'
overlay = overlay + "," + str(val)
print(overlay)
sock.sendto(overlay.encode(), conn)
My Program
import datetime
import selectors
import socket
import time
from multiprocessing import Lock, Process, Queue
from multiprocessing.sharedctypes import Array
from ctypes import c_char_p
REQUIRED_CAMERAS = 1
CAMERA_CONN = {'name':['Colour Camera'], 'ip':['127.0.0.1'], 'port':[9000]}
OVERLAY_CONN = ('0.0.0.0', 6666)
CONTROL_CONN = ('0.0.0.0', 6667)
NUMBER_OF_ITEMS_IN_OVERLAY = 10
class Camera():
def __init__(self, cam_name, cam_ip, cam_port):
self.ip = cam_ip
self.port = cam_port
self.capture = cv2.VideoCapture(0)
self.frame_width = int(self.capture.get(3))
self.frame_height = int(self.capture.get(4))
self.name = cam_name
def get_overlay(data_packet):
data = data_packet.decode()
data = data.split(',')
field0 = data[0]
field1 = 'KP: ' + str(round(float(data[1]), 3))
field2 = 'DCC: ' + str(round(float(data[2]), 2)) + 'm'
field3 = 'E: ' + str(round(float(data[3]), 2)) + 'm'
field4 = 'N: ' + str(round(float(data[4]), 2)) + 'm'
field5 = 'D: ' + str(round(float(data[5]), 2)) + 'm'
field6 = 'H: ' + str(round(float(data[6]), 2)) # + '°'
field7 = 'R: ' + str(round(float(data[7]), 2)) # + '°'
field8 = 'P: ' + str(round(float(data[8]), 2)) # + '°'
field9 = data[9]
x = []
for i in range(NUMBER_OF_ITEMS_IN_OVERLAY):
x.append(eval('field' + str(i)).encode())
# if i == 0:
# print(x[i])
return x
def socket_reader(sock, mask, q, REQUIRED_CAMERAS, overlay):
data_packet, sensor_ip = sock.recvfrom(1024)
sensor_port = sock.getsockname()[1]
print(f'SENSOR PORT {sensor_port} and SENSOR_IP {sensor_ip}')
if sensor_port == OVERLAY_CONN[1]:
x = get_overlay(data_packet)
for i in range(len(x)):
overlay[i] = x[i]
print(f'Socket Reader {overlay}')
def camera_view(CAMERA_CONN, cam_name, camera, overlay_q, control_q, overlay):
while True:
print(f'PROCESS {camera} RUNNING FOR: {cam_name}')
try:
print(f'Camera View {overlay}')
for i in range(len(overlay)):
print(overlay[i])
except:
pass
time.sleep(1)
def controller(REQUIRED_CAMERAS, CAMERA_CONN, OVERLAY_CONN, CONTROL_CONN):
if REQUIRED_CAMERAS > len(CAMERA_CONN['name']):
print(f'REQURIED_CAMERAS: {REQUIRED_CAMERAS} - more than connections in CAMERA_CONN ')
else:
# Set up a UDP connection for the overlay string and the control commands
sock_overlay = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock_control = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock_overlay.bind(OVERLAY_CONN)
sock_control.bind(CONTROL_CONN)
# Set up the selector to watch over the socket
# and trigger when data is ready for reading
sel = selectors.DefaultSelector()
sel.register(fileobj=sock_overlay, events=selectors.EVENT_READ, data=socket_reader)
sel.register(fileobj=sock_control, events=selectors.EVENT_READ, data=socket_reader)
# create shared memory
overlay_q = Queue(maxsize=1)
control_q = Queue(maxsize=1)
overlay = Array(c_char_p, range(NUMBER_OF_ITEMS_IN_OVERLAY))
print(f'Init Overlay {overlay}')
# Generate the processes; one per camera
processes = []
for camera in range(REQUIRED_CAMERAS):
processes.append(Process(target=camera_view, args=(CAMERA_CONN, CAMERA_CONN['name'][camera], camera, overlay_q, control_q, overlay)))
for process in processes:
process.daemon = True
process.start()
# Spin over the selector
while True:
# Only have one connnection registered, so to stop
# the loop spinning up the CPU, I have made it blocking
# with the timeout = 1 (sec) instead of =0.
events = sel.select(timeout=None)
for key, mask in events:
# the selector callback is the data= from the register above
callback = key.data
# the callback gets the sock, mask and the sensor queues
if key.fileobj == sock_overlay:
callback(key.fileobj, mask, overlay_q, REQUIRED_CAMERAS, overlay)
else:
callback(key.fileobj, mask, control_q, REQUIRED_CAMERAS, overlay)
if __name__ == "__main__":
controller(REQUIRED_CAMERAS, CAMERA_CONN, OVERLAY_CONN, CONTROL_CONN)
EDIT1:
from multiprocessing import Process, Array
from ctypes import c_char_p
import time
def worker(arr):
count = 0
while True:
count += 1
val = 'val' + str(count)
arr[0] = val
print(arr[:])
time.sleep(2)
def main():
arr = Array(c_char_p, 1)
p = Process(target=worker, args=(arr,))
p.daemon = True
p.start()
while True:
print(arr[:])
try:
print(arr[:].decode('utf-8'))
except :
pass
# try:
# val = arr[:]
# val = val.decode('utf-8')
# print(f'main {val}')
# except:
# pass
time.sleep(1)
if __name__ == "__main__":
main()
'''
from multiprocessing import Process, Array
from ctypes import c_char_p
import time
def worker(arr):
count = 0
while True:
count += 1
val = 'val' + str(count)
arr[0] = bytes(val, 'utf-8')
print(arr[:])
time.sleep(2)
def main():
arr = Array(c_char_p, 1)
p = Process(target=worker, args=(arr,))
p.daemon = True
p.start()
while True:
print(arr[:])
try:
print(arr[:].decode('utf-8'))
except :
pass
time.sleep(1)
if __name__ == "__main__":
main()
if __name__ == "__main__":
main()
'''
EDIT2:
Thanks to #RolandSmith, I have persevered with Queues and I think I have got a template on how I can move forward. See below code. If I can't get this to work in program, I'll be back here.
from multiprocessing import Process, Queue
import time
import datetime
def worker(camera, q):
val = ''
while True:
if q.full() == True:
val = q.get()
else:
val = val
print(f'WORKER{camera} {val}')
time.sleep(0.2)
def main():
cameras = 2
processes = []
queues = []
for camera in range(cameras):
queues.append(Queue(maxsize=1))
processes.append(Process(target=worker, args=(camera, queues[camera])))
for process in processes:
process.daemon = True
process.start()
while True:
for q in queues:
if not q.empty():
try:
_ = q.get()
except:
pass
else:
q.put(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
time.sleep(.5)
if __name__ == "__main__":
main()
In my view, using Queue is a less error-prone solution than using an Array.
Here is your second example, converted to using a Queue:
from multiprocessing import Process, Queue
import time
def worker(q):
count = 0
while True:
count += 1
val = 'val' + str(count)
q.put(val)
print('worker:', val)
time.sleep(2)
def main():
q = Queue()
p = Process(target=worker, args=(q, ))
p.daemon = True
p.start()
while True:
if not q.empty():
print('main:', q.get())
time.sleep(1)
if __name__ == "__main__":
main()
This yields:
> python3 test3.py
worker: val1
main: val1
worker: val2
main: val2
worker: val3
main: val3
worker: val4
main: val4
worker: val5
Here is the same example using a Pipe:
from multiprocessing import Process, Pipe
import time
def worker(p):
count = 0
while True:
count += 1
val = 'val' + str(count)
p.send(val)
print('worker:', val)
time.sleep(2)
def main():
child, parent = Pipe()
p = Process(target=worker, args=(child, ))
p.daemon = True
p.start()
while True:
if parent.poll():
print('main:', parent.recv())
time.sleep(1)
if __name__ == "__main__":
main()
This produces the same result as the previous example.
Additionally, by default a pipe is bidirectional.
So you could also send back data from the workers to the parent.
New to Queues and Threads but as I have gotten my program to work and run 2 threads of motors separate from each other, ive noticed that there is still a point in time that one will wait for the other to finish.
I have 2 motors on linear rails. In order to validate their consistency, repeatability, and accuracy it runs a home test. Just giving the motor a fixed number of tests and a position to go to, where it runs back and forth, logs the data and tells me how close we got both in steps and distance. The odd behavior is that one motor is waiting for the other to complete its cycle before starting its next cycle.
My guess is this is a problem with serial reads where my motor is held up waiting for a serial read on one motor and cannot execute the other. Can anyone confirm this or have a work around?
Code for the test, wrapped in a class
def home(self):
DIConf = None
debugStart = time.time()
while DIConf != -1:
self.anyCMD('DI-1')
while DIConf == None:
DIConf = self.checkReturn('DI')
if time.time() - debugStart >= 5:
print 'DIConf = ' + str(DIConf)
return 'Homing Timeout'
self.s.write('SH1H\r')
def homeTest(self, numTests = 30, testPos = 100000):
if numTests == None:
numTests = 30
if testPos == None:
testPos = 100000
self.tests = numTests
self.testingPos = testPos
self.B = np.zeros((tests, 4))
for i in range(0, self.tests):
# print i
self.setPos(0)
self.start = time.time()
self.gotoPos(self.testingPos)
self.s.write('WM\r')
time.sleep(2)
while self.getPos() < self.testingPos:
pass
self.testPosConfirm = self.getPos()
self.home()#(setHomePos = 1)
self.s.write('WM\r')
time.sleep(2)
self.end = time.time()
self.backHome = self.getPos()
self.B[i][0] = self.testPosConfirm
self.B[i][1] = self.backHome
self.B[i][2] = (float(self.backHome)/20000) * 3.175
self.B[i][3] = self.end-self.start
return self.B
and code for the queue/threads
import Queue
import threading
a = Queue.Queue()
b = Queue.Queue()
y = motor(DA = 1)
z = motor(DA = 2)
def yQueue(tempY, tempA, cycles = None, defPos = None):
tempVar = tempY.homeTest(cycles, defPos)
tempA.put(tempVar)
if 1 == 1:
thread = threading.Thread(target = yQueue, args = [y, a, 5, 50000])
thread2 = threading.Thread(target = yQueue, args = [z, b, 5, 600000])
thread.start()
thread2.start()
As suggested below. Attempted the same thing with Multiprocessing and got similar results.
Code:
import multiprocessing as mp
p = mp.Process(target = yQueue, args = [y, a, 3, 40000])
p.start()
p.join
p2 = Process(target = yQueue, args = [z, b, 3, 500000])
p2.start()
p2.join
I have following script:
max_number = 100000
minimums = np.full((max_number), np.inf, dtype=np.float32)
data = np.zeros((max_number, 128, 128, 128), dtype=np.uint8)
if __name__ == '__main__':
main()
def worker(array, start, end):
for in_idx in range(start, end):
value = data[start:end][in_idx] # compute something using this array
minimums[in_idx] = value
def main():
jobs = []
num_jobs = 5
for i in range(num_jobs):
start = int(i * (1000 / num_jobs))
end = int(start + (1000 / num_jobs))
p = multiprocessing.Process(name=('worker_' + str(i)), target=worker, args=(start, end))
jobs.append(p)
p.start()
for proc in jobs:
proc.join()
print(jobs)
How can I ensure that the numpy array is global and can be accessed by each worker? Each worker uses a different part of the numpy array
import numpy as np
import multiprocessing as mp
ar = np.zeros((5,5))
def callback_function(result):
x,y,data = result
ar[x,y] = data
def worker(num):
data = ar[num,num]+3
return num, num, data
def apply_async_with_callback():
pool = mp.Pool(processes=5)
for i in range(5):
pool.apply_async(worker, args = (i, ), callback = callback_function)
pool.close()
pool.join()
print "Multiprocessing done!"
if __name__ == '__main__':
ar = np.ones((5,5)) #This will be used, as local scope comes before global scope
apply_async_with_callback()
Explanation: You set up your data array and your workers and callback functions. The number of processes in the pool set up a number of independent workers, where each worker can do more than one task. The callback writes the result back to the array.
The __name__=='__main__' protects the following line from being run at each import.
multi() freezes somewhere in the middle of its activity:
def current_proc(): print mp.current_process().name, 'started'
def multi(fn, func):
print 'Process started on',time.strftime('%H:%M:%S')
count = mp.cpu_count()*2
input = nohead(xlsx2array(fn))
parts = chunks(input, 10)
pool = mp.Pool(processes = count, initializer = current_proc, maxtasksperchild = 1)
for part in parts:
with stopwatch() as r: pool.map(func, part)
return r
pool.close()
pool.join()
I am using multiprocessing with the function to get effective urls:
def query(i):
attempts = 2
while attempts:
try:
q = requests.get(i, allow_redirects = True, verify = False, timeout = 2)
match = q.url
match = str(match)
break
except:
attempts -= 1
match = 'pattern not found'
pass
return [ i, match ]
Please advise how can I avoid such freezing. Thanks,
I have the following code which converts graph from edges list to adjacency matrix:
for line in open('graph.txt'):
converted = [sparse_to_dense.get(int(ID)) for ID in line.split()]
i = converted[0]
j = converted[1]
I.append(i)
J.append(j)
n = max([max(I), max(J)]) + 1
data = [1]*len(I)
return coo_matrix((data, (I,J)), shape=(n,n), dtype='i1')
This code is awfully slow -- on may machine conversion of 500k edges takes hours. On the other hand i/o is obviously is not bottleneck (I can read full file in memory almost instantaneously) so I think there is a room for parallelism. But I'm not sure how to proceed: should I read file in parallel or something?
Use multiprocessing one way to do it is this. I did not check and could be further improved
import multiprocessing
class Worker(multiprocessing.Process):
def __init__(self, queue, results):
multiprocessing.Process.__init__(self):
self.q = queue
self.results = results
def run(self):
while True:
try:
lineno, linecontents = self.q.get(block=False)
except Queue.Empty:
break
converted = [sparse_to_dense.get(int(ID)) for ID in line.split()]
i = converted[0]
j = converted[1]
self.results.put((i, j))
def main():
q = multiprocessing.Queue()
results = multiprocessing.JoinableQueue()
for i, l in open(fname):
q.put((i, l))
for _ in xrange(4):
w = Worker(q, results)
w.start()
I, J = []
while True:
try:
i, j = results.get(block=False)
except Queue.Empty:
break
I.append(i)
J.append(j)
results.task_done()
results.join()
n = max([max(I), max(J)]) + 1
data = [1]*len(I)
coo = coo_matrix((data, (I,J)), shape=(n,n), dtype='i1')