Related
I have a following problem. I am running a parallel task. I am getting this error:
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "eclat_model.py", line 127, in do_work
function(*args, work_queue, valid_list)
File "eclat_model.py", line 115, in eclat_parallel_helper
valid_list.extend(next_vectors)
File "<string>", line 2, in extend
File "/usr/lib/python3.8/multiprocessing/managers.py", line 834, in _callmethod
conn.send((self._id, methodname, args, kwds))
File "/usr/lib/python3.8/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/lib/python3.8/multiprocessing/connection.py", line 404, in _send_bytes
self._send(header)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 368, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Relevant functions in eclat_model.py look like this:
def eclat_parallel_helper(index, bit_vectors, min_support, work_queue, valid_list):
next_vectors = []
for j in range(index + 1, len(bit_vectors)):
item_vector = bit_vectors[index][0] | bit_vectors[j][0]
transaction_vector = bit_vectors[index][1] & bit_vectors[j][1]
support = get_vector_support(transaction_vector)
if support >= min_support:
next_vectors.append((item_vector, transaction_vector, support))
if len(next_vectors) > 0:
valid_list.extend(next_vectors)
for i in range(len(next_vectors)):
work_queue.put((eclat_parallel_helper, (i, next_vectors, min_support)))
def do_work(work_queue, valid_list, not_done):
# work queue entries have the form (function, args)
while not_done.value:
try:
function, args = work_queue.get_nowait()
except QueueEmptyError:
continue
function(*args, work_queue, valid_list)
work_queue.task_done()
work_queue.close()
EDIT:
Multiprocessing part of the code is as follows: bit_vectors is a list of lists, where each entry is of the form
[items, transactions, support], where items is a bit vector encoding which items appear in the itemset, vector is a bit vector encoding which transactions the itemset appears in, and support is the number of transactions in which the itemset occurs.
from multiprocessing import Process, JoinableQueue, Manager, Value, cpu_count
def eclat_parallel(bit_vectors, min_support):
not_done = Value('i', 1)
manager = Manager()
valid_list = manager.list()
work_queue = JoinableQueue()
for i in range(len(bit_vectors)):
work_queue.put((eclat_parallel_helper, (i, bit_vectors, min_support)))
processes = []
for i in range(cpu_count()):
p = Process(target=do_work, args=(work_queue, valid_list, not_done), daemon=True)
p.start()
processes.append(p)
work_queue.join()
not_done.value = 0
work_queue.close()
valid_itemset_vectors = bit_vectors
for element in valid_list:
valid_itemset_vectors.append(element)
for p in processes:
p.join()
return valid_itemset_vectors
What does this error mean, please? Am I appending too many elements into next_vectors list?
I had the same issue, in my case just added a delay (time.sleep(0.01)) to solve it.
The problem is that the individual processes are too fast on queue that causes the error.
I am using pymodbus to test my electronic board. By using the RTU version I don't have any kind of problems, but when I test the TCP part, pymodbus fail and I don't understand why!
The version of python is 3.9, and pymodbus is 2.5.3.
My code is:
from pymodbus.client.sync import ModbusTcpClient as ModbusClient
SERVER_HOST = '192.168.1.123'
SERVER_PORT = 502
if __name__ == '__main__':
c = ModbusClient(SERVER_HOST, port=SERVER_PORT, timeout=4)
c.connect()
if not c.is_socket_open():
if not c.connect():
print("unable to connect!")
ver = c.read_input_registers(
address=0x05,
count=1,
unit=1)
assert(not ver.isError())
The error is:
Traceback (most recent call last):
File "/Users/warcomeb/Projects/AMMICROSYSTEMS/LOCCIONI_BatterySimulator_CFirmware/test/test_modbustcp_basic.py", line 138, in <module>
ver = c.read_input_registers(
File "/usr/local/lib/python3.9/site-packages/pymodbus/client/common.py", line 125, in read_input_registers
return self.execute(request)
File "/usr/local/lib/python3.9/site-packages/pymodbus/client/sync.py", line 109, in execute
return self.transaction.execute(request)
File "/usr/local/lib/python3.9/site-packages/pymodbus/transaction.py", line 174, in execute
response, last_exception = self._transact(
File "/usr/local/lib/python3.9/site-packages/pymodbus/transaction.py", line 271, in _transact
packet = self.client.framer.buildPacket(packet)
File "/usr/local/lib/python3.9/site-packages/pymodbus/framer/socket_framer.py", line 206, in buildPacket
data = message.encode()
File "/usr/local/lib/python3.9/site-packages/pymodbus/register_read_message.py", line 33, in encode
return struct.pack('>HH', self.address, self.count)
struct.error: required argument is not an integer
In the examples https://github.com/riptideio/pymodbus/blob/dev/examples/common/synchronous_client.py
the arguments are different.
arguments = {
"read_address": 1,
"read_count": 8,
"write_address": 1,
"write_registers": [20] * 8,
}
log.debug("Read write registers simultaneously")
rq = client.readwrite_registers(unit=UNIT, **arguments)
rr = client.read_holding_registers(1, 8, unit=UNIT)
I am attempting to perform an SQL insertion using a thread, and I am losing connection and getting an error back from SQL. I am unable to identify what is the problem on my own, and would like to ask StackOverflow what they think could be the source of the error.
I have attached the error message first then the code that is being run.
Error
Exception in thread Thread-160:
Traceback (most recent call last):
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/network.py", line 161, in send_plain
self.sock.sendall(packet)
File "/usr/lib/python3.6/ssl.py", line 975, in sendall
v = self.send(byte_view[count:])
File "/usr/lib/python3.6/ssl.py", line 944, in send
return self._sslobj.write(data)
File "/usr/lib/python3.6/ssl.py", line 642, in write
return self._sslobj.write(data)
ssl.SSLEOFError: EOF occurred in violation of protocol (_ssl.c:2162)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
self.run()
File "/usr/lib/python3.6/threading.py", line 864, in run
self._target(*self._args, **self._kwargs)
File "trt_yolo_tracklite_mysql.py", line 148, in mysql_insert
cursor.execute(sql_op, (copy_today, copy_time, confidence, identity, x1, y1, x2, y2, img_blob))
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/cursor.py", line 568, in execute
self._handle_result(self._connection.cmd_query(stmt))
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/connection.py", line 846, in cmd_query
result = self._handle_result(self._send_cmd(ServerCmd.QUERY, query))
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/connection.py", line 495, in _send_cmd
packet_number, compressed_packet_number)
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/network.py", line 164, in send_plain
errno=2055, values=(self.get_address(), _strioerror(err)))
mysql.connector.errors.OperationalError: 2055: Lost connection to MySQL server at '192.168.1.131:3306', system error: 8 EOF occurred in violation of protocol (_ssl.c:2162)
and it throws similar errors for every iteration of the loop
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
self.run()
File "/usr/lib/python3.6/threading.py", line 864, in run
self._target(*self._args, **self._kwargs)
File "trt_yolo_tracklite_mysql.py", line 148, in mysql_insert
cursor.execute(sql_op, (copy_today, copy_time, confidence, identity, x1, y1, x2, y2, img_blob))
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/cursor.py", line 568, in execute
self._handle_result(self._connection.cmd_query(stmt))
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/connection.py", line 846, in cmd_query
result = self._handle_result(self._send_cmd(ServerCmd.QUERY, query))
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/connection.py", line 495, in _send_cmd
packet_number, compressed_packet_number)
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/network.py", line 164, in send_plain
errno=2055, values=(self.get_address(), _strioerror(err)))
mysql.connector.errors.OperationalError: 2055: Lost connection to MySQL server at '192.168.1.131:3306', system error: 8 EOF occurred in violation of protocol (_ssl.c:2162)
Exception in thread Thread-164:
Traceback (most recent call last):
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/network.py", line 161, in send_plain
self.sock.sendall(packet)
File "/usr/lib/python3.6/ssl.py", line 975, in sendall
v = self.send(byte_view[count:])
File "/usr/lib/python3.6/ssl.py", line 944, in send
return self._sslobj.write(data)
File "/usr/lib/python3.6/ssl.py", line 642, in write
return self._sslobj.write(data)
BrokenPipeError: [Errno 32] Broken pipe
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
self.run()
File "/usr/lib/python3.6/threading.py", line 864, in run
self._target(*self._args, **self._kwargs)
File "trt_yolo_tracklite_mysql.py", line 148, in mysql_insert
cursor.execute(sql_op, (copy_today, copy_time, confidence, identity, x1, y1, x2, y2, img_blob))
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/cursor.py", line 568, in execute
self._handle_result(self._connection.cmd_query(stmt))
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/connection.py", line 846, in cmd_query
result = self._handle_result(self._send_cmd(ServerCmd.QUERY, query))
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/connection.py", line 495, in _send_cmd
packet_number, compressed_packet_number)
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/network.py", line 164, in send_plain
errno=2055, values=(self.get_address(), _strioerror(err)))
mysql.connector.errors.OperationalError: 2055: Lost connection to MySQL server at '192.168.1.131:3306', system error: 32 Broken pipe
Exception in thread Thread-162:
Traceback (most recent call last):
File "/home/user/.local/lib/python3.6/site-packages/mysql/connector/network.py", line 161, in send_plain
self.sock.sendall(packet)
File "/usr/lib/python3.6/ssl.py", line 975, in sendall
v = self.send(byte_view[count:])
File "/usr/lib/python3.6/ssl.py", line 944, in send
return self._sslobj.write(data)
File "/usr/lib/python3.6/ssl.py", line 642, in write
return self._sslobj.write(data)
BrokenPipeError: [Errno 32] Broken pipe
During handling of the above exception, another exception occurred:
Here is the code being run:
This script demonstrates how to do real-time object detection with
TensorRT optimized YOLO engine.
"""
import os
import time
import argparse
import cv2
import pycuda.autoinit # This is needed for initializing CUDA driver
import mysql.connector
from mysql.connector import pooling
from datetime import date
#import local classes and their functions
from utils.yolo_classes import get_cls_dict
from utils.camera import add_camera_args, Camera
from utils.display import open_window, set_display, show_fps
from utils.visualization import BBoxVisualization
from utils.yolo_with_plugins_tracklite_mysql import TrtYOLO
from tracklite.utils.parser import get_config
from threading import Thread, Lock
WINDOW_NAME = 'TrtYOLODemo'
database = mysql.connector.connect(
host='192.168.1.131',
user='*******'
password='*********',
database='bird_detections'
)
cursor = database.cursor()
sql_op = 'INSERT INTO nano_detections (DATE, TIME, CONFIDENCE, IDENTITY, X1, Y1, X2, Y2, IMG) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)'
mutex = Lock()
WINDOW_NAME = 'TrtYOLODemo'
def parse_args():
"""Parse input arguments."""
desc = ('Capture and display live camera video, while doing '
'real-time object detection with TensorRT optimized '
'YOLO model on Jetson')
parser = argparse.ArgumentParser(description=desc)
parser = add_camera_args(parser)
parser.add_argument(
'-c', '--category_num', type=int, default=80,
help='number of object categories [80]')
parser.add_argument(
'-m', '--model', type=str, required=True,
help=('[yolov3-tiny|yolov3|yolov3-spp|yolov4-tiny|yolov4|'
'yolov4-csp|yolov4x-mish]-[{dimension}], where '
'{dimension} could be either a single number (e.g. '
'288, 416, 608) or 2 numbers, WxH (e.g. 416x256)'))
parser.add_argument(
'-l', '--letter_box', action='store_true',
help='inference with letterboxed image [False]')
args = parser.parse_args()
return args
def loop_and_detect(cam, trt_yolo, conf_th, vis):
"""Continuously capture images from camera and do object detection.
# Arguments
cam: the camera instance (video source).
trt_yolo: the TRT YOLO object detector instance.
conf_th: confidence/score threshold for object detection.
vis: for visualization.
"""
today = date.today()
#full_screen is set to false by default
full_scrn = False
#fps is set at 0 by default
fps = 0.0
#create time variable for measuring the frames per second in real time
tic = time.time()
#while loop to perform inference
while True:
mutex.acquire()
today_formatted = today.strftime("%y-%m-%d")
time_formatted = time.strftime("%H:%M:%S")
#determine if window is closed or not ????
#break the loop if window is closed
if cv2.getWindowProperty(WINDOW_NAME, 0) < 0:
break
#create img object from a reading of the camera frame
img = cam.read()
image_blob = cv2.imencode('.jpg', img)[1].tostring()
#break loop if the camera frame is none
if img is None:
break
#create bounding box coordinate, detection confidence, and class id from the detect function of the trt_yolo object.
img, outputs, scores = trt_yolo.detect(img, conf_th)
t = Thread(target=mysql_insert, args=(outputs, scores, today_formatted, time_formatted, image_blob))
mutex.release()
t.start()
mutex.acquire()
#img = vis.draw_bboxes(img, boxes, confs, clss)
img = show_fps(img, fps)
cv2.imshow(WINDOW_NAME, img)
toc = time.time()
curr_fps = 1.0 / (toc - tic)
# calculate an exponentially decaying average of fps number
fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05)
tic = toc
key = cv2.waitKey(1)
if key == 27: # ESC key: quit program
break
elif key == ord('F') or key == ord('f'): # Toggle fullscreen
full_scrn = not full_scrn
set_display(WINDOW_NAME, full_scrn)
mutex.release()
def mysql_insert(outputs, scores, today, time, img_blob):
mutex.acquire()
copy_outputs = outputs
copy_scores = scores
copy_today= today
copy_time = time
mutex.release()
if len(outputs) > 0:
bbox_xyxy = copy_outputs[:, :4]
identities = copy_outputs[:, -1]
for box, identity, score in zip(bbox_xyxy, identities, copy_scores):
if score > 0 :
x1 = str(box[0])
y1 = str(box[1])
x2 = str(box[2])
y2 = str(box[3])
identity = str(identity)
confidence = str(score)
cursor.execute(sql_op, (copy_today, copy_time, confidence, identity, x1, y1, x2, y2, img_blob))
database.commit()
#cursor.close()
#conn.close()
def test_insert():
sql_insert = 'INSERT INTO test_table(TEST_STRING) VALUES (%s)'
string_to_insert = 'TEST'
cursor.execute(sql_insert, (string_to_insert))
def main():
cfg_file = "./tracklite/configs/deep_sort.yaml"
cfg = get_config()
cfg = cfg.merge_from_file(cfg_file)
#parse arguments
args = parse_args()
#raise errors for lack of arguments, such as the category number and the model file
if args.category_num <= 0:
raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
if not os.path.isfile('yolo/%s.trt' % args.model):
raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)
#camera object instantiated with arguments
cam = Camera(args)
#raise error if cameras is not opened
if not cam.isOpened():
raise SystemExit('ERROR: failed to open camera!')
#create list of classes to be detected
cls_dict = get_cls_dict(args.category_num)
#instantiate vis object with class_dict passed as an argument
#BBOXVisualization contains code to draw boxes and assign colors to each class
vis = BBoxVisualization(cls_dict)
#instantiate the TtrYOLO object based on the arguments given in the command to start trt_yolo.py
trt_yolo = TrtYOLO(args.model, cfg, args.category_num, args.letter_box)
#open a window based on camera height and width
open_window(
WINDOW_NAME, 'Camera TensorRT YOLO Demo',
cam.img_width, cam.img_height)
#loop and perform detections
loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis)
cam.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
#Decoding Image From SQL:
#nparr = np.fromstring(STRING_FROM_DATABASE, np.uint8)
#img = cv2.imdecode(nparr, cv2.CV_LOAD_IMAGE_COLOR)
I at first thought the issue was because I needed to use pooled connections, but I am encountering the same error or an error related to exhausted connections when I modify the above code to use pooled connections. Based on the error message I think that there is a problem in the data that I am inserting, but I do not know how to properly read this error and I need some help.
I want to configure the Multiple gpu environment using ‘torch.multiprocessing’ and ‘torch.distributed’. However, I received the following error message.
Traceback (most recent call last):
File "train_custom.py", line 398, in <module>
mp.spawn(init_process, args=(world_size, backend), nprocs=world_size, join=True)
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/spawn.py", line 230, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/spawn.py", line 188, in start_processes
while not context.join():
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/spawn.py", line 150, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 0 terminated with the following error:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/spawn.py", line 59, in _wrap
fn(i, *args)
File "/root/USRGAN_step2/train_custom.py", line 390, in init_process
fn(rank, size2)
TypeError: 'str' object is not callable
My code is as follows.
def run(rank, size2):
...
def init_process(rank, size2, fn, backend='gloo'):
""" Initialize the distributed environment. """
os.environ['MASTER_ADDR'] = '127.0.0.1'
os.environ['MASTER_PORT'] = '29500'
dist.init_process_group(backend, rank=rank, world_size=torch.cuda.device_count())
fn(rank, size2)
###################################
if __name__ == "__main__":
world_size = torch.cuda.device_count()
backend = 'gloo'
mp.spawn(init_process, args=(world_size, backend), nprocs=world_size, join=True)
#mp.set_start_method('spawn', force=True)
processes = []
size2 = 4
for rank in range(size2):
p = Process(target=init_process, args=(rank, size2, run))
p.start()
processes.append(p)
for p in processes:
p.join()
I followed the tutorial(https://pytorch.org/tutorials/intermediate/dist_tuto.html) as it was, but this error occurred. And I don't find the cause of error.
I have an issue in Python 3.7.3 where my multiprocessing operation (using Queue, Pool, and apply_async) deadlocks when handling large computational tasks.
For small computations, this multiprocessing task works just fine. However, when dealing with larger processes, the multiprocessing task stops, or deadlocks, altogether without exiting the process! I read that this will happen if you "grow your queue without bounds, and you are joining up to a subprocess that is waiting for room in the queue [...] your main process is stalled waiting for that one to complete, and it never will." (Process.join() and queue don't work with large numbers)
I am having trouble converting this concept into code. I would greatly appreciate guidance on refactoring the code I have written below:
import multiprocessing as mp
def listener(q, d): # task to queue information into a manager dictionary
while True:
item_to_write = q.get()
if item_to_write == 'kill':
break
foo = d['region']
foo.add(item_to_write)
d['region'] = foo # add items and set to manager dictionary
def main():
manager = mp.Manager()
q = manager.Queue()
d = manager.dict()
d['region'] = set()
pool = mp.Pool(mp.cpu_count() + 2)
watcher = pool.apply_async(listener, (q, d))
jobs = []
for i in range(24):
job = pool.apply_async(execute_search, (q, d)) # task for multiprocessing
jobs.append(job)
for job in jobs:
job.get() # begin multiprocessing task
q.put('kill') # kill multiprocessing task (view listener function)
pool.close()
pool.join()
print('process complete')
if __name__ == '__main__':
main()
Ultimately, I would like to prevent deadlocking altogether to facilitate a multiprocessing task that could operate indefinitely until completion.
BELOW IS THE TRACEBACK WHEN EXITING DEADLOCK IN BASH
^CTraceback (most recent call last):
File "multithread_search_cl_gamma.py", line 260, in <module>
main(GEOTAG)
File "multithread_search_cl_gamma.py", line 248, in main
job.get()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/pool.py", line 651, in get
Process ForkPoolWorker-28:
Process ForkPoolWorker-31:
Process ForkPoolWorker-30:
Process ForkPoolWorker-27:
Process ForkPoolWorker-29:
Process ForkPoolWorker-26:
self.wait(timeout)
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/pool.py", line 648, in wait
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/pool.py", line 110, in worker
task = get()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/pool.py", line 110, in worker
task = get()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/queues.py", line 351, in get
with self._rlock:
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/queues.py", line 351, in get
self._event.wait(timeout)
File "/Users/Ira/anaconda3/lib/python3.7/threading.py", line 552, in wait
Traceback (most recent call last):
Traceback (most recent call last):
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/pool.py", line 110, in worker
task = get()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/queues.py", line 352, in get
res = self._reader.recv_bytes()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
buf = self._recv_bytes(maxlength)
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
buf = self._recv(4)
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining)
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/pool.py", line 110, in worker
task = get()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/queues.py", line 351, in get
with self._rlock:
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
KeyboardInterrupt
signaled = self._cond.wait(timeout)
File "/Users/Ira/anaconda3/lib/python3.7/threading.py", line 296, in wait
waiter.acquire()
KeyboardInterrupt
with self._rlock:
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
Traceback (most recent call last):
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/pool.py", line 110, in worker
task = get()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/queues.py", line 351, in get
with self._rlock:
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/pool.py", line 110, in worker
task = get()
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/queues.py", line 351, in get
with self._rlock:
File "/Users/Ira/anaconda3/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
Below is the updated script:
import multiprocessing as mp
import queue
def listener(q, d, stop_event):
while not stop_event.is_set():
try:
while True:
item_to_write = q.get(False)
if item_to_write == 'kill':
break
foo = d['region']
foo.add(item_to_write)
d['region'] = foo
except queue.Empty:
pass
time.sleep(0.5)
if not q.empty():
continue
def main():
manager = mp.Manager()
stop_event = manager.Event()
q = manager.Queue()
d = manager.dict()
d['region'] = set()
pool = mp.get_context("spawn").Pool(mp.cpu_count() + 2)
watcher = pool.apply_async(listener, (q, d, stop_event))
stop_event.set()
jobs = []
for i in range(24):
job = pool.apply_async(execute_search, (q, d))
jobs.append(job)
for job in jobs:
job.get()
q.put('kill')
pool.close()
pool.join()
print('process complete')
if __name__ == '__main__':
main()
UPDATE::
execute_command executes several processes necessary for search, so I put in code for where q.put() lies.
Alone, the script will take > 72 hrs to finish. Each multiprocess never completes the entire task, rather they work individually and reference a manager.dict() to avoid repeating tasks. These tasks work until every tuple in the manager.dict() has been processed.
def area(self, tup, housing_dict, q):
state, reg, sub_reg = tup[0], tup[1], tup[2]
for cat in housing_dict:
"""
computationally expensive, takes > 72 hours
for a list of 512 tup(s)
"""
result = self.search_geotag(
state, reg, cat, area=sub_reg
)
q.put(tup)
The q.put(tup) is ultimately placed in the listener function to add tup to the manager.dict()
Since listener and execute_search are sharing the same queue object, there could be race,
where execute_search gets 'kill' from queue before listener does, thus listener will stuck in blocking get() forever, since there are no more new items.
For that case you can use Event object to signal all processes to stop:
import multiprocessing as mp
import queue
def listener(q, d, stop_event):
while not stop_event.is_set():
try:
item_to_write = q.get(timeout=0.1)
foo = d['region']
foo.add(item_to_write)
d['region'] = foo
except queue.Empty:
pass
print("Listener process stopped")
def main():
manager = mp.Manager()
stop_event = manager.Event()
q = manager.Queue()
d = manager.dict()
d['region'] = set()
pool = mp.get_context("spawn").Pool(mp.cpu_count() + 2)
watcher = pool.apply_async(listener, (q, d, stop_event))
stop_event.set()
jobs = []
for i in range(24):
job = pool.apply_async(execute_search, (q, d))
jobs.append(job)
try:
for job in jobs:
job.get(300) #get the result or throws a timeout exception after 300 seconds
except multiprocessing.TimeoutError:
pool.terminate()
stop_event.set() # stop listener process
print('process complete')
if __name__ == '__main__':
main()