I am trying to use multiprocessing library to speed up CSV reading from files. I've done so using Pool and now I'm trying to do it with Process(). However when concatenating the list to create a dataframe, it's giving me the following error:
ValueError: No objects to concatenate
To me it looks like the processes are overwriting the uber_data list. What am I missing here?
import glob
import pandas as pd
from multiprocessing import Process
import matplotlib.pyplot as plt
import os
location = "/home/data/csv/"
uber_data = []
def read_csv(filename):
return uber_data.append(pd.read_csv(filename))
def data_wrangling(uber_data):
uber_data['Date/Time'] = pd.to_datetime(uber_data['Date/Time'], format="%m/%d/%Y %H:%M:%S")
uber_data['Dia Setmana'] = uber_data['Date/Time'].dt.weekday_name
uber_data['Num dia'] = uber_data['Date/Time'].dt.dayofweek
return uber_data
def plotting(uber_data):
weekdays = uber_data.pivot_table(index=['Num dia','Dia Setmana'], values='Base', aggfunc='count')
weekdays.plot(kind='bar', figsize=(8,6))
plt.ylabel('Total Journeys')
plt.title('Journey on Week Day')
def main():
processes = []
files = list(glob.glob(os.path.join(location,'*.csv*')))
for file in files:
print(file)
p = Process(target=read_csv, args=[file])
processes.append(p)
p.start()
for i, process in enumerate(processes):
process.join()
print(uber_data)
combined_df = pd.concat(uber_data, ignore_index=True)
dades_mod = data_wrangling(combined_df)
Plotting(dades_mod)
main()
Traceback is:
Process Process-223:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "<timed exec>", line 17, in read_csv
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
sort=sort,
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
objs = list(objs)
TypeError: 'NoneType' object is not iterable
Process Process-224:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "<timed exec>", line 17, in read_csv
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
sort=sort,
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
objs = list(objs)
TypeError: 'NoneType' object is not iterable
Process Process-221:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "<timed exec>", line 17, in read_csv
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
sort=sort,
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
objs = list(objs)
TypeError: 'NoneType' object is not iterable
Process Process-222:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
Process Process-225:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "<timed exec>", line 17, in read_csv
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
sort=sort,
File "<timed exec>", line 17, in read_csv
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
objs = list(objs)
TypeError: 'NoneType' object is not iterable
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
sort=sort,
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
objs = list(objs)
TypeError: 'NoneType' object is not iterable
Process Process-220:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "<timed exec>", line 17, in read_csv
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
sort=sort,
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
objs = list(objs)
TypeError: 'NoneType' object is not iterable
[]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<timed eval> in <module>
<timed exec> in main()
/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, sort, copy)
253 verify_integrity=verify_integrity,
254 copy=copy,
--> 255 sort=sort,
256 )
257
/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy, sort)
302
303 if len(objs) == 0:
--> 304 raise ValueError("No objects to concatenate")
305
306 if keys is None:
ValueError: No objects to concatenate
Thank you
The uber_data's in each process are not the same object as the uber_data in the main process. You can't really share data between processes.
from multiprocessing import Process
def read_csv(filename=r'c:\pyProjects\data.csv'):
print(id(uber_data))
uber_data.append(pd.read_csv(filename))
def main():
processes = []
for file in range(4):
p = Process(target=read_csv)
processes.append(p)
p.start()
for i, process in enumerate(processes):
process.join()
return processes
if __name__ == '__main__':
uber_data = []
print(id(uber_data))
ps = main()
Prints
PS C:\pyProjects> py -m tmp
2632505050432
1932359777344
2230288136512
2039196563648
2479121315968
You could use a Queue to send the data back to the main process.
from multiprocessing import Process, Queue
def read_csv(filename=r'c:\pyProjects\data.csv', q=None):
q.put(pd.read_csv(filename))
def main(q):
processes = []
for file in range(4):
p = Process(target=read_csv, kwargs={'q':q})
processes.append(p)
p.start()
for i, process in enumerate(processes):
process.join()
while not q.empty():
print('.')
uber_data.append(q.get(block=True))
return processes
if __name__ == '__main__':
uber_data = []
q = Queue()
ps = main(q)
for thing in uber_data:
print(thing.head().to_string())
print('**')
Or you could use threads.
from threading import Thread
def g(filename):
uber_data.append(pd.read_csv(filename))
if __name__ == '__main__':
uber_data = []
threads = []
for _ in range(4):
threads.append(Thread(target=g, args=(r'c:\pyProjects\data.csv',)))
for t in threads:
t.start()
while any(t.is_alive() for t in threads):
pass
for thing in uber_data:
print(thing.head().to_string())
print('**')
Related
I have a following problem. I am running a parallel task. I am getting this error:
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "eclat_model.py", line 127, in do_work
function(*args, work_queue, valid_list)
File "eclat_model.py", line 115, in eclat_parallel_helper
valid_list.extend(next_vectors)
File "<string>", line 2, in extend
File "/usr/lib/python3.8/multiprocessing/managers.py", line 834, in _callmethod
conn.send((self._id, methodname, args, kwds))
File "/usr/lib/python3.8/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/lib/python3.8/multiprocessing/connection.py", line 404, in _send_bytes
self._send(header)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 368, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Relevant functions in eclat_model.py look like this:
def eclat_parallel_helper(index, bit_vectors, min_support, work_queue, valid_list):
next_vectors = []
for j in range(index + 1, len(bit_vectors)):
item_vector = bit_vectors[index][0] | bit_vectors[j][0]
transaction_vector = bit_vectors[index][1] & bit_vectors[j][1]
support = get_vector_support(transaction_vector)
if support >= min_support:
next_vectors.append((item_vector, transaction_vector, support))
if len(next_vectors) > 0:
valid_list.extend(next_vectors)
for i in range(len(next_vectors)):
work_queue.put((eclat_parallel_helper, (i, next_vectors, min_support)))
def do_work(work_queue, valid_list, not_done):
# work queue entries have the form (function, args)
while not_done.value:
try:
function, args = work_queue.get_nowait()
except QueueEmptyError:
continue
function(*args, work_queue, valid_list)
work_queue.task_done()
work_queue.close()
EDIT:
Multiprocessing part of the code is as follows: bit_vectors is a list of lists, where each entry is of the form
[items, transactions, support], where items is a bit vector encoding which items appear in the itemset, vector is a bit vector encoding which transactions the itemset appears in, and support is the number of transactions in which the itemset occurs.
from multiprocessing import Process, JoinableQueue, Manager, Value, cpu_count
def eclat_parallel(bit_vectors, min_support):
not_done = Value('i', 1)
manager = Manager()
valid_list = manager.list()
work_queue = JoinableQueue()
for i in range(len(bit_vectors)):
work_queue.put((eclat_parallel_helper, (i, bit_vectors, min_support)))
processes = []
for i in range(cpu_count()):
p = Process(target=do_work, args=(work_queue, valid_list, not_done), daemon=True)
p.start()
processes.append(p)
work_queue.join()
not_done.value = 0
work_queue.close()
valid_itemset_vectors = bit_vectors
for element in valid_list:
valid_itemset_vectors.append(element)
for p in processes:
p.join()
return valid_itemset_vectors
What does this error mean, please? Am I appending too many elements into next_vectors list?
I had the same issue, in my case just added a delay (time.sleep(0.01)) to solve it.
The problem is that the individual processes are too fast on queue that causes the error.
I create a PyTable object W_hat where processes should share and save the results their instead of returning them.
from multiprocessing import Lock
from multiprocessing import Pool
import tables as tb
def parallel_l21(labels, X, lam, g, W_hat):
g_indxs = np.where(labels == g)[0]
tmp = rfs(X[g_indxs, 1:].T, X[:, :-1].T, gamma=lam, verbose=False).T
tmp[abs(tmp) <= 1e-6] = 0
with lock:
W_hat[:, g_indxs] = np.array(tmp)
def init_child(lock_):
global lock
lock = lock_
#Previous code is omitted.
n_ = X_test.shape[0]
tb.file._open_files.close_all()
f = tb.open_file(path_name + 'dot' + sub_num + str(lam) + '.h5', 'w')
filters = tb.Filters(complevel=5, complib='blosc')
W_hat = f.create_carray(f.root, 'data', tb.Float32Atom(), shape=(n_, n_), filters=filters)
W_hats = []
for i in np.unique(labels):
W_hats.append(W_hat)
lock = Lock()
with Pool(processes=cpu_count, initializer=init_child, initargs=(lock,)) as pool:
print(pool)
pool.starmap(parallel_l21, zip(repeat(labels), repeat(X), repeat(lam), np.unique(labels), W_hats))
Now, when running into starmap, this error shows up:
Traceback (most recent call last):
File "/Applications/PyCharm CE 2.app/Contents/plugins/python-ce/helpers/pydev/_pydevd_bundle/pydevd_exec2.py", line 3, in Exec
exec(exp, global_vars, local_vars)
File "<input>", line 1, in <module>
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/pool.py", line 372, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/pool.py", line 771, in get
raise self._value
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/pool.py", line 537, in _handle_tasks
put(task)
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
File "stringsource", line 2, in tables.hdf5extension.Array.__reduce_cython__
TypeError: self.dims,self.dims_chunk,self.maxdims cannot be converted to a Python object for pickling
Note: I thought that the code works fine on Python 3.6.8 but it turns out that it is not the case.
I would like to use multiprocessing on a large pandas data frame. I want to set a column entry of that data frame based on another columns value. It is a simple labeling done with some if-statements.
This is the minimal example I have tried:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import multiprocessing
def worker(data):
'''worker function'''
try:
assert type(data) == pd.core.frame.DataFrame
for i in data.index:
if 0 < data['Value'].iloc[i] <=2:
data['Label'].iloc[i] = 'low'
elif 2 < data['Value'].iloc[i] <=4:
data['Label'].iloc[i] = 'medium'
elif 4 < data['Value'].iloc[i] <=6:
data['Label'].iloc[i] = 'high'
else:
data['Label'].iloc[i] = 'very high'
except AssertionError:
print('Data has to be pandas df!')
if __name__ == '__main__':
# dummy data set
df = pd.DataFrame(np.random.randint(0,10,1001),columns=['Value'])
df['Labels'] = 0
num_cores = multiprocessing.cpu_count()
splits = np.linspace(0,len(df),num_cores+1,dtype=int)
jobs = []
for i in range(num_cores):
lower_bound = splits[i]
upper_bound = splits[i+1]
p = multiprocessing.Process(target=worker, args=(df.iloc[lower_bound:upper_bound],))
jobs.append(p)
p.start()
for proc in jobs:
proc.join()
print(jobs)
However, when I run it I get the following error statement:
Process Process-1:
Traceback (most recent call last):
File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
self.run()
File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
TypeError: worker() takes 1 positional argument but 2 were given
Process Process-2:
Traceback (most recent call last):
File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
self.run()
File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
TypeError: worker() takes 1 positional argument but 2 were given
Process Process-3:
Traceback (most recent call last):
File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
self.run()
File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
TypeError: worker() takes 1 positional argument but 2 were given
Process Process-4:
Traceback (most recent call last):
File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
self.run()
File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
TypeError: worker() takes 1 positional argument but 2 were given
Process Process-5:
Traceback (most recent call last):
File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
self.run()
File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
TypeError: worker() takes 1 positional argument but 2 were given
Process Process-6:
Traceback (most recent call last):
File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
self.run()
File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
TypeError: worker() takes 1 positional argument but 2 were given
Process Process-7:
Traceback (most recent call last):
File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
self.run()
File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
TypeError: worker() takes 1 positional argument but 2 were given
Process Process-8:
Traceback (most recent call last):
File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
self.run()
File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
TypeError: worker() takes 1 positional argument but 2 were given
[<Process(Process-1, stopped[1])>, <Process(Process-2, stopped[1])>, <Process(Process-3, stopped[1])>, <Process(Process-4, stopped[1])>, <Process(Process-5, stopped[1])>, <Process(Process-6, stopped[1])>, <Process(Process-7, stopped[1])>, <Process(Process-8, stopped[1])>]
I'm not sure what's going wrong with the multiprocessing...?
I have a list of CSV files. I want to do a set of operations on each of them and then produce a counter dict and i want to cerate a master list containing individual counter dict from all CSV files. I want to parallelize processing each of the csv file and then return the counter dict from each file. I found a similar solution here : How can I recover the return value of a function passed to multiprocessing.Process?
I used the solution suggested by David Cullen. This solution works perfectly for strings, but when I tried to return a counter dict or a normal dict. All the CSV files are processed until the send_end.send(result) and it hangs on there forever when executed and then throws a memory error. I am running this in a Linux server with more than sufficient memory for creating the list of counter dicts.
I used the following code:
import multiprocessing
#get current working directory
cwd = os.getcwd()
#take a list of all files in cwd
files = os.listdir(cwd)
#defining the function that needs to be done on all csv files
def worker(f,send_end):
infile= open(f)
#read liens in csv file
lines = infile.readlines()
#split the lines by "," and store it in a list of lists
master_lst = [line.strip().split(“,”) for line in lines]
#extract the second field in each sublist
counter_lst = [ element[1] for element in master_lst]
print “Total elements in the list” + str(len(counter_lst))
#create a dictionary of count elements
a = Counter(counter_lst)
# return the counter dict
send_end.send(a)
def main():
jobs = []
pipe_list = []
for f in files:
if f.endswith('.csv'):
recv_end, send_end = multiprocessing.Pipe(duplex=False)
p = multiprocessing.Process(target=worker, args=(f, send_end))
jobs.append(p)
pipe_list.append(recv_end)
p.start()
for proc in jobs:
proc.join()
result_list = [x.recv() for x in pipe_list]
print len(result_list)
if __name__ == '__main__':
main()
The error that i get is the following:
Process Process-42:
Traceback (most recent call last):
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in
_bootstrap
self.run()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/home/amm/python/collapse_multiprocessing_return.py", line 32, in
worker
a = Counter(counter_lst)
File "/usr/lib64/python2.7/collections.py", line 444, in __init__
self.update(iterable, **kwds)
File "/usr/lib64/python2.7/collections.py", line 526, in update
self[elem] = self_get(elem, 0) + 1
MemoryError
Process Process-17:
Traceback (most recent call last):
Process Process-6:
Traceback (most recent call last):
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in
_bootstrap
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in
_bootstrap
Process Process-8:
Traceback (most recent call last):
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in
_bootstrap
self.run()
self.run()
self.run()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
File "/home/amm/python/collapse_multiprocessing_return.py", line 32, in
worker
self._target(*self._args, **self._kwargs)
self._target(*self._args, **self._kwargs)
File "/home/amm/python/collapse_multiprocessing_return.py", line 32, in
worker
File "/home/amm/python/collapse_multiprocessing_return.py", line 32, in
worker
a = Counter(counter_lst_lst)
a = Counter(counter_lst_lst)
a = Counter(counter_lst_lst)
File "/usr/lib64/python2.7/collections.py", line 444, in __init__
File "/usr/lib64/python2.7/collections.py", line 444, in __init__
File "/usr/lib64/python2.7/collections.py", line 444, in __init__
self.update(iterable, **kwds)
File "/usr/lib64/python2.7/collections.py", line 526, in update
self[elem] = self_get(elem, 0) + 1
MemoryError
self.update(iterable, **kwds)
self.update(iterable, **kwds)
File "/usr/lib64/python2.7/collections.py", line 526, in update
File "/usr/lib64/python2.7/collections.py", line 526, in update
self[elem] = self_get(elem, 0) + 1
self[elem] = self_get(elem, 0) + 1
MemoryError
MemoryError
Process Process-10:
Traceback (most recent call last):
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in
_bootstrap
self.run()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/home/amm/python/collapse_multiprocessing_return.py", line 32, in
worker
a = Counter(counter_lst)
File "/usr/lib64/python2.7/collections.py", line 444, in __init__
self.update(iterable, **kwds)
File "/usr/lib64/python2.7/collections.py", line 526, in update
self[elem] = self_get(elem, 0) + 1
MemoryError
^Z
[18]+ Stopped collapse_multiprocessing_return.py
Now instead of "a" in send_end.send(a) if i replace f, the filename. It prints the number of csv files in the directory (which is what len(result_list) does in this case). But when the counter dict "a" is returned it gets stuck forever, throwing the above error.
I would like to have the code pass the counter dict to receive end without any error/problems. Is there a work around? Could someone please suggest a possible solution?
p.s: I am new to multiprocessing module, sorry if this question sounds naive. Also, i tried the multiprocessing.Manager(), but got a similar error
Your traceback mentions Process Process-42:, so there are at least 42 processes being created. You're creating a process for every CSV file, which is not useful and is probably causing the memory error.
Your problem can be solved much more simply using multiprocessing.Pool.map. The worker function can also be shortened greatly:
def worker(f):
with open(f) as infile:
return Counter(line.strip().split(",")[1]
for line in infile)
def main():
pool = multiprocessing.Pool()
result_list = pool.map(worker, [f for f in files if f.endswith('.csv')])
Passing no arguments to the pool means it'll create as many processes as you have CPU cores. Using more may or may not increase performance.
I have a python object - list of dictionaries which I want to fill with key-value pairs in each of those dicts but simultaneously using multiple processors and using the multiprocessing module in python. For that purpose I am using the Manager module for storing that python object. Here is the following code:
from pylab import *
from numpy.random import *
import multiprocessing
import threading
import random
def tasks_start(id, global_lists):
counter_lock = threading.Lock()
with counter_lock:
num = int(10*random.random())
global_lists[num] = {'1':'Random'}
print("Id: ", id)
print(global_lists[0])
if __name__ == '__main__':
numProcessors = 6
pool = multiprocessing.Pool(numProcessors)
global_list = multiprocessing.Manager().list(range(100))
for idx in range(100):
global_list[idx] = multiprocessing.Manager().dict()
tasks = []
for id in range(10):
tasks.append((id, global_list))
pool.starmap(tasks_start, tasks)
pool.close()
pool.join()
So what I am doing here is creating a list of dictionaries stored as global_list and then calling the tasks_start() method 10 times using the python's starmap() module (just so that I can later extend to multiple arguments) to fill the list of dictionaries. As a simple test case, I just use the random generator to randomly pick up one dictionary among the lists everytime and fill it with some value. When I run the program, the following error occurs:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3.4/multiprocessing/pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "/usr/lib/python3.4/multiprocessing/pool.py", line 47, in starmapstar
return list(itertools.starmap(args[0], args[1]))
File "/home/cysis/inhibition_soum/motif_temporal_patterns/code_versions/2016/09/09_08/parallel_test/test_error_manager.py", line 14, in tasks_start
print(global_lists[0])
File "<string>", line 2, in __getitem__
File "/usr/lib/python3.4/multiprocessing/managers.py", line 732, in _callmethod
kind, result = conn.recv()
File "/usr/lib/python3.4/multiprocessing/connection.py", line 251, in recv
return ForkingPickler.loads(buf.getbuffer())
File "/usr/lib/python3.4/multiprocessing/managers.py", line 852, in RebuildProxy
return func(token, serializer, incref=incref, **kwds)
File "/usr/lib/python3.4/multiprocessing/managers.py", line 706, in __init__
self._incref()
File "/usr/lib/python3.4/multiprocessing/managers.py", line 756, in _incref
conn = self._Client(self._token.address, authkey=self._authkey)
File "/usr/lib/python3.4/multiprocessing/connection.py", line 495, in Client
c = SocketClient(address)
File "/usr/lib/python3.4/multiprocessing/connection.py", line 624, in SocketClient
s.connect(address)
FileNotFoundError: [Errno 2] No such file or directory
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/cysis/inhibition_soum/motif_temporal_patterns/code_versions/2016/09/09_08/parallel_test/test_error_manager.py", line 29, in <module>
pool.starmap(tasks_start, tasks)
File "/usr/lib/python3.4/multiprocessing/pool.py", line 268, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/usr/lib/python3.4/multiprocessing/pool.py", line 599, in get
raise self._value
FileNotFoundError: [Errno 2] No such file or directory
In my opinion before the last print(global_lists[0) is executed, the Manager exits and therefore is not able to find global_lists[0]. Can anybody shed some light on this sort of stuff?