Problems with multiprocessing - python

I'm trying to implement a python script which reads the content of a pdf file and move that file to a specific directory.
On my Debian machine it works without any problem. But on my Xubuntu system I"m getting the following error:
Traceback (most recent call last):
File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
self.run()
File "/usr/lib/python3.6/threading.py", line 864, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.6/multiprocessing/pool.py", line 463, in _handle_results
task = get()
File "/usr/lib/python3.6/multiprocessing/connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
TypeError: __init__() takes 1 positional argument but 2 were given
At this point, the script halts until I cancel it with KeyboardInerrupt, which gives me the rest of the error:
Process ForkPoolWorker-5:
Process ForkPoolWorker-6:
Process ForkPoolWorker-3:
Traceback (most recent call last):
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.6/multiprocessing/pool.py", line 108, in worker
task = get()
File "/usr/lib/python3.6/multiprocessing/pool.py", line 108, in worker
task = get()
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/queues.py", line 334, in get
with self._rlock:
File "/usr/lib/python3.6/multiprocessing/queues.py", line 334, in get
with self._rlock:
File "/usr/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
File "/usr/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
KeyboardInterrupt
Process ForkPoolWorker-1:
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.6/multiprocessing/pool.py", line 108, in worker
task = get()
File "/usr/lib/python3.6/multiprocessing/queues.py", line 334, in get
with self._rlock:
File "/usr/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.6/multiprocessing/pool.py", line 108, in worker
task = get()
File "/usr/lib/python3.6/multiprocessing/queues.py", line 335, in get
res = self._reader.recv_bytes()
File "/usr/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
buf = self._recv_bytes(maxlength)
File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
buf = self._recv(4)
File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining)
KeyboardInterrupt
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.6/multiprocessing/pool.py", line 108, in worker
task = get()
File "/usr/lib/python3.6/multiprocessing/queues.py", line 334, in get
with self._rlock:
File "/usr/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
I don't know how to fix this issue. Hope you guys can give a hint.
Thank y'all so far!
EDIT The code of the script:
from datetime import date
from multiprocessing import Pool
from pdf2image import convert_from_path
from os import listdir, remove
from os.path import isfile, join, abspath, split, exists
import pytesseract
import sys
import os
import re
import tempfile
tmp_path = tempfile.gettempdir() # replace with given output directory
def run(path):
PDF_file = abspath(path) # use absolute path of pdf file
pages = convert_from_path(PDF_file, 500)
page = pages[0]
imgFile = abspath(join(tmp_path, "document"+str(date.today())+".jpg"))
# save image to temp path
page.save(imgFile, 'JPEG')
# get text from image of page 1
text = str(((pytesseract.image_to_string(Image.open(imgFile)))))
if exists(imgFile):
os.remove(imgFile)
match = re.search(r"(Vertragsnummer\:\s)(\d+)\w+", text)
if match == None:
print("Could not find contract id")
exit(1)
else:
f = split(PDF_file)
d = join(tmp_path, match.group(2))
if not exists(d):
os.mkdir(d)
PDF_file_new = join(d, f[1])
print("New file: "+PDF_file_new)
os.rename(PDF_file, PDF_file_new)
def run_in_dir(directory):
files = [join(directory, f)
for f in listdir(directory) if isfile(join(directory, f))]
with Pool() as p:
p.map_async(run, files)
p.close()
p.join()
if __name__ == "__main__":
import argparse
import cProfile
parser = argparse.ArgumentParser(description="")
parser.add_argument("-p", "--path", help="Path to specific PDF file.")
parser.add_argument("-d", "--directory",
help="Path to folder containing PDF files.")
args = parser.parse_args()
# run(args.path)
print(cProfile.run("run_in_dir(args.directory)"))

Try running the script without multiprocessing. In my case I found that
pytesseract.pytesseract.TesseractNotFoundError: tesseract is not installed or it's not in your path
Here's how to install it.
I have no idea why the error message with multiprocessing is so unclear.
Also, remove exit(1) since it's intended for interactive shells not scripts.

Related

When added ipdb at flask app it raises RuntimeError: There is no current event loop in thread

Okay now I'm working on simple flask app and it is working but while working I needed ipdb at certain api, after I added it
Traceback (most recent call last):
File "/home/amir/Workspace/mmb/recsysenv/lib/python3.6/site-packages/flask/app.py", line 2446, in wsgi_app
response = self.full_dispatch_request()
.
.
.
File "/usr/local/lib/python3.6/bdb.py", line 51, in trace_dispatch
return self.dispatch_line(frame)
File "/usr/local/lib/python3.6/bdb.py", line 69, in dispatch_line
self.user_line(frame)
File "/usr/local/lib/python3.6/pdb.py", line 261, in user_line
self.interaction(frame, None)
File "/home/amir/Workspace/mmb/recsysenv/lib/python3.6/site-packages/IPython/core/debugger.py", line 294, in interaction
OldPdb.interaction(self, frame, traceback)
File "/usr/local/lib/python3.6/pdb.py", line 352, in interaction
self._cmdloop()
File "/usr/local/lib/python3.6/pdb.py", line 321, in _cmdloop
self.cmdloop()
File "/home/amir/Workspace/mmb/recsysenv/lib/python3.6/site-packages/IPython/terminal/debugger.py", line 97, in cmdloop
line = self.pt_app.prompt() # reset_current_buffer=True)
File "/home/amir/Workspace/mmb/recsysenv/lib/python3.6/site-packages/prompt_toolkit/shortcuts/prompt.py", line 986, in prompt
return self.app.run()
File "/home/amir/Workspace/mmb/recsysenv/lib/python3.6/site-packages/prompt_toolkit/application/application.py", line 788, in run
return get_event_loop().run_until_complete(self.run_async(pre_run=pre_run))
File "/usr/local/lib/python3.6/asyncio/events.py", line 694, in get_event_loop
return get_event_loop_policy().get_event_loop()
File "/usr/local/lib/python3.6/asyncio/events.py", line 602, in get_event_loop
% threading.current_thread().name)
RuntimeError: There is no current event loop in thread 'Thread-1'.
After that I added threading snippet like that at the flask file
from threading import Thread
t = Thread(target=app, args=())
t.daemon = True
t.start()
t.join()
if __name__ == '__main__':
app.run()
but it raises
Traceback (most recent call last):
File "/usr/local/lib/python3.6/threading.py", line 916, in _bootstrap_inner
self.run()
File "/usr/local/lib/python3.6/threading.py", line 864, in run
self._target(*self._args, **self._kwargs)
TypeError: __call__() missing 2 required positional arguments: 'environ' and 'start_response'
I tried both debug and non debug mode

Pycharm debugger throws Bad file descriptor error when using dask distributed

I am using the most lightweight/simple dask multiprocessing which is the non-cluster local Client:
from distributed import Client
client = Client()
Even so: the first instance of invoking dask.bag.compute() results in the following:
Connected to pydev debugger (build 191.7141.48)
Traceback (most recent call last):
File "/Applications/PyCharm.app/Contents/helpers/pydev/_pydevd_bundle/pydevd_comm.py", line 383, in _on_run
r = self.sock.recv(1024)
OSError: [Errno 9] Bad file descriptor
Traceback (most recent call last):
File "/Applications/PyCharm.app/Contents/helpers/pydev/_pydevd_bundle/pydevd_comm.py", line 383, in _on_run
r = self.sock.recv(1024)
OSError: [Errno 9] Bad file descriptor
Traceback (most recent call last):
File "/Applications/PyCharm.app/Contents/helpers/pydev/_pydevd_bundle/pydevd_comm.py", line 383, in _on_run
r = self.sock.recv(1024)
OSError: [Errno 9] Bad file descriptor
The result is that you can more or less flip a coin on whether the program will proceed or error out with a communication exception. Here is what happens when the flip comes up "tails":
Connected to pydev debugger (build 191.7141.48)
Traceback (most recent call last):
File "/Applications/PyCharm.app/Contents/helpers/pydev/_pydevd_bundle/pydevd_comm.py", line 383, in _on_run
r = self.sock.recv(1024)
OSError: [Errno 9] Bad file descriptor
Process ForkServerProcess-3:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/distributed/core.py", line 178, in __init__
from .counter import Digest
ImportError: cannot import name 'Digest' from 'distributed.counter' (/usr/local/lib/python3.7/site-packages/distributed/counter.py)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python3.7/site-packages/distributed/process.py", line 181, in _run
target(*args, **kwargs)
File "/usr/local/lib/python3.7/site-packages/distributed/nanny.py", line 587, in _run
worker = Worker(*worker_args, **worker_kwargs)
File "/usr/local/lib/python3.7/site-packages/distributed/worker.py", line 552, in __init__
**kwargs
File "/usr/local/lib/python3.7/site-packages/distributed/node.py", line 76, in __init__
io_loop=self.io_loop,
File "/usr/local/lib/python3.7/site-packages/distributed/core.py", line 180, in __init__
self.digests = defaultdict(partial(Digest, loop=self.io_loop))
File "/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/contextlib.py", line 130, in __exit__
self.gen.throw(type, value, traceback)
File "/usr/local/lib/python3.7/site-packages/distributed/utils.py", line 179, in ignoring
yield
SystemError: error return without exception set
distributed.nanny - WARNING - Worker process 20417 exited with status 1
Traceback (most recent call last):
File "_pydevd_frame_eval/pydevd_frame_evaluator_darwin_37_64.pyx", line 95, in _pydevd_frame_eval.pydevd_frame_evaluator_darwin_37_64.get_bytecode_while_frame_eval
KeyError: '/usr/local/lib/python3.7/site-packages/distributed/bokeh/__init__.py'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Applications/PyCharm.app/Contents/helpers/pydev/pydevd.py", line 1758, in <module>
main()
File "/Applications/PyCharm.app/Contents/helpers/pydev/pydevd.py", line 1752, in main
globals = debugger.run(setup['file'], None, None, is_module)
File "/Applications/PyCharm.app/Contents/helpers/pydev/pydevd.py", line 1147, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File "/Applications/PyCharm.app/Contents/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "/git/huddl/python/hamspam/enron.py", line 205, in <module>
client = Client()
File "/usr/local/lib/python3.7/site-packages/distributed/client.py", line 712, in __init__
self.start(timeout=timeout)
File "/usr/local/lib/python3.7/site-packages/distributed/client.py", line 858, in start
sync(self.loop, self._start, **kwargs)
File "/usr/local/lib/python3.7/site-packages/distributed/utils.py", line 331, in sync
six.reraise(*error[0])
File "/usr/local/lib/python3.7/site-packages/six.py", line 693, in reraise
raise value
File "/usr/local/lib/python3.7/site-packages/distributed/utils.py", line 316, in f
result[0] = yield future
File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 729, in run
value = future.result()
File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 736, in run
yielded = self.gen.throw(*exc_info) # type: ignore
File "/usr/local/lib/python3.7/site-packages/distributed/client.py", line 928, in _start
yield self.cluster
File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 729, in run
value = future.result()
File "/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/tasks.py", line 603, in _wrap_awaitable
return (yield from awaitable.__await__())
File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 736, in run
yielded = self.gen.throw(*exc_info) # type: ignore
File "/usr/local/lib/python3.7/site-packages/distributed/deploy/local.py", line 284, in _start
yield [self._start_worker(**self.worker_kwargs) for i in range(n_workers)]
File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 729, in run
value = future.result()
File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 501, in callback
result_list.append(f.result())
File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 742, in run
yielded = self.gen.send(value)
File "/usr/local/lib/python3.7/site-packages/distributed/deploy/local.py", line 316, in _start_worker
raise gen.TimeoutError("Worker failed to start")
tornado.util.TimeoutError: Worker failed to start
Any advice on this?
There will be even more issues/complications when trying to use a LocalCluster mode -but that will be saved for a different question.

gensim.LDAMulticore throwing exception:

I am running LDAMulticore from the python gensim library, and the script cannot seem to create more than one thread. Here is the error:
Traceback (most recent call last):
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib64/python2.7/multiprocessing/pool.py", line 97, in worker
initializer(*initargs)
File "/usr/lib64/python2.7/site-packages/gensim/models/ldamulticore.py", line 333, in worker_e_step
worker_lda.do_estep(chunk) # TODO: auto-tune alpha?
File "/usr/lib64/python2.7/site-packages/gensim/models/ldamodel.py", line 725, in do_estep
gamma, sstats = self.inference(chunk, collect_sstats=True)
File "/usr/lib64/python2.7/site-packages/gensim/models/ldamodel.py", line 655, in inference
ids = [int(idx) for idx, _ in doc]
TypeError: 'int' object is not iterable
Exception in thread Thread-1:
Traceback (most recent call last):
File "/usr/lib64/python2.7/threading.py", line 812, in __bootstrap_inner
self.run()
File "/usr/lib64/python2.7/threading.py", line 765, in run
self.__target(*self.__args, **self.__kwargs)
File "/usr/lib64/python2.7/multiprocessing/pool.py", line 325, in _handle_workers
pool._maintain_pool()
File "/usr/lib64/python2.7/multiprocessing/pool.py", line 229, in _maintain_pool
self._repopulate_pool()
File "/usr/lib64/python2.7/multiprocessing/pool.py", line 222, in _repopulate_pool
w.start()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 130, in start
self._popen = Popen(self)
File "/usr/lib64/python2.7/multiprocessing/forking.py", line 121, in __init__
self.pid = os.fork()
OSError: [Errno 12] Cannot allocate memory
I'm creating my LDA model like this:
ldamodel = LdaMulticore(corpus, num_topics=50, id2word = dictionary, workers=3)
I have actually asked another question about this script, so the full script can be found here:
Gensim LDA Multicore Python script runs much too slow
If it's relevant, I'm running this on a CentOS server. Let me know if I should include any other information.
Any help is appreciated!
OSError: [Errno 12] Cannot allocate memory sounds like you are running out of RAM.
Check your available free memory and swap.
You can try to to reduce the number of threads with the workers parameter or the number of documents to be used in each training chunk with the chunksize parameter.

RuntimeError: unable to open shared memory object, OSError: [Errno 24] Too many open files

I having trouble with loading indexes of document.
I am testing my code, so I set
batch_size = 4
number_of_sentences_in_document = 84
number_of_words_in_sentence = 80
that sums up one mini_batch with 80 * 84 * 4 indexes of documents.
The problem is that when I transform those indexes dataset into a DataLoader as below
and try to loop over trainloader, it results out so many error messages.
DataManager = DS.NewsDataset(data_examples_gen, Vocab)
trainloader = torch.utils.data.DataLoader(DataManager, batch_size=Args.args.batch_size, shuffle=True, num_workers=32)
The error messages are below.
Traceback (most recent call last): File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run() File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs) File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 61, in _worker_loop
data_queue.put((idx, samples)) File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/queues.py", line 341, in put File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/site-packages/torch/multiprocessing/reductions.py", line 125, in reduce_storage File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/reduction.py", line 191, in DupFd File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/resource_sharer.py", line 48, in __init__ OSError: [Errno 24] Too many open files
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/util.py", line 262, in _run_finalizers File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/util.py", line 186, in __call__ File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/shutil.py", line 476, in rmtree File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/shutil.py", line 474, in rmtree OSError: [Errno 24] Too many open files: '/tmp/pymp-be4nmgxw' Process Process-2: Traceback (most recent call last): File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run() File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs) File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 61, in _worker_loop
data_queue.put((idx, samples)) File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/queues.py", line 341, in put File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/site-packages/torch/multiprocessing/reductions.py", line 125, in reduce_storage File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/reduction.py", line 191, in DupFd File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/resource_sharer.py", line 48, in __init__ OSError: [Errno 24] Too many open files Traceback (most recent call last): File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run() File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs) File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 61, in _worker_loop
data_queue.put((idx, samples)) File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/queues.py", line 341, in put File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/site-packages/torch/multiprocessing/reductions.py", line 121, in reduce_storage RuntimeError: unable to open shared memory object </torch_54415_3383444026> in read-write mode at /opt/conda/conda-bld/pytorch_1525909934016/work/aten/src/TH/THAllocator.c:342
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/util.py", line 262, in _run_finalizers File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/util.py", line 186, in __call__ File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/shutil.py", line 476, in rmtree File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/shutil.py", line 474, in rmtree OSError: [Errno 24] Too many open files: '/tmp/pymp-abguy87b' Process Process-1: Traceback (most recent call last): File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run() File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs) File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 61, in _worker_loop
data_queue.put((idx, samples)) File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/queues.py", line 341, in put File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/site-packages/torch/multiprocessing/reductions.py", line 121, in reduce_storage RuntimeError: unable to open shared memory object </torch_54415_3383444026> in read-write mode at /opt/conda/conda-bld/pytorch_1525909934016/work/aten/src/TH/THAllocator.c:342 Traceback (most recent call last): File "/home/nlpgpu3/LinoHong/FakeNewsByTitle/main.py", line 26, in <module>
for mini_batch in trainloader : File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 280, in __next__
idx, batch = self._get_batch() File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 259, in _get_batch
return self.data_queue.get() File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/queues.py", line 335, in get
res = self._reader.recv_bytes() File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
buf = self._recv_bytes(maxlength) File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
buf = self._recv(4) File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining) File "/home/nlpgpu3/anaconda3/envs/linohong3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 178, in handler
_error_if_any_worker_fails() RuntimeError: DataLoader worker (pid 54416) exited unexpectedly with exit code 1.
Process finished with exit code 1
I thought this is some kind of memory problem so I tried the same thing
only having two sentences for a document and it worked.
However, I am expecting this to get pretty much larger with
batch_size up to 32 or 64,
the number of sentences per document up to 84
the number of words per sentence up to 84.
I tried
$ ulimit -n 10000
but that one didn't work.
How can I manage this problem?
Any Idea???

Multiprocessing Large Objects Using Pathos in Python

I am trying to make use of my computer's multiple CPUs. However, the BeautifulSoup object returned by my function as part of an SQLAlchemy object is not picklable with pickle or cPickle so I am using pathos, a fork of the multiprocssing package that uses dill such that it can pickle any python object. I tested dill on the object that I could not pickle and it worked, so I thought my problem would be solved. However, when I use pathos' pool.map I have the same problem that I did before, mainly that the function completes but the result is not returned. I confirmed this by using results = pool.amap(myfunc, myarglist) which completes, but results.get() which does not. Unfortunately, I cannot post the html for the page (it is not publicly available), and I have been unable to find a reproducible example of the problem. This answer includes a function for troubleshooting multiprocessing of large objects, but unfortunately it uses Queue which does not seem to be implemented for pathos by itself (only presumably under the hood within the pool.map function). I am using the 0.2a1.dev version of pathos (with dependencies installed with pip prior to compiling from source) on python 2.7. Here is the traceback for the keyboard interrupt:
Process PoolWorker-2:
Process PoolWorker-7:
Traceback (most recent call last):
Process PoolWorker-8:Process PoolWorker-6:Process PoolWorker-3:Process PoolWorker-5:Process PoolWorker-4:Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 227, in _bootstrap
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 227, in _bootstrap
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 227, in _bootstrap
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 227, in _bootstrap
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 227, in _bootstrap
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 227, in _bootstrap
self.run()
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 85, in run
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/pool.py", line 59, in worker
self.run()
self.run()
self.run()
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 85, in run
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 85, in run
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 85, in run
self._target(*self._args, **self._kwargs)
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/pool.py", line 54, in worker
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/pool.py", line 54, in worker
self._target(*self._args, **self._kwargs)
self.run()
self.run()
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/pool.py", line 54, in worker
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 85, in run
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 85, in run
put((job, i, result))
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/queue.py", line 339, in put
self._target(*self._args, **self._kwargs)
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/pool.py", line 54, in worker
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/pool.py", line 54, in worker
for job, i, func, args, kwds in iter(inqueue.get, None):
for job, i, func, args, kwds in iter(inqueue.get, None):
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/queue.py", line 325, in get
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/queue.py", line 325, in get
wacquire()
KeyboardInterrupt
for job, i, func, args, kwds in iter(inqueue.get, None):
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/queue.py", line 325, in get
racquire()
racquire()
for job, i, func, args, kwds in iter(inqueue.get, None):
for job, i, func, args, kwds in iter(inqueue.get, None):
KeyboardInterrupt
KeyboardInterrupt
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/queue.py", line 325, in get
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/queue.py", line 325, in get
racquire()
KeyboardInterrupt
racquire()
racquire()
KeyboardInterrupt
KeyboardInterrupt
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 227, in _bootstrap
self.run()
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/process.py", line 85, in run
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/pool.py", line 54, in worker
for job, i, func, args, kwds in iter(inqueue.get, None):
File "/usr/local/lib/python2.7/dist-packages/processing-0.52_pathos-py2.7-linux-x86_64.egg/processing/queue.py", line 327, in get
return recv()
KeyboardInterrupt

Categories

Resources