Can't use multiprocessing in python function - python

I'm a beginner of python, I'm trying to put multiprocessing into a function, however python gives me an error.
Please refer the original code as below:
from multiprocessing import Process
import time
def func1():
print('test1')
time.sleep(10)
def func2():
print('test2')
time.sleep(5)
if __name__ == '__main__':
p_func1 = Process(target=func1)
p_func2 = Process(target=func2)
p_func1.start()
p_func2.start()
p_func1.join()
p_func2.join()
print('done')
It runs well and give the correct result I need.
However, when I tried to put the multiprocessing code into function:
from multiprocessing import Process
import time
def test_multiprocessing():
def func1():
print('test1')
time.sleep(10)
def func2():
print('test2')
time.sleep(5)
if __name__ == '__main__':
p_func1 = Process(target=func1)
p_func2 = Process(target=func2)
p_func1.start()
p_func2.start()
p_func1.join()
p_func2.join()
print('done')
test_multiprocessing()
Below is error I got, may I know how to fix this issue ? The reason I'd like to put multiprocessing into a funciton is because there is an existing code there, and I don't want to do major change of the code to support multiprocessing.
Traceback (most recent call last):
File "multipleprocessing.py", line 20, in <module>
test_multiprocessing()
File "multipleprocessing.py", line 14, in test_multiprocessing
p_func1.start()
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\context.py", line 322, in _Popen
return Popen(process_obj)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\popen_spawn_win32.py", line 65, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
AttributeError: Can't pickle local object 'test_multiprocessing.<locals>.func1'
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\spawn.py", line 99, in spawn_main
new_handle = reduction.steal_handle(parent_pid, pipe_handle)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\reduction.py", line 87, in steal_handle
_winapi.DUPLICATE_SAME_ACCESS | _winapi.DUPLICATE_CLOSE_SOURCE)
PermissionError: [WinError 5] Access is denied
Per tested code on Linux, it works. Does that mean Windows Python can't support multiprocessing in function?

Your code is correct. You shouldn't be keeping if __name__ == '__main__': inside the function. Read more about it here why name=="main"
try like below,
from multiprocessing import Process
import time
def test_multiprocessing():
def func1():
print('test1')
time.sleep(10)
def func2():
print('test2')
time.sleep(5)
p_func1 = Process(target=func1)
p_func2 = Process(target=func2)
p_func1.start()
p_func2.start()
p_func1.join()
p_func2.join()
print('done')
test_multiprocessing()

a bit correction in #Prakash answer. You need to call function inside from if __name__== "__main__"
Here, explained well !!
from multiprocessing import Process
import time
def func1():
print('test1')
time.sleep(10)
def func2():
print('test2')
time.sleep(5)
def test_multiprocessing():
p_func1 = Process(target=func1)
p_func2 = Process(target=func2)
p_func1.start()
p_func2.start()
p_func1.join()
p_func2.join()
print('done')
if __name__== "__main__":
test_multiprocessing()
Another way is you can bound method to a class because functions are only picklable if they are defined at the top-level of a module. as below:
from multiprocessing import Process
import time
class Foo:
def func1(self):
print('test1')
time.sleep(10)
def func2(self):
print('test2')
time.sleep(5)
def test_multiprocessing(self):
p_func1 = Process(target=self.func1)
p_func2 = Process(target=self.func2)
p_func1.start()
p_func2.start()
p_func1.join()
p_func2.join()
print('done')
if __name__== "__main__":
f=Foo()
f.test_multiprocessing()

Related

Does os module work inside ProcessPoolExecutor function?

When I run convert(mp3_files[5]) it creates wav file as expected. When I do the same with ProcessPoolExecutor I see 'os' is not defined exception. What is wrong?
import concurrent.futures, subprocess, multiprocessing
def convert(mp3_file):
file_name = os.path.splitext(os.path.basename(mp3_file))[0]
out = os.path.join(audio, file_name + '.wav')
subprocess.run([exe_mpg123, '-q', '-e', 'f32', '-w', out, mp3_file])
def main():
# convert(mp3_files[5])
with concurrent.futures.ProcessPoolExecutor() as executor:
results = executor.map(convert, mp3_files)
for result in results:
print(result)
if __name__ == "__main__":
import os
main()
# Traceback (most recent call last):
# File "C:\script.py", line 159, in <module>
# main()
# File "C:\script.py", line 108, in main
# for result in results:
# File "C:\Users\Asd\miniconda3\lib\concurrent\futures\process.py", line 483, in _chain_from_iterable_of_lists
# for element in iterable:
# File "C:\Users\Asd\miniconda3\lib\concurrent\futures\_base.py", line 598, in result_iterator
# yield fs.pop().result()
# File "C:\Users\Asd\miniconda3\lib\concurrent\futures\_base.py", line 428, in result
# return self.__get_result()
# File "C:\Users\Asd\miniconda3\lib\concurrent\futures\_base.py", line 384, in __get_result
# raise self._exception
# NameError: name 'os' is not defined
os was imported before. If I uncomment the line convert(mp3_files[5]) it works.
I use Windows 10 64, Python 3.7.7 (default, May 6 2020, 11:45:54) [MSC v.1916 64 bit (AMD64)].
Thank you very much for your comments, I found the issue. Using ProcessPoolExecutor on Windows you MUST place all imports you used inside ProcessPoolExecutor BEFORE if __name__ == "__main__":. This code works:
import os, concurrent.futures
def convert(mp3_file):
print(os.name)
def main():
mp3_files = [1, 2, 3, 4, 5]
with concurrent.futures.ProcessPoolExecutor() as executor:
results = executor.map(convert, mp3_files)
for result in results:
print(result)
if __name__ == "__main__":
main()
This does not:
import concurrent.futures
def convert(mp3_file):
print(os.name)
def main():
mp3_files = [1, 2, 3, 4, 5]
with concurrent.futures.ProcessPoolExecutor() as executor:
results = executor.map(convert, mp3_files)
for result in results:
print(result)
if __name__ == "__main__":
import os
main()
When you run a simple function convert(mp3_files[1]) it works always no matter where you placed your imports.

Is it possible to spawn a process inside a thread in Python?

I'm wrting a program that spawns a process and restarts the process on certain conditions. For example, if a child process doesn't send data anymore to the mother process, for a certain period of time, I want the mother process to terminate the child process and restart it. I thought I could use a thread to recieve data from a child process and restart the child process, but it doesn't work the way I thought.
import numpy as np
import multiprocessing as mp
import threading
import time
from apscheduler.schedulers.background import BackgroundScheduler
pipe_in, pipe_out = mp.Pipe()
class Mother():
def __init__(self):
self.pipe_out = pipe_out
self.proc = mp.Process(target = self.test_func, args=(pipe_in, ))
self.proc.start()
self.thread = threading.Thread(target=self.thread_reciever, args=(self.pipe_out, ))
self.thread.start()
def thread_reciever(self, pipe_out):
while True:
value = pipe_out.recv()
print(value)
if value == 5:
self.proc.terminate()
time.sleep(2)
self.proc = mp.Process(target = self.test_func)
self.proc.start()
def test_func(self, pipe_in):
for i in range(10):
pipe_in.send(i)
time.sleep(1)
if __name__ == '__main__':
r = Mother()
It prints out this error.
D:\>d:\python36-32\python.exe temp06.py
0
1
2
3
4
5
Exception in thread Thread-1:
Traceback (most recent call last):
File "d:\python36-32\lib\threading.py", line 916, in _bootstrap_inner
self.run()
File "d:\python36-32\lib\threading.py", line 864, in run
self._target(*self._args, **self._kwargs)
File "temp06.py", line 28, in thread_reciever
self.proc.start()
File "d:\python36-32\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "d:\python36-32\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "d:\python36-32\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "d:\python36-32\lib\multiprocessing\popen_spawn_win32.py", line 65, in __init__
reduction.dump(process_obj, to_child)
File "d:\python36-32\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: can't pickle _thread.lock objects
D:\>Traceback (most recent call last):
File "<string>", line 1, in <module>
File "d:\python36-32\lib\multiprocessing\spawn.py", line 99, in spawn_main
new_handle = reduction.steal_handle(parent_pid, pipe_handle)
File "d:\python36-32\lib\multiprocessing\reduction.py", line 82, in steal_handle
_winapi.PROCESS_DUP_HANDLE, False, source_pid)
OSError: [WinError 87]
How could I start and terminate a process inside a thread? (I'm using a thread because it can synchronously recieve data from a different process) Or are there any other ways to do this job?
test_func as a global function
import numpy as np
import multiprocessing as mp
import threading
import time
from apscheduler.schedulers.background import BackgroundScheduler
pipe_in, pipe_out = mp.Pipe()
def test_func( pipe_in):
for i in range(10):
pipe_in.send(i)
time.sleep(1)
class Mother():
def __init__(self):
self.pipe_out = pipe_out
mp.freeze_support()
self.proc = mp.Process(target = test_func, args=(pipe_in, ))
self.proc.start()
self.thread = threading.Thread(target=self.thread_reciever, args=(self.pipe_out, ))
self.thread.start()
def thread_reciever(self, pipe_out):
while True:
value = pipe_out.recv()
print(value)
if value == 5:
self.proc.terminate()
time.sleep(2)
mp.freeze_support()
self.proc = mp.Process(target = test_func, args=(pipe_in,))
self.proc.start()
if __name__ == '__main__':
r = Mother()
OUTPUT
D:\> d:\python36-32\python.exe temp06.py
0
1
2
3
4
5
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "d:\python36-32\lib\multiprocessing\spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "d:\python36-32\lib\multiprocessing\spawn.py", line 115, in _main
self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'test_func' on <module '__main__' (built-in)>
under windows, as there is no fork syscall, python starts a new interpreter instance, use pickle/unpickle to reconstruct execution context, but thread.Lock is not picklable. while pickling self.test_func, self.thread reference to a thread.Lock object, makes it unpicklable.
you could simply change test_func to a plain global function, without thread object reference :
self.proc = mp.Process(target = test_func, args=(pipe_in,))
...
def test_func(pipe_in):
for i in range(10):
pipe_in.send(i)
time.sleep(1)

Behavior of Manager in Multiprocessing

I cant find an explanation for this behavior in Python 3:
from multiprocessing import Process, cpu_count, freeze_support, Manager
class A:
def __init__(self):
# self._manager = Manager()
# self._list = self._manager.list()
manager = Manager()
self._list = manager.list()
def producer(self):
processes = []
cores = cpu_count()
for i in range(cores):
process = Process(target=self.worker)
process.start()
processes.append(process)
for process in processes:
process.join()
def worker(self):
print('I was called')
if __name__ == '__main__':
freeze_support()
a = A()
a.producer()
With this in __init__ :
self._manager = Manager()
self._list = self._manager.list()
I get an error OSError: handle is closed at the call process.start().
With this in __init__:
manager = Manager()
self._list = manager.list()
All seems to work.
I read https://docs.python.org/3.6/library/multiprocessing.html#sharing-state-between-processes but I can't find an explanation why an instance of a Manager() can't be a variable in the example above. My best guess is because Manager() is itself process and with a call like that target=self.worker I'm trying to break some logic in handling processes.
Question: Am I right? or I miss something?
Full Traceback:
Traceback (most recent call last):
File "G:/files-from-server/apps/test_module/test_export.py", line 27, in <module>
a.producer()
File "G:/files-from-server/apps/test_module/test_export.py", line 15, in producer
process.start()
File "c:\users\maxim\appdata\local\programs\python\python36-32\Lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "c:\users\maxim\appdata\local\programs\python\python36-32\Lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "c:\users\maxim\appdata\local\programs\python\python36-32\Lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "c:\users\maxim\appdata\local\programs\python\python36-32\Lib\multiprocessing\popen_spawn_win32.py", line 65, in __init__
reduction.dump(process_obj, to_child)
File "c:\users\maxim\appdata\local\programs\python\python36-32\Lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
File "c:\users\maxim\appdata\local\programs\python\python36-32\Lib\multiprocessing\connection.py", line 939, in reduce_pipe_connection
dh = reduction.DupHandle(conn.fileno(), access)
File "c:\users\maxim\appdata\local\programs\python\python36-32\Lib\multiprocessing\connection.py", line 170, in fileno
self._check_closed()
File "c:\users\maxim\appdata\local\programs\python\python36-32\Lib\multiprocessing\connection.py", line 136, in _check_closed
raise OSError("handle is closed")
OSError: handle is closed

How to start processes with methods as targets in a class context?

I am trying to start several processes in a class context which should share a queue:
import multiprocessing
import queue
class MyMulti:
def __init__(self):
self.myq = queue.Queue()
def printhello(self):
print("hello")
self.myq.put("hello")
def run(self):
for _ in range(5):
p = multiprocessing.Process(target=self.printhello)
p.start()
if __name__ == "__main__":
multiprocessing.freeze_support()
m = MyMulti()
m.run()
# at that point the queue is being filled in with five elements
This crashes with
C:\Python34\python.exe C:/Users/yop/dev/GetNessusScans/tests/testm.py
Traceback (most recent call last):
File "C:/Users/yop/dev/GetNessusScans/tests/testm.py", line 20, in <module>
m.run()
File "C:/Users/yop/dev/GetNessusScans/tests/testm.py", line 15, in run
p.start()
File "C:\Python34\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Python34\lib\multiprocessing\context.py", line 212, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Python34\lib\multiprocessing\context.py", line 313, in _Popen
return Popen(process_obj)
File "C:\Python34\lib\multiprocessing\popen_spawn_win32.py", line 66, in __init__
reduction.dump(process_obj, to_child)
File "C:\Python34\lib\multiprocessing\reduction.py", line 59, in dump
ForkingPickler(file, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class '_thread.lock'>: attribute lookup lock on _thread failed
An answer to a similar question suggested to have a worker uppermost function, which I adapted to my case as
import multiprocessing
import queue
def work(foo):
foo.printhello()
class MyMulti:
def __init__(self):
self.myq = queue.Queue()
def printhello(self):
print("hello")
self.myq.put("hello")
def run(self):
for _ in range(5):
p = multiprocessing.Process(target=work, args=(self,))
p.start()
if __name__ == "__main__":
multiprocessing.freeze_support()
m = MyMulti()
m.run()
# at that point the queue is being filled in with five elements
This crashes the same way, though.
Is there a way to start processes with methods as targets?
I should have used self.myq = multiprocessing.Queue() instead of queue.Queue().
multiprocessing.Queue() is, in addition of queue.Queue(), process safe.
I leave the question unanswered for now for someone to possibly comment if the whole approach is wrong.

Asyncio error, An operation was attempted on something that is not a socket

I'm currently working on the current toy code to try and understand the asyncio module.
import asyncio
import os, sys, traceback
from time import time
os.environ['PYTHONASYNCIODEBUG'] = '1'
print(sys.version)
def timed_fib(n):
def fib(n):
return fib(n - 1) + fib(n - 2) if n > 1 else n
a = time()
return fib(n), time() - a
def process_input():
text = sys.stdin.readline()
n = int(text.strip())
print('fib({}) = {}'.format(n, timed_fib(n)))
#asyncio.coroutine
def print_hello():
while True:
print("{} - Hello world!".format(int(time())))
yield from asyncio.sleep(3)
def main():
loop = asyncio.get_event_loop()
loop.add_reader(sys.stdin, process_input)
loop.run_until_complete(print_hello())
if __name__ == '__main__':
main()
However, trying to run this yields the incredibly cryptic traceback below. As you can see, the debug environment variable is set in the fifth line of the code above, however, the traceback remains incredibly unhelpful, as below:
3.4.3rc1 (v3.4.3rc1:69dd528ca625+, Feb 8 2015, 11:01:19) [MSC v.1600 32 bit (In
tel)]
Traceback (most recent call last):
File "test.py", line 33, in <module>
main()
File "test.py", line 29, in main
loop.run_until_complete(print_hello())
File "C:\Python34\lib\asyncio\base_events.py", line 304, in run_until_complete
self.run_forever()
File "C:\Python34\lib\asyncio\base_events.py", line 276, in run_forever
self._run_once()
File "C:\Python34\lib\asyncio\base_events.py", line 1136, in _run_once
event_list = self._selector.select(timeout)
File "C:\Python34\lib\selectors.py", line 314, in select
r, w, _ = self._select(self._readers, self._writers, [], timeout)
File "C:\Python34\lib\selectors.py", line 305, in _select
r, w, x = select.select(r, w, w, timeout)
OSError: [WinError 10038] An operation was attempted on something that is not a
socket
How can I access a more useful traceback, and what might be the problem? I am on Windows 7, if it matters.
select() works only with sockets on Windows.
To work with file descriptors, you could try non-select-based event loop, code example:
if os.name == 'nt':
loop = asyncio.ProactorEventLoop() # for subprocess' pipes on Windows
asyncio.set_event_loop(loop)
else:
loop = asyncio.get_event_loop()
Though I doubt that it would help with sys.stdin and asyncio, code example.

Categories

Resources