Does os module work inside ProcessPoolExecutor function? - python

When I run convert(mp3_files[5]) it creates wav file as expected. When I do the same with ProcessPoolExecutor I see 'os' is not defined exception. What is wrong?
import concurrent.futures, subprocess, multiprocessing
def convert(mp3_file):
file_name = os.path.splitext(os.path.basename(mp3_file))[0]
out = os.path.join(audio, file_name + '.wav')
subprocess.run([exe_mpg123, '-q', '-e', 'f32', '-w', out, mp3_file])
def main():
# convert(mp3_files[5])
with concurrent.futures.ProcessPoolExecutor() as executor:
results = executor.map(convert, mp3_files)
for result in results:
print(result)
if __name__ == "__main__":
import os
main()
# Traceback (most recent call last):
# File "C:\script.py", line 159, in <module>
# main()
# File "C:\script.py", line 108, in main
# for result in results:
# File "C:\Users\Asd\miniconda3\lib\concurrent\futures\process.py", line 483, in _chain_from_iterable_of_lists
# for element in iterable:
# File "C:\Users\Asd\miniconda3\lib\concurrent\futures\_base.py", line 598, in result_iterator
# yield fs.pop().result()
# File "C:\Users\Asd\miniconda3\lib\concurrent\futures\_base.py", line 428, in result
# return self.__get_result()
# File "C:\Users\Asd\miniconda3\lib\concurrent\futures\_base.py", line 384, in __get_result
# raise self._exception
# NameError: name 'os' is not defined
os was imported before. If I uncomment the line convert(mp3_files[5]) it works.
I use Windows 10 64, Python 3.7.7 (default, May 6 2020, 11:45:54) [MSC v.1916 64 bit (AMD64)].

Thank you very much for your comments, I found the issue. Using ProcessPoolExecutor on Windows you MUST place all imports you used inside ProcessPoolExecutor BEFORE if __name__ == "__main__":. This code works:
import os, concurrent.futures
def convert(mp3_file):
print(os.name)
def main():
mp3_files = [1, 2, 3, 4, 5]
with concurrent.futures.ProcessPoolExecutor() as executor:
results = executor.map(convert, mp3_files)
for result in results:
print(result)
if __name__ == "__main__":
main()
This does not:
import concurrent.futures
def convert(mp3_file):
print(os.name)
def main():
mp3_files = [1, 2, 3, 4, 5]
with concurrent.futures.ProcessPoolExecutor() as executor:
results = executor.map(convert, mp3_files)
for result in results:
print(result)
if __name__ == "__main__":
import os
main()
When you run a simple function convert(mp3_files[1]) it works always no matter where you placed your imports.

Related

Python pass variable to multiprocessing pool

I been trying to figure out this for ages, but wondering if anyone might know how I can pass the s variable to the pool without making it into an argument?
import ctypes
import multiprocessing as mp
import os
def worker1(n):
k = n*3
print(k)
print(s)
# print(ctypes_int.value)
if __name__ == '__main__':
# global s
somelist = [1,2,3]
# ctypes_int = mp.Value(ctypes.c_wchar_p , "hi")
s = "TESTING"
# worker1(1)
p = mp.Pool(1)
p.map(worker1,somelist)
This is the error I am getting:
3
6
9
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Program Files\Python\Python37\lib\multiprocessing\pool.py", line 121, in worker
result = (True, func(*args, **kwds))
File "C:\Program Files\Python\Python37\lib\multiprocessing\pool.py", line 44, in mapstar
return list(map(*args))
File "C:\Users\light\AppData\Local\Temp\tempCodeRunnerFile.python", line 10, in worker1
print(s)
NameError: name 's' is not defined
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\light\AppData\Local\Temp\tempCodeRunnerFile.python", line 21, in <module>
p.map(worker1,somelist)
File "C:\Program Files\Python\Python37\lib\multiprocessing\pool.py", line 268, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Program Files\Python\Python37\lib\multiprocessing\pool.py", line 657, in get
raise self._value
NameError: name 's' is not defined
You can pass your variable along with each item in somelist:
import multiprocessing as mp
def worker1(p):
n,s = p
k = n*3
print(k)
print(s)
if __name__ == '__main__':
somelist = [1,2,3]
s = "TESTING"
p = mp.Pool(1)
p.map(worker1,[(n,s) for n in somelist])
The parameter (n,s) gets passed as p and I unpack it into n,s.

How to mock an object inside main() function in python?

I am trying to mock data1 and data2 and trying to provide a return value.
I have the following code:
import pandas
def main():
data1= pandas.read_excel('path1')
data2= pandas.read_excel('path2')
if __name__ == '__main__':
main()
import test1
from unittest.mock import patch
import pandas
class Testdata(unittest.TestCase):
#patch('test1.main.data1')
#patch('test1.main.data2')
def test_main(self, mock_data1, mock_data2):
mock_data1.return_value = pandas.DataFrame([some dataframe])
mock_data2.return_value = pandas.DataFrame([some dataframe])
test.main()
data1.assert_called_once()
data2.assert_called_once()
if __name__ == '__main__':
unittest.main()
I am getting the following error:
Error
Traceback (most recent call last):
File "C:\apps\python\3.6.2\lib\unittest\case.py", line 59, in testPartExecutor
yield
File "C:\apps\python\3.6.2\lib\unittest\case.py", line 605, in run
testMethod()
File "C:\apps\python\3.6.2\lib\unittest\mock.py", line 1171, in patched
arg = patching.__enter__()
File "C:\apps\python\3.6.2\lib\unittest\mock.py", line 1227, in __enter__
self.target = self.getter()
File "C:\apps\python\3.6.2\lib\unittest\mock.py", line 1397, in <lambda>
getter = lambda: _importer(target)
File "C:\apps\python\3.6.2\lib\unittest\mock.py", line 1080, in _importer
thing = __import__(import_path)
ModuleNotFoundError: No module named 'main'
How do I resolve this issue and how to mock data1 and data2 and provide a return value to it?
Can't say much before looking at the full-code but I thing adding import unitest in the starting shall do the job.

Can't use multiprocessing in python function

I'm a beginner of python, I'm trying to put multiprocessing into a function, however python gives me an error.
Please refer the original code as below:
from multiprocessing import Process
import time
def func1():
print('test1')
time.sleep(10)
def func2():
print('test2')
time.sleep(5)
if __name__ == '__main__':
p_func1 = Process(target=func1)
p_func2 = Process(target=func2)
p_func1.start()
p_func2.start()
p_func1.join()
p_func2.join()
print('done')
It runs well and give the correct result I need.
However, when I tried to put the multiprocessing code into function:
from multiprocessing import Process
import time
def test_multiprocessing():
def func1():
print('test1')
time.sleep(10)
def func2():
print('test2')
time.sleep(5)
if __name__ == '__main__':
p_func1 = Process(target=func1)
p_func2 = Process(target=func2)
p_func1.start()
p_func2.start()
p_func1.join()
p_func2.join()
print('done')
test_multiprocessing()
Below is error I got, may I know how to fix this issue ? The reason I'd like to put multiprocessing into a funciton is because there is an existing code there, and I don't want to do major change of the code to support multiprocessing.
Traceback (most recent call last):
File "multipleprocessing.py", line 20, in <module>
test_multiprocessing()
File "multipleprocessing.py", line 14, in test_multiprocessing
p_func1.start()
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\context.py", line 322, in _Popen
return Popen(process_obj)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\popen_spawn_win32.py", line 65, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
AttributeError: Can't pickle local object 'test_multiprocessing.<locals>.func1'
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\spawn.py", line 99, in spawn_main
new_handle = reduction.steal_handle(parent_pid, pipe_handle)
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\multiprocessing
\reduction.py", line 87, in steal_handle
_winapi.DUPLICATE_SAME_ACCESS | _winapi.DUPLICATE_CLOSE_SOURCE)
PermissionError: [WinError 5] Access is denied
Per tested code on Linux, it works. Does that mean Windows Python can't support multiprocessing in function?
Your code is correct. You shouldn't be keeping if __name__ == '__main__': inside the function. Read more about it here why name=="main"
try like below,
from multiprocessing import Process
import time
def test_multiprocessing():
def func1():
print('test1')
time.sleep(10)
def func2():
print('test2')
time.sleep(5)
p_func1 = Process(target=func1)
p_func2 = Process(target=func2)
p_func1.start()
p_func2.start()
p_func1.join()
p_func2.join()
print('done')
test_multiprocessing()
a bit correction in #Prakash answer. You need to call function inside from if __name__== "__main__"
Here, explained well !!
from multiprocessing import Process
import time
def func1():
print('test1')
time.sleep(10)
def func2():
print('test2')
time.sleep(5)
def test_multiprocessing():
p_func1 = Process(target=func1)
p_func2 = Process(target=func2)
p_func1.start()
p_func2.start()
p_func1.join()
p_func2.join()
print('done')
if __name__== "__main__":
test_multiprocessing()
Another way is you can bound method to a class because functions are only picklable if they are defined at the top-level of a module. as below:
from multiprocessing import Process
import time
class Foo:
def func1(self):
print('test1')
time.sleep(10)
def func2(self):
print('test2')
time.sleep(5)
def test_multiprocessing(self):
p_func1 = Process(target=self.func1)
p_func2 = Process(target=self.func2)
p_func1.start()
p_func2.start()
p_func1.join()
p_func2.join()
print('done')
if __name__== "__main__":
f=Foo()
f.test_multiprocessing()

How to start processes with methods as targets in a class context?

I am trying to start several processes in a class context which should share a queue:
import multiprocessing
import queue
class MyMulti:
def __init__(self):
self.myq = queue.Queue()
def printhello(self):
print("hello")
self.myq.put("hello")
def run(self):
for _ in range(5):
p = multiprocessing.Process(target=self.printhello)
p.start()
if __name__ == "__main__":
multiprocessing.freeze_support()
m = MyMulti()
m.run()
# at that point the queue is being filled in with five elements
This crashes with
C:\Python34\python.exe C:/Users/yop/dev/GetNessusScans/tests/testm.py
Traceback (most recent call last):
File "C:/Users/yop/dev/GetNessusScans/tests/testm.py", line 20, in <module>
m.run()
File "C:/Users/yop/dev/GetNessusScans/tests/testm.py", line 15, in run
p.start()
File "C:\Python34\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Python34\lib\multiprocessing\context.py", line 212, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Python34\lib\multiprocessing\context.py", line 313, in _Popen
return Popen(process_obj)
File "C:\Python34\lib\multiprocessing\popen_spawn_win32.py", line 66, in __init__
reduction.dump(process_obj, to_child)
File "C:\Python34\lib\multiprocessing\reduction.py", line 59, in dump
ForkingPickler(file, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class '_thread.lock'>: attribute lookup lock on _thread failed
An answer to a similar question suggested to have a worker uppermost function, which I adapted to my case as
import multiprocessing
import queue
def work(foo):
foo.printhello()
class MyMulti:
def __init__(self):
self.myq = queue.Queue()
def printhello(self):
print("hello")
self.myq.put("hello")
def run(self):
for _ in range(5):
p = multiprocessing.Process(target=work, args=(self,))
p.start()
if __name__ == "__main__":
multiprocessing.freeze_support()
m = MyMulti()
m.run()
# at that point the queue is being filled in with five elements
This crashes the same way, though.
Is there a way to start processes with methods as targets?
I should have used self.myq = multiprocessing.Queue() instead of queue.Queue().
multiprocessing.Queue() is, in addition of queue.Queue(), process safe.
I leave the question unanswered for now for someone to possibly comment if the whole approach is wrong.

Asyncio error, An operation was attempted on something that is not a socket

I'm currently working on the current toy code to try and understand the asyncio module.
import asyncio
import os, sys, traceback
from time import time
os.environ['PYTHONASYNCIODEBUG'] = '1'
print(sys.version)
def timed_fib(n):
def fib(n):
return fib(n - 1) + fib(n - 2) if n > 1 else n
a = time()
return fib(n), time() - a
def process_input():
text = sys.stdin.readline()
n = int(text.strip())
print('fib({}) = {}'.format(n, timed_fib(n)))
#asyncio.coroutine
def print_hello():
while True:
print("{} - Hello world!".format(int(time())))
yield from asyncio.sleep(3)
def main():
loop = asyncio.get_event_loop()
loop.add_reader(sys.stdin, process_input)
loop.run_until_complete(print_hello())
if __name__ == '__main__':
main()
However, trying to run this yields the incredibly cryptic traceback below. As you can see, the debug environment variable is set in the fifth line of the code above, however, the traceback remains incredibly unhelpful, as below:
3.4.3rc1 (v3.4.3rc1:69dd528ca625+, Feb 8 2015, 11:01:19) [MSC v.1600 32 bit (In
tel)]
Traceback (most recent call last):
File "test.py", line 33, in <module>
main()
File "test.py", line 29, in main
loop.run_until_complete(print_hello())
File "C:\Python34\lib\asyncio\base_events.py", line 304, in run_until_complete
self.run_forever()
File "C:\Python34\lib\asyncio\base_events.py", line 276, in run_forever
self._run_once()
File "C:\Python34\lib\asyncio\base_events.py", line 1136, in _run_once
event_list = self._selector.select(timeout)
File "C:\Python34\lib\selectors.py", line 314, in select
r, w, _ = self._select(self._readers, self._writers, [], timeout)
File "C:\Python34\lib\selectors.py", line 305, in _select
r, w, x = select.select(r, w, w, timeout)
OSError: [WinError 10038] An operation was attempted on something that is not a
socket
How can I access a more useful traceback, and what might be the problem? I am on Windows 7, if it matters.
select() works only with sockets on Windows.
To work with file descriptors, you could try non-select-based event loop, code example:
if os.name == 'nt':
loop = asyncio.ProactorEventLoop() # for subprocess' pipes on Windows
asyncio.set_event_loop(loop)
else:
loop = asyncio.get_event_loop()
Though I doubt that it would help with sys.stdin and asyncio, code example.

Categories

Resources