I've just tested python multiprocessing for reading file or a global variable, but there is something strange happen.
for expample:
import multiprocessing
a = 0
def test(lock, name):
global a
with lock:
for i in range(10):
a = a + 1
print "in process %d : %d" % (name, a)
def main():
lock = multiprocessing.Lock()
p1 = multiprocessing.Process(target=test, args=(lock, 1))
p2 = multiprocessing.Process(target=test, args=(lock, 2))
p1.start()
p2.start()
p1.join()
p2.join()
print "in main process : %d" % a
if __name__=='__main__':
main()
The program read a global variable, but the output is:
in process 1 : 10
in process 2 : 10
in main process : 0
It seems that the sub-process cannot get and edit the global variable properly. Also, if I change the program to read the file, each sub-process will read the file completely, ignoring the lock.
So how does these happen? And how to solve this problem?
Global variables are not shared between processes. When you create and start a new Process(), that process runs inside a separate "cloned" copy of the current Python interpreter. Updating the variable from within a Process() will only update the variable locally to the particular process it is updated in.
To share data between Python processes, we need a multiprocessing.Pipe(), a multiprocessing.Queue(), a multiprocessing.Value(), a multiprocessing.Array() or one of the other multiprocessing-safe containers.
Here's an example based on your code:
import multiprocessing
def worker(lock, counter, name):
with lock:
for i in range(10):
counter.value += 1
print "In process {}: {}".format(name, counter.value)
def main():
lock = multiprocessing.Lock()
counter = multiprocessing.Value('i', 0)
p1 = multiprocessing.Process(target=worker, args=(lock, counter, 1))
p2 = multiprocessing.Process(target=worker, args=(lock, counter, 2))
p1.start()
p2.start()
p1.join()
p2.join()
print "In main process: {}".format(counter.value)
if __name__=='__main__':
main()
This gives me:
In process 1: 10
In process 2: 20
In main process: 20
Now, if you really want to use a global variable, you can use a multiprocessing.Manager(), but I think the first method is preferable, and this is a "heavier" solution. Here's an example:
import multiprocessing
manager = multiprocessing.Manager()
counter = manager.Value('i', 0);
def worker(lock, name):
global counter
with lock:
for i in range(10):
counter.value += 1
print "In process {}: {}".format(name, counter.value)
def main():
global counter
lock = multiprocessing.Lock()
p1 = multiprocessing.Process(target=worker, args=(lock, 1))
p2 = multiprocessing.Process(target=worker, args=(lock, 2))
p1.start()
p2.start()
p1.join()
p2.join()
print "In main process: {}".format(counter.value)
if __name__=='__main__':
main()
Related
I'm just studying about multiprocessing in Python. I have a code that updates the value of a variable in a process, and other processes read the value of this variable. This is working as I expected.
Now I just want to know if there is some way to do the same using the Ray library to improve the speed of execution if I need to run lots of processes reading it
from multiprocessing import Process, Manager
def write_to_dict(d, value):
while True:
value = value + 1
d['key'] = value
def read_from_dict(d):
while True:
read = d['key']
print(read)
if __name__ == '__main__':
manager = Manager()
shared_dict = manager.dict()
p1 = Process(target=write_to_dict, args=(shared_dict, 0))
p2 = Process(target=read_from_dict, args=(shared_dict,))
p1.start()
p2.start()
p1.join()
p2.join()
hellow,
please some help.
i want to take variables when using repeating statement.
Actually in my code, there are so many variables and function to handle variables.
so i have to use multiprocess for some reason, but it's doesn't work for what i want.
below is simple code,
please help me.
from multiprocessing import Process, Manager
import time
def a(final_list):
c=0
while True:
c += 1
final_list.append(c)
time.sleep(1)
print(final_list)
def b(final_list):
while True:
print(final_list[-1])
time.sleep(1)
if __name__ == '__main__':
manager = Manager()
final_list = []
final_list = manager.list()
#print(a)
p1 = Process(target=a, args=(final_list,))
p2 = Process(target=b, args=(final_list,))
p1.start()
time.sleep(3)
p2.start()
I think you forgot to use join() for the processes. try this:
from multiprocessing import Process, Manager
import time
def a(final_list):
c=0
while True:
c += 1
final_list.append(c)
time.sleep(1)
print(final_list)
def b(final_list):
while True:
print(final_list[-1])
time.sleep(1)
if __name__ == '__main__':
with Manager() as manager:
final_list = manager.list()
p1 = Process(target=a, args=(final_list,))
p2 = Process(target=b, args=(final_list,))
p1.start()
time.sleep(3)
p2.start()
p1.join()
p2.join()
Why while loop is ignored in work1? I would like to update value from string to another value in loop and output this value in process work2. Also already tried with Queue, but problem is I have only one variable which I would like to update in work1 and access to it at work2.
from multiprocessing import Process, Manager, Value
from ctypes import c_char_p
import time
def work1(string):
i = 2
string.value = i
# while True:
# print("work1")
# string.value = i + 1
# time.sleep(2)
def work2(string):
while True:
print("Value set in work1 " + str(string.value))
time.sleep(2)
if __name__ == '__main__':
manager = Manager()
string = manager.Value(int, 0);
p1=Process(target=work1, args=(string,))
p1.start()
p1.join()
p2=Process(target=work2, args=(string,))
p2.start()
p2.join()
That is because you didn't make your program parallel with two processes, but instead, two processes run in tandem. What you need to do is to start both process before any join. Like my modification below:
from multiprocessing import Process, Manager, Value
from ctypes import c_char_p
import time
def work1(string):
i = 2
string.value = i
while True:
i = i+1
string.value = i
print("work1 set value to "+str(string.value))
time.sleep(2)
def work2(string):
while True:
print("Value set in work1 " + str(string.value))
time.sleep(2)
if __name__ == '__main__':
manager = Manager()
string = manager.Value(int, 0, lock=False);
p1=Process(target=work1, args=(string,))
p2=Process(target=work2, args=(string,))
p1.start()
p2.start()
p2.join()
p1.join()
Indeed, if you write the code in this way, the join never happened due to the infinite while loop.
I am new with multiprocessing in python and so far all the example I've seen are this kind (with one or more methods in the file and then 'main'):
from multiprocessing import Process
def f1(a):
#do something
def f2(b):
#do something
if __name__ == '__main__':
f1(a1)
p = Process(target=f2, args=(b2,))
p.start()
p.join()
If I have instead a method who calls 2 functions in another file to be concurrent like in the following lines,
def function():
#do something
file2.f1(a) #first concurrent method
file2.f2(b) #second concurrent method
how should I do?
Can anyone make a simple example? I tried in this way, but it starts all the program again after the first loop :
def function():
#do something
for i in range(3):
p1 = Process(target=file2.f1, args=(a)) #first concurrent method
p2 = Process(target=file2.f2, args=(b)) #second concurrent method
p1.start()
p2.start()
p1.join()
p2.join()
The issue seems to be that args varialbe is incorrectly defined, it should be tuple and not a single variable:
def function():
#do something
for i in range(3):
p1 = Process(target=file2.f1, args=(a, )) #first concurrent method
p2 = Process(target=file2.f2, args=(b, )) #second concurrent method
p1.start()
p2.start()
p1.join()
p2.join()
If you the order of the executions is flexible, you can use the Pool class to trigger multiple calls:
from multiprocessing.pool import Pool
pool = Pool()
pool.map_async(f1, [(arg, )] * 3)
pool.map_async(f2, [(arg, )] * 3)
pool.close()
pool.join()
I have 2 process running and I want them to complete before further down command executes (at the end of script it prints out that the program has ended). How can I make sure the process completes before printing out that it has ended?
from multiprocessing import Process
import datetime
class foo:
def fun1():
do sthn
def fun2():
do sthn
ob = foo()
if __name__ == '__main__':
p1 = Process(target = ob.fun1)
p1.start()
p2 = Process(target = ob.fun2)
p2.start()
endTime=datetime.datetime.now()
print 'Program Ending time is: ', endTime
You would use the .join() method, which blocks until the process is complete.
p1.join()
p2.join()