When I run this script on Linux, it prints 8 duplicates. How to force python use all cores on different results, rather than on duplicates?
from multiprocessing import Pool
def f():
f = open("/path/to/10.txt", 'r')
l = [s.strip('\n') for s in f]
f.close()
for a in range(0, len(l)):
for b in range(0, len(l)):
result = 0
if (a == b):
result = 1
else:
counter = 0
for i in range(len(l[a])):
if (int(l[a][i]) == int(l[b][i]) == 1):
counter += 1
result = counter / 10000
print((a + 1), (b + 1), result)
if __name__ == '__main__':
p = Process(target=f)
p.start()
p.join()
If you simply want to run more than one core you will have to use multiple processes, here you are just using one.
also you need to break your routine f in independent units/routine such a way that it can work in parallel and the whole task can be shared among the multiple worker processes.
Here is a sample 2-process code, which can use multiple cores on your machine:
from multiprocessing import Process
def task(arg):
pass
if __name__ == '__main__'
value = 'something'
prc1 = Process(target=task, args=(value,))
prc2 = Process(target=task, args=(value,))
prc1.start()
prc2.start()
prc1.join()
prc2.join()
Related
I'm trying parallelize some nested loops using pool, moreover function have to return an array, but external array stays empty.
def calcul_T(m):
temp=[]
for n in range(0,N):
x = sym.Symbol('x')
y=sym.sin(x)
#.....some stuff.....
temp.append(y)
return temp
rt=[]
if __name__ == '__main__':
pool = Pool()
rt.append(pool.map(calcul_T, range(0,M)))
pool.close()
pool.join()
I expect getting at least array of arrays in order to make it 2-D array and then use it further, after if __name__ block
What do I wrong?
Use context manager:
from multiprocessing import Pool
def calcul_T(m):
temp=[]
for n in range(0,N):
x = sym.Symbol('x')
y=sym.sin(x)
#.....some stuff.....
temp.append(y)
return temp
rt=[]
if __name__ == '__main__':
with Pool(N_PROCESSES) as p:
rt = p.map(calcul_T, range(0,M))
EDIT
According to comment, accesing rt like normal 2D array works just fine (run in console, i changed calcul_T function for running this)
from multiprocessing import Pool
N = 10
M = 10
def calcul_T(m):
temp=[]
for n in range(0,N):
temp.append(n * m)
return temp
rt = []
if __name__ == '__main__':
with Pool(5) as p:
rt = p.map(calcul_T, range(0,M))
print(rt[8][8])
I use this code: (from here)
import multiprocessing
def calc_square(numbers, result):
for idx, n in enumerate(numbers):
result[idx] = n*n
if __name__ == "__main__":
numbers = [2,3,5]
result = multiprocessing.Array('i',3)
p = multiprocessing.Process(target=calc_square, args=(numbers, result))
p.start()
p.join()
print(result[:])
How do I let this loop to run in parallel? For example, on 3 different processes.
Due to performance issue, i would like to run in parallel my function in python :
import multiprocessing as mp
source_nodes = [10413173, 10414530, 10414530, 10437199]
sink_nodes = [10420346, 10438770, 10438711, 10414530, 10436258]
path =[]
def createpath(source,sink):
for i in source:
for j in sink:
path = path + list(nx.all_simple_paths(Directed_G,i,j))
return path
From my understanding i must give 1 iterable to apply function. but my idea was to do something like :
results = [pool.apply(createpath, args=(source_nodes, sink_nodes))]
And then don't give any iterable object to applyfunction
I managed to get it work, but i don't think it run on parallel.
Do you think i should include the apply function inside the first loop ?
from multiprocessing import Pool
source_nodes = [1,2,3,4,5,6]
sink_nodes = [1,1,1,1,1,1,1,1,1]
def sum_values(parameter_tuple):
source,sink, start, stop = parameter_tuple
out = 0
for i in range(start, stop):
val_i = source[i]
for j in sink:
out += val_i*j
return out
if __name__ == "__main__":
params = (source_nodes, sink_nodes, 0, 6)
print(sum_values(params))
with Pool(2) as p:
print(p.map(sum_values, [
(source_nodes, sink_nodes, 0, 3),
(source_nodes, sink_nodes, 3, 6),
]))
You can try to run this one. This runs parallel with map pattern on pool of 2 threads. In this case your output result is the sum of result of each process from pool.
I am trying to come up with a way to have threads work on the same goal without interfering. In this case I am using 4 threads to add up every number between 0 and 90,000. This code runs but it ends almost immediately (Runtime: 0.00399994850159 sec) and only outputs 0. Originally I wanted to do it with a global variable but I was worried about the threads interfering with each other (ie. the small chance that two threads double count or skip a number due to strange timing of the reads/writes). So instead I distributed the workload beforehand. If there is a better way to do this please share. This is my simple way of trying to get some experience into multi threading. Thanks
import threading
import time
start_time = time.time()
tot1 = 0
tot2 = 0
tot3 = 0
tot4 = 0
def Func(x,y,tot):
tot = 0
i = y-x
while z in range(0,i):
tot = tot + i + z
# class Tester(threading.Thread):
# def run(self):
# print(n)
w = threading.Thread(target=Func, args=(0,22499,tot1))
x = threading.Thread(target=Func, args=(22500,44999,tot2))
y = threading.Thread(target=Func, args=(45000,67499,tot3))
z = threading.Thread(target=Func, args=(67500,89999,tot4))
w.start()
x.start()
y.start()
z.start()
w.join()
x.join()
y.join()
z.join()
# while (w.isAlive() == False | x.isAlive() == False | y.isAlive() == False | z.isAlive() == False): {}
total = tot1 + tot2 + tot3 + tot4
print total
print("--- %s seconds ---" % (time.time() - start_time))
You have a bug that makes this program end almost immediately. Look at while z in range(0,i): in Func. z isn't defined in the function and its only by luck (bad luck really) that you happen to have a global variable z = threading.Thread(target=Func, args=(67500,89999,tot4)) that masks the problem. You are testing whether the thread object is in a list of integers... and its not!
The next problem is with the global variables. First, you are absolutely right that using a single global variable is not thread safe. The threads would mess with each others calculations. But you misunderstand how globals work. When you do threading.Thread(target=Func, args=(67500,89999,tot4)), python passes the object currently referenced by tot4 to the function, but the function has no idea which global it came from. You only update the local variable tot and discard it when the function completes.
A solution is to use a global container to hold the calculations as shown in the example below. Unfortunately, this is actually slower than just doing all the work in one thread. The python global interpreter lock (GIL) only lets 1 thread run at a time and only slows down CPU-intensive tasks implemented in pure python.
You could look at the multiprocessing module to split this into multiple processes. That works well if the cost of running the calculation is large compared to the cost of starting the process and passing it data.
Here is a working copy of your example:
import threading
import time
start_time = time.time()
tot = [0] * 4
def Func(x,y,tot_index):
my_total = 0
i = y-x
for z in range(0,i):
my_total = my_total + i + z
tot[tot_index] = my_total
# class Tester(threading.Thread):
# def run(self):
# print(n)
w = threading.Thread(target=Func, args=(0,22499,0))
x = threading.Thread(target=Func, args=(22500,44999,1))
y = threading.Thread(target=Func, args=(45000,67499,2))
z = threading.Thread(target=Func, args=(67500,89999,3))
w.start()
x.start()
y.start()
z.start()
w.join()
x.join()
y.join()
z.join()
# while (w.isAlive() == False | x.isAlive() == False | y.isAlive() == False | z.isAlive() == False): {}
total = sum(tot)
print total
print("--- %s seconds ---" % (time.time() - start_time))
You can pass in a mutable object that you can add your results either with an identifier, e.g. dict or just a list and append() the results, e.g.:
import threading
def Func(start, stop, results):
results.append(sum(range(start, stop+1)))
rngs = [(0, 22499), (22500, 44999), (45000, 67499), (67500, 89999)]
results = []
jobs = [threading.Thread(target=Func, args=(start, stop, results)) for start, stop in rngs]
for j in jobs:
j.start()
for j in jobs:
j.join()
print(sum(results))
# 4049955000
# 100 loops, best of 3: 2.35 ms per loop
As others have noted you could look multiprocessing in order to split the work to multiple different processes that can run parallel. This would benefit especially in CPU-intensive tasks assuming that there isn't huge amount of data to pass between the processes.
Here's a simple implementation of the same functionality using multiprocessing:
from multiprocessing import Pool
POOL_SIZE = 4
NUMBERS = 90000
def func(_range):
tot = 0
for z in range(*_range):
tot += z
return tot
with Pool(POOL_SIZE) as pool:
chunk_size = int(NUMBERS / POOL_SIZE)
chunks = ((i, i + chunk_size) for i in range(0, NUMBERS, chunk_size))
print(sum(pool.imap(func, chunks)))
In above chunks is a generator that produces the same ranges that were hardcoded in original version. It's given to imap which works the same as standard map except that it executes the function in the processes within the pool.
Less known fact about multiprocessing is that you can easily convert the code to use threads instead of processes by using undocumented multiprocessing.pool.ThreadPool. In order to convert above example to use threads just change import to:
from multiprocessing.pool import ThreadPool as Pool
3sum Problem is defined as
Given: A positive integer k≤20, a postive integer n≤104, and k arrays of size n containing integers from −105 to 105.
Return: For each array A[1..n], output three different indices 1≤p<q<r≤n such that A[p]+A[q]+A[r]=0 if exist, and "-1" otherwise.
Sample Dataset
4 5
2 -3 4 10 5
8 -6 4 -2 -8
-5 2 3 2 -4
2 4 -5 6 8
Sample Output
-1
1 2 4
1 2 3
-1
However I want to speed up the code using threads, To do so I am applying python code
def TS(arr):
original = arr[:]
arr.sort()
n = len(arr)
for i in xrange(n-2):
a = arr[i]
j = i+1
k = n-1
while j < k:
b = arr[j]
c = arr[k]
if a + b + c == 0:
return sorted([original.index(a)+1,original.index(b)+1,original.index(c)+1])
elif a + b + c > 0:
k = k - 1
else:
j = j +1
return [-1]
with open("dataset.txt") as dataset:
k = int(dataset.readline().split()[0])
for i in xrange(k):
aux = map(int, dataset.readline().split())
results = TS(aux)
print ' ' . join(map(str, results))
I was thinking on creating k threads, and a global array output, however do not know how to continue developing the idea
from threading import Thread
class thread_it(Thread):
def __init__ (self,param):
Thread.__init__(self)
self.param = param
def run(self):
mutex.acquire()
output.append(TS(aux))
mutex.release()
threads = [] #k threads
output = [] #global answer
mutex = thread.allocate_lock()
with open("dataset.txt") as dataset:
k = int(dataset.readline().split()[0])
for i in xrange(k):
aux = map(int, dataset.readline().split())
current = thread_it(aux)
threads.append(current)
current.start()
for t in threads:
t.join()
What would be the correct way to get the results = TS(aux) inside a thread and then wait until all threads have finish and then print ' ' . join(map(str,results)) for all of them?
Update
Got this issue when running script from console
First, like #Cyphase said, because of GIL, you cannot speed things up with threading. Every thread will run on the same core. Consider using multiprocessing to utilize multiple cores, multiprocessing has a very similar API as threading.
Second, even if we pretend GIL doesn't exist. Putting everything in a critical section protected by mutex, you are actually serializing all the threads. What you need to protect is access to output, so put the processing code out of critical section, to make them run concurrently:
def run(self):
result = TS(aux)
mutex.acquire()
output.append(result)
mutex.release()
But don't re-invent the wheel, python standard library provides a thread-safe Queue, use that:
try:
import Queue as queue # python2
except:
import queue
output = queue.Queue()
def run(self):
result = TS(self.param)
output.append(result)
With multiprocessing, the final code looks something like this:
from multiprocessing import Process, Queue
output = Queue()
class TSProcess(Process):
def __init__ (self, param):
Process.__init__(self)
self.param = param
def run(self):
result = TS(self.param)
output.put(result)
processes = []
with open("dataset.txt") as dataset:
k = int(dataset.readline().split()[0])
for i in xrange(k):
aux = map(int, dataset.readline().split())
current = TSProcess(aux)
processes.append(current)
current.start()
for p in processes:
p.join()
# process result with output.get()