How can I change multithreading with multiprocessing in the code (Python)

How can I change multithreading with multiprocessing in the code (Python) - python

I wrote a program which implements caesar-cipher with threads and queues with python. I would like to change all the threading work with multiprocessing in my code and I have no idea how to do it. I would appreciate if you could explain where & how to start the implementation. Here is the code:
import threading
import Queue
import sys
import string
lock = threading.Lock()
def do_work(in_queue, out_queue, shift):
while True:
lock.acquire()
item = in_queue.get()
result = caesar(item, shift)
out_queue.put(result)
in_queue.task_done()
lock.release()
def caesar(plaintext, shift):
plaintext = plaintext.upper()
alphabet = string.ascii_uppercase
shifted_alphabet = alphabet[shift:] + alphabet[:shift]
table = string.maketrans(alphabet, shifted_alphabet)
return plaintext.translate(table)
if __name__ == "__main__":
if len(sys.argv) != 4:
print("Duzgun giriniz: '<filename>.py s n l'")
sys.exit(0)
else:
s = int(sys.argv[1])
n = int(sys.argv[2])
l = int(sys.argv[3])
work = Queue.Queue()
results = Queue.Queue()
myfile=open('metin.txt','r')
text_data=myfile.read() # <=== here load file
index=0
for i in xrange(n):
t = threading.Thread(target=do_work, args=(work, results, s))
t.daemon = True
t.start()
for i in range(0, len(text_data), l):
work.put(text_data[index:index + l])
index += l
work.join()
index=0
output_file=open("crypted"+ "_"+ str(s)+"_"+str(n)+"_"+str(l)+".txt", "w")
for i in range(0, len(text_data), l):
output_file.write(results.get())
index += l
sys.exit()

You can save yourself some code and move to the standard multiprocessing.Pool implementation.
import multiprocessing
import sys
import string
import itertools
# for non-forking systems like Windows
def worker(args):
# args: (text, shift)
return caesar(*args)
# for forking systems like linux
def forking_worker(args):
# args: ((start_index, end_index), shift)
return caesar(text_data[args[0][0]:args[0][1], args[1])
def caesar(plaintext, shift):
plaintext = plaintext.upper()
alphabet = string.ascii_uppercase
shifted_alphabet = alphabet[shift:] + alphabet[:shift]
table = string.maketrans(alphabet, shifted_alphabet)
return plaintext.translate(table)
if __name__ == "__main__":
if len(sys.argv) != 4:
print("Duzgun giriniz: '<filename>.py s n l'")
sys.exit(0)
else:
s = int(sys.argv[1])
n = int(sys.argv[2])
l = int(sys.argv[3])
pool = multiprocessing.Pool() # todo: change number of cpus...
with open('metin.txt') as myfile:
text_data=myfile.read() # <=== here load file
# on a forking system so only pass index, not text to child
result = pool.map(forking_worker,
zip(((index, index + l)
for index in range(0, len(text_data), l)),
itertools.cycle([s])))
with open("crypted"+ "_"+ str(s)+"_"+str(n)+"_"+str(l)+".txt", "w") as output_file:
output_file.writelines(result)

Related

Python Concurrent.Futures ProcessPool Executor wrong output

I am trying to use the ProcessPoolExecutor() to run some functions but I cant manage to understand how to get the return of the functions out of the with.
def threaded_upload(i):
time.sleep(2)
if i == 0:
k = 10
elif i == 2:
k = i*i
else:
k = -99
return [k]
def controller():
if __name__ == "__main__":
futures = []
with ProcessPoolExecutor() as pool:
for paso in range(4):
futuro_i = pool.submit(threaded_upload,paso)
wth=[futuro_i.result()]
futures.append(futuro_i)
wait(futures, return_when=ALL_COMPLETED)
merged_list = []
for future in futures:
for valor in future.result():
merged_list.append(valor)
Lista_Final = merged_list
wait(futures, return_when=ALL_COMPLETED)
return Lista_Final
print(controller())
The output of the code is:
None
[10, -99, 4, -99]
I am not sure why?
The "wait" doesn't seem to wait until all functions are executed either.
To be honest, I have been reading and reading for a few days but the description of concurrent.futures or multiprocessing are more advanced that my current knowledge.
Any clarification will be appreciated.
Thanks in advance.

You first submit the jobs and then wait for the results. You can also return an integer instead of a list and then skip the inner loop:
test.py:
import random
import time
from concurrent.futures import ProcessPoolExecutor, wait
def worker(i):
t = random.uniform(1, 5)
print(f"START: {i} ({t:.3f}s)")
time.sleep(t)
if i == 0:
k = 10
elif i == 2:
k = i * i
else:
k = -99
print(f"END: {i}")
return k
def main():
futures = []
with ProcessPoolExecutor() as pool:
for i in range(4):
future = pool.submit(worker, i)
futures.append(future)
results = []
done, pending = wait(futures) # ALL_COMPLETED is the default value
for future in done:
results.append(future.result())
print(results)
if __name__ == "__main__":
main()
Test:
$ python test.py
START: 0 (1.608s)
START: 1 (1.718s)
START: 2 (1.545s)
START: 3 (1.588s)
END: 2
END: 3
END: 0
END: 1
[10, -99, 4, -99]

Different result after using multiprocessing

guys:
I am new to the use of python multiprocessing. Recently my research needs calculation with many iterations. So I tried to use multiprocessing to speed it up. But when I wrote a small sample code, I found that the curve I got with multiprocessing is different from that without multiprocessing.
The code with multiprocessing:
import random
import matplotlib.pyplot as plt
import math
import numpy as np
import multiprocessing as mp
class Classic:
def __init__(self,position,type):
assert type == 'A' or type == 'B'
self.position = position
self.type = type
def getposition(self):
return self.position
def gettype (self):
return self.type
def setposition(self,pos):
self.position = pos
def settype (self,t):
self.type = t
def number_count(system):
counter = 0
for i in range(0,len(system)):
if system[i] !=0:
counter=counter+1
return counter
def time_evolution(system_temp,steps):
numberlist=np.zeros(steps)
number = number_count(system_temp)
for t in range(0,steps):
for i in range(0,len(system_temp)):
x = random.randint(0, len(system_temp)-2)
if system_temp[x]!=0 and system_temp[x+1]!=0:
p1 = system_temp[x]
p2 = system_temp[x+1]
p1_type = p1.gettype()
p2_type = p2.gettype()
exchange_check = random.randint(0,1)
if p1_type == p2_type:
system_temp[x]=0
system_temp[x+1]=0
number = number-2
elif exchange_check == 1:
type_temp = p1_type
p1.settype(p2_type)
p2.settype(type_temp)
elif system_temp[x]!=0 and system_temp[x+1]==0:
system_temp[x+1] = system_temp[x]
system_temp[x] =0
elif system_temp[x]==0 and system_temp[x+1]!=0:
system_temp[x]=system_temp[x+1]
system_temp[x+1]=0
numberlist[t]=numberlist[t]+number
return numberlist
if __name__ =='__main__':
pool = mp.Pool(8)
size = 10000
system_init = [0]*size
particle_num = 3000
repeat = 20
steps = 2000
res=[]
totalnum= np.zeros(steps)
randomlist = random.sample(range(1,100*repeat),repeat)
for i in range(0,particle_num):
pos = random.randint(0,size-1)
ran_num = random.randint (0,1)
if ran_num == 0:
temp_type = 'A'
else:
temp_type = 'B'
if system_init[pos] ==0:
system_init[pos] = Classic(pos,temp_type)
for k in range(0, repeat):
system_temp = system_init[:]
random.seed(randomlist[k])
res.append(pool.apply_async(time_evolution, args=(system_temp,steps,)))
pool.close()
pool.join()
for count in range(0,len(res)):
totalnum =totalnum+ np.array(res[count].get())
time=np.linspace(1,steps+1,steps)
time_sqrt=np.sqrt(8.0*math.pi*time)
density =totalnum/(repeat*size)
density_mod = np.multiply(time_sqrt,density)
#plt.loglog(time,density_mod)
#plt.savefig("modified_density_loglog.pdf")
#plt.close()
myfile=open('density_mod2.txt','w')
for element in density_mod:
myfile.write(str(element))
myfile.write('\n')
myfile.close()
And the code without multiprocessing is
import random
import matplotlib.pyplot as plt
import math
import numpy as np
class Classic:
def __init__(self,position,type):
assert type == 'A' or type == 'B'
self.position = position
self.type = type
def getposition(self):
return self.position
def gettype (self):
return self.type
def setposition(self,pos):
self.position = pos
def settype (self,t):
self.type = t
def number_count(system):
counter = 0
for i in range(0,len(system)):
if system[i] !=0:
counter=counter+1
return counter
def time_evolution(system_temp,steps):
numberlist=np.zeros(steps)
number = number_count(system_temp)
for t in range(0,steps):
for i in range(0,len(system_temp)):
x = random.randint(0, len(system_temp)-2)
if system_temp[x]!=0 and system_temp[x+1]!=0:
p1 = system_temp[x]
p2 = system_temp[x+1]
p1_type = p1.gettype()
p2_type = p2.gettype()
exchange_check = random.randint(0,1)
if p1_type == p2_type:
system_temp[x]=0
system_temp[x+1]=0
number = number-2
elif exchange_check == 1:
type_temp = p1_type
p1.settype(p2_type)
p2.settype(type_temp)
elif system_temp[x]!=0 and system_temp[x+1]==0:
system_temp[x+1] = system_temp[x]
system_temp[x] =0
elif system_temp[x]==0 and system_temp[x+1]!=0:
system_temp[x]=system_temp[x+1]
system_temp[x+1]=0
numberlist[t]=numberlist[t]+number
return numberlist
size = 10000
system_init = [0]*size
particle_num = 3000
repeat = 20
steps = 2000
res=[]
totalnum= np.zeros(steps)
randomlist = random.sample(range(1,100*repeat),repeat)
for i in range(0,particle_num):
pos = random.randint(0,size-1)
ran_num = random.randint (0,1)
if ran_num == 0:
temp_type = 'A'
else:
temp_type = 'B'
if system_init[pos] ==0:
system_init[pos] = Classic(pos,temp_type)
for k in range(0, repeat):
system_temp = system_init[:]
random.seed(randomlist[k])
res.append(time_evolution(system_temp,steps))
for count in range(0,len(res)):
totalnum +=res[count]
time=np.linspace(1,steps+1,steps)
time_sqrt=np.sqrt(8.0*math.pi*time)
density =totalnum/(repeat*size)
density_mod = np.multiply(time_sqrt,density)
myfile=open('density_mod3.txt','w')
for element in density_mod:
myfile.write(str(element))
myfile.write('\n')
myfile.close()
And the result is shown as
enter image description here
The blue curve is result with multiprocessing and the orange one is that without multiprocessing. I am not sure why this would happen. How to fix it?

My guess is that you don't initialize the random number generator correctly. You have to do that "inside" the spawned processes.
Check the following simple example:
import random
import multiprocessing as mp
def rand_test_1():
print(random.randint(0, 100))
return None
def rand_test_2(seed):
random.seed(seed)
print(random.randint(0, 100))
return None
if __name__ == '__main__':
repeat = 3
randomlist = random.sample(range(1, 100 * repeat), repeat)
print('Classic:')
for k in range(repeat):
random.seed(randomlist[k])
rand_test_1()
print('\nMultiprocessing version 1:')
with mp.Pool() as pool:
for k in range(repeat):
random.seed(randomlist[k])
pool.apply_async(rand_test_1, args=tuple())
pool.close()
pool.join()
print('\nMultiprocessing version 2:')
with mp.Pool() as pool:
for k in range(repeat):
pool.apply_async(rand_test_2, args=(randomlist[k],))
pool.close()
pool.join()
The results look like:
Classic:
32
78
6
Multiprocessing version 1:
84
43
90
Multiprocessing version 2:
32
78
6
You are using the multiprocessing version 1, I think you should use version 2.
One other point which has nothing to do with your problem: My impression is that it might be a good idea to use .map/.starmap (see here) instead of .apply_async:
...
with mp.Pool() as pool:
res = list(pool.map(rand_test_2, randomlist))

MPI with class & for-loop

I want to use MPI for parallel processing the calculation of hamiltonian paths in a graph.
So, I achieved this:
from mpi4py import MPI
import random,time
comm = MPI.COMM_WORLD
my_rank = comm.Get_rank()
p = comm.Get_size()
numOfNodes = 10
numOfProblems = 11
class Graph:
def __init__(self, numOfNodes):
if numOfNodes > 0:
self.numOfNodes = numOfNodes
else:
print("Error")
def calculateMaxPairs(self):
self.maxPairs = self.numOfNodes*(self.numOfNodes - 1)//2
def generatePairs(self):
self.calculateMaxPairs()
self.pairs = []
startRange = self.numOfNodes
endRange = (self.numOfNodes - 10)*3 + 18
numOfPairs = random.randint(startRange, endRange)
while len(self.pairs) != numOfPairs:
try:
startNode = random.randint(1, self.numOfNodes)
endNode = random.randint(1, self.numOfNodes)
if startNode == endNode:
raise ValueError
except ValueError:
pass
else:
pair = (startNode, endNode)
invertedPair = (endNode, startNode)
if pair not in self.pairs and invertedPair not in self.pairs:
self.pairs.append(pair)
self.hamiltonianPath = []
def generatePathLink(self):
self.graphLink = {}
for x in self.pairs:
x = str(x)
splitNode = x.split(', ')
a = int(splitNode[0][1:])
b = int(splitNode[1][:-1])
try:
if b not in self.graphLink[a]:
self.graphLink[a].append(b)
except KeyError:
self.graphLink[a] = []
self.graphLink[a].append(b)
finally:
try:
if a not in self.graphLink[b]:
self.graphLink[b].append(a)
except KeyError:
self.graphLink[b] = []
self.graphLink[b].append(a)
finally:
pass
def findPaths(self, start, end, path = []):
path = path + [start]
if start == end:
return [path]
if start not in self.graphLink:
return []
paths = []
for node in self.graphLink[start]:
if node not in path:
newpaths = self.findPaths(node, end, path)
for newpath in newpaths:
paths.append(newpath)
if (len(newpath) == self.numOfNodes):
self.hamiltonianPath = newpath
raise OverflowError
return paths
def exhaustiveSearch(self):
try:
allPaths = []
for startNode in self.graphLink:
for endNode in self.graphLink:
newPaths = self.findPaths(startNode, endNode)
for path in newPaths:
if (len(path) == self.numOfNodes):
allPaths.append(path)
return allPaths
except OverflowError:
return self.hamiltonianPath
else:
pass
def isHamiltonianPathExist(self):
time_start = time.clock()
self.generatePathLink()
if len(self.graphLink) != self.numOfNodes:
time_elapsed = (time.clock() - time_start)
return [[], time_elapsed]
else:
result = self.exhaustiveSearch()
time_elapsed = (time.clock() - time_start)
if len(result) == 0:
print("There isn't any Hamiltonian Path.")
else:
print("Computing time:", round(time_elapsed, 2), "seconds\n")
return [result, time_elapsed]
comm.send(result, dest=0)
yes = 0
no = 0
total_computing_time = 0
for x in range(1, numOfProblems + 1):
if my_rank !=0:
graph = Graph(numOfNodes)
graph.generatePairs()
output = graph.isHamiltonianPathExist()
else:
for procid in range(1,p):
result = comm.recv(source=procid)
time_elapsed = comm.recv(source=procid, tag=12)
total_computing_time += time_elapsed
if len(result) == 0:
no += 1
else:
yes += 1
print("Have Hamiltonian Path:", yes)
print("Don't Have Hamiltonian Path:", no)
print("Total computing time for %s problems in %s processes: %s s"%(numOfProblems, p, str(round(total_computing_time, 2))))
As you can see in this script, there's two sections.
The first one is where we will generate a Graph and calculate the hamiltonian paths.
The second one is where we tell the script to run this graph generating script in parallel in multiple processors.
The problem here is that it generates the graph and calculate paths in every processor, not dividing the job between them.
Where am I doing wrong?

How can I stop my animation from running when stoprun = 1?

I have tried th1.join but that didn't work and I don't know what else to try.
Also, I need it to be a single function
Here is my code:
https://repl.it/#JamesGordon1/JuicyScentedCrypto
(sorry too long to post)

On Linux it works for me only if I add two things
I have to set stoprun = 1 after loop
c = InKey()
while not c == 27:
c = InKey()
stoprun = 1 # set after loop
I have to use print() in thread - probably it needs \n (or it simply needs this IO function to change running thread)
if stoprun == 1: # True
print() # have to send `\n`
return
I don't know why it is needed but threads in Python doesn't run at the same time - one thread is bloking other thereads - and maybe these elements stops one thread and let to run other thread.
Of course it needs also global stoprun inside stop()
from threading import Thread
import time
import sys
import os
# --- classes ---
class _GetCh:
def __init__(self):
try:
self.impl = _GetChWindows()
except ImportError:
try:
self.impl = _GetChMacCarbon()
except ImportError:
self.impl = _GetChUnix()
def __call__(self):
return self.impl()
class _GetChWindows:
def __init__(self):
import msvcrt
def __call__(self):
import msvcrt
if msvcrt.kbhit():
while msvcrt.kbhit():
ch = msvcrt.getch()
while ch in b'\x00\xe0':
msvcrt.getch()
ch = msvcrt.getch()
return ord( ch.decode() )
else:
return -1
class _GetChMacCarbon:
def __init__(self):
import Carbon
Carbon.Evt
def __call__(self):
import Carbon
if Carbon.Evt.EventAvail(0x0008)[0]==0:
return ""
else:
(what,msg,when,where,mod)=Carbon.Evt.GetNextEvent(0x0008)[1]
return msg & 0x000000FF
class _GetChUnix:
def __init__(self):
import tty, sys, termios
def __call__(self):
import sys, tty, termios
fd = sys.stdin.fileno()
old_settings = termios.tcgetattr(fd)
try:
tty.setraw(sys.stdin.fileno())
ch = sys.stdin.read(1)
finally:
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
return ord(ch)
InKey = _GetCh()
# --- main ---
stoprun = 0 # False
def load_animation():
load_str = "starting your console application "
ls_len = len(load_str)
animation = "|/-\\"
anicount = 0
i = 0
while True:
time.sleep(0.075)
load_str_list = list(load_str)
x = ord(load_str_list[i])
y = 0
if x != 32 and x != 46:
if x>90:
y = x-32
else:
y = x + 32
load_str_list[i]= chr(y)
res =''
for j in range(ls_len):
res = res + load_str_list[j]
sys.stdout.write("\r"+res + animation[anicount])
sys.stdout.flush()
load_str = res
anicount = (anicount + 1)% 4
i =(i + 1)% ls_len
if stoprun == 1: # True
print() # have to send `\n`
return
def stop():
global stoprun
print ("Press Esc to exit")
c = InKey()
while not c == 27:
c = InKey()
stoprun = 1 # have to be after loop
return
th1 = Thread(target=load_animation)
th1.start()
stop()
#th1.join()

In stop you write:
while not c == 27:
c = InKey()
stoprun = 1
return
The stoprun = 1 will create a new local name stoprun instead of modifying the global variable because you never marked stoprun global. Do this:
def stop():
global stoprun
... # your code

python Infinite Loop append in multiprocessing.manger list

Why this code isnt working:
from multiprocessing import Process, Manager
import pcap, string, sys
def f(a, p):
try:
while True:
a.append(p.next())
except KeyboardInterrupt:
print 'stop'
def g(a):
# print a
while True:
print a[len(a)-1]
if __name__ == '__main__':
# num = Value('d', 0.0)
manager = Manager()
l = manager.list([])
p = pcap.pcapObject()
dev = sys.argv[1]
p.open_live(dev, 65000, 0, 100)
p.setfilter(string.join(sys.argv[2:], ' '), 0, 0)
p = Process(target=f, args=(l, p))
p.start()
p.join()
a = Process(target=g, args=(l,))
a.start()
a.join()
# print l
# print len(l)
while below code is working fine:
from multiprocessing import Process, Manager
import pcap, string, sys
def f(a, p):
try:
while len(a) < 100:
a.append(p.next())
except KeyboardInterrupt:
print 'stop'
def g(a):
# print a
while True:
print a[len(a)-1]
if __name__ == '__main__':
# num = Value('d', 0.0)
manager = Manager()
l = manager.list([])
p = pcap.pcapObject()
dev = sys.argv[1]
p.open_live(dev, 65000, 0, 100)
p.setfilter(string.join(sys.argv[2:], ' '), 0, 0)
p = Process(target=f, args=(l, p))
p.start()
p.join()
a = Process(target=g, args=(l,))
a.start()
a.join()
# print l
# print len(l)
Other Question:
Is this is a best and fastest/optimized way way to create shared
memory between different processes?
Is multiprocessing.manager class for finite size data structure. if
no what am i doing wrong?
any help/hint would be appreciated. Thanx in advance.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How can I change multithreading with multiprocessing in the code (Python) - python

Related

Python Concurrent.Futures ProcessPool Executor wrong output

Different result after using multiprocessing

MPI with class & for-loop

How can I stop my animation from running when stoprun = 1?

python Infinite Loop append in multiprocessing.manger list

Categories

Resources