Let says I have two parallel block loops. What is the best way to run them in parallel using python. Currently I am experimenting with multi-threading using following program
#!/usr/bin/env python
import time
import serial
import os
from threading import Thread
ser = serial.Serial(port='/dev/ttyUSB0', baudrate=38400, timeout=None)
ser.flushInput()
ser.flushOutput()
def getstrings(port):
buf = bytearray()
while True:
b = port.read(1)
if b == b'\x02':
del buf[:]
elif b == b'\x03':
yield buf.decode('ascii')
else:
buf.append(b)
def tester():
while 1:
print('testing')
def values():
count = ""
tem = ""
hv = ""
counti = 0
temi = 0
hvi = 0
while 1:
for item in getstrings(ser):
#if len(item) >= 10:
# continue
if item[1] == "C":
count = item.split('C')[1]
counti=int(count[0:5])
if item[1] == "T":
tem = item.split('T')[1]
temi=int(tem[0:5])
if item[1] == "H":
hv = item.split('H')[1]
hvi = int(hv[0:5])/10
print ("HV="+str(hvi)+" "+"Count="+str(counti)+" "+"Temp="+str(temi))
t1 = Thread(target=values)
t2 = Thread(target=tester)
t1.start()
t2.start()
Only the second thread works. It doesn't print the values from second. This is the first time I am experimenting with multi-threading. Once, I understood how this will function then I intend to use this to design a GUI using Tkinter libraries. I want to use loop of my program along Tkinter main loop. Any suggestion where I might be making a mistakes.
Update:
Yes it thread 2 not thread 1. My mistakes sorry about that. But individually both threads work if I comments t1.start() or t2.start(). However, together only thread 2 prints the output.
Related
I am trying out multiprocessing for my Monty Hall game simulation for improved performance. The game is payed 10mm times and takes ~17 seconds when directly run, however, my multiprocessing implementation is taking significantly longer to run. I am clearly doing something wrong but I can't figure out what.
import multiprocessing
from MontyHall.game import Game
from MontyHall.player import Player
from Timer.timer import Timer
def doWork(input, output):
while True:
try:
f = input.get(timeout=1)
res = f()
output.put(res)
except:
break
def main():
# game setup
player_1 = Player(True) # always switch strategy
game_1 = Game(player_1)
input_queue = multiprocessing.Queue()
output_queue = multiprocessing.Queue()
# total simulations
for i in range(10000000):
input_queue.put(game_1.play_game)
with Timer('timer') as t:
# initialize 5 child processes
processes = []
for i in range(5):
p = multiprocessing.Process(target=doWork, args=(input_queue, output_queue))
processes.append(p)
p.start()
# terminate the processes
for p in processes:
p.join()
results = []
while len(results) != 10000000:
r = output_queue.get()
results.append(r)
win = results.count(True) / len(results)
loss = results.count(False) / len(results)
print(len(results))
print(win)
print(loss)
if __name__ == '__main__':
main()
This is my first post. Advice on posting etiquette is also appreciated. Thank you.
Code for the Classes:
class Player(object):
def __init__(self, switch_door=False):
self._switch_door = switch_door
#property
def switch_door(self):
return self._switch_door
#switch_door.setter
def switch_door(self, iswitch):
self._switch_door = iswitch
def choose_door(self):
return random.randint(0, 2)
class Game(object):
def __init__(self, player):
self.player = player
def non_prize_door(self, door_with_prize, player_choice):
"""Returns a door that doesn't contain the prize and that isn't the players original choice"""
x = 1
while x == door_with_prize or x == player_choice:
x = (x + 1) % 3 # assuming there are only 3 doors. Can be modified for more doors
return x
def switch_function(self, open_door, player_choice):
"""Returns the door that isn't the original player choice and isn't the opened door """
x = 1
while x == open_door or x == player_choice:
x = (x + 1) % 3 # assuming there are only 3 doors. Can be modified for more doors
return x
def play_game(self):
"""Game Logic"""
# randomly places the prize behind one of the three doors
door_with_prize = random.randint(0, 2)
# player chooses a door
player_choice = self.player.choose_door()
# host opens a door that doesn't contain the prize
open_door = self.non_prize_door(door_with_prize, player_choice)
# final player choice
if self.player.switch_door:
player_choice = self.switch_function(open_door, player_choice)
# Result
return player_choice == door_with_prize
Code for running it without multiprocessing:
from MontyHall.game import Game
from MontyHall.player import Player
from Timer.timer import Timer
def main():
# Setting up the game
player_2 = Player(True) # always switch
game_1 = Game(player_2)
# Testing out the hypothesis
with Timer('timer_1') as t:
results = []
for i in range(10000000):
results.append(game_1.play_game())
win = results.count(True) / len(results)
loss = results.count(False) / len(results)
print(
f'When switch strategy is {player_2.switch_door}, the win rate is {win:.2%} and the loss rate is {loss:.2%}')
if __name__ == '__main__':
main()
As you did not give the full code that we can run locally, I can only speculate. My guess is that you are passing an object(a method from your game) to other processes so pickling and unpickling took too much time. Unlike multithreading where you can "share" data, in multiprocessing, you need to pack the data and send to the other process.
However, there's a rule I always follow when I try to optimize my code - profile before optimizing! It would be much better to KNOW what's slow than GUESS.
It's a multiprocessing program so there are not a lot of options in the market. You could try viztracer which supports multiprocessing.
pip install viztracer
viztracer --log_multiprocess your_program.py
It will generate a result.html that you can open with chrome. Or you can just do
vizviewer result.html
I would suggest to reduce the iteration number so you can have a view of the whole picture(because viztracer uses a circular buffer and 10 million iterations will definitely overflow). But, you can still get the last piece of your code executing if you don't, which should be helpful enough for you to figure out what's going on.
I used viztracer as you gave the whole code.
This is one of your iteration in your worker process. As you can tell, the actual working part is very small(the yellow-ish slice in the middle p...). Most of the time has been spent on receiving and putting data, which eliminates the advantage of parallelization.
The correct way to do this is do it in batches. Also as this game does not actually require any data, you should just sent "I want to do it 1000 times" to the process, and let it do it, instead of sending the method one by one.
There's another interesting problem that you can easily find with viztracer:
This is the big picture of your worker process. Notice the large "nothing" in the end? Because your worker needs a timeout to finish, and that's when they are waiting. You should come up with a better idea to elegantly finish your worker process.
Updated my code. I fundamentally misunderstood the multiprocessing method.
def do_work(input, output):
"""Generic function that takes an input function and argument and runs it"""
while True:
try:
f, args = input.get(timeout=1)
results = f(*args)
output.put(results)
except:
output.put('Done')
break
def run_sim(game, num_sim):
"""Runs the game the given number of times"""
res = []
for i in range(num_sim):
res.append(game.play_game())
return res
def main():
input_queue = multiprocessing.Queue()
output_queue = multiprocessing.Queue()
g = Game(Player(False)) # set up game and player
num_sim = 2000000
for i in range(5):
input_queue.put((run_sim, (g, num_sim))) # run sim with game object and number of simulations passed into
# the queue
with Timer('Monty Hall Timer: ') as t:
processes = [] # list to save processes
for i in range(5):
p = multiprocessing.Process(target=do_work, args=(input_queue, output_queue))
processes.append(p)
p.start()
results = []
while True:
r = output_queue.get()
if r != 'Done':
results.append(r)
else:
break
# terminate processes
for p in processes:
p.terminate()
# combining the five returned list
flat_list = [item for sublist in results for item in sublist]
print(len(flat_list))
print(len(results))
How to make this task much faster to be finished? The 3 calls of generate_ngrams_from_file() can be done in parallel? Just get into python and don't know how to make it faster. I think multiprocessing or threading should be doing the job, but no idea of how to do it. This looks like a typical task can be done concurrently to use multiple cores on my Mac machine.
def tokenize(text):
return [token for token in text.split(' ')]
def generate_ngrams(text, n):
tokens = tokenize(text)
ngrams = zip(*[tokens[i:] for i in range(n)])
return [''.join(ngram) for ngram in ngrams]
def generate_ngrams_from_file(input, out, n):
count = 0
with open(input, 'r') as f:
for line in f:
count += 1
if line:
ngrams = generate_ngrams(line, n)
if n == 2:
bigrams.update(ngrams)
elif n == 3:
trigrams.update(ngrams)
elif n == 4:
fourgrams.update(ngrams)
elif n == 5:
fourgrams.update(ngrams)
print("Ngram done!")
if __name__ == "__main__":
start = time.time()
input_file = 'bigfile.txt'
output_3_tram = '3gram.txt'
output_4_tram = '4ngram.txt'
output_5_tram = '5ngram.txt'
print('Generate trigram: ')
generate_ngrams_from_file(input_file, output_3_tram, 3)
print("Generate fourgrams: ")
generate_ngrams_from_file(input_file, output_4_tram, 4)
print("Generate fivegrams: ")
generate_ngrams_from_file(input_file, output_5_tram, 5)
end = time.time()
mytime(start, end)
Multithreading in Python is not a very good idea because of the Global Interpreter Lock feature of Python. You can read about it here https://www.geeksforgeeks.org/what-is-the-python-global-interpreter-lock-gil/. Multiprocessing is a better option to make your programs faster. You can put the generate_ngrams() function inside the Process class of multiprocessing module. Read about the Process class at https://docs.python.org/2/library/multiprocessing.html. Process class is recommended as it is faster than both pool.apply() and pool.apply_async()
Given this code snippet:
import time
if __name__ == "__main__":
list = []
counter = 0
while True:
counter = counter + 1
time.sleep(0.0033)
list.append(counter)
I would like to create a thread which runs in the background doing some metadata calculations (finding the sum of the elements in the array) on the array "list" being populated in real time in the while loop.
import time
import threading
if __name__ == "__main__":
def print_sum(l):
while True:
total = sum(l)
print("the total is {}".format(total))
time.sleep(1)
#list = [] - should not use 'list'. - this shadows the built in object name list.
l = []
counter = 0
thread = threading.Thread(target=print_sum,args=(l,))
thread.daemon = True
thread.start()
while True:
counter = counter + 1
l.append(counter)
time.sleep(1)
This spins up a thread in the background running function print_sum to display the sum of the list.
I'm trying to design a control interface for my system which sends and receives some data through serial link. My searches related to GUI design took me to understand the "multi-threading" issue and code below shows the latest position I arrived.
This indicates similar parts (e.g try, run) with the ones I've seen on example GUIs. I planned to convert this to a GUI, once I understand how it exactly works.
So the problem is after I start, stop the code below I can't restart it again. Because, as I understand, multi-threading features only one cycle: start, stop and quit. I mean it doesn't accept start command after stop.
My question is how I can make this code to accept start after stopping?
Best wishes
import threading, random, time
class process(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
self.leave = 0
print("\n it's running ...\n\n")
while self.leave != 1:
print "Done!"
time.sleep(1)
operate = process()
while True:
inputt = input(" START : 1 \n STOP\t : 0 \n QUIT\t : 2 \n")
try:
if int(inputt) == 1:
operate.start()
elif int(inputt) == 0:
operate.leave = 1
elif int(inputt) == 2:
break
except:
print(" Wrong input, try egain...\n")
Create process inside while True loop
if int(inputt) == 1:
operate = process()
operate.start()
It should work.
... but your code may need other changes to make it safer - you will have to check if process exists before you try to stop it. You could use operate = None to control it.
import threading
import random
import time
class Process(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
self.leave = False
print("\n it's running ...\n\n")
while self.leave == False:
print("Done!")
time.sleep(1)
operate = None
while True:
inputt = input(" START : 1 \n STOP\t : 0 \n QUIT\t : 2 \n")
try:
if int(inputt) == 1:
if operate is None:
operate = Process()
operate.start()
elif int(inputt) == 0:
if operate is not None:
operate.leave = True
operate.join() # wait on process end
operate = None
elif int(inputt) == 2:
if operate is not None:
operate.leave = True
operate.join() # wait on process end
break
except:
print(" Wrong input, try egain...\n")
Other method is not to leave run() when you set leave = True but keep running thead. You would need two loops.
def run(self):
self.leave = False
self.stoped = False
print("\n it's running ...\n\n")
while self.leave == False:
while self.stoped == False:
print("Done!")
time.sleep(1)
First off, I am brand new to the multiprocessing and threading world. I have two devices that generate data (gps and mca). the gps simulator is supposed to generate a location every 0.1 seconds. The mca is supposed to generate a random number every randomly generated time interval. When an event is registered by the mca, the count (cnt) is supposed to be sent to the count list.The same goes for the gps. The event handler is supposed to synchronize the count with the latest gps value is registered, and this should be printed to standard output. After 5 seconds, the mca should stop and send 'DONE' over a Queue to stop all of the other functions.I am also very new to Queue. It seems to me that my definitions start but don't do anything.
I would greatly appreciate it if someone could fix my code or let me know what is going wrong in it.
import random
from multiprocessing import Process, Queue
from time import sleep, time, clock
count = []
gps_data = []
def mca(q1):
print 'started'
t = 5
while True:
cnt = random.randint(0,30)
count.append(cnt)
dt = random.randint(0,3)
sleep(dt)
nt = t-dt
if nt <= 0:
break
q1.put('DONE')
def gps(q1):
print 'started2'
while q1.get() != 'DONE':
x = 0
dt = 0.1
sleep(dt)
y = x + 1
gps_data.append(y)
def event_handler(q1):
print 'started3'
size_i = len(count) #initial size of the mca list
while q1.get() != 'DONE':
size_f = len(count)
if size_f > size_i:
local_count = count[-1]
local_location = gps_data[-1]
data = local_count + local_location
print str(data)
size_i = size_f
else:
pass
if __name__ == '__main__':
q1 = Queue()
p_mca = Process(target = mca, args = (q1,))
p_gps = Process(target = gps, args = (q1,))
p_evh = Process(target = event_handler, args = (q1,))
p_evh.start()
p_gps.start()
p_mca.start()
p_evh.join()
p_gps.join()
p_mca.join()
Your variable t in mca() keeps getting set back to 5, thus
if nt <= 0:
is never True.
Like D_rez90 said,
if nt<=0:
is never true. You should change
nt=t-dt
to
t-=dt
if t<=0: