Multiprocessing show multiple progress bars - python

For my program, I have a file that writes random integers to a .CSV file.
from __future__ import absolute_import, division, print_function
from numpy.random import randint as randrange
import os, argparse, time
from tqdm import tqdm
def write_to_csv(filename, *args, newline = True):
write_string = ''
for arg in args:
if type(arg) == list:
for i in arg:
write_string += str(i) + ','
else:
write_string += str(arg) + ','
if newline:
write_string = write_string.rstrip(',') + '\n'
else:
write_string = write_string.rstrip(',')
with open(filename+'.csv', 'a') as file:
file.write(write_string)
def move_dir(dirname, parent = False):
if not parent:
dirname = str(dirname)
exists = os.path.isfile(dirname)
try:
os.mkdir(dirname)
os.chdir(dirname)
except FileExistsError:
os.chdir(dirname)
else:
os.chdir("..")
def calculate_probability(odds, exitmode = False, low_cpu = 0):
try:
file_count = 0
move_dir('Probability')
move_dir(str(odds))
d = {}
writelist = []
percentlist = []
for i in tqdm(range(odds)):
d[str(i)] = 0
writelist.append(f'Times {i}')
percentlist.append(f'Percent {i}')
while True:
if os.path.isfile(str(file_count)+'.csv'):
file_count += 1
else:
break
filename = str(file_count)
write_to_csv(filename, 'Number', 'Value')
rep = 500 * odds
if rep > 10000:
rep = 10000
for i in tqdm(range(rep)):
ran = randrange(odds)
ran = int(ran)
d[str(ran)] += 1
if i == 999:
write_to_csv(filename, i, ran+1, newline = False)
else:
write_to_csv(filename, i, ran+1)
if low_cpu:
time.sleep(0.01*float(low_cpu))
writelist2 = []
percentlist2 = []
for i in tqdm(range(odds)):
val = d[str(i)]
writelist2.append(val)
percentlist2.append(round(((val/rep)*100), 2))
if os.path.isfile('runs.csv'):
write_to_csv('runs', file_count, writelist2, percentlist2)
else:
write_to_csv('runs', 'Run #', writelist, percentlist)
write_to_csv('runs', file_count, writelist2, percentlist2)
if exitmode:
exit()
except(KeyboardInterrupt, SystemExit):
if exitmode:
os.remove(str(file_count)+'.csv')
exit()
else:
try:
os.system('cls')
print('User/program interrupted, lauching shutdown mode...')
os.remove(str(file_count)+'.csv')
print('Finilizaing current trial...')
os.chdir("..")
os.chdir("..")
except FileNotFoundError:
exit()
calculate_probability(odds, exitmode = True)
I also have a repetition system to do this multiple times.
def run_tests(times, odds, low_cpu = 0, shutdown = False):
for i in tqdm(range(times)):
calculate_probability(odds, low_cpu = low_cpu)
os.chdir("..")
os.chdir("..")
if shutdown:
os.system('shutdown /S /F /T 0 /hybrid')
However, if I were to run like 30 trails, it would take forever. So I decided to use the multiprocessing module to speed up the process. Because each run needs to write to the same file at the end, I had to collect the data and write them after the processes ended.
def calculate_probability(odds, low_cpu = 0):
try:
file_count = 0
move_dir('Probability')
move_dir(str(odds))
d = {}
writelist = []
percentlist = []
for i in tqdm(range(odds)):
d[str(i)] = 0
writelist.append(f'Times {i}')
percentlist.append(f'Percent {i}')
while True:
if os.path.isfile(str(file_count)+'.csv'):
file_count += 1
else:
break
filename = str(file_count)
write_to_csv(filename, 'Number', 'Value')
rep = 500 * odds
if rep > 10000:
rep = 10000
for i in range(rep):
ran = randrange(odds)
ran = int(ran)
d[str(ran)] += 1
if i == 999:
write_to_csv(filename, i, ran+1, newline = False)
else:
write_to_csv(filename, i, ran+1)
if low_cpu:
time.sleep(0.01*float(low_cpu))
writelist2 = []
percentlist2 = []
for i in range(odds):
val = d[str(i)]
writelist2.append(val)
percentlist2.append(round(((val/rep)*100), 2))
return (writelist, percentlist, writelist2, percentlist2)
except(KeyboardInterrupt, SystemExit):
try:
os.remove(str(file_count)+'.csv')
finally:
exit()
def worker(odds, returndict, num, low_cpu = 0):
returndict[f'write{num}'] = calculate_probability(odds, low_cpu = low_cpu)
os.chdir("..")
os.chdir("..")
os.system('cls')
def run_tests(times, odds, low_cpu = 0, shutdown = False):
print('Starting...')
manager = Manager()
return_dict = manager.dict()
job_list = []
for i in range(times):
p = Process(target=worker, args=(odds,return_dict,i), kwargs = {'low_cpu' : low_cpu})
job_list.append(p)
p.start()
try:
for proc in job_list:
proc.join()
except KeyboardInterrupt:
print('User quit program...')
time.sleep(5)
for proc in job_list:
proc.join()
exit()
else:
move_dir('Probability')
move_dir(str(odds))
if not os.path.isfile('runs.csv'):
write_to_csv('runs', return_dict.values()[0][0], return_dict.values()[0][1])
for value in return_dict.values():
write_to_csv('runs', value[2], value[3])
print('Done!')
finally:
if shutdown:
os.system('shutdown /S /F /T 0 /hybrid')
However, when I run this new code, there is one progressbar, and each process overwrites the bar, so the bar is flashing with random numbers, making the bar useful. I want to have a stack of bars, one for each process, that each update without interrupting the others. The bars do not need to be ordered; I just need to have an idea of how fast each process is doing their tasks.

STDOUT is just a stream, and all of your processes are attached to the same one, so there's no direct way to tell it to print the output from different processes on different lines.
Probably the simplest way to achieve this would be to have a separate process that is responsible for aggregating the status of all the other processes and reporting the results. You can use a multiprocessing.Queue to pass data from the worker threads to the status thread, then the status thread can print the status to stdout. If you want a stack of progress bars, you'll have to get a little creative with the formatting (essentially update all the progress bars at the same time and print them in the same order so they appear to stack up).

Related

Ways to Pause/Resume a Process with Python?

First im Sorry for my Bad English.
I try to pause and resume a .exe File with Python. I Already tryed psutil.Process(...).suspend()/.resume() but this take more then 8 Seconds.
Normally thats okay, but i freeze a WallpaperEngine and it looks very shit if your Background is Freezed. So i need a alternative for p.suspend() and p.resume().
Important Informations:
-> OS: Windows 10
-> Python: 3.8
My Code:
from win32gui import GetWindowText, GetForegroundWindow
import win32gui
import time
import pygetwindow as gw
import psutil
process_name = "WallEngine"
pid = None
detectedPIDs = 0
for proc in psutil.process_iter():
if process_name in proc.name():
pid = proc.pid
detectedPIDs+=1
print(pid)
if detectedPIDs == 1:
pass
else:
print("Didnt Found the WallEngine Process. Enter Searching Mode ...")
while detectedPIDs != 1:
for proc in psutil.process_iter():
if process_name in proc.name():
print("Searching Mode Finished. We Found!")
pid = proc.pid
detectedPIDs+=1
print(pid)
print("[INFO] WallpaperEnginePerfomancer Booted.")
allWindows = []
def winEnumHandler(hwnd, ctx, currentlyPaused=None):
if not win32gui.IsWindowVisible(hwnd):
# print("Window isnt Visble yet! (" + str(win32gui.GetWindowText(hwnd)) + ")")
pass
else:
rect = win32gui.GetWindowRect(hwnd)
windowName: str = win32gui.GetWindowText(hwnd)
x = rect[0]
y = rect[1]
w = rect[2] - x
h = rect[3] - y
currentScannedWindow = gw.getWindowsWithTitle(windowName)[0]
if currentScannedWindow.isMaximized:
if windowName == "" or windowName == None or "Paint 3D" in windowName:
pass
else:
allWindows.append(windowName)
def window_handler():
p = psutil.Process(pid)
if len(allWindows) == 0:
p.resume()
else:
p.suspend()
allWindows.clear()
if __name__ == '__main__':
while True:
win32gui.EnumWindows(winEnumHandler, None )
window_handler()
time.sleep(0.09)
Try rewrite with using the signal library.
import signal
p = psutil.Process(pid)
if len(allWindows) == 0:
signal.pause(p)
else:
signal.pause(p)

Unable to start capture: Invalid argument

I am new in python. Please help me solve the below problem.
What the meaning of "Invalid argument"?
Below code all are work well but when I add a code publish the live stream through dataplicity. There will occur error "Unable to start capture: Invalid argument i: Error grabbing frames". After the error to publish the live stream, those function below will proceed while motion detection.
The code I add in the top of the def is_person(image) caused error:
os.system('sudo ./mjpg_streamer -i "./input_uvc.so -f 10 -r 640x320 -n -y" -o "./output_http.so -w ./www -p 80"')
def is_person(image):
det = Detector(image)
faces = len(det.face())
print ("FACE: "), det.drawColors[det.drawn-1 % len(det.drawColors)], faces
uppers = len(det.upper_body())
print ("UPPR: "), det.drawColors[det.drawn-1 % len(det.drawColors)], uppers
fulls = len(det.full_body())
print ("FULL: "), det.drawColors[det.drawn-1 % len(det.drawColors)], fulls
peds = len(det.pedestrian())
print ("PEDS: "), det.drawColors[det.drawn-1 % len(det.drawColors)], peds
det.draw()
det.overlay()
return faces + uppers + fulls + peds
# return len(det.face()) or len(det.full_body()) or len(det.upper_body()) # or len(det.pedestrian())
def processImage(imgFile):
global connection
if is_person(imgFile):
print ("True")
imgFile = datetime.datetime.now() .strftime ("%Y-%m-%d-%H.%M.%S.jpg")
cam.capture (imgFile)
#with open(imgFile, "rb") as image_file:
# encoded_string = base64.b64encode(image_file.read())
else: # Not a person
print ("False")
os.remove(imgFile)
sys.exit(0)
try:
while True:
previous_state = current_state
current_state = GPIO.input(sensor)
if current_state != previous_state:
new_state = "HIGH" if current_state else "LOW"
if current_state: # Motion is Detected
lock.acquire()
cam.start_preview() # Comment in future
cam.preview_fullscreen = False
cam.preview_window = (10,10, 320,240)
print('Motion Detected')
for i in range(imgCount):
curTime = (time.strftime("%I:%M:%S")) + ".jpg"
cam.capture(curTime, resize=(320,240))
t = threading.Thread(target=processImage, args = (curTime,))
t.daemon = True
t.start()
time.sleep(frameSleep)
cam.stop_preview()
lock.release()
time.sleep(camSleep)
except KeyboardInterrupt:
cam.stop_preview()
sys.exit(0)
Thank you in advance.
You have an issue with mjpg_streamer configuration.
Check comment #274 here.

Unable to catch ctrl+c when running mpi4py

I have been using a code to deal with some data in ~9000 files, since every single process takes some time, I managed to use mpi4py to speed it up. But I cannot make the program catch the ctrl+c signal and save the results already calculated. Here's the code:
import os
import sys
from string import atof
import gc
import pandas as pd
import numpy as np
from StructFunc.Structure_Function import SF_true, SF_fit_params
import scipy.io as sio
import mpi4py
from mpi4py import MPI
import time
import signal
# def handler(signal_num, frame):
# combine_list = comm.gather(p, root=0)
# if comm_rank == 0:
# print combine_list
# combine_dict = {}
# for sub_dict in combine_list:
# for cur_key in sub_dict.keys():
# combine_dict[cur_key] = sub_dict[cur_key]
# print combine_dict
# sio.savemat('./all_S82_DRW_params.mat', combine_dict)
# print "results before the interrupt has been saved."
# sys.exit()
# signal.signal(signal.SIGINT, handler)
comm = MPI.COMM_WORLD
comm_rank = comm.Get_rank()
comm_size = comm.Get_size()
bad_value = [-1, -99.99, -1]
lc_path = '/Users/zhanghaowen/Desktop/AGN/BroadBand_RM/QSO_S82'
model="DRW"
if comm_rank == 0:
file_list = os.listdir(lc_path)
# print file_list
sys.stderr.write('%d files were to be processed.' %len(file_list))
file_list = comm.bcast(file_list if comm_rank == 0 else None, root=0)
num_files = len(file_list)
# num_files = 6
local_files_offset = np.linspace(0, num_files, comm_size+1).astype('int')
local_files = file_list[local_files_offset[comm_rank] : local_files_offset[comm_rank + 1]]
sys.stderr.write('%d/%d processor gets %d/%d data' %(comm_rank, comm_size, len(local_files), num_files))
cnt = 0
p = {}
for ind, lc in enumerate(local_files):
# beginning of process
try:
print local_files[ind]
lc_file = open(os.path.join(lc_path, local_files[ind]), 'r')
lc_data = pd.read_csv(lc_file, header=None, names=['MJD', 'mag', 'err'], usecols=[0, 1, 2], sep=' ')
lc_file.close()
#remove the bad values in the data
lc_data['MJD'] = lc_data['MJD'].replace(to_replace=bad_value[0], value=np.nan)
lc_data['MJD'] = lc_data['MJD'].replace(to_replace=np.nan, value=np.nanmean(lc_data['MJD']))
lc_data['mag'] = lc_data['mag'].replace(to_replace=bad_value[1], value=np.nan)
lc_data['mag'] = lc_data['mag'].replace(to_replace=np.nan, value=np.nanmean(lc_data['mag']))
lc_data['err'] = lc_data['err'].replace(to_replace=bad_value[2], value=np.nan)
lc_data['err'] = lc_data['err'].replace(to_replace=np.nan, value=np.nanmean(lc_data['err']))
MJD = np.array(lc_data['MJD'])
mag = np.array(lc_data['mag'])
err = np.array(lc_data['err'])
SF_params = []
resamp_tag = 0
while resamp_tag < 1:
sim_err = np.array([abs(np.random.normal(0, err[i], size=1)) for i in range(len(err))]).reshape((1, len(err)))[0]
try:
p[lc] = SF_fit_params(MJD, mag, sim_err, MCMC_step=100, MCMC_threads=1, model=model)
cnt += 1
sys.stderr.write('processor %d has processed %d/%d files \n' %(comm_rank, cnt, len(local_files)))
print "finished the MCMC for %s \n" %lc
resamp_tag += 1
except:
continue
# end of process
except KeyboardInterrupt:
combine_list = comm.gather(p, root=0)
if comm_rank == 0:
print combine_list
combine_dict = {}
for sub_dict in combine_list:
for cur_key in sub_dict.keys():
combine_dict[cur_key] = sub_dict[cur_key]
print combine_dict
sio.savemat('./all_S82_DRW_params.mat', combine_dict)
print "save the dict."
os._exit()
combine_list = comm.gather(p, root=0)
if comm_rank == 0:
print combine_list
combine_dict = {}
for sub_dict in combine_list:
for cur_key in sub_dict.keys():
combine_dict[cur_key] = sub_dict[cur_key]
print combine_dict
sio.savemat('./all_S82_DRW_params.mat', combine_dict)
I have tried two ways to catch the ctrl+c signal, i.e. the handler function I defined but commented and the except KeyboardInterrupt trick. When I run the code using python all.py and then type ctrl+c, the script would catch the signal but keeps running after saving the results; while when I use mpirun -np 2 all.py and type ctrl+c, the script just stops without saving.
I think the problem under MPI mode may be that only the manager process catched the signal, but why didn't the single process stop in the non-MPI mode after I typed ctrl+c? Do anyone know how to make the worker process catch the signal and do something before terminating?
By the way I'm using openmpi.

Only 1 Thread started in for loop

So Im trying to code a really simple Internet Download Manager Spoof with Python 2.7
It is supposed to query a files HTTP header, get the byte range and spread the download among a no.of threads(I hard-coded 2 for simplicity) according to the byte range and later join the file parts together again.
The problem is my console log tells me that only 1 thread is started.
[EDIT] The problem has been solved. Find the working code below.
Here is my source:
from __future__ import print_function
import threading
import urllib
import urllib2
import time
threads = []
# url to open
url = "http://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4"
u = urllib.urlopen(url)
# define file
file_name = "test.mp4"
f = open(file_name, 'wb')
# open url and get header info
def get_file_size(url):
stream_size = u.info()['Content-Length']
end = stream_size
return end
start = 0
#get stream size
end = get_file_size(url)
# specify block size
block_sz = 512
#algo to divide work among 2 threads
def calculate_no_of_bytes_for_thread1():
full_stream_size = end
first_thread = {'start':0, 'end':(int(full_stream_size)/2)}
print(first_thread)
return first_thread
#algo to divide work among 2 threads
def calculate_no_of_bytes_for_thread2():
full_stream_size = end
second_thread= {'start':int(full_stream_size)/2,'end': int(full_stream_size)}
print(second_thread)
return second_thread
# download function
def download_thread(url ,id,start,end):
current_size = int(float(start)/1024)
total_size = int(float(end)/1024)
print ("Start at_"+str(current_size) + "Ends at_" + str(total_size))
# specify request range and init stream
req = urllib2.Request(url)
req.headers['Range'] = 'bytes=%s-%s' % (start, end)
data = urllib2.urlopen(req)
while True:
buffer = u.read(block_sz)
if not buffer:
break
start += len(buffer)
f.write(buffer)
thread_id = id
#percentage = (current_size * 100 / total_size)
status = str(thread_id) + "_" + str(current_size) + "_" +str(total_size)
print (status)
#starts 2 threads
def start_threads():
for i in range(2):
#if first loop, start thread 1
if(i==1):
start = calculate_no_of_bytes_for_thread1().get('start')
end = calculate_no_of_bytes_for_thread1().get('end')
print("Thread 1 started")
t = threading.Thread(target=download_thread, args=(url,i,start,end))
t.start()
threads.append( t)
#if second loop, start thread 1
if(i==2):
start = calculate_no_of_bytes_for_thread2().get('start')
end = calculate_no_of_bytes_for_thread2().get('end')
print("Thread 2 started")
t = threading.Thread(target=download_thread, args=(url,i,start,end))
t.start()
threads.append( t)
# Join threads back (order doesn't matter, you just want them all)
for i in threads:
i.join()
#start benchmarking
start_time = time.clock()
start_threads()
print ("Finito!")
end_time = time.clock()
benchmark = str(end_time - start_time)
print ("Download took_" +benchmark)
f.close()
And the output:
{'start': 0, 'end': 527868}
{'start': 0, 'end': 527868}
Thread 1 started
Start at_0Ends at_515
1_0_515
1_0_515
Finito!
Download took_6.97844422658
Working code:
from __future__ import print_function
import threading
import urllib
import urllib2
import time
threads = []
parts = {}
# url to open
url = "http://www.sample-videos.com/audio/mp3/india-national-anthem.mp3"
u = urllib.urlopen(url)
# define file
file_name = "test.mp3"
f = open(file_name, 'wb')
# open url and get header info
def get_file_size(url):
stream_size = u.info()['Content-Length']
file_size = stream_size
return file_size
start = 0
#get stream size
end = get_file_size(url)
# specify block size
block_sz = 512
#algo to divide work among 2 threads
def calculate_no_of_bytes_for_thread1():
full_stream_size = end
first_thread = {'start':0, 'end':(int(full_stream_size)/2)}
print(first_thread)
return first_thread
#algo to divide work among 2 threads
def calculate_no_of_bytes_for_thread2():
full_stream_size = end
second_thread= {'start':int(full_stream_size)/2,'end': int(full_stream_size)}
print(second_thread)
return second_thread
# download function
def download_thread(url ,id,start,end):
current_size = int(float(start)/1024)
total_size = int(float(end)/1024)
print ("Start at_"+str(current_size) + "Ends at_" + str(total_size))
# specify request range and init stream
req = urllib2.Request(url)
req.headers['Range'] = 'bytes=%s-%s' % (start, end)
while True:
buffer = u.read(block_sz)
if not buffer:
break
start += len(buffer)
f.write(buffer)
thread_id = id
status = "Thread ID_" +str(thread_id) + "Downloaded_" + str(int(start/1024)) + "Total_" +str(total_size)
print (status)
#starts 2 threads
def start_threads():
for i in range(2):
#if first loop, start thread 1
if(i==0):
start = calculate_no_of_bytes_for_thread1().get('start')
end = calculate_no_of_bytes_for_thread1().get('end')
print("Thread 1 started")
t = threading.Thread(target=download_thread, args=(url,i,start,end))
t.start()
threads.append( t)
#if second loop, start thread 2
if(i==1):
start = calculate_no_of_bytes_for_thread2().get('start')
end = calculate_no_of_bytes_for_thread2().get('end')
print("Thread 2 started")
t = threading.Thread(target=download_thread, args=(url,i,start,end))
t.start()
threads.append( t)
# Join threads back (order doesn't matter, you just want them all)
for i in threads:
i.join()
# Sort parts and you're done
# result = ''
# for i in range(2):
# result += parts[i*block_sz]
#start benchmarking
start_time = time.clock()
start_threads()
print ("Finito!")
end_time = time.clock()
benchmark = str(end_time - start_time)
print ("Download took_" +benchmark)
f.close()
You have:
for i in range(2):
if(i==1):
...
if(i==2):
...
But range(2) iterates over [0,1] not [1,2].
Save some trouble and just remove those 3 lines. The code to start the two threads can just run serially.

Python Multiprocessing arcgis shapefiles with PP or async stalling on large files

I am new trying to implement either Parallel Python (PP) or async to multiprocess arcgis shapefile clipping. I have been successful with both pool_async and PP; however, it stalls forever on big files (and yes I tried making python access large addresses). Here is my code using PP, please offer any solutions and sorry for glaring errors if there are :-)
def ClipDo(F,M,O,OW = ""):
#for F in F:
print "\n"+"PID:%s"%(os.getpid())
arcpy.env.overwriteOutput = False
if OW == "":
pass
else:
arcpy.env.overwriteOutput = True
FPath = os.path.dirname(F)
F = os.path.basename(F)
ClipList = []
pattern = '*.shp'
for filename in M:
ClipList.append(filename)
clipN = str(os.path.splitext(os.path.basename(filename))[0])
if not os.path.isdir(O+"/"+clipN+"/"):
os.makedirs(O+"/"+clipN+"/")
#Counts files in clip directory
count = len(ClipList)
for num in range(0,count):
clip = ClipList[num]
clipN = str(os.path.splitext(os.path.basename(clip))[0])
OutShp = clipN +"_"+ F
try:
print "Clipping, Base File: %s Clip File: %s Output: %s" % (F,clip,O+"\\"+OutShp)
arcpy.Clip_analysis(os.path.join(FPath,F),os.path.join(M,clip), os.path.join(os.path.join(O+"\\",clipN),OutShp))
print "Clipping SUCCESS "
except:
print "Clipping FAILED " +F
def PP(F,M,O,OW):
print F
#~ # tuple of all parallel python servers to connect with
ncpus = 6
ncpus = ncpus
ppservers = ("localhost",)
#~ #ppservers = ("10.0.0.1",)
if len(sys.argv) > 1:
ncpus = int(sys.argv[1])
# Creates jobserver with ncpus workers
job_server = pp.Server(ncpus, ppservers=ppservers)
else:
#~ # Creates jobserver with automatically detected number of workers
job_server = pp.Server(ncpus,ppservers=ppservers)
print "Starting pp with", job_server.get_ncpus(), "workers"
jobs = []
start_time = time.time()
for f in F:
job = job_server.submit(ClipDo, (f,M,O,OW),(), ("arcpy","NullGeomFilter"))
jobs.append(job)
for job in jobs:
result = job()
print result
if result:
break
job_server.destroy()
print "\n"+"PID:%s"%(os.getpid())
print "Time elapsed: ", time.time() - start_time, "s"
Could it be that your big chunks are just too big for arcpy and that the parallelization is not the problem?
As a test, it might be good to run one of arg lists through your function with the big data interactively/locally to see if that's working at all. If it does, then you could move on to logging and debugging the parallel version.

Categories

Resources