I am a biologist and I am new to parallel processing. Some important background is that some of my scripts can take up to 15hrs to execute. While the main function (in_command) is running I am trying to run in parallel a function that will take snapshots of the hardware usage (CPU, RAM, etc.). The problem I am having is that my recursive script that is on a timer (get_stats) is executing correctly when it ran separately, but as soon as I run it in parallel using multiprocessing the timer doesn't seem to work. The function runs about every second even though I have it on a 300second timer. It does stop after the other script has finished, but I get way more snapshots than needed. I am also not in love with my current approach so if there is a better way to do it I am willing to learn. I just can't have it impact the other script too much hence the snapshot approach, I only need to know generally what is happening. Thanks!
import psutil
import platform
from datetime import datetime
import multiprocessing
import time
import os
from threading import Timer
import multiprocessing as mp
def get_size(bytes, suffix="B"):
"""
Scale bytes to its proper format
e.g:
1253656 => '1.20MB'
1253656678 => '1.17GB'
"""
factor = 1024
for unit in ["", "K", "M", "G", "T", "P"]:
if bytes < factor:
return f"{bytes:.2f}{unit}{suffix}"
bytes /= factor
def get_stats(switch, snapshot_dict, beg_time):
df =snapshot_dict
start_time = beg_time
df['time'].append(time.time() - start_time)
# Get core information
df['total_cores'].append(psutil.cpu_count(logical=True))
df['physical_cores'].append(psutil.cpu_count(logical=False))
cpufreq = psutil.cpu_freq()
# cpu frequency in Mhz
df['max_frequency'].append(cpufreq.max)
df['min_frequency'].append(cpufreq.min)
df['current_frequency'].append(cpufreq.current)
cpu_core = {}
for i, percentage in enumerate(psutil.cpu_percent(percpu=True, interval=1)):
cpu_core[str(i)] = percentage
df['cpu_core'].append(cpu_core)
# get ram information
svmem = psutil.virtual_memory()
df['total_memory'].append(get_size(svmem.total))
df['available_memory'].append(get_size(svmem.available))
df['used_memory'].append(get_size(svmem.used))
df['percent_memory'].append(svmem.percent)
# swap memory if it exists
swap = psutil.swap_memory()
df['swap_total'].append(get_size(swap.total))
df['swap_free'].append(get_size(swap.free))
df['swap_used'].append(get_size(swap.used))
df['swap_percentage'].append(swap.percent)
print(df)
#Call the code on a recursive function
t = Timer(300, get_stats(switch,df,beg_time))
t.start()
print('Switch: ', switch.value)
if switch.value == 1:
t.cancel()
def in_command(file,switch):
f = open(file,'r')
f_lines = f.readlines()
for line in f_lines:
print(line)
os.system(line)
f.close()
switch.value += 1
if __name__ == "__main__":
manager= mp.Manager()
df = {'time': [], 'total_cores': [], 'physical_cores': [], 'max_frequency': [],
'min_frequency': [], 'current_frequency': [], 'cpu_core': [], 'total_memory': [],
'available_memory': [], 'used_memory': [], 'percent_memory': [], 'swap_total': [],
'swap_free': [], 'swap_used': [], 'swap_percentage': []}
process_switch = manager.Value('i',0)
start_time = time.time()
p1 = mp.Process(target=get_stats, args = (process_switch, df, start_time))
p2 = mp.Process(target=in_command, args=('text_command.txt', process_switch))
p1.start()
p2.start()
p1.join()
p2.join()
print('finished')
Thanks Michael. So to everyone who might be reading this, I don't include the arguments in the call function but I put them in separately.
Incorrect:
t = Timer(300, get_stats(switch,df,beg_time))
Correct:
t = Timer(5, function=get_stats,args=[switch,snapshot_dict,beg_time])
Related
I have multidimensional array which needs to be calculated with an imported function. (I am using jupyter notebook, so I exported the function to ipynb and imported it again) The function takes argument of 1 dimensional array.
#Function
def calculatespi(datagrid,q):
date_time = datagrid['time'][:]
gridvalue = datagrid.values
if np.isnan(np.sum(gridvalue)) != True:
df_precip = pd.DataFrame({"Date": date_time,"precip":gridvalue})
spi_prc = spi.SPI()
spi3_grid = spi_prc.calculate(df_precip, 'Date', 'precip', freq = 'M', scale = 3, fit_type ="lmom", dist_type="gam")
spi3 = spi3_grid['precip_scale_3_calculated_index'].values
else:
spi3 = np.empty((489))
spi3[:] = np.nan
q.put(spi3)
#Main Notebook
if name == "main":
spipi = []
processes = []
for x in range (3):
for y in range(3):
q = multiprocessing.Queue()
p = multiprocessing.Process(target=calculatespi, args= (prcoba[:,x,y],q))
p.start()
processes.append(p)
spipi.append(q.get())
for process in processes:
process.join()
After hundreds of attempt, finally I can retrieve the results from my problem but it took times longer than running it without using multiprocessing. What should I do?
Using concurrent.futures.ProcessPoolExecutor makes things much easier.
First, replace in calculatespi the q.put(spi3) by return spi3 and remove the q parameter. Then the "main" code can be written as
#Main Notebook
if name == "main":
from concurrent.futures import ProcessPoolExecutor
args = []
for x in range (3):
for y in range(3):
args.append(prcoba[:,x,y])
with ProcessPoolExecutor() as executor:
spipi = list(executor.map(calculatespi, args))
The executor takes care about everything else.
I am passing the key and value of a dictionary for parallel processing
if __name__ == "__main__":
DATASETS = {
"Dataset_1": data_preprocess.dataset_1,
"Dataset_2": data_preprocess.dataset_2,}
pool = mp.Pool(8)
pool.starmap(main, zip(DATASETS.keys(), DATASETS.values()))
pool.close()
# As I am not joining any result and I am directly saving the output
# in CSV file from (main function) I did not used pool.join()
The main function
def main(dataset_name, generate_dataset):
REGRESSORS = {
"LinReg": LinearRegression(),
"Lasso": Lasso(),}
ROOT = Path(__file__).resolve().parent
dataset_name = dataset_name
generate_dataset = generate_dataset
dfs = []
for reg_name, regressor in REGRESSORS.items():
df = function_calling(
generate_dataset=generate_dataset,
regressor=regressor,
reg_name=reg_name,)
print(df)
dfs.append(df)
df = pd.concat(dfs, axis=0, ignore_index=True)
filename = dataset_name + "_result.csv"
outfile = str(PATH) + "/" + filename
df.to_csv(outfile)
I am getting an error AssertionError: daemonic processes are not allowed to have children.
Could you tell me why I am getting the error? How can I resolve this?
To just create your own Process instances:
import multiprocessing as mp
def main(dataset_name, generate_dataset):
print(dataset_name, generate_dataset, flush=True)
... # etc.
if __name__ == "__main__":
DATASETS = {
"Dataset_1": 1,
"Dataset_2": 2,}
processes = [mp.Process(target=main, args=(k, v)) for k, v in DATASETS.items()]
for process in processes:
process.start()
# wait for termination:
for process in processes:
process.join
Prints:
Dataset_1 1
Dataset_2 2
The issue is suppose you have 8 CPU cores and DATASETS had 100 key/value pairs. You would be creating 100 processes. Assuming these processes were CPU-intensive, you could not expect more than 8 of them to really be doing anything productive. Yet you incurred the CPU and storage overhead of having created all those processes. But as long as the number of processes you will be creating are not excessively greater than the number of CPU cores you have and your function main does not need to return a value back to your main process, this should be OK.
There is also a way of implementing your own multiprocessing pool with these Process instances and a Queue instance, but that's a bit more complicated:
import multiprocessing as mp
def main(dataset_name, generate_dataset):
print(dataset_name, generate_dataset, flush=True)
... # etc.
def worker(queue):
while True:
arg = queue.get()
if arg is None:
# signal to terminate
break
# unpack
dataset_name, generate_dataset = arg
main(dataset_name, generate_dataset)
if __name__ == "__main__":
DATASETS = {
"Dataset_1": 1,
"Dataset_2": 2,}
queue = mp.Queue()
items = list(DATASETS.items())
for k, v in items:
# put the arguments on the queue
queue.put((k, v))
# number of processors we will be using:
n_processors = min(mp.cpu_count(), len(items))
for _ in range(n_processors):
# special value to tell main there is no nore work: one for each task
queue.put(None)
processes = [mp.Process(target=worker, args=(queue,)) for _ in range(n_processors)]
for process in processes:
process.start()
for process in processes:
process.join
I am able to submit batches of concurrent.futures.ProcessPoolExecutor.submits() where each batch may contain several submit(). However, I noticed that if each batch of submits consumes a significant about of RAM, there can be quite a bit of RAM usage inefficiencies; need to wait for all futures in the batch to be completed before another batch of submit() can be submitted.
How does one create a continuous stream of Python's concurrent.futures.ProcessPoolExecutor.submit() until some condition is satisfied?
Test Script:
#!/usr/bin/env python3
import numpy as np
from numpy.random import default_rng, SeedSequence
import concurrent.futures as cf
from itertools import count
def dojob( process, iterations, samples, rg ):
# Do some tasks
result = []
for i in range( iterations ):
a = rg.standard_normal( samples )
b = rg.integers( -3, 3, samples )
mean = np.mean( a + b )
result.append( ( i, mean ) )
return { process : result }
if __name__ == '__main__':
cpus = 2
iterations = 10000
samples = 1000
# Setup NumPy Random Generator
ss = SeedSequence( 1234567890 )
child_seeds = ss.spawn( cpus )
rg_streams = [ default_rng(s) for s in child_seeds ]
# Peform concurrent analysis by batches
counter = count( start=0, step=1 )
# Serial Run of dojob
process = next( counter )
for cpu in range( cpus ):
process = next( counter )
rg = rg_streams[ cpu ]
rdict = dojob( process, iterations, samples, rg )
print( 'rdict', rdict )
# Concurrent Run of dojob
futures = []
results = []
with cf.ProcessPoolExecutor( max_workers=cpus ) as executor:
while True:
for cpu in range( cpus ):
process = next( counter )
rg = rg_streams[ cpu ]
futures.append( executor.submit( dojob, process, iterations, samples, rg ) )
for future in cf.as_completed( futures ):
# Do some post processing
r = future.result()
for k, v in r.items():
if len( results ) < 5000:
results.append( np.std( v ) )
print( k, len(results) )
if len(results) <= 100: #Put a huge number to simulate continuous streaming
futures = []
child_seeds = child_seeds[0].spawn( cpus )
rg_streams = [ default_rng(s) for s in child_seeds ]
else:
break
print( '\n*** Concurrent Analyses Ended ***' )
To expand on my comment, how about something like this, using the completion callback and a threading.Condition? I took the liberty of adding a progress indicator too.
EDIT: I refactored this into a neat function you pass your desired concurrency and queue depth, as well as a function that generates new jobs, and another function that processes a result and lets the executor know whether you've had enough.
import concurrent.futures as cf
import threading
import time
from itertools import count
import numpy as np
from numpy.random import SeedSequence, default_rng
def dojob(process, iterations, samples, rg):
# Do some tasks
result = []
for i in range(iterations):
a = rg.standard_normal(samples)
b = rg.integers(-3, 3, samples)
mean = np.mean(a + b)
result.append((i, mean))
return {process: result}
def execute_concurrently(cpus, max_queue_length, get_job_fn, process_result_fn):
running_futures = set()
jobs_complete = 0
job_cond = threading.Condition()
all_complete_event = threading.Event()
def on_complete(future):
nonlocal jobs_complete
if process_result_fn(future.result()):
all_complete_event.set()
running_futures.discard(future)
jobs_complete += 1
with job_cond:
job_cond.notify_all()
time_since_last_status = 0
start_time = time.time()
with cf.ProcessPoolExecutor(cpus) as executor:
while True:
while len(running_futures) < max_queue_length:
fn, args = get_job_fn()
fut = executor.submit(fn, *args)
fut.add_done_callback(on_complete)
running_futures.add(fut)
with job_cond:
job_cond.wait()
if all_complete_event.is_set():
break
if time.time() - time_since_last_status > 1.0:
rps = jobs_complete / (time.time() - start_time)
print(
f"{len(running_futures)} running futures on {cpus} CPUs, "
f"{jobs_complete} complete. RPS: {rps:.2f}"
)
time_since_last_status = time.time()
def main():
ss = SeedSequence(1234567890)
counter = count(start=0, step=1)
iterations = 10000
samples = 1000
results = []
def get_job():
seed = ss.spawn(1)[0]
rg = default_rng(seed)
process = next(counter)
return dojob, (process, iterations, samples, rg)
def process_result(result):
for k, v in result.items():
results.append(np.std(v))
if len(results) >= 10000:
return True # signal we're complete
execute_concurrently(
cpus=16,
max_queue_length=20,
get_job_fn=get_job,
process_result_fn=process_result,
)
if __name__ == "__main__":
main()
The Answer posted by #AKX works. Kudos to him. After testing it, I would like to recommend two amendments that I believe are worth considering and implementing.
Amendment 1: To prematurely cancel the execution of the python script, Ctrl+C has to be used. Unfortunately, doing that would not terminate the concurrent.futures.ProcessPoolExecutor() processes that are executing the function dojob(). This issue becomes more pronounced when the time is taken to complete dojob() is long; this situation can be simulated by making the sample size in the script to be large (e.g. samples = 100000). This issue can be seen when the terminal command ps -ef | grep python is executed. Also, if dojob() consumes a significant amount of RAM, the memory used by these concurrent processes do not get released until the concurrent processes are manually killed (e.g. kill -9 [PID]). To address these issues, the following amendment is needed.
with job_cond:
job_cond.wait()
should be changed to:
try:
with job_cond:
job_cond.wait()
except KeyboardInterrupt:
# Cancel running futures
for future in running_futures:
_ = future.cancel()
# Ensure concurrent.futures.executor jobs really do finish.
_ = cf.wait(running_futures, timeout=None)
So when Ctrl+C has to be used, you just have to press it once first. Next, give some time for the futures in running_futures to be cancelled. This could take a few seconds to several seconds to complete; it depends on the resource requirements of dojob(). You can see the CPUs activity in your task manager or system monitor drops to zero or hear the high revving sound from your cpu cooling fan reduce. Note, the RAM used would not be released yet. Thereafter, press Ctrl+C again and that should allow a clean exit of all the concurrent processes whereby the used RAM are also released.
Amendment 2: Presently, the inner while-loop dictates that jobs must be submitted continuously as fast as the cpu "mainThread" can allow. Realistically, there is no benefit to be able to submit more jobs than there are available cpus in the cpus pool. Doing so only unnecessarily consumes cpu resources from the "MainThread" of the main processor. To regulate the continuous job submission, a new submit_job threading.Event() object can be used.
Firstly, define such an object and set its value to True with:
submit_job = threading.Event()
submit_job.set()
Next, at the end of the inner while-loop add this condition and .wait() method:
with cf.ProcessPoolExecutor(cpus) as executor:
while True:
while len(running_futures) < max_queue_length:
fn, args = get_job_fn()
fut = executor.submit(fn, *args)
fut.add_done_callback(on_complete)
running_futures.add(fut)
if len(running_futures) >= cpus: # Add this line
submit_job.clear() # Add this line
submit_job.wait() # Add this line
Finally change the on_complete(future) callback to:
def on_complete(future):
nonlocal jobs_complete
if process_result_fn(future.result()):
all_complete_event.set()
running_futures.discard(future)
if len(running_futures) < cpus: # add this conditional setting
submit_job.set() # add this conditional setting
jobs_complete += 1
with job_cond:
job_cond.notify_all()
There is a library called Pypeln that does this beautifully. It allows for streaming tasks between stages, and each stage can be run in a process, thread, or asyncio pool, depending on what is optimum for your use case.
Sample code:
import pypeln as pl
import time
from random import random
def slow_add1(x):
time.sleep(random()) # <= some slow computation
return x + 1
def slow_gt3(x):
time.sleep(random()) # <= some slow computation
return x > 3
data = range(10) # [0, 1, 2, ..., 9]
stage = pl.process.map(slow_add1, data, workers=3, maxsize=4)
stage = pl.process.filter(slow_gt3, stage, workers=2)
data = list(stage) # e.g. [5, 6, 9, 4, 8, 10, 7]
I am a bioinformaticist trying to create a script that will monitor the hardware resource usage of my scripts. The scripts can take up to 15 hours so I need something that will take snapshots periodically while the script is running.
I decided to run the two functions in parallel with the monitoring function (get_stats()) on a recursive timer to get the snapshots as they come. The in_command() function takes a text file with the os commands used to call the script pipelines.
My problem is, that the get_stats() function doesn't stop once the other script has stopped. I have a switch (process_switch) that should flip once the other function has finished and stopped the get_stats() function. I am new to parallel processing.
import psutil
import platform
from datetime import datetime
import multiprocessing
import time
import os
from threading import Timer
import multiprocessing as mp
def get_size(bytes, suffix="B"):
"""
Scale bytes to its proper format
e.g:
1253656 => '1.20MB'
1253656678 => '1.17GB'
"""
factor = 1024
for unit in ["", "K", "M", "G", "T", "P"]:
if bytes < factor:
return f"{bytes:.2f}{unit}{suffix}"
bytes /= factor
def get_stats():
global df
global process_switch
global start_time
df['time'].append(time.time() - start_time)
# Get core information
df['total_cores'].append(psutil.cpu_count(logical=True))
df['physical_cores'].append(psutil.cpu_count(logical=False))
cpufreq = psutil.cpu_freq()
# cpu frequency in Mhz
df['max_frequency'].append(cpufreq.max)
df['min_frequency'].append(cpufreq.min)
df['current_frequency'].append(cpufreq.current)
cpu_core = {}
for i, percentage in enumerate(psutil.cpu_percent(percpu=True, interval=1)):
cpu_core[str(i)] = percentage
df['cpu_core'].append(cpu_core)
# get ram information
svmem = psutil.virtual_memory()
df['total_memory'].append(get_size(svmem.total))
df['available_memory'].append(get_size(svmem.available))
df['used_memory'].append(get_size(svmem.used))
df['percent_memory'].append(svmem.percent)
# swap memory if it exists
swap = psutil.swap_memory()
df['swap_total'].append(get_size(swap.total))
df['swap_free'].append(get_size(swap.free))
df['swap_used'].append(get_size(swap.used))
df['swap_percentage'].append(swap.percent)
print(df)
#check to see if the other function has finished and stop this process
if process_switch == 1:
t = Timer(20, get_stats)
t.start()
else:
t.stop()
def in_command(file):
#takes in a text file and runs the command lines located within and runs them sequentially
global process_switch
f = open(file,'r')
f_lines = f.readlines()
for line in f_lines:
print(line)
os.system(line)
f.close()
#turn off monitoring software
process_switch = 0
if __name__ == "__main__":
df = {'time': [], 'total_cores': [], 'physical_cores': [], 'max_frequency': [],
'min_frequency': [], 'current_frequency': [], 'cpu_core': [], 'total_memory': [],
'available_memory': [], 'used_memory': [], 'percent_memory': [], 'swap_total': [],
'swap_free': [], 'swap_used': [], 'swap_percentage': []}
process_switch = 1
start_time = time.time()
p1 = mp.Process(target=get_stats, args = ())
p2 = mp.Process(target=in_command, args=('text_command.txt',))
p1.start()
p2.start()
p1.join()
p2.join()
print(df)
print('finished')
Thanks martineau! I had no idea that global variables were not shared, it makes sense now that I stop to think about it logically. For other people who are having this issue I was able to solve it by using the multiprocessing.Manager() function to declare the global variable. There is a great post about the problem here https://blog.ruanbekker.com/blog/2019/02/19/sharing-global-variables-in-python-using-multiprocessing/ . Thanks for the help!
My question is very similar to this question here, except the solution with catching didn't quite work for me.
Problem: I'm using multiprocessing to handle a file in parallel. Around 97%, it works. However, sometimes, the parent process will idle forever and CPU usage shows 0.
Here is a simplified version of my code
from PIL import Image
import imageio
from multiprocessing import Process, Manager
def split_ranges(min_n, max_n, chunks=4):
chunksize = ((max_n - min_n) / chunks) + 1
return [range(x, min(max_n-1, x+chunksize)) for x in range(min_n, max_n, chunksize)]
def handle_file(file_list, vid, main_array):
for index in file_list:
try:
#Do Stuff
valid_frame = Image.fromarray(vid.get_data(index))
main_array[index] = 1
except:
main_array[index] = 0
def main(file_path):
mp_manager = Manager()
vid = imageio.get_reader(file_path, 'ffmpeg')
num_frames = vid._meta['nframes'] - 1
list_collector = mp_manager.list(range(num_frames)) #initialize a list as the size of number of frames in the video
total_list = split_ranges(10, min(200, num_frames), 4) #some arbitrary numbers between 0 and num_frames of video
processes = []
file_readers = []
for split_list in total_list:
video = imageio.get_reader(file_path, 'ffmpeg')
proc = Process(target=handle_file, args=(split_list, video, list_collector))
print "Started Process" #Always gets printed
proc.Daemon = False
proc.start()
processes.append(proc)
file_readers.append(video)
for i, proc in enumerate(processes):
proc.join()
print "Join Process " + str(i) #Doesn't get printed
fd = file_readers[i]
fd.close()
return list_collector
The issue is that I can see the processes starting and I can see that all of the items are being handled. However, sometimes, the processes don't rejoin. When I check back, only the parent process is there but it's idling as if it's waiting for something. None of the child processes are there, but I don't think join is called because my print statement doesn't show up.
My hypothesis is that this happens to videos with a lot of broken frames. However, it's a bit hard to reproduce this error because it rarely occurs.
EDIT: Code should be valid now. Trying to find a file that can reproduce this error.