I would like to be able to do something like this:
import subprocess
nproc = 0
for i in range(100):
subprocess.Popen(commands[i], when_finished="nproc -= 1")
nproc += 1
while nproc > 0:
print("%d processes running..." % nproc)
Poll the processes:
import subprocess
processes = [subprocess.Popen(command) for command in commands]
while True:
nproc = sum(p.poll() is None for p in processes)
if not nproc:
break
print("%d processes running..." % nproc)
sleep(0.1)
Related
Code:
import os
import subprocess
execs = ['C:\\Users\\XYZ\\PycharmProjects\\Task1\\dist\\Multiof2.exe', # --> Child 1
'C:\\Users\\XYZ\\PycharmProjects\\Task1\\dist\\Multiof5.exe', # --> Child 2
'C:\\Users\\XYZ\\PycharmProjects\\Task1\\dist\\Multiof10.exe',...] # --> Child 3 and more
print('Parent Process id : ', os.getpid())
process = [subprocess.Popen(exe) for exe in execs]
for proc in process:
try:
proc.wait(timeout=15)
print('Child Process id : ', proc.pid)
if proc.returncode == 0:
print(proc.pid, 'Exited')
except subprocess.TimeoutExpired:
proc.terminate()
print('Child Process with pid',proc.pid ,'is killed')
Some Child Processes will take more than 15 sec to execute. So, I have to kill the process which timeout. But proc.wait(timeout=15) is not raising an exception instead it executes the process.
I also tried [subprocess.Popen(exe, timeout=15) for exe in execs] but got
Error:
TypeError: __init__() got an unexpected keyword argument 'timeout'
You're not waiting for the processes in parallel - you're giving them each a sequential 15 seconds to do their thing.
You might want something like this to give all of them up to 15 seconds in parallel.
import os
import subprocess
import time
execs = [
"C:\\Users\\XYZ\\PycharmProjects\\Task1\\dist\\Multiof2.exe",
"C:\\Users\\XYZ\\PycharmProjects\\Task1\\dist\\Multiof5.exe",
"C:\\Users\\XYZ\\PycharmProjects\\Task1\\dist\\Multiof10.exe",
]
print("Parent Process id : ", os.getpid())
process = [subprocess.Popen(exe) for exe in execs]
start_time = time.time()
while True:
elapsed_time = time.time() - start_time
any_alive = False
for proc in process:
ret = proc.poll()
if ret is None: # still alive
if elapsed_time >= 15:
print("Killing:", proc.pid)
proc.terminate()
any_alive = True
if not any_alive:
break
time.sleep(1)
I have a code where I try to stress CPU cores. I want to run partial number of cores at 100% while the rest should run at 0%. The logic I've used for cores to run at 100% is:
#Pass the CPU core number as affinity
def loop(conn, affinity):
proc = psutil.Process()
proc_info = proc.pid
msg = "Process ID: "+str(proc_info)+" CPU: "+str(affinity[0])
conn.send(msg)
conn.close()
proc.cpu_affinity(affinity) #Allocate a certain CPU core for this process
while True:
1*1
The cores executing this code run at 100%.
I wrote another loop and am attaching the remaining cores to processing executing this loop:
def rest_cores(affinity, exec_time):
proc = psutil.Process()
proc.cpu_affinity(affinity)
time.sleep(exec_time)
According to this logic, the cores should suspend execution for the exec_time and be at 0%. But the cores run at a higher percentage. How do I ensure that all the remaining cores are running at 0%?
Here is the full logic:
from multiprocessing import Process, Pipe
import os
import signal
import sys
import time
import psutil
def loop(conn, affinity):
proc = psutil.Process()
proc_info = proc.pid
msg = "Process ID: "+str(proc_info)+" CPU: "+str(affinity[0])
conn.send(msg)
conn.close()
proc.cpu_affinity(affinity)
while True:
1*1
def rest_cores(affinity, exec_time):
proc = psutil.Process()
proc.cpu_affinity(affinity)
time.sleep(exec_time)
def cpu_stress():
procs = []
conns = []
n_cpu = psutil.cpu_count(logical=True)
proc_num = n_cpu//2 #Half the cores will run at 100%
for i in range(proc_num): #Initial Half of the total cores
parent_conn, child_conn = Pipe()
p = Process(target=loop, args=(child_conn,[i]))
p.start()
procs.append(p)
conns.append(parent_conn)
for i in range(proc_num+1, n_cpu): #Final half of total cores
parent_conn, child_conn = Pipe()
p = Process(target=rest_cores, args=([i], exec_time))
p.start()
procs.append(p)
for conn in conns:
try:
print(conn.recv())
except EOFError:
continue
time.sleep(exec_time)
for p in procs:
p.terminate()
cpu_stress()
Can someone help me figure out why the following code won't run properly? I want to spawn new processes as the previous ones finish but running this code automatically runs everything, i.e. all the jobs report finished and stopped when they arent, and their windows are open as well. Any thoughts on why is_alive() returns false when it is actually true?
import subprocess
import sys
import multiprocessing
import time
start_on = 33 #'!'
end_on = 34
num_processors = 4;
jobs = []
def createInstance():
global start_on, end_on, jobs
cmd = "python scrape.py" + " " + str(start_on) + " " + str(end_on)
print cmd
p = multiprocessing.Process(target=processCreator(cmd))
jobs.append(p)
p.start()
start_on += 1
end_on += 1
print "length of jobs is: " + str(len(jobs))
def processCreator(cmd):
subprocess.Popen(cmd, creationflags=subprocess.CREATE_NEW_CONSOLE)
if __name__ == '__main__':
num_processors = input("How many instances to run simultaneously?: ")
for i in range(num_processors):
createInstance()
while len(jobs) > 0:
jobs = [job for job in jobs if job.is_alive()]
for i in range(num_processors - len(jobs)):
createInstance()
time.sleep(1)
print('*** All jobs finished ***')
Your code is spawning 2 processes on each createInstance() call, I think that's messing the is_alive() call.
p = multiprocessing.Process(target=processCreator(cmd))
This will spawn 1 process to run processCreator(cmd). Then, subprocess.Popen(cmd, creationflags=subprocess.CREATE_NEW_CONSOLE) will spawn a child process to run the command. This subprocess will return immediately, so the parent process.
I think this version will work, removing the usage of multiprocess. I also have changed the cmd definition(see docs):
import subprocess
import sys
import time
start_on = 33 #'!'
end_on = 34
num_processors = 4;
jobs = []
def createInstance():
global start_on, end_on, jobs
cmd = ["python","scrape.py", str(start_on), str(end_on)]
print(str(cmd))
p = subprocess.Popen(cmd, creationflags=subprocess.CREATE_NEW_CONSOLE)
jobs.append(p)
p.start()
start_on += 1
end_on += 1
print "length of jobs is: " + str(len(jobs))
if __name__ == '__main__':
num_processors = input("How many instances to run simultaneously?: ")
for i in range(num_processors):
createInstance()
while len(jobs) > 0:
jobs = [job for job in jobs if job.poll() is None]
for i in range(num_processors - len(jobs)):
createInstance()
time.sleep(1)
print('*** All jobs finished ***')
I hope that all child processes finished, and then main process exit, but it can not exit, why?
#!/usr/bin/env python
# coding=utf-8
import os
from multiprocessing import Manager
from multiprocessing import Pool
def write_file_name_to_queue(q, src_folder):
print('Process to write: %s' % os.getpid())
if not os.path.exists(src_folder):
print "Please input folder path"
return
for (dirpath, dirnames, filelist) in os.walk(src_folder):
for name in filelist:
if name[0] == '.':
continue
q.put(os.path.join(dirpath, name))
def read_file_name_from_queue(q):
print('Process to read: %s' % os.getpid())
while True:
value = q.get(True)
print('Get %s from queue.' % value)
if __name__ == "__main__":
mg = Manager()
q = mg.Queue()
p = Pool()
p.apply_async(func=write_file_name_to_queue, args=(q, "./test/"))
for i in xrange(8):
p.apply_async(func=read_file_name_from_queue, args=(q,))
p.close()
p.join()
Run it and get the follow result:
➜ check python check_process.py
Process to write: 3918
Process to read: 3919
Process to read: 3920
Get ./test/a from queue.
Get ./test/b from queue.
Get ./test/c from queue.
Get ./test/e from queue.
Get ./test/f from queue.
Process to read: 3921
Process to read: 3918
The process still waits.
See, I need to write a code for ~quarter million input files to run on batch. I saw this post: https://codereview.stackexchange.com/questions/20416/python-parallelization-using-popen
I can't figure it out how to implement this in my code.
What I want
I want to give each process specific number of cores or in other words, specific number of processes only can run at certain time.
If one process is finished another one should takes its place.
My code (using subprocess)
Main.py
import subprocess
import os
import multiprocessing
import time
MAXCPU = multiprocessing.cpu_count()
try:
cp = int(raw_input("Enter Number of CPU's to use (Total %d) = "%MAXCPU))
assert cp <= MAXCPU
except:
print "Bad command taking all %d cores"%MAXCPU
cp =MAXCPU # set MAXCPU as CPU
list_pdb = [i for i in os.listdir(".") if i.endswith(".pdb")] # Input PDB files
assert len(list_pdb) != 0
c = {}
d = {}
t = {}
devnull = file("Devnull","wb")
for each in range(0, len(list_pdb), cp): # Number of cores in Use = 4
for e in range(cp):
if each + e < len(list_pdb):
args = ["sh", "Child.sh", list_pdb[each + e], str(cp)]
p = subprocess.Popen(args, shell=False,
stdout=devnull, stderr=devnull)
c[p.pid] = p
print "Started Process : %s" % list_pdb[each + e]
while c:
print c.keys()
pid, status = os.wait()
if pid in c:
print "Ended Process"
del c[pid]
devnull.close()
Child.sh
#!/bin/sh
sh grand_Child.sh
sh grand_Child.sh
sh grand_Child.sh
sh grand_Child.sh
# Some heavy processes with $1
grand_Child.sh
#!/bin/sh
sleep 5
Output
Here's a version of the code using multiprocessing.Pool. It's a lot simpler, as the module does nearly all the work!
This version also does:
lots of logging, when a proc starts/ends
prints how many files will be processed
lets you process more than numcpus at a time
Often when running multiprocess jobs, it's best to run more processes than CPUs. Different procs will wait on I/O, vs waiting for CPU. Often people run 2n+1, so for a 4 proc system they run 2*4+1 or 9 procs for a job. (I generally hardcode "5" or "10" until there's a reason to change, I'm lazy that way :) )
Enjoy!
source
import glob
import multiprocessing
import os
import subprocess
MAXCPU = multiprocessing.cpu_count()
TEST = False
def do_work(args):
path,numproc = args
curproc = multiprocessing.current_process()
print curproc, "Started Process, args={}".format(args)
devnull = open(os.devnull, 'w')
cmd = ["sh", "Child.sh", path, str(numproc)]
if TEST:
cmd.insert(0, 'echo')
try:
return subprocess.check_output(
cmd, shell=False,
stderr=devnull,
)
finally:
print curproc, "Ended Process"
if TEST:
cp = MAXCPU
list_pdb = glob.glob('t*.py')
else:
cp = int(raw_input("Enter Number of processes to use (%d CPUs) = " % MAXCPU))
list_pdb = glob.glob('*.pdb') # Input PDB files
# assert cp <= MAXCPU
print '{} files, {} procs'.format(len(list_pdb), cp)
assert len(list_pdb) != 0
pool = multiprocessing.Pool(cp)
print pool.map(
do_work, [ (path,cp) for path in list_pdb ],
)
pool.close()
pool.join()
output
27 files, 4 procs
<Process(PoolWorker-2, started daemon)> Started Process, args=('tdownload.py', 4)
<Process(PoolWorker-2, started daemon)> Ended Process
<Process(PoolWorker-2, started daemon)> Started Process, args=('tscapy.py', 4)
<Process(PoolWorker-2, started daemon)> Ended Process