I have a code where I try to stress CPU cores. I want to run partial number of cores at 100% while the rest should run at 0%. The logic I've used for cores to run at 100% is:
#Pass the CPU core number as affinity
def loop(conn, affinity):
proc = psutil.Process()
proc_info = proc.pid
msg = "Process ID: "+str(proc_info)+" CPU: "+str(affinity[0])
conn.send(msg)
conn.close()
proc.cpu_affinity(affinity) #Allocate a certain CPU core for this process
while True:
1*1
The cores executing this code run at 100%.
I wrote another loop and am attaching the remaining cores to processing executing this loop:
def rest_cores(affinity, exec_time):
proc = psutil.Process()
proc.cpu_affinity(affinity)
time.sleep(exec_time)
According to this logic, the cores should suspend execution for the exec_time and be at 0%. But the cores run at a higher percentage. How do I ensure that all the remaining cores are running at 0%?
Here is the full logic:
from multiprocessing import Process, Pipe
import os
import signal
import sys
import time
import psutil
def loop(conn, affinity):
proc = psutil.Process()
proc_info = proc.pid
msg = "Process ID: "+str(proc_info)+" CPU: "+str(affinity[0])
conn.send(msg)
conn.close()
proc.cpu_affinity(affinity)
while True:
1*1
def rest_cores(affinity, exec_time):
proc = psutil.Process()
proc.cpu_affinity(affinity)
time.sleep(exec_time)
def cpu_stress():
procs = []
conns = []
n_cpu = psutil.cpu_count(logical=True)
proc_num = n_cpu//2 #Half the cores will run at 100%
for i in range(proc_num): #Initial Half of the total cores
parent_conn, child_conn = Pipe()
p = Process(target=loop, args=(child_conn,[i]))
p.start()
procs.append(p)
conns.append(parent_conn)
for i in range(proc_num+1, n_cpu): #Final half of total cores
parent_conn, child_conn = Pipe()
p = Process(target=rest_cores, args=([i], exec_time))
p.start()
procs.append(p)
for conn in conns:
try:
print(conn.recv())
except EOFError:
continue
time.sleep(exec_time)
for p in procs:
p.terminate()
cpu_stress()
Related
I am trying to implement multiprocessing in a Python program where I need to run some CPU intensive code. In my test code the multiprocessing Queue and the multiprocessing Pool are both slower than a normal loop with no multiprocessing. During the Pool section of my code, I can see that the CPU usage is maxed out. However, it is still slower than the normal loop! Is there an issue with my code?
import time
from multiprocessing import Process
from multiprocessing import Queue
from multiprocessing import Pool
import random
def run_sims(iterations):
sim_list = []
for i in range(iterations):
sim_list.append(random.uniform(0,1))
print(iterations, "count", sum(sim_list)/len(sim_list))
return (sum(sim_list)/len(sim_list))
def worker(queue):
i=0
while not queue.empty():
task = queue.get()
run_sims(task)
i=i+1
if __name__ == '__main__':
queue = Queue()
iterations_list = [30000000, 30000000, 30000000, 30000000, 30000000]
it_len = len(iterations_list)
## Queue ##
print("#STARTING QUEUE#")
start_t = time.perf_counter()
for i in range(it_len):
iterations = iterations_list[i]
queue.put(iterations)
process = Process(target=worker, args=(queue, ))
process.start()
process.join()
end_t = time.perf_counter()
print("Queue time: ", end_t - start_t)
## Pool ##
print("#STARTING POOL#")
start_t = time.perf_counter()
with Pool() as pool:
results = pool.imap_unordered(run_sims, iterations_list)
for res in results:
res
end_t = time.perf_counter()
print("Pool time: ", end_t - start_t)
## No Multiprocessing - Normal Loop
print("#STARTING NORMAL LOOP#")
start_t = time.perf_counter()
for i in iterations_list:
run_sims(i)
end_t = time.perf_counter()
print("Normal time: ", end_t - start_t)
I've tried the above code but the multiprocessing sections are slower than the normal loop:
Queue Time: 59 seconds
Pool Time: 83 seconds
Normal Loop Time: 55 seconds
My expectation is that Queue and Pool would be significantly faster than the normal loop.
Added processes to the queue code so that it will perform about the same as the pool. On my machine, queue and pool were significantly faster than sequential. I have 4 cores and 8 cpus. Since this is a cpu bound task, performance differences will differ depending on the number of available cpus and other working going on in the machine.
This script keeps the number of workers below the cpu count. If these were network bound tasks, a larger pool could potentially perform faster. Disk bound tasks would likely not benefit from a larger pool.
import time
from multiprocessing import Process
from multiprocessing import Queue
from multiprocessing import Pool
from multiprocessing import cpu_count
import random
def run_sims(iterations):
sim_list = []
for i in range(iterations):
sim_list.append(random.uniform(0,1))
print(iterations, "count", sum(sim_list)/len(sim_list))
return (sum(sim_list)/len(sim_list))
def worker(queue):
i=0
while not queue.empty():
task = queue.get()
run_sims(task)
i=i+1
if __name__ == '__main__':
iteration_count = 5
queue = Queue()
iterations_list = [30000000] * iteration_count
it_len = len(iterations_list)
# guess a parallel execution size. CPU bound, and we want some
# room for other processes.
pool_size = max(min(cpu_count()-2, len(iterations_list)), 2)
print("Pool size", pool_size)
## Queue ##
print("#STARTING QUEUE#")
start_t = time.perf_counter()
for iterations in iterations_list:
queue.put(iterations)
processes = []
for i in range(pool_size):
processes.append(Process(target=worker, args=(queue, )))
processes[-1].start()
for process in processes:
process.join()
end_t = time.perf_counter()
print("Queue time: ", end_t - start_t)
## Pool ##
print("#STARTING POOL#")
start_t = time.perf_counter()
with Pool(pool_size) as pool:
results = pool.imap_unordered(run_sims, iterations_list)
for res in results:
res
end_t = time.perf_counter()
print("Pool time: ", end_t - start_t)
## No Multiprocessing - Normal Loop
print("#STARTING NORMAL LOOP#")
start_t = time.perf_counter()
for i in iterations_list:
run_sims(i)
end_t = time.perf_counter()
print("Normal time: ", end_t - start_t)
I have found several other questions that touch on this topic but none that are quite like my situation.
I have several very large text files (3+ gigabytes in size).
I would like to process them (say 2 documents) in parallel using multiprocessing. As part of my processing (within a single process) I need to make an API call and because of this would like to have each process have it's own threads to run asynchronously.
I have came up with a simplified example ( I have commented the code to try to explain what I think it should be doing):
import multiprocessing
from threading import Thread
import threading
from queue import Queue
import time
def process_huge_file(*, file_, batch_size=250, num_threads=4):
# create APICaller instance for each process that has it's own Queue
api_call = APICaller()
batch = []
# create threads that will run asynchronously to make API calls
# I expect these to immediately block since there is nothing in the Queue (which is was
# the api_call.run depends on to make a call
threads = []
for i in range(num_threads):
thread = Thread(target=api_call.run)
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
####
# start processing the file line by line
for line in file_:
# if we are at our batch size, add the batch to the api_call to to let the threads do
# their api calling
if i % batch_size == 0:
api_call.queue.put(batch)
else:
# add fake line to batch
batch.append(fake_line)
class APICaller:
def __init__(self):
# thread safe queue to feed the threads which point at instances
of these APICaller objects
self.queue = Queue()
def run(self):
print("waiting for something to do")
self.queue.get()
print("processing item in queue")
time.sleep(0.1)
print("finished processing item in queue")
if __name__ == "__main__":
# fake docs
fake_line = "this is a fake line of some text"
# two fake docs with line length == 1000
fake_docs = [[fake_line] * 1000 for i in range(2)]
####
num_processes = 2
procs = []
for idx, doc in enumerate(fake_docs):
proc = multiprocessing.Process(target=process_huge_file, kwargs=dict(file_=doc))
proc.start()
procs.append(proc)
for proc in procs:
proc.join()
As the code is now, "waiting for something to do" prints 8 times (makes sense 4 threads per process) and then it stops or "deadlocks" which is not what I expect - I expect it to start sharing time with the threads as soon as I start putting items in the Queue but the code does not appear to make it this far. I ordinarily would step through to find a hang up but I still don't have a solid understanding of how to best debug using Threads (another topic for another day).
In the meantime, can someone help me figure out why my code is not doing what it should be doing?
I have made a few adjustments and additions and the code appears to do what it is supposed to now. The main adjustments are: adding a CloseableQueue class (from Brett Slatkins Effective Python Item 55), and ensuring that I call close and join on the queue so that the threads properly exit. Full code with these changes below:
import multiprocessing
from threading import Thread
import threading
from queue import Queue
import time
from concurrency_utils import CloseableQueue
def sync_process_huge_file(*, file_, batch_size=250):
batch = []
for idx, line in enumerate(file_):
# do processing on the text
if idx % batch_size == 0:
time.sleep(0.1)
batch = []
# api_call.queue.put(batch)
else:
computation = 0
for i in range(100000):
computation += i
batch.append(line)
def process_huge_file(*, file_, batch_size=250, num_threads=4):
api_call = APICaller()
batch = []
# api call threads
threads = []
for i in range(num_threads):
thread = Thread(target=api_call.run)
threads.append(thread)
thread.start()
for idx, line in enumerate(file_):
# do processing on the text
if idx % batch_size == 0:
api_call.queue.put(batch)
else:
computation = 0
for i in range(100000):
computation += i
batch.append(line)
for _ in threads:
api_call.queue.close()
api_call.queue.join()
for thread in threads:
thread.join()
class APICaller:
def __init__(self):
self.queue = CloseableQueue()
def run(self):
for item in self.queue:
print("waiting for something to do")
pass
print("processing item in queue")
time.sleep(0.1)
print("finished processing item in queue")
print("exiting run")
if __name__ == "__main__":
# fake docs
fake_line = "this is a fake line of some text"
# two fake docs with line length == 1000
fake_docs = [[fake_line] * 10000 for i in range(2)]
####
time_s = time.time()
num_processes = 2
procs = []
for idx, doc in enumerate(fake_docs):
proc = multiprocessing.Process(target=process_huge_file, kwargs=dict(file_=doc))
proc.start()
procs.append(proc)
for proc in procs:
proc.join()
time_e = time.time()
print(f"took {time_e-time_s} ")
class CloseableQueue(Queue):
SENTINEL = object()
def __init__(self, **kwargs):
super().__init__(**kwargs)
def close(self):
self.put(self.SENTINEL)
def __iter__(self):
while True:
item = self.get()
try:
if item is self.SENTINEL:
return # exit thread
yield item
finally:
self.task_done()
As expected this is a great speedup from running synchronously - 120 seconds vs 50 seconds.
Example:
I have installed a sensor in the car, that is sending the data continuously, Now, I have to process(fusion) the continuous data coming from the sensor but at the same while process will be finishing its execution, data will also be coming so, how to store the data that is coming while process is taking time for execution for future?.
sample code:
buffer1=[]
buffer2=[]
def process_function(buffer):
//processing
while true:
//data receiving continously
buffer1.append(data)
if len(buffer1)>0: process(buffer1)
buffer2.append(data)
(while the process_function will take buffer1 to process, at the same time, the continuous data should be stored in buffer2 so that after finishing the process_function with buffer1 can process with buffer2 and repeat.)
You could use a multiprocessing Queue and two processes. One for the producer and one for the consumer:
from multiprocessing import Process, Queue
def collection_sensor_values(mp_queue):
fake_value = 0
while True:
mp_queue.put(f"SENSOR_DATA_{fake_value}")
fake_value += 1
time.sleep(2)
def process_function(mp_queue):
while True:
sensor_reading = mp_queue.get(block=True)
print(f"Received sensor reading: {sensor_reading}")
q = Queue()
sensor_collector_process = Process(target=collection_sensor_values, args=(q,))
readings_process = Process(target=process_function, args=(q,))
all_procs = [sensor_collector_process, readings_process]
for p in all_procs:
p.start()
for p in all_procs:
# run until either process stops
if p.is_alive():
p.join()
for p in all_procs:
if p.is_alive():
p.terminate()
I used python to execute this program on Ubuntu
import thread
import time
# Define a function for the thread
def print_time( threadName, delay):
count = 0
while True:
count += 1
# Create two threads as follows
try:
for index in xrange(1,50000):
thread.start_new_thread( print_time, ("Thread-" + str(index), 0, ) )
except:
print "Error: unable to start thread"
while 1:
pass
I want all 8 cores are all 100% usage, but through System Monitor i only got 50% usage of the first 4 cores and 25% usage of the last 4 cores.
How can i make all 8 cores with 100% usage by python?
Something like this will get you started. You'd need to tweak num_processes in order to match your hardware.
import multiprocessing as mp
import time
def slow_func():
while True:
for i in xrange(99999):
j = i*i
def main():
num_processes = 4
for _ in range(num_processes):
process = mp.Process(target = slow_func)
process.daemon = True
process.start()
while True:
time.sleep(1)
if __name__ == '__main__':
main()
Edit: this works for me on Windows with 4 cores and gives 4x 25% processor usage.
To compare to the threading module, you can import threading and replace the line process = mp.Process(target = slow_func) with process = threading.Thread(target = slow_func). You should find it uses only one of your cores.
I would like to be able to do something like this:
import subprocess
nproc = 0
for i in range(100):
subprocess.Popen(commands[i], when_finished="nproc -= 1")
nproc += 1
while nproc > 0:
print("%d processes running..." % nproc)
Poll the processes:
import subprocess
processes = [subprocess.Popen(command) for command in commands]
while True:
nproc = sum(p.poll() is None for p in processes)
if not nproc:
break
print("%d processes running..." % nproc)
sleep(0.1)