How to get every second's GPU usage in Python

How to get every second's GPU usage in Python - python

I have a model which runs by tensorflow-gpu and my device is nvidia. And I want to list every second's GPU usage so that I can measure average/max GPU usage. I can do this mannually by open two terminals, one is to run model and another is to measure by nvidia-smi -l 1. Of course, this is not a good way. I also tried to use a Thread to do that, here it is.
import subprocess as sp
import os
from threading import Thread
class MyThread(Thread):
def __init__(self, func, args):
super(MyThread, self).__init__()
self.func = func
self.args = args
def run(self):
self.result = self.func(*self.args)
def get_result(self):
return self.result
def get_gpu_memory():
output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
ACCEPTABLE_AVAILABLE_MEMORY = 1024
COMMAND = "nvidia-smi -l 1 --query-gpu=memory.used --format=csv"
memory_use_info = output_to_list(sp.check_output(COMMAND.split()))[1:]
memory_use_values = [int(x.split()[0]) for i, x in enumerate(memory_use_info)]
return memory_use_values
def run():
pass
t1 = MyThread(run, args=())
t2 = MyThread(get_gpu_memory, args=())
t1.start()
t2.start()
t1.join()
t2.join()
res1 = t2.get_result()
However, this does not return every second's usage as well. Is there a good solution?

In the command nvidia-smi -l 1 --query-gpu=memory.used --format=csv
the -l stands for:
-l, --loop= Probe until Ctrl+C at specified second interval.
So the command:
COMMAND = 'nvidia-smi -l 1 --query-gpu=memory.used --format=csv'
sp.check_output(COMMAND.split())
will never terminate and return.
It works if you remove the event loop from the command(nvidia-smi) to python.
Here is the code:
import subprocess as sp
import os
from threading import Thread , Timer
import sched, time
def get_gpu_memory():
output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
ACCEPTABLE_AVAILABLE_MEMORY = 1024
COMMAND = "nvidia-smi --query-gpu=memory.used --format=csv"
try:
memory_use_info = output_to_list(sp.check_output(COMMAND.split(),stderr=sp.STDOUT))[1:]
except sp.CalledProcessError as e:
raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
memory_use_values = [int(x.split()[0]) for i, x in enumerate(memory_use_info)]
# print(memory_use_values)
return memory_use_values
def print_gpu_memory_every_5secs():
"""
This function calls itself every 5 secs and print the gpu_memory.
"""
Timer(5.0, print_gpu_memory_every_5secs).start()
print(get_gpu_memory())
print_gpu_memory_every_5secs()
"""
Do stuff.
"""

Here is a more rudimentary way of getting this output, however just as effective - and I think easier to understand. I added a small 10-value cache to get a good recent average and upped the check time to every second. It outputs average of the last 10 seconds and the current each second, so operations that cause usage can be identified (what I think the original question was).
import subprocess as sp
import time
memory_total=8192 #found with this command: nvidia-smi --query-gpu=memory.total --format=csv
memory_used_command = "nvidia-smi --query-gpu=memory.used --format=csv"
isolate_memory_value = lambda x: "".join(y for y in x.decode('ascii') if y in "0123456789")
def main():
percentage_cache = []
while True:
memory_used = isolate_memory_value(sp.check_output(memory_used_command.split(), stderr=sp.STDOUT))
percentage = float(memory_used)/float(memory_total)*100
percentage_cache.append(percentage)
percentage_cache = percentage_cache[max(0, len(percentage_cache) - 10):]
print("curr: " + str(percentage) + " %", "\navg: " + str(sum(percentage_cache)/len(percentage_cache))[:4] + " %\n")
time.sleep(1)
main()

Related

Stop the running instances when max_instances is reached

I'm using apscheduler-django and I created a task that loops every 10 seconds.
This function will make a request to an API and save the content to my database (PostgreSQL).
This is my task:
scheduler.add_job(
SaveAPI,
trigger=CronTrigger(second="*/10"),
id="SaveAPI",
max_instances=1,
replace_existing=True,
)
and my SaveAPI is:
def SaveAPI():
SPORT = 3
print('API Queue Started')
AllMatches = GetAllMatches(SPORT)
for Match in AllMatches:
AddToDatabase(Match, SPORT)
print(f'API Queue Ended')
The GetAllMatches and AddToDatabase are too big and I don't think the implementations are relevant to my question.
My problem is sometimes I will get this error:
Run time of job "SaveAPI (trigger: cron[second='*/10'], next run at: 2022-03-05 23:21:00 +0330)" was missed by 0:00:11.445357
When this happens, it will not get replaced with a new instance because my SaveAPI function doesn't end. And apscheduler will always miss new instances.
I did many tests and function does not have any problem.
How can I make apscheduler stop the last running instance if a new instance is going to be missed?
So if my last instance takes more than 10 seconds, I want to just terminate the instance and create a new one.

apscheduler and apscheduler-django don't directly support that.
You can implement and use a custom executor that tracks the process running a job and kills the process if trying to submit a job that is currently running.
Here's a MaxInstancesCancelEarliestProcessPoolExecutor that uses pebble.ProcessPool.
class MaxInstancesCancelEarliestProcessPoolExecutor(BasePoolExecutor):
def __init__(self):
pool = ProcessPool()
pool.submit = lambda function, *args: pool.schedule(function, args=args)
super().__init__(pool)
self._futures = defaultdict(list)
def submit_job(self, job, run_times):
assert self._lock is not None, 'This executor has not been started yet'
with self._lock:
if self._instances[job.id] >= job.max_instances:
f = self._futures[job.id][0] # +
f.cancel() # +
try: # +
self._pool._pool_manager.update_status() # +
except RuntimeError: # +
pass # +
if self._instances[job.id] >= job.max_instances: # +
raise MaxInstancesReachedError(job)
self._do_submit_job(job, run_times)
self._instances[job.id] += 1
def _do_submit_job(self, job, run_times):
def callback(f):
with self._lock: # +
self._futures[job.id].remove(f) # +
try: # +
exc, tb = (f.exception_info() if hasattr(f, 'exception_info') else
(f.exception(), getattr(f.exception(), '__traceback__', None)))
except CancelledError: # +
exc, tb = TimeoutError(), None # +
if exc:
self._run_job_error(job.id, exc, tb)
else:
self._run_job_success(job.id, f.result())
try:
f = self._pool.submit(run_job, job, job._jobstore_alias, run_times, self._logger.name)
except BrokenProcessPool:
self._logger.warning('Process pool is broken; replacing pool with a fresh instance')
self._pool = self._pool.__class__(self._pool._max_workers)
f = self._pool.submit(run_job, job, job._jobstore_alias, run_times, self._logger.name)
f.add_done_callback(callback)
self._futures[job.id].append(f) # +
def shutdown(self, wait=True):
if wait:
self._pool.close()
self._pool.join()
else:
self._pool.close()
threading.Thread(target=self._pool.join).start()
Usage:
scheduler.add_executor(MaxInstancesCancelEarliestProcessPoolExecutor(), alias='max_instances_cancel_earliest')
scheduler.add_job(
SaveAPI,
trigger=CronTrigger(second="*/10"),
id="SaveAPI",
max_instances=1,
executor='max_instances_cancel_earliest', # +
replace_existing=True,
)

Python: store results of ProcessPoolExecutor

I'm very new to parallel processing with "concurrent.futures". Code seems to work, but I am not sure how to store the result of each process, therey by marking the build as failed at last, if any of processes's return value is not zero.
Tried to create a list (exit_status) and append the results to that, but that shows IndexError. Wondering what can I do right?
#!/usr/bin/env python3
import concurrent.futures
import sys
import shutil
import os
import glob
import multiprocessing as mp
import json
from os import path
def slave(path1, path2, target):
os.makedirs(target)
shutil.copy(path1, target)
shutil.copy(path2, target)
os.system(<Login command>)
os.system(<Image creation command>)
os.system(<Copy to Other slaves or NFS>)
#If any one of the above operation or command fails for any of the process, the script should return 1 at the end of the execution or fail the build at last.
def main():
processed = {}
exit_status = []
with open('example.json', 'r') as f:
data = json.load(f)
for value in data.items():
for line in value[1]:
if line.endswith('.zip'):
targz = line
elif line.endswith('.yaml'):
yaml = line
processed[targz] = yaml
with concurrent.futures.ProcessPoolExecutor() as executor:
for id, (path2, path1) in enumerate(processed.items(), 1):
target = path.join("/tmp", "dir" + str(id))
ret = executor.submit(slave, path1, path2, target)
exit_status.append(ret.result())
for i in exit_status:
print("##########Result status: ", i)
if __name__ == "__main__":
mp.set_start_method('spawn')
main()
exit_status list's output:
##########Result status: None
##########Result status: None

re; comments
If you want to get the result of a system call in order to act on the results of it, using subprocess.run is much more flexible and powerful than os.system. Additionally, if you actually want to perform the operations in parallel, you can't wait on result() after each task. Otherwise you're only ever doing one thing at a time. Better to submit all the tasks, and collect the Future objects. Then you can iterate over those and wait on each result() now that you've submitted all the work you want the executor to do.
def target_func(path1, path2, target):
#...
#instead of os.system, use subprocess.run
#you can inspect the stdout from the process
complete_process = subprocess.run(<Login command>, text=True, capture_output=True)
if "success" not in complete_process.stdout:
return "uh-oh"
#you can also just check the return value (0 typically means clean exit)
if subprocess.run(<Image creation command>).returncode == 0:
return "uh-oh"
#or you can tell `run` to generate an error if the returncode is non-zero
try:
subprocess.run(<Copy to Other slaves or NFS>, check=True)
except subprocess.CalledProcessError:
return "uh-oh"
return "we did it!"
def main():
#...
#...
with concurrent.futures.ProcessPoolExecutor() as executor:
for id, (path2, path1) in enumerate(processed.items(), 1):
target = path.join("/tmp", "dir" + str(id))
ret = executor.submit(slave, path1, path2, target)
exit_status.append(ret)
for i in exit_status:
print("##########Result status: ", i.result())

How can I run a function for just 5 minutes?

import logging
from a.x.models import X
from a.x.management.commands.syncx \
import Command as SyncCommand
from a.x.adapter_classes import ADAPTER_CLASSES
LOGGER = logging.getLogger(__name__)
def logger_function(code):
if not X.objects.filter(code=code).exists():
X.objects.create(code=code)
LOGGER.info(f"{X} created")
args = []
kwargs = {'x_code': code,
'class': False,
'database': 'default'}
try:
LOGGER.info(f"Starting syncx command for {code}")
#or this command needs to be run just 5 minutes for every key
SyncCommand().handle(*args, **kwargs)
LOGGER.info(f"There is no error for {code}")
except Exception as error:
with open("logger.txt", "a") as file:
file.write(f"{code}'s error is : {error}")
LOGGER.info(f"Logging error about {code}\n")
def run():
for key in ADAPTER_CLASSES.keys():
#this function needs to be run just 5 minutes for every key
logger_function(key)
My logger_function needs to be run for 5 minutes. Is there any timer decorator or thread destroyer with timer ? How can I do this.
My for loop is shifting in keys and sending to logger function , if there any problem for try except block its okey , but if everything right for my SyncCommand it can take a many hours, bu i just want to logging errors in first 5 minutes.

Is there any timer decorator
If you are allowed to use external libraries I suggest taking look at timeout-decorator.

# importing the required module
import timeit
# code snippet to be executed only once
mysetup = "from math import sqrt"
# code snippet whose execution time is to be measured
mycode = '''
def example():
mylist = []
for x in range(100):
mylist.append(sqrt(x))
'''
# timeit statement
print timeit.timeit(setup = mysetup,
stmt = mycode,
number = 10000)

I solved it with using signal library
def handler(signum, frame):
raise Exception(None)
#do stuff func
for key in ADAPTER_CLASSES.keys():
signal.signal(signal.SIGALRM, handler)
signal.alarm(300) #5 min
logger_function(key)
signal.alarm(0)

Multiprocessing With r2pipe

I'm having issues with using r2pipe, Radare2's API, with the multiprocessing Pool.map function in python. The problem I am facing is the application hangs on pool.join().
My hope was to use multithreading via the multiprocessing.dummy class in order to evaluate functions quickly through r2pipe. I have tried passing my r2pipe object as a namespace using the Manager class. I have attempted using events as well, but none of these seem to work.
class Test:
def __init__(self, filename=None):
if filename:
self.r2 = r2pipe.open(filename)
else:
self.r2 = r2pipe.open()
self.r2.cmd('aaa')
def t_func(self, args):
f = args[0]
r2_ns = args[1]
print('afbj # {}'.format(f['name']))
try:
bb = r2_ns.cmdj('afbj # {}'.format(f['name']))
if bb:
return bb[0]['addr']
else:
return None
except Exception as e:
print(e)
return None
def thread(self):
funcs = self.r2.cmdj('aflj')
mgr = ThreadMgr()
ns = mgr.Namespace()
ns.r2 = self.r2
pool = ThreadPool(2)
results = pool.map(self.t_func, product(funcs, [ns.r2]))
pool.close()
pool.join()
print(list(results))
This is the class I am using. I make a call to the Test.thread function in my main function.
I expect the application to print out the command it is about to run in r2pipe afbj # entry0, etc. Then to print out the list of results containing the first basic block address [40000, 50000, ...].
The application does print out the command about to run, but then hangs before printing out the results.

ENVIRONMENT
radare2: radare2 4.2.0-git 23712 # linux-x86-64 git.4.1.1-97-g5a48a4017
commit: 5a48a401787c0eab31ecfb48bebf7cdfccb66e9b build: 2020-01-09__21:44:51
r2pipe: 1.4.2
python: Python 3.6.9 (default, Nov 7 2019, 10:44:02)
system: Ubuntu 18.04.3 LTS
SOLUTION
This may be due to passing the same instance of r2pipe.open() to every call of t_func in the pool. One solution is to move the following lines of code into t_func:
r2 = r2pipe.open('filename')
r2.cmd('aaa')
This works, however its terribly slow to reanalyze for each thread/process.
Also, it is often faster to allow radare2 to do as much of the work as possible and limit the number of commands we need to send using r2pipe.
This problem is solved by using the command: afbj ##f
afbj # List basic blocks of given function and show results in json
##f # Execute the command for each function
EXAMPLE
Longer Example
import r2pipe
R2: r2pipe.open_sync = r2pipe.open('/bin/ls')
R2.cmd("aaaa")
FUNCS: list = R2.cmd('afbj ##f').split("\n")[:-1]
RESULTS: list = []
for func in FUNCS:
basic_block_info: list = eval(func)
first_block: dict = basic_block_info[0]
address_first_block: int = first_block['addr']
RESULTS.append(hex(address_first_block))
print(RESULTS)
'''
['0x4a56', '0x1636c', '0x3758', '0x15690', '0x15420', '0x154f0', '0x15420',
'0x154f0', '0x3780', '0x3790', '0x37a0', '0x37b0', '0x37c0', '0x37d0', '0x0',
...,
'0x3e90', '0x6210', '0x62f0', '0x8f60', '0x99e0', '0xa860', '0xc640', '0x3e70',
'0xd200', '0xd220', '0x133a0', '0x14480', '0x144e0', '0x145e0', '0x14840', '0x15cf0']
'''
Shorter Example
import r2pipe
R2 = r2pipe.open('/bin/ls')
R2.cmd("aaaa")
print([hex(eval(func)[0]['addr']) for func in R2.cmd('afbj ##f').split("\n")[:-1]])

ThreadPool from python's multiprocessing hangs out

I have a phantom problem with one of my unit tests.
I use a ThreadPool from multiprocessing package for wrapping stdout and stderr funtions from my class utilizing paramiko. During creation I made some real life tests using code below and it is working nicely. But during writing unit test for that code I managed to get into problem, that this usage of ThreadPool hangs out.
This part hangs out for like 95 percent of time and somehow sometimes executes properly.
while not (self.__stdout_async_r.ready() and self.__stderr_async_r.ready()):
time.sleep(WAIT_FOR_DATA)
I've checked the values during debugging and I've found out that sometimes there is one or other condition set to finished but the other is not. But both functions are already finished so the results is just asking for the state that is never changed in the future.
The code for reproduce (with functionality necessary for this issue):
import time
from multiprocessing.pool import ThreadPool
class ExecResult(object):
def __init__(self, command=None, exit_status_func=None,
receive_stdout_func=None, receive_stderr_func=None,
connection=None):
self.connection = connection
self.stdout = None
self.stderr = None
self.ecode = None
self.ts_stop = None
self._exit_status_f = exit_status_func
self.result_available = False
self.__fetch_streams(receive_stdout_func, receive_stderr_func)
def wait_for_data(self):
WAIT_FOR_DATA = 0.1
if not self.result_available:
# Here it hangs out for 95 percent
while not (self.__stdout_async_r.ready() and self.__stderr_async_r.ready()):
time.sleep(WAIT_FOR_DATA)
self.result_available = True
self.ts_stop = time.time()
self.stdout = self.__stdout_async_r.get(timeout=2)
self.stderr = self.__stderr_async_r.get(timeout=2)
self.ecode = self._exit_status_f()
def __fetch_streams(self, stdout_func, stderr_func):
stdout_t = ThreadPool(processes=1)
stderr_t = ThreadPool(processes=1)
self.__stdout_async_r = stdout_t.apply_async(func=stdout_func)
self.__stderr_async_r = stderr_t.apply_async(func=stderr_func)
stdout_t.close()
stderr_t.close()
def stderr():
return "stderr"
def stdout():
return "stdout"
def exit():
return "0"
# actual reproduction
res = ExecResult(None, exit, stdout, stderr, None)
res.wait_for_data() #if are data available get them or wait
print res.stdout
print res.stderr
print res.ecode

As it usually is, I found out an answer for this after some time spent cursing and doing some tea.
Solution is to add this after close methods:
stdout_t.join()
stderr_t.join()
So this is the repaired part as whole:
def __fetch_streams(self, stdout_func, stderr_func):
stdout_t = ThreadPool(processes=1)
stderr_t = ThreadPool(processes=1)
self.__stdout_async_r = stdout_t.apply_async(func=stdout_func)
self.__stderr_async_r = stderr_t.apply_async(func=stderr_func)
stdout_t.close()
stderr_t.close()
stdout_t.join()
stderr_t.join()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to get every second's GPU usage in Python - python

Related

Stop the running instances when max_instances is reached

Python: store results of ProcessPoolExecutor

How can I run a function for just 5 minutes?

Multiprocessing With r2pipe

ThreadPool from python's multiprocessing hangs out

Categories

Resources