Log process output to separate log file - python

I have the following parallel_executor.py module which I use to run several processes simultaneously,
import time
from multiprocessing import Process
class ParallelExecutor(object):
def __init__(self, pool_size=10):
self._pool_size = pool_size
self._processes = []
self._results = []
def add_task(self, target, args=None, kwargs=None):
args = [] if not args else args
kwargs = {} if not kwargs else kwargs
index = len(self._processes)
process_args = (index, target, args, kwargs)
process = Process(target=self._executor, args=process_args)
self._processes.append(process)
result = {'result': None, 'end_time': 0, 'completed': False}
self._results.append(result)
return index
def run(self, block=True):
if not block:
for process in self._processes:
process.start()
return None
else:
counter = 0
processes = []
for process in self._processes:
processes.append(process)
process.start()
if counter >= self._pool_size:
# Wait for completion and reset counters.
for i in range(len(processes)):
processes[i].join()
processes = []
counter = 0
continue
counter += 1
# Wait for the left over processes to complete.
if len(processes) > 0:
for i in range(len(processes)):
processes[i].join()
return self._results
def _executor(self, index, target, args, kwargs):
try:
self._results[index]['result'] = target(*args, **kwargs)
self._results[index]['end_time'] = int(round((time.time())))
self._results[index]['completed'] = True
except Exception as exc:
self._results[index]['exception'] = exc
self._results[index]['completed'] = True
raise
And I use it as follows(example.py):
from framework.lib.parallel_executor import ParallelExecutor
import time
import os
def foo(x):
for i in range(3):
print x
time.sleep(0.5)
return 123
def main():
runner = ParallelExecutor()
runner.add_task(foo, ["This"])
runner.add_task(foo, ["is"])
runner.add_task(foo, ["a"])
runner.add_task(foo, ["test"])
runner.run()
runner.wait_for_executor_to_finish()
for i in runner.get_results():
print i
main()
My question is how do I print the process ID with every statement of 'foo' that is printed to the output by making changes only to parallel_executor.py module and not touching the example.py file, so that later I could perform a 'grep' on outputs of a particular process.

You can't do it without modifying the example at all, but you can achieve what you want with a very small modification.
Using the Python logging facilities, you can set the default log message ensuring every logline will respect your standard.
In the parallel_executor.py add the following:
import logging
log_format = "%(process)d: %(message)s"
logging.basicConfig(level=logging.INFO, format=log_format)
In the example replace the line:
print x
with:
logging.info(x)
And you will see your messages appearing as:
34321: message content here

Related

Stop the running instances when max_instances is reached

I'm using apscheduler-django and I created a task that loops every 10 seconds.
This function will make a request to an API and save the content to my database (PostgreSQL).
This is my task:
scheduler.add_job(
SaveAPI,
trigger=CronTrigger(second="*/10"),
id="SaveAPI",
max_instances=1,
replace_existing=True,
)
and my SaveAPI is:
def SaveAPI():
SPORT = 3
print('API Queue Started')
AllMatches = GetAllMatches(SPORT)
for Match in AllMatches:
AddToDatabase(Match, SPORT)
print(f'API Queue Ended')
The GetAllMatches and AddToDatabase are too big and I don't think the implementations are relevant to my question.
My problem is sometimes I will get this error:
Run time of job "SaveAPI (trigger: cron[second='*/10'], next run at: 2022-03-05 23:21:00 +0330)" was missed by 0:00:11.445357
When this happens, it will not get replaced with a new instance because my SaveAPI function doesn't end. And apscheduler will always miss new instances.
I did many tests and function does not have any problem.
How can I make apscheduler stop the last running instance if a new instance is going to be missed?
So if my last instance takes more than 10 seconds, I want to just terminate the instance and create a new one.
apscheduler and apscheduler-django don't directly support that.
You can implement and use a custom executor that tracks the process running a job and kills the process if trying to submit a job that is currently running.
Here's a MaxInstancesCancelEarliestProcessPoolExecutor that uses pebble.ProcessPool.
class MaxInstancesCancelEarliestProcessPoolExecutor(BasePoolExecutor):
def __init__(self):
pool = ProcessPool()
pool.submit = lambda function, *args: pool.schedule(function, args=args)
super().__init__(pool)
self._futures = defaultdict(list)
def submit_job(self, job, run_times):
assert self._lock is not None, 'This executor has not been started yet'
with self._lock:
if self._instances[job.id] >= job.max_instances:
f = self._futures[job.id][0] # +
f.cancel() # +
try: # +
self._pool._pool_manager.update_status() # +
except RuntimeError: # +
pass # +
if self._instances[job.id] >= job.max_instances: # +
raise MaxInstancesReachedError(job)
self._do_submit_job(job, run_times)
self._instances[job.id] += 1
def _do_submit_job(self, job, run_times):
def callback(f):
with self._lock: # +
self._futures[job.id].remove(f) # +
try: # +
exc, tb = (f.exception_info() if hasattr(f, 'exception_info') else
(f.exception(), getattr(f.exception(), '__traceback__', None)))
except CancelledError: # +
exc, tb = TimeoutError(), None # +
if exc:
self._run_job_error(job.id, exc, tb)
else:
self._run_job_success(job.id, f.result())
try:
f = self._pool.submit(run_job, job, job._jobstore_alias, run_times, self._logger.name)
except BrokenProcessPool:
self._logger.warning('Process pool is broken; replacing pool with a fresh instance')
self._pool = self._pool.__class__(self._pool._max_workers)
f = self._pool.submit(run_job, job, job._jobstore_alias, run_times, self._logger.name)
f.add_done_callback(callback)
self._futures[job.id].append(f) # +
def shutdown(self, wait=True):
if wait:
self._pool.close()
self._pool.join()
else:
self._pool.close()
threading.Thread(target=self._pool.join).start()
Usage:
scheduler.add_executor(MaxInstancesCancelEarliestProcessPoolExecutor(), alias='max_instances_cancel_earliest')
scheduler.add_job(
SaveAPI,
trigger=CronTrigger(second="*/10"),
id="SaveAPI",
max_instances=1,
executor='max_instances_cancel_earliest', # +
replace_existing=True,
)

Python: store results of ProcessPoolExecutor

I'm very new to parallel processing with "concurrent.futures". Code seems to work, but I am not sure how to store the result of each process, therey by marking the build as failed at last, if any of processes's return value is not zero.
Tried to create a list (exit_status) and append the results to that, but that shows IndexError. Wondering what can I do right?
#!/usr/bin/env python3
import concurrent.futures
import sys
import shutil
import os
import glob
import multiprocessing as mp
import json
from os import path
def slave(path1, path2, target):
os.makedirs(target)
shutil.copy(path1, target)
shutil.copy(path2, target)
os.system(<Login command>)
os.system(<Image creation command>)
os.system(<Copy to Other slaves or NFS>)
#If any one of the above operation or command fails for any of the process, the script should return 1 at the end of the execution or fail the build at last.
def main():
processed = {}
exit_status = []
with open('example.json', 'r') as f:
data = json.load(f)
for value in data.items():
for line in value[1]:
if line.endswith('.zip'):
targz = line
elif line.endswith('.yaml'):
yaml = line
processed[targz] = yaml
with concurrent.futures.ProcessPoolExecutor() as executor:
for id, (path2, path1) in enumerate(processed.items(), 1):
target = path.join("/tmp", "dir" + str(id))
ret = executor.submit(slave, path1, path2, target)
exit_status.append(ret.result())
for i in exit_status:
print("##########Result status: ", i)
if __name__ == "__main__":
mp.set_start_method('spawn')
main()
exit_status list's output:
##########Result status: None
##########Result status: None
re; comments
If you want to get the result of a system call in order to act on the results of it, using subprocess.run is much more flexible and powerful than os.system. Additionally, if you actually want to perform the operations in parallel, you can't wait on result() after each task. Otherwise you're only ever doing one thing at a time. Better to submit all the tasks, and collect the Future objects. Then you can iterate over those and wait on each result() now that you've submitted all the work you want the executor to do.
def target_func(path1, path2, target):
#...
#instead of os.system, use subprocess.run
#you can inspect the stdout from the process
complete_process = subprocess.run(<Login command>, text=True, capture_output=True)
if "success" not in complete_process.stdout:
return "uh-oh"
#you can also just check the return value (0 typically means clean exit)
if subprocess.run(<Image creation command>).returncode == 0:
return "uh-oh"
#or you can tell `run` to generate an error if the returncode is non-zero
try:
subprocess.run(<Copy to Other slaves or NFS>, check=True)
except subprocess.CalledProcessError:
return "uh-oh"
return "we did it!"
def main():
#...
#...
with concurrent.futures.ProcessPoolExecutor() as executor:
for id, (path2, path1) in enumerate(processed.items(), 1):
target = path.join("/tmp", "dir" + str(id))
ret = executor.submit(slave, path1, path2, target)
exit_status.append(ret)
for i in exit_status:
print("##########Result status: ", i.result())

How to get every second's GPU usage in Python

I have a model which runs by tensorflow-gpu and my device is nvidia. And I want to list every second's GPU usage so that I can measure average/max GPU usage. I can do this mannually by open two terminals, one is to run model and another is to measure by nvidia-smi -l 1. Of course, this is not a good way. I also tried to use a Thread to do that, here it is.
import subprocess as sp
import os
from threading import Thread
class MyThread(Thread):
def __init__(self, func, args):
super(MyThread, self).__init__()
self.func = func
self.args = args
def run(self):
self.result = self.func(*self.args)
def get_result(self):
return self.result
def get_gpu_memory():
output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
ACCEPTABLE_AVAILABLE_MEMORY = 1024
COMMAND = "nvidia-smi -l 1 --query-gpu=memory.used --format=csv"
memory_use_info = output_to_list(sp.check_output(COMMAND.split()))[1:]
memory_use_values = [int(x.split()[0]) for i, x in enumerate(memory_use_info)]
return memory_use_values
def run():
pass
t1 = MyThread(run, args=())
t2 = MyThread(get_gpu_memory, args=())
t1.start()
t2.start()
t1.join()
t2.join()
res1 = t2.get_result()
However, this does not return every second's usage as well. Is there a good solution?
In the command nvidia-smi -l 1 --query-gpu=memory.used --format=csv
the -l stands for:
-l, --loop= Probe until Ctrl+C at specified second interval.
So the command:
COMMAND = 'nvidia-smi -l 1 --query-gpu=memory.used --format=csv'
sp.check_output(COMMAND.split())
will never terminate and return.
It works if you remove the event loop from the command(nvidia-smi) to python.
Here is the code:
import subprocess as sp
import os
from threading import Thread , Timer
import sched, time
def get_gpu_memory():
output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
ACCEPTABLE_AVAILABLE_MEMORY = 1024
COMMAND = "nvidia-smi --query-gpu=memory.used --format=csv"
try:
memory_use_info = output_to_list(sp.check_output(COMMAND.split(),stderr=sp.STDOUT))[1:]
except sp.CalledProcessError as e:
raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
memory_use_values = [int(x.split()[0]) for i, x in enumerate(memory_use_info)]
# print(memory_use_values)
return memory_use_values
def print_gpu_memory_every_5secs():
"""
This function calls itself every 5 secs and print the gpu_memory.
"""
Timer(5.0, print_gpu_memory_every_5secs).start()
print(get_gpu_memory())
print_gpu_memory_every_5secs()
"""
Do stuff.
"""
Here is a more rudimentary way of getting this output, however just as effective - and I think easier to understand. I added a small 10-value cache to get a good recent average and upped the check time to every second. It outputs average of the last 10 seconds and the current each second, so operations that cause usage can be identified (what I think the original question was).
import subprocess as sp
import time
memory_total=8192 #found with this command: nvidia-smi --query-gpu=memory.total --format=csv
memory_used_command = "nvidia-smi --query-gpu=memory.used --format=csv"
isolate_memory_value = lambda x: "".join(y for y in x.decode('ascii') if y in "0123456789")
def main():
percentage_cache = []
while True:
memory_used = isolate_memory_value(sp.check_output(memory_used_command.split(), stderr=sp.STDOUT))
percentage = float(memory_used)/float(memory_total)*100
percentage_cache.append(percentage)
percentage_cache = percentage_cache[max(0, len(percentage_cache) - 10):]
print("curr: " + str(percentage) + " %", "\navg: " + str(sum(percentage_cache)/len(percentage_cache))[:4] + " %\n")
time.sleep(1)
main()

Capture check_output value

I am trying to capture the return value of check_output instead of having it automatically print to the command line. Unfortunately, my solution is not working and I'm not sure why. I've included my code and it's output:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from multiprocessing import Pool
from subprocess import check_output,CalledProcessError
def job(cmd):
result = ""
try:
result = check_output(cmd.split()) # Split string into list.
print("job result length = {0}".format(len(result)), file=sys.stdout)
except CalledProcessError as error:
raise Exception("Exit status of the child process: {0}\
Command used to spawn child process: {1}\
Output of the child process: {2}".format(error.returncode,error.cmd,error.output))
def main():
# Sets up a process pool. Defaults to number of cores.
# Each input gets passed to job and processed in a separate process.
p = Pool()
result = []
try:
# cmd_list is just a list of system commands which have been verified to work.
result = list(p.imap_unordered(job, cmd_list))
print("main result length = {0}".format(len(result)), file=sys.stdout)
print("{0}".format(result), file=sys.stdout)
except Exception as error:
print("Error: {0}. Aborting...".format(error), file=sys.stderr)
p.close()
p.terminate()
else:
p.close()
p.join()
if __name__ == '__main__':
main()
Output
In addition to the output of each command executed by check_output, my print statements reveal some unexpected results:
job result length = 0
job result length = 0
main result length = 2
[None, None]
I would expect job result length to equal 2 and result to contain the return values of the child processes.
result is a local variable. Either return it:
def job(cmd):
# something goes here
return result
Or make it global:
result = ""
def job(cmd):
global result
# something goes here
result = whatever it shall be.
Or parameterize it:
def job(cmd, result):
result = whatever it shall be.

Python output from Popen command from inside a thread into a GUI form in real time

Being new to Python I have looked around the site and found partial answers but nothing that helps make things clear. This is what I have. The main window through a button activates the thread which runs a command (wash -C -i monX) and a table needs to populate with the results in the GUI in real time. I found the code for the loop here Intercepting stdout of a subprocess while it is running.
the code is here https://github.com/theodhori-dhiamanti/wifern/blob/master
the code in question is:
class WashThread(QtCore.QThread):
def __init__(self, parent=None):
super(WashThread, self).__init__(parent)
def run(self):
global mon_iface
try:
if mon_iface != '':
device = mon_iface
cmd = ['wash', '-C', '-i', device]
wash_cmd = Popen(cmd, stdout=PIPE)
for line in iter(wash_cmd.stdout.readline, b''):
if line.strip() == '' or line.startswith('---'): continue
if line.startswith('Wash') or line.startswith('Copyright') or line.startswith('BSSID'): continue
print line
Split = line.split(' ')
wash_bssid = Split[0]
wash_essid = Split[58]
wash_power = Split[19]
wash_locked = Split[42]
else:
print('No Good')
except OSError:
pass
and the GUI part that calls this method and where the results need to be used is:
def wash(self):
row = 0
col = 0
self.wash_tableWidget.setColumnCount(4)
self.wash_tableWidget.setColumnWidth(1,150)
self.wash_tableWidget.setColumnWidth(4,30)
self.wash_tableWidget.setColumnWidth(3,70)
self.wash_tableWidget.setRowCount(10)
if self.start_wash_Button.text() == 'Start':
self.start_wash_Button.setText('Stop')
self.wash_thread.start()
row_item = QtGui.QTableWidgetItem(wash_bssid)
x = QtGui.QTableWidgetItem(wash_essid)
y = QtGui.QTableWidgetItem(wash_power)
z = QtGui.QTableWidgetItem(wash_locked)
self.wash_tableWidget.setItem(row, col, row_item)
self.wash_tableWidget.setItem(row, 1, x)
self.wash_tableWidget.setItem(row, 2, y)
self.wash_tableWidget.setItem(row, 3, z)
row += 1
else:
try:
os.kill(cmd.pid, SIGTERM)
print('Done')
except OSError:
pass
except UnboundLocalError:
pass
self.wash_thread = WashThread() # method initialized in main body
How can I transfer the output from the thread to the main portion?
After I than can assign the values appropriately to the table fields?
Any help is greatly appreciated.
* Note: out of the thread the method works as intended.

Categories

Resources