wait subprocess execution in multiprocessing Pool

wait subprocess execution in multiprocessing Pool - python

I'm starting pool of workers and submitting jobs to this pool. Each process creates subprocess with browser, waits page loading and then takes a screenshot. Sometimes Opera shows crash dialog for incorrect terminated session. For avoiding this I'm killing tab through xkill and waiting browser termination. Now I need to make correct handling for SIGTERM signal. After signal was set and handled in a sig_handler function, I prevent submitting new jobs with pool.close() and waiting pool termination with pool.join(). When pool not running any subproccesses main process terminates normally, but when pool has a subprocess all worker processes terminates without waiting browser termination. How can I normally terminate my main process?
#!/usr/bin/env python
#
# http://bugs.python.org/issue6766 in functions manager data packed by pickle
#
import redis
import pickle
import getopt
import time
import logging
import os
import sys
import pwd
import subprocess
import re
import urllib2
import signal
import multiprocessing
import httplib
# Define regexps
xvfb_reg = re.compile(r'Xvfb :(\d+)')
browser_reg = re.compile(r'0x(\d+) .* \("opera" "Opera"\) 1024x768')
running = True
def sig_handler(signum, frame):
"""
Set termination flag
"""
global running
running = False
return
def check_url_code(url):
"""
Try fetch url before processing.
Return True if returned request code is 200 OK else False
"""
try:
url = urllib2.urlopen(url)
code = url.getcode()
if code == 200:
return True
else:
return False
except (urllib2.URLError, httplib.InvalidURL, ValueError):
return False
def list_display():
"""
Get working virtual framebuffers
"""
proc = subprocess.Popen(['/bin/ps', 'ax'], stdout=subprocess.PIPE)
return xvfb_reg.findall(proc.communicate()[0])
def get_display(queue, lock):
"""
Get display for opera instance.
"""
while True:
lock.acquire()
_queue = pickle.loads(queue['q'])
free = list(set(_queue['displays']).difference(_queue['locked_displays']))
if len(free):
_queue['locked_displays'].append(free[0])
queue['q'] = pickle.dumps(_queue)
lock.release()
return free[0]
lock.release()
time.sleep(3)
def get_screenshot(data, display):
"""
Fork background opera process and then search window with url.
Wait for 30 seconds and take screenshot of the window.
xkill killing opera window, cuz without opened tabs opera will be terminated.
"""
try:
os.remove('.opera/{0}/sessions/autosave.win'.format(display))
except:
pass
proc = subprocess.Popen(['/usr/bin/opera', '-geometry', '1024x768+0+0', '-fullscreen', '-display', ':{0}'.format(display), '-pd', '.opera/{0}'.format(display), data['url']])
time.sleep(10)
if int(data['size']) == 120:
geometry = '120x90'
elif int(data['size']) == 240:
geometry = '240x151'
elif int(data['size']) == 400:
geometry = '400x300'
try:
os.makedirs(data['path'])
except OSError:
pass
xwin_proc = subprocess.Popen(['/usr/bin/xwininfo', '-display', ':{0}'.format(display), '-root', '-tree'], stdout=subprocess.PIPE)
xwin_info = xwin_proc.communicate()[0]
window = browser_reg.findall(xwin_info)[0]
time.sleep(5)
pimport = subprocess.Popen(['/usr/bin/import', '-display', ':{0}'.format(display), '-window', 'root', '-resize', geometry, data['file']], stdout=subprocess.PIPE)
pimport.wait()
logging.info('Screenshot {0} for {1}: display={2}, window=0x{3}, file={4}'.format(geometry, data['url'], display, window, data['file']))
pxkill = subprocess.Popen(['/usr/bin/xkill', '-display', ':{0}'.format(display), '-id', '0x{0}'.format(window)])
proc.wait()
def worker_process(data, display, lock, connection, queue):
"""
Return data for callback function for freeing display and url
"""
get_screenshot(data, display)
lock.acquire()
_queue = pickle.loads(queue['q'])
_queue['locked_displays'].remove(display)
queue['q'] = pickle.dumps(_queue)
lock.release()
connection.hdel('jobs', data['md5_url'])
connection.hincrby('stats', 'completed', 1)
return
def main(pool, queue, lock, connection, job):
"""
Checking for file has been created early in another queue, url and url locks
"""
data = pickle.loads(job)
if os.path.isfile(data['path']):
connection.hdel('jobs', data['md5_url'])
return
lock.acquire()
_queue = pickle.loads(queue['q'])
if not check_url_code(data['url']):
logging.error('Error fetching {0}'.format(data['url']))
lock.release()
connection.hdel('jobs', data['md5_url'])
return
lock.release()
display = get_display(queue, lock)
pool.apply_async(worker_process, args = (data, display, lock, connection, queue))
def create_daemon(home):
try:
pid = os.fork()
except OSError:
sys.exit('Can not demonize process')
if pid == 0:
os.setsid()
try:
pid = os.fork()
except OSError:
sys.exit('Can not demonize process')
if pid == 0:
os.chdir(home)
os.umask(0)
else:
os._exit(0)
else:
os._exit(0)
import resource
maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
if (maxfd == resource.RLIM_INFINITY):
maxfd = 1024
for fd in range(0, maxfd):
try:
os.close(fd)
except OSError:
pass
if hasattr(os, 'devnull'):
console = os.devnull
else:
console = '/dev/null'
os.open(console, os.O_RDWR)
os.dup2(0, 1)
os.dup2(0, 2)
return (0)
def help():
print """
Usage: {0} --u screenshot -l /var/log/screenshot/server.log -p /var/run/screenshot.pid
--user Set unprivileged user for process. This user can't be nobody, because script
-u reads home directory from passwd and uses it for Chrome user data dirs.
--log Set log file.
-l
--pid Set pid file.
-p
--help This help.
-h
""".format(sys.argv[0])
if __name__ == '__main__':
log_file = '/var/log/screenshot/server.log'
pid_file = '/var/run/screenshot.pid'
user = None
try:
opts, args = getopt.getopt(sys.argv[1:], 'l:p:u:h', ['log', 'pid', 'user', 'help'])
except getopt.GetoptError:
help()
sys.exit(2)
for opt, arg in opts:
if opt in ('-h', '--help'):
help()
sys.exit()
elif opt in ('-l', '--log'):
log_file = arg
elif opt in ('-p', '--pid'):
pid_file = arg
elif opt in ('-u', '--user'):
user = arg
if user:
if not os.geteuid() == 0:
sys.exit('You need root privileges to set user')
try:
userl = pwd.getpwnam(user)
uid = userl.pw_uid
home = userl.pw_dir
except KeyError:
sys.exit('User {0} does not exist'.format(user))
os.setuid(uid)
os.chdir(home)
else:
sys.exit('You must set user')
# Fork child process for demonization
retval = create_daemon(home)
# Write pid to pidfile
pid = os.getpid()
open(pid_file, 'w').write(str(pid))
# Open logfile
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
datefmt='%m-%d %H:%M',
filename=log_file)
logging.info('Starting server with pid {0}'.format(os.getpid()))
#
# Get working displays and start subprocesses
displays = list_display()
logging.info('Found displays: {0}'.format(' '.join(displays)))
pool = multiprocessing.Pool(processes=len(displays))
queue = multiprocessing.Manager().dict()
queue['q'] = pickle.dumps({
'displays' : displays,
'termination' : False,
'locked_displays' : []})
lock = multiprocessing.Manager().Lock()
connection = redis.Redis('localhost')
# Handle termination signals
signal.signal(signal.SIGTERM, sig_handler)
while running:
job = connection.lpop('high_priority')
if job is None:
job = connection.rpop('low_priority')
if not job is None:
main(pool, queue, lock, connection, job)
else:
time.sleep(5)
logging.info('Server stopped')
pool.close()
pool.join()
os._exit(0)

Related

Extra carriage return in interactive python subprocess

I'm writing a subprocess based python program that acts as a proxy between the user input and the subprocess (trying to go beyond pexpect). I've taken this thread as reference, and some code chunk from pexpect (_read_incoming() method for popen_spawn) to read output (the fcntl method worked, but not satisfactorily).
The code runs but has a problem: There seems to be an additional carriage return being sent to the process. This is causing me issues when I try to do things like sending passwords to ssh etc.
Could you look into what might be the issue? Thanks!
The code is as follows:
from queue import Queue, Empty
from threading import Thread
import subprocess
import signal
import fcntl
import os
global terminating
terminating = False
def setNonBlocking(fd):
"""
Set the file description of the given file descriptor to non-blocking.
"""
print(fd)
flags = fcntl.fcntl(fd, fcntl.F_GETFL)
flags = flags | os.O_NONBLOCK
fcntl.fcntl(fd, fcntl.F_SETFL, flags)
def enqueue(out, q):
fileno = out.fileno()
while not terminating:
buf = b''
try:
buf = os.read(fileno, 1024)
if buf and len(buf)>0:
q.put(buf)
except OSError as e:
#print("OS error: {0}".format(e))
pass
if not buf:
q.put(buf)
# for line in iter(out.readline, b''):
# if len(line.strip()) > 0:
# print(line)
# q.put(line)
out.close()
print('Terminating')
return
def get_output(q):
out_str = bytes()
while True:
try:
incoming = q.get_nowait()
except Empty:
break
else:
if incoming is None:
break
else:
out_str += incoming
if out_str:
return out_str
else:
return b''
def explore(cmd="/bin/bash"):
global terminating
universal_newlines = False
p = subprocess.Popen([cmd], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.STDOUT,
bufsize=0, shell=False, universal_newlines=universal_newlines)
#setNonBlocking(p.stdout)
outQueue = Queue()
outThread = Thread(target=enqueue, args=(p.stdout, outQueue))
outThread.daemon = True
outThread.start()
while True:
try:
someInput = input()
print('[In]:'+someInput)
someInput += '\n'
if not universal_newlines:
p.stdin.write(someInput.encode('utf-8'))
else:
p.stdin.write(someInput)
p.stdin.flush()
out = get_output(outQueue).decode('utf-8')
print('[Out]:'+out)
#p.communicate(someInput+'\n')
except KeyboardInterrupt:
print('Interrupting')
p.send_signal(signal.SIGINT)
terminating = True
outThread.join()
break
p.wait()
if __name__ == '__main__':
explore()
Example run:
ls
[In]:ls
[Out]:
[In]:
[Out]:explorer.py
__init__.py
^CInterrupting
Terminating
The second In was an enter from user.
Update:
Tested the alternate using pexpect's popen_spawn module. Same result:
from pexpect.popen_spawn import PopenSpawn as Spawn
import signal
def explore(cmd="/bin/bash"):
p = Spawn(cmd)
while True:
try:
someInput = input()
print('[In]:'+someInput)
p.sendline(someInput)
out = p.read_nonblocking(size=1024, timeout=-1).decode('utf-8')
print('[Out]:'+out)
#p.communicate(someInput+'\n')
except KeyboardInterrupt:
print('Interrupting')
p.sendeof()
p.kill(signal.SIGINT)
break
if __name__ == '__main__':
explore()

How to set a timeout period for downloading YouTube video audio using Python and Windows

On some YouTube links youtube_dl takes hours to try to download them. So I want to set a time limit on how long it tries to download a video for. On MAC/Linux you can use Signal or Interrupting Cow, but I run Windows and can't figure out how to stop this process after some time.
I've tried using some info on timeout from other stack overflow, in particular
#I got the code immediately below from a different stack overflow post:
from contextlib import contextmanager
import threading
import _thread
class TimeoutException(Exception):
def __init__(self, msg=''):
self.msg = msg
#contextmanager
def time_limit(seconds, msg=''):
timer = threading.Timer(seconds, lambda: _thread.interrupt_main())
timer.start()
try:
yield
except KeyboardInterrupt:
raise TimeoutException("Timed out for operation {}".format(msg))
finally:
# if the action ends in specified time, timer is canceled
timer.cancel()
#This I'm trying to have a timeout for.
if __name__ == '__main__':
for i in range(len(df)):
url = df.loc[i, 'url']
artist_name = df.loc[i, 'Artist']
track_name = df.loc[i, 'Title']
html = requests.get(url)
index_begin = html.text.find('href=\"https://www.youtube.com')
youtube_link = html.text[index_begin + 6: index_begin + 49]
print(youtube_link)
# Run youtube-dl to download the youtube song with the link:
new_track = artist_name + "--" + track_name
location = "SongMP3_files/" + new_track + ".%(ext)s"
process_call = ["youtube-dl", "--audio-format", "mp3", "-x", "-R 2", "--no-playlist", "-o", location, youtube_link]
try:
with time_limit(10, 'aahhh'):
subprocess.run(process_call)
except TimeoutException:
print('didn't work')

I think you are looking for something similar like the following code part. It should work on Windows as well.
from subprocess import Popen, PIPE
from threading import Timer
def run(cmd, timeout_sec):
proc = Popen(cmd.split(), stdout=PIPE, stderr=PIPE)
timer = Timer(timeout_sec, proc.kill)
try:
timer.start()
stdout, stderr = proc.communicate()
finally:
timer.cancel()
run("sleep 1", 5)
run("sleep 5", 1)

Run interactive Bash with popen and a dedicated TTY Python

I need to run an interactive Bash instance in a separated process in Python with it's own dedicated TTY (I can't use pexpect).
I used this code snippet I commonly see used in similar programs:
master, slave = pty.openpty()
p = subprocess.Popen(["/bin/bash", "-i"], stdin=slave, stdout=slave, stderr=slave)
os.close(slave)
x = os.read(master, 1026)
print x
subprocess.Popen.kill(p)
os.close(master)
But when I run it I get the following output:
$ ./pty_try.py
bash: cannot set terminal process group (10790): Inappropriate ioctl for device
bash: no job control in this shell
Strace of the run shows some errors:
...
readlink("/usr/bin/python2.7", 0x7ffc8db02510, 4096) = -1 EINVAL (Invalid argument)
...
ioctl(3, SNDCTL_TMR_TIMEBASE or SNDRV_TIMER_IOCTL_NEXT_DEVICE or TCGETS, 0x7ffc8db03590) = -1 ENOTTY (Inappropriate ioctl for device)
...
readlink("./pty_try.py", 0x7ffc8db00610, 4096) = -1 EINVAL (Invalid argument)
The code snippet seems pretty straightforward, is Bash not getting something it needs? what could be the problem here?

This is a solution to run an interactive command in subprocess. It uses pseudo-terminal to make stdout non-blocking(also some command needs a tty device, eg. bash). it uses select to handle input and ouput to the subprocess.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import select
import termios
import tty
import pty
from subprocess import Popen
command = 'bash'
# command = 'docker run -it --rm centos /bin/bash'.split()
# save original tty setting then set it to raw mode
old_tty = termios.tcgetattr(sys.stdin)
tty.setraw(sys.stdin.fileno())
# open pseudo-terminal to interact with subprocess
master_fd, slave_fd = pty.openpty()
try:
# use os.setsid() make it run in a new process group, or bash job control will not be enabled
p = Popen(command,
preexec_fn=os.setsid,
stdin=slave_fd,
stdout=slave_fd,
stderr=slave_fd,
universal_newlines=True)
while p.poll() is None:
r, w, e = select.select([sys.stdin, master_fd], [], [])
if sys.stdin in r:
d = os.read(sys.stdin.fileno(), 10240)
os.write(master_fd, d)
elif master_fd in r:
o = os.read(master_fd, 10240)
if o:
os.write(sys.stdout.fileno(), o)
finally:
# restore tty settings back
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_tty)

This is the solution that worked for me at the end (as suggested by qarma) :
libc = ctypes.CDLL('libc.so.6')
master, slave = pty.openpty()
p = subprocess.Popen(["/bin/bash", "-i"], preexec_fn=libc.setsid, stdin=slave, stdout=slave, stderr=slave)
os.close(slave)
... do stuff here ...
x = os.read(master, 1026)
print x

Here is a full object oriented solution to do interactive shell commands with TTYs using threads and queues for stdout and stderr IO handling. This took me a while to build from multiple locations but it works perfectly so far on Unix/Linux systems and also as part of a Juniper op script. Thought I would post this here to save others time in trying to build something like this.
import pty
import re
import select
import threading
from datetime import datetime, timedelta
import os
import logging
import subprocess
import time
from queue import Queue, Empty
lib_logger = logging.getLogger("lib")
# Handler function to be run as a thread for pulling pty channels from an interactive shell
def _pty_handler(pty_master, logger, queue, stop):
poller = select.poll()
poller.register(pty_master, select.POLLIN)
while True:
# Stop handler if flagged
if stop():
logger.debug("Disabling pty handler for interactive shell")
break
fd_event = poller.poll(100)
for descriptor, event in fd_event:
# Read data from pipe and send to queue if there is data to read
if event == select.POLLIN:
data = os.read(descriptor, 1).decode("utf-8")
if not data:
break
# logger.debug("Reading in to handler queue: " + data)
queue.put(data)
# Exit handler if stdout is closing
elif event == select.POLLHUP:
logger.debug("Disabling pty handler for interactive shell")
break
# Function for reading outputs from the given queue by draining it and returning the output
def _get_queue_output(queue: Queue) -> str:
value = ""
try:
while True:
value += queue.get_nowait()
except Empty:
return value
# Helper function to create the needed list for popen and print the command run to the logger
def popen_command(command, logger, *args):
popen_list = list()
popen_list.append(command)
command_output = command
for arg in args:
popen_list.append(arg)
command_output += " " + arg
lib_logger.debug("Making Popen call using: " + str(popen_list))
logger.debug("")
logger.debug(command_output)
logger.debug("")
return popen_list
# Class for create an interactive shell and sending commands to it along with logging output to loggers
class InteractiveShell(object):
def __init__(self, command, logger, *args):
self.logger = logger
self.command = command
self.process = None
self.popen_list = popen_command(command, logger, *args)
self.master_stdout = None
self.slave_stdout = None
self.master_stderr = None
self.slave_stderr = None
self.stdout_handler = None
self.stderr_handler = None
self.stdout_queue = None
self.stderr_queue = None
self.stop_handlers = False
# Open interactive shell and setup all threaded IO handlers
def open(self, shell_prompt, timeout=DEVICE_TIMEOUT):
# Create PTYs
self.master_stdout, self.slave_stdout = pty.openpty()
self.master_stderr, self.slave_stderr = pty.openpty()
# Create shell subprocess
self.process = subprocess.Popen(self.popen_list, stdin=self.slave_stdout, stdout=self.slave_stdout,
stderr=self.slave_stderr, bufsize=0, start_new_session=True)
lib_logger.debug("")
lib_logger.debug("Started interactive shell for command " + self.command)
lib_logger.debug("")
# Create thread and queues for handling pty output and start them
self.stdout_queue = Queue()
self.stderr_queue = Queue()
self.stdout_handler = threading.Thread(target=_pty_handler, args=(self.master_stdout,
lib_logger,
self.stdout_queue,
lambda: self.stop_handlers))
self.stderr_handler = threading.Thread(target=_pty_handler, args=(self.master_stderr,
lib_logger,
self.stderr_queue,
lambda: self.stop_handlers))
self.stdout_handler.daemon = True
self.stderr_handler.daemon = True
lib_logger.debug("Enabling stderr handler for interactive shell " + self.command)
self.stderr_handler.start()
lib_logger.debug("Enabling stdout handler for interactive shell " + self.command)
self.stdout_handler.start()
# Wait for shell prompt
lib_logger.debug("Waiting for shell prompt: " + shell_prompt)
return self.wait_for(shell_prompt, timeout)
# Close interactive shell which should also kill all threaded IO handlers
def close(self):
# Wait 5 seconds before closing to let shell handle all input and outputs
time.sleep(5)
# Stop IO handler threads and terminate the process then wait another 5 seconds for cleanup to happen
self.stop_handlers = True
self.process.terminate()
time.sleep(5)
# Check for any additional output from the stdout handler
output = ""
while True:
data = _get_queue_output(self.stdout_queue)
if data != "":
output += data
else:
break
for line in iter(output.splitlines()):
self.logger.debug(line)
# Check for any additional output from the stderr handler
output = ""
while True:
data = _get_queue_output(self.stderr_queue)
if data != "":
output += data
else:
break
for line in iter(output.splitlines()):
self.logger.error(line)
# Cleanup PTYs
os.close(self.master_stdout)
os.close(self.master_stderr)
os.close(self.slave_stdout)
os.close(self.slave_stderr)
lib_logger.debug("Interactive shell command " + self.command + " terminated")
# Run series of commands given as a list of a list of commands and wait_for strings. If no wait_for is needed then
# only provide the command. Return if all the commands completed successfully or not.
# Ex:
# [
# ["ssh jsas#" + vnf_ip, r"jsas#.*:"],
# ["juniper123", r"jsas#.*\$"],
# ["sudo su", r".*jsas:"],
# ["juniper123", r"root#.*#"],
# ["usermod -p 'blah' jsas"]
# ]
def run_commands(self, commands_list):
shell_status = True
for command in commands_list:
shell_status = self.run(command[0])
if shell_status and len(command) == 2:
shell_status = self.wait_for(command[1])
# Break out of running commands if a command failed
if not shell_status:
break
return shell_status
# Run given command and return False if error occurs otherwise return True
def run(self, command, sleep=0):
# Check process to make sure it is still running and if not grab the stderr output
if self.process.poll():
self.logger.error("Interactive shell command " + self.command + " closed with return code: " +
self.process.returncode)
data = _get_queue_output(self.stderr_queue)
if data != "":
self.logger.error("Interactive shell error messages:")
for line in iter(data.splitlines()):
self.logger.error(line)
return False
# Write command to process and check to make sure a newline is in command otherwise add it
if "\n" not in command:
command += "\n"
os.write(self.master_stdout, command.encode("utf-8"))
if sleep:
time.sleep(sleep)
return True
# Wait for specific regex expression in output before continuing return False if wait time expires otherwise return
# True
def wait_for(self, this, timeout=DEVICE_TIMEOUT):
timeout = datetime.now() + timedelta(seconds=timeout)
output = ""
# Keep searching for output until timeout occurs
while timeout > datetime.now():
data = _get_queue_output(self.stdout_queue)
if data != "":
# Add to output line and check for match to regex given and if match then break and send output to
# logger
output += data
lib_logger.debug("Checking for " + this + " in data: ")
for line in iter(output.splitlines()):
lib_logger.debug(line)
if re.search(r"{}\s?$".format(this), output):
break
time.sleep(1)
# Send output to logger
for line in iter(output.splitlines()):
self.logger.debug(line)
# If wait time expired print error message and return False
if timeout < datetime.now():
self.logger.error("Wait time expired when waiting for " + this)
return False
return True

Run python Webserver as Windows service

I have server and console scripts which keeps on listening on port for console and server requests.
In UNIX environment I made both the server and console script as continuously running daemons which will keep them listening on port.
Is there any way way in windows to keep them running like daemon in UNIX ? I also want them to get up on reboot (should get auto started on reboot)
I read about windows services and followed code written here, but I am getting 404 error on my webpage
__version__ = "0.4"
__all__ = ["RequestHandler"]
import atexit
import BaseHTTPServer
import CGIHTTPServer
import copy
import os
import select
import SimpleHTTPServer
import sys
import time
import threading
import urllib
from signal import SIGTERM
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from SocketServer import ThreadingMixIn
class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
pass
class RequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
have_fork = hasattr(os, 'fork')
have_popen2 = hasattr(os, 'popen2')
have_popen3 = hasattr(os, 'popen3')
rbufsize = 0
def do_POST(self):
if self.is_cgi():
self.run_cgi()
else:
self.send_error(501, "Can only POST to CGI scripts")
def send_head(self):
if self.is_cgi():
return self.run_cgi()
else:
return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
def is_cgi(self):
splitpath = _url_collapse_path_split(self.path)
if splitpath[0] in self.cgi_directories:
self.cgi_info = splitpath
return True
return False
cgi_directories = ['/cgi-bin', '/htbin']
def is_executable(self, path):
return executable(path)
def is_python(self, path):
head, tail = os.path.splitext(path)
return tail.lower() in (".py", ".pyw")
def run_cgi(self):
path = self.path
dir, rest = self.cgi_info
i = path.find('/', len(dir) + 1)
while i >= 0:
nextdir = path[:i]
nextrest = path[i+1:]
scriptdir = self.translate_path(nextdir)
if os.path.isdir(scriptdir):
dir, rest = nextdir, nextrest
i = path.find('/', len(dir) + 1)
else:
break
i = rest.rfind('?')
if i >= 0:
rest, query = rest[:i], rest[i+1:]
else:
query = ''
i = rest.find('/')
if i >= 0:
script, rest = rest[:i], rest[i:]
else:
script, rest = rest, ''
scriptname = dir + '/' + script
scriptfile = self.translate_path(scriptname)
if not os.path.exists(scriptfile):
self.send_error(404, "No such CGI script (%r)" % scriptname)
return
if not os.path.isfile(scriptfile):
self.send_error(403, "CGI script is not a plain file (%r)" %
scriptname)
return
ispy = self.is_python(scriptname)
if not ispy:
if not (self.have_fork or self.have_popen2 or self.have_popen3):
self.send_error(403, "CGI script is not a Python script (%r)" %
scriptname)
return
if not self.is_executable(scriptfile):
self.send_error(403, "CGI script is not executable (%r)" %
scriptname)
return
# Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
# XXX Much of the following could be prepared ahead of time!
env = {}
env['SERVER_SOFTWARE'] = self.version_string()
env['SERVER_NAME'] = self.server.server_name
env['GATEWAY_INTERFACE'] = 'CGI/1.1'
env['SERVER_PROTOCOL'] = self.protocol_version
env['SERVER_PORT'] = str(self.server.server_port)
env['REQUEST_METHOD'] = self.command
uqrest = urllib.unquote(rest)
env['PATH_INFO'] = uqrest
env['PATH_TRANSLATED'] = self.translate_path(uqrest)
env['SCRIPT_NAME'] = scriptname
if query:
env['QUERY_STRING'] = query
host = self.address_string()
if host != self.client_address[0]:
env['REMOTE_HOST'] = host
env['REMOTE_ADDR'] = self.client_address[0]
authorization = self.headers.getheader("authorization")
if authorization:
authorization = authorization.split()
if len(authorization) == 2:
import base64, binascii
env['AUTH_TYPE'] = authorization[0]
if authorization[0].lower() == "basic":
try:
authorization = base64.decodestring(authorization[1])
except binascii.Error:
pass
else:
authorization = authorization.split(':')
if len(authorization) == 2:
env['REMOTE_USER'] = authorization[0]
# XXX REMOTE_IDENT
if self.headers.typeheader is None:
env['CONTENT_TYPE'] = self.headers.type
else:
env['CONTENT_TYPE'] = self.headers.typeheader
length = self.headers.getheader('content-length')
if length:
env['CONTENT_LENGTH'] = length
referer = self.headers.getheader('referer')
if referer:
env['HTTP_REFERER'] = referer
accept = []
for line in self.headers.getallmatchingheaders('accept'):
if line[:1] in "\t\n\r ":
accept.append(line.strip())
else:
accept = accept + line[7:].split(',')
env['HTTP_ACCEPT'] = ','.join(accept)
ua = self.headers.getheader('user-agent')
if ua:
env['HTTP_USER_AGENT'] = ua
co = filter(None, self.headers.getheaders('cookie'))
if co:
env['HTTP_COOKIE'] = ', '.join(co)
# XXX Other HTTP_* headers
# Since we're setting the env in the parent, provide empty
# values to override previously set values
for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
env.setdefault(k, "")
os.environ.update(env)
self.send_response(200, "Script output follows")
decoded_query = query.replace('+', ' ')
if self.have_fork:
# Unix -- fork as we should
args = [script]
if '=' not in decoded_query:
args.append(decoded_query)
nobody = nobody_uid()
self.wfile.flush() # Always flush before forking
pid = os.fork()
if pid != 0:
# Parent
pid, sts = os.waitpid(pid, 0)
# throw away additional data [see bug #427345]
while select.select([self.rfile], [], [], 0)[0]:
if not self.rfile.read(1):
break
if sts:
self.log_error("CGI script exit status %#x", sts)
return
# Child
try:
try:
os.setuid(nobody)
except os.error:
pass
os.dup2(self.rfile.fileno(), 0)
os.dup2(self.wfile.fileno(), 1)
os.execve(scriptfile, args, os.environ)
except:
self.server.handle_error(self.request, self.client_address)
os._exit(127)
else:
# Non Unix - use subprocess
import subprocess
cmdline = [scriptfile]
if self.is_python(scriptfile):
interp = sys.executable
if interp.lower().endswith("w.exe"):
# On Windows, use python.exe, not pythonw.exe
interp = interp[:-5] + interp[-4:]
cmdline = [interp, '-u'] + cmdline
if '=' not in query:
cmdline.append(query)
self.log_message("command: %s", subprocess.list2cmdline(cmdline))
try:
nbytes = int(length)
except (TypeError, ValueError):
nbytes = 0
p = subprocess.Popen(cmdline,
stdin = subprocess.PIPE,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE
)
if self.command.lower() == "post" and nbytes > 0:
data = self.rfile.read(nbytes)
else:
data = None
# throw away additional data [see bug #427345]
while select.select([self.rfile._sock], [], [], 0)[0]:
if not self.rfile._sock.recv(1):
break
stdout, stderr = p.communicate(data)
self.wfile.write(stdout)
if stderr:
self.log_error('%s', stderr)
status = p.returncode
if status:
self.log_error("CGI script exit status %#x", status)
else:
self.log_message("CGI script exited OK")
def _url_collapse_path_split(path):
path_parts = []
for part in path.split('/'):
if part == '.':
path_parts.append('')
else:
path_parts.append(part)
# Filter out blank non trailing parts before consuming the '..'.
path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
if path_parts:
tail_part = path_parts.pop()
else:
tail_part = ''
head_parts = []
for part in path_parts:
if part == '..':
head_parts.pop()
else:
head_parts.append(part)
if tail_part and tail_part == '..':
head_parts.pop()
tail_part = ''
return ('/' + '/'.join(head_parts), tail_part)
nobody = None
def nobody_uid():
"""Internal routine to get nobody's uid"""
global nobody
if nobody:
return nobody
try:
import pwd
except ImportError:
return -1
try:
nobody = pwd.getpwnam('nobody')[2]
except KeyError:
nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
return nobody
def executable(path):
"""Test for executable file."""
try:
st = os.stat(path)
except os.error:
return False
return st.st_mode & 0111 != 0
Handler = RequestHandler
PORT = 7998
ADDRESS = "0.0.0.0"
httpd = ThreadedHTTPServer((ADDRESS, PORT), Handler)
print "serving at %s:%s" % (ADDRESS, PORT)
import os
import SocketServer
import BaseHTTPServer
import SimpleHTTPServer
import xmlrpclib
import SimpleXMLRPCServer
import socket
import httplib
import inspect
import win32service
import win32serviceutil
import win32api
import win32con
import win32event
import win32evtlogutil
class XMLRPCServerService(win32serviceutil.ServiceFramework):
_svc_name_ = "XMLRPCServerService"
_svc_display_name_ = "XMLRPCServerService"
_svc_description_ = "Tests Python service framework by receiving and echoing messages over a named pipe"
def __init__(self, args):
win32serviceutil.ServiceFramework.__init__(self, args)
self.hWaitStop = win32event.CreateEvent(None, 0, 0, None)
def SvcStop(self):
self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
self.ReportServiceStatus(win32service.SERVICE_STOPPED)
win32event.SetEvent(self.hWaitStop)
def SvcDoRun(self):
import servicemanager
servicemanager.LogMsg(servicemanager.EVENTLOG_INFORMATION_TYPE,servicemanager.PYS_SERVICE_STARTED,(self._svc_name_, ''))
self.timeout = 100
while 1:
rc = win32event.WaitForSingleObject(self.hWaitStop, self.timeout)
if rc == win32event.WAIT_OBJECT_0:
servicemanager.LogInfoMsg("XMLRPCServerService - STOPPED")
break
else:
httpd.serve_forever()
servicemanager.LogInfoMsg("XMLRPCServerService - is alive and well")
def ctrlHandler(ctrlType):
return True
if __name__ == '__main__':
win32api.SetConsoleCtrlHandler(ctrlHandler, True)
win32serviceutil.HandleCommandLine(XMLRPCServerService)
Any clues where I am going wrong ? Or good way to implement it (May be w/o using service).
Strict Note:
Solution must be in Python 2.6 (Project requirements).
Updates:
I saw some weird thing in log:python service.py debug
127.0.0.1 - - [04/Apr/2014 09:41:04] command: C:\Python27\Lib\site-packages\win3
2\**pythonservice.exe** -u C:\CONSOLE-CGI\cgi-bin\login.py ""
Why is executing CGI script using pythonservice.exe?
What am I missing Here?
More updates:
Code snippet from daemon process python script
#Non Unix - use subprocess
import subprocess
cmdline = [scriptfile]
if self.is_python(scriptfile):
#interp = sys.executable // here it return pythonservice.exe
interp = "python.exe" // if I hardcode it to python.exe all goes fine
if interp.lower().endswith("w.exe"): #On Windows,use python.exe,not pythonw.exe
interp = interp[: -5] + interp[-4: ]
cmdline = [interp, '-u'] + cmdline
Any clues why is so??

You may need to redirect all the output since Windows scheduler has some issues doing this in pythonw case. Process does start properly, but no action being done and server does not respond without redirecting stdout and stderr.
import http.server
import socketserver
import sys
PORT = 1234
Handler = http.server.SimpleHTTPRequestHandler
if __name__ == '__main__':
sys.stdout = open('out.txt', 'w')
sys.stderr = open('err.txt', 'w')
with socketserver.TCPServer(("", PORT), Handler) as httpd:
print("serving at port %d" % PORT, flush=True)
httpd.serve_forever()

python subprocess with timeout and large output (>64K)

I want to execute a process, limit the execution-time by some timeout in seconds and grab the output produced by the process. And I want to do this on windows, linux and freebsd.
I have tried implementing this in three different ways:
cmd - Without timeout and subprocess.PIPE for output capture.
BEHAVIOUR: Operates as expected but does not support timeout, i need timeout...
cmd_to - With timeout and subprocess.PIPE for output capture.
BEHAVIOUR: Blocks subprocess execution when output >= 2^16 bytes.
cmd_totf - With timeout and tempfile.NamedTemporaryfile for output capture.
BEHAVIOUR: Operates as expected but uses temporary files on disk.
These are available below for closer inspection.
As can be seen in the output below, then the timeout-code blocks the execution of the sub-process when using subprocessing.PIPE and output from the subprocess is >= 2^16 bytes.
The subprocess documentation states that this is expected when calling process.wait() and using subprocessing.PIPE, however no warnings are given when using process.poll(), so what is going wrong here?
I have a solution in cmd_totf which use the tempfile module but the tradeoff is that it writes the output to disk, something I would REALLY like to avoid.
So my questions are:
What am I doing wrong in cmd_to?
Is there a way to do what I want and without using tempfiles / keeping the output in memory.
Script to generate a bunch of output ('exp_gen.py'):
#!/usr/bin/env python
import sys
output = "b"*int(sys.argv[1])
print output
Three different implementations (cmd, cmd_to, cmd_totf) of wrappers around subprocessing.Popen:
#!/usr/bin/env python
import subprocess, time, tempfile
bufsize = -1
def cmd(cmdline, timeout=60):
"""
Execute cmdline.
Uses subprocessing and subprocess.PIPE.
"""
p = subprocess.Popen(
cmdline,
bufsize = bufsize,
shell = False,
stdin = subprocess.PIPE,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE
)
out, err = p.communicate()
returncode = p.returncode
return (returncode, err, out)
def cmd_to(cmdline, timeout=60):
"""
Execute cmdline, limit execution time to 'timeout' seconds.
Uses subprocessing and subprocess.PIPE.
"""
p = subprocess.Popen(
cmdline,
bufsize = bufsize,
shell = False,
stdin = subprocess.PIPE,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE
)
t_begin = time.time() # Monitor execution time
seconds_passed = 0
while p.poll() is None and seconds_passed < timeout:
seconds_passed = time.time() - t_begin
time.sleep(0.1)
#if seconds_passed > timeout:
#
# try:
# p.stdout.close() # If they are not closed the fds will hang around until
# p.stderr.close() # os.fdlimit is exceeded and cause a nasty exception
# p.terminate() # Important to close the fds prior to terminating the process!
# # NOTE: Are there any other "non-freed" resources?
# except:
# pass
#
# raise TimeoutInterrupt
out, err = p.communicate()
returncode = p.returncode
return (returncode, err, out)
def cmd_totf(cmdline, timeout=60):
"""
Execute cmdline, limit execution time to 'timeout' seconds.
Uses subprocessing and tempfile instead of subprocessing.PIPE.
"""
output = tempfile.NamedTemporaryFile(delete=False)
error = tempfile.NamedTemporaryFile(delete=False)
p = subprocess.Popen(
cmdline,
bufsize = 0,
shell = False,
stdin = None,
stdout = output,
stderr = error
)
t_begin = time.time() # Monitor execution time
seconds_passed = 0
while p.poll() is None and seconds_passed < timeout:
seconds_passed = time.time() - t_begin
time.sleep(0.1)
#if seconds_passed > timeout:
#
# try:
# p.stdout.close() # If they are not closed the fds will hang around until
# p.stderr.close() # os.fdlimit is exceeded and cause a nasty exception
# p.terminate() # Important to close the fds prior to terminating the process!
# # NOTE: Are there any other "non-freed" resources?
# except:
# pass
#
# raise TimeoutInterrupt
p.wait()
returncode = p.returncode
fd = open(output.name)
out = fd.read()
fd.close()
fd = open(error.name)
err = fd.read()
fd.close()
error.close()
output.close()
return (returncode, err, out)
if __name__ == "__main__":
implementations = [cmd, cmd_to, cmd_totf]
bytes = ['65535', '65536', str(1024*1024)]
timeouts = [5]
for timeout in timeouts:
for size in bytes:
for i in implementations:
t_begin = time.time()
seconds_passed = 0
rc, err, output = i(['exp_gen.py', size], timeout)
seconds_passed = time.time() - t_begin
filler = ' '*(8-len(i.func_name))
print "[%s%s: timeout=%d, iosize=%s, seconds=%f]" % (repr(i.func_name), filler, timeout, size, seconds_passed)
Output from execution:
['cmd' : timeout=5, iosize=65535, seconds=0.016447]
['cmd_to' : timeout=5, iosize=65535, seconds=0.103022]
['cmd_totf': timeout=5, iosize=65535, seconds=0.107176]
['cmd' : timeout=5, iosize=65536, seconds=0.028105]
['cmd_to' : timeout=5, iosize=65536, seconds=5.116658]
['cmd_totf': timeout=5, iosize=65536, seconds=0.104905]
['cmd' : timeout=5, iosize=1048576, seconds=0.025964]
['cmd_to' : timeout=5, iosize=1048576, seconds=5.128062]
['cmd_totf': timeout=5, iosize=1048576, seconds=0.103183]

As opposed to all the warnings in the subprocess documentation then directly reading from process.stdout and process.stderr has provided a better solution.
By better I mean that I can read output from a process that exceeds 2^16 bytes without having to temporarily store the output on disk.
The code follows:
import fcntl
import os
import subprocess
import time
def nonBlockRead(output):
fd = output.fileno()
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
try:
return output.read()
except:
return ''
def cmd(cmdline, timeout=60):
"""
Execute cmdline, limit execution time to 'timeout' seconds.
Uses the subprocess module and subprocess.PIPE.
Raises TimeoutInterrupt
"""
p = subprocess.Popen(
cmdline,
bufsize = bufsize, # default value of 0 (unbuffered) is best
shell = False, # not really needed; it's disabled by default
stdout = subprocess.PIPE,
stderr = subprocess.PIPE
)
t_begin = time.time() # Monitor execution time
seconds_passed = 0
stdout = ''
stderr = ''
while p.poll() is None and seconds_passed < timeout: # Monitor process
time.sleep(0.1) # Wait a little
seconds_passed = time.time() - t_begin
# p.std* blocks on read(), which messes up the timeout timer.
# To fix this, we use a nonblocking read()
# Note: Not sure if this is Windows compatible
stdout += nonBlockRead(p.stdout)
stderr += nonBlockRead(p.stderr)
if seconds_passed >= timeout:
try:
p.stdout.close() # If they are not closed the fds will hang around until
p.stderr.close() # os.fdlimit is exceeded and cause a nasty exception
p.terminate() # Important to close the fds prior to terminating the process!
# NOTE: Are there any other "non-freed" resources?
except:
pass
raise TimeoutInterrupt
returncode = p.returncode
return (returncode, stdout, stderr)

Disclaimer: This answer is not tested on windows, nor freebsd. But the used modules should work on these systems. I believe this should be a working answer to your question - it works for me.
Here's code I just hacked to solve the problem on linux. It is a combination of several Stackoverflow threads and my own research in the Python 3 documents.
Main characteristics of this code:
Uses processes not threads for blocking I/O because they can more reliably be p.terminated()
Implements a retriggerable timeout watchdog that restarts counting whenever some output happens
Implements a long-term timeout watchdog to limit overall runtime
Can feed in stdin (although I only need to feed in one-time short strings)
Can capture stdout/stderr in the usual Popen means (Only stdout is coded, and stderr redirected to stdout; but can easily be separated)
It's almost realtime because it only checks every 0.2 seconds for output. But you could decrease this or remove the waiting interval easily
Lots of debugging printouts still enabled to see whats happening when.
The only code dependency is enum as implemented here, but the code could easily be changed to work without. It's only used to distinguish the two timeouts - use separate exceptions if you like.
Here's the code - as usual - feedback is highly appreciated:
(Edit 29-Jun-2012 - the code is now actually working)
# Python module runcmd
# Implements a class to launch shell commands which
# are killed after a timeout. Timeouts can be reset
# after each line of output
#
# Use inside other script with:
#
# import runcmd
# (return_code, out) = runcmd.RunCmd(['ls', '-l', '/etc'],
# timeout_runtime,
# timeout_no_output,
# stdin_string).go()
#
import multiprocessing
import queue
import subprocess
import time
import enum
def timestamp():
return time.strftime('%Y%m%d-%H%M%S')
class ErrorRunCmd(Exception): pass
class ErrorRunCmdTimeOut(ErrorRunCmd): pass
class Enqueue_output(multiprocessing.Process):
def __init__(self, out, queue):
multiprocessing.Process.__init__(self)
self.out = out
self.queue = queue
self.daemon = True
def run(self):
try:
for line in iter(self.out.readline, b''):
#print('worker read:', line)
self.queue.put(line)
except ValueError: pass # Readline of closed file
self.out.close()
class Enqueue_input(multiprocessing.Process):
def __init__(self, inp, iterable):
multiprocessing.Process.__init__(self)
self.inp = inp
self.iterable = iterable
self.daemon = True
def run(self):
#print("writing stdin")
for line in self.iterable:
self.inp.write(bytes(line,'utf-8'))
self.inp.close()
#print("writing stdin DONE")
class RunCmd():
"""RunCmd - class to launch shell commands
Captures and returns stdout. Kills child after a given
amount (timeout_runtime) wallclock seconds. Can also
kill after timeout_retriggerable wallclock seconds.
This second timer is reset whenever the child does some
output
(return_code, out) = RunCmd(['ls', '-l', '/etc'],
timeout_runtime,
timeout_no_output,
stdin_string).go()
"""
Timeout = enum.Enum('No','Retriggerable','Runtime')
def __init__(self, cmd, timeout_runtime, timeout_retriggerable, stdin=None):
self.dbg = False
self.cmd = cmd
self.timeout_retriggerable = timeout_retriggerable
self.timeout_runtime = timeout_runtime
self.timeout_hit = self.Timeout.No
self.stdout = '--Cmd did not yield any output--'
self.stdin = stdin
def read_queue(self, q):
time_last_output = None
try:
bstr = q.get(False) # non-blocking
if self.dbg: print('{} chars read'.format(len(bstr)))
time_last_output = time.time()
self.stdout += bstr
except queue.Empty:
#print('queue empty')
pass
return time_last_output
def go(self):
if self.stdin:
pstdin = subprocess.PIPE
else:
pstdin = None
p = subprocess.Popen(self.cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=pstdin)
pin = None
if (pstdin):
pin = Enqueue_input(p.stdin, [self.stdin + '\n'])
pin.start()
q = multiprocessing.Queue()
pout = Enqueue_output(p.stdout, q)
pout.start()
try:
if self.dbg: print('Beginning subprocess with timeout {}/{} s on {}'.format(self.timeout_retriggerable, self.timeout_runtime, time.asctime()))
time_begin = time.time()
time_last_output = time_begin
seconds_passed = 0
self.stdout = b''
once = True # ensure loop's executed at least once
# some child cmds may exit very fast, but still produce output
while once or p.poll() is None or not q.empty():
once = False
if self.dbg: print('a) {} of {}/{} secs passed and overall {} chars read'.format(seconds_passed, self.timeout_retriggerable, self.timeout_runtime, len(self.stdout)))
tlo = self.read_queue(q)
if tlo:
time_last_output = tlo
now = time.time()
if now - time_last_output >= self.timeout_retriggerable:
self.timeout_hit = self.Timeout.Retriggerable
raise ErrorRunCmdTimeOut(self)
if now - time_begin >= self.timeout_runtime:
self.timeout_hit = self.Timeout.Runtime
raise ErrorRunCmdTimeOut(self)
if q.empty():
time.sleep(0.1)
# Final try to get "last-millisecond" output
self.read_queue(q)
finally:
self._close(p, [pout, pin])
return (self.returncode, self.stdout)
def _close(self, p, procs):
if self.dbg:
if self.timeout_hit != self.Timeout.No:
print('{} A TIMEOUT occured: {}'.format(timestamp(), self.timeout_hit))
else:
print('{} No timeout occured'.format(timestamp()))
for process in [proc for proc in procs if proc]:
try:
process.terminate()
except:
print('{} Process termination raised trouble'.format(timestamp()))
raise
try:
p.stdin.close()
except: pass
if self.dbg: print('{} _closed stdin'.format(timestamp()))
try:
p.stdout.close() # If they are not closed the fds will hang around until
except: pass
if self.dbg: print('{} _closed stdout'.format(timestamp()))
#p.stderr.close() # os.fdlimit is exceeded and cause a nasty exception
try:
p.terminate() # Important to close the fds prior to terminating the process!
# NOTE: Are there any other "non-freed" resources?
except: pass
if self.dbg: print('{} _closed Popen'.format(timestamp()))
try:
self.stdout = self.stdout.decode('utf-8')
except: pass
self.returncode = p.returncode
if self.dbg: print('{} _closed all'.format(timestamp()))
Use with:
import runcmd
cmd = ['ls', '-l', '/etc']
worker = runcmd.RunCmd(cmd,
40, # limit runtime [wallclock seconds]
2, # limit runtime after last output [wallclk secs]
'' # stdin input string
)
(return_code, out) = worker.go()
if worker.timeout_hit != worker.Timeout.No:
print('A TIMEOUT occured: {}'.format(worker.timeout_hit))
else:
print('No timeout occured')
print("Running '{:s}' returned {:d} and {:d} chars of output".format(cmd, return_code, len(out)))
print('Output:')
print(out)
command - the first argument - should be a list of a command and its arguments. It is used for the Popen(shell=False) call and its timeouts are in seconds. There's currently no code to disable the timeouts. Set timeout_no_output to time_runtime to effectively disable the retriggerable timeout_no_output.
stdin_string can be any string which is to be sent to the command's standard input. Set to None if your command does not need any input. If a string is provided, a final '\n' is appended.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

wait subprocess execution in multiprocessing Pool - python

Related

Extra carriage return in interactive python subprocess

How to set a timeout period for downloading YouTube video audio using Python and Windows

Run interactive Bash with popen and a dedicated TTY Python

Run python Webserver as Windows service

python subprocess with timeout and large output (>64K)

Categories

Resources