In a nutshell
I get a BrokenProcessPool exception when parallelizing my code with concurrent.futures. No further error is displayed. I want to find the cause of the error and ask for ideas of how to do that.
Full problem
I am using concurrent.futures to parallelize some code.
with ProcessPoolExecutor() as pool:
mapObj = pool.map(myMethod, args)
I end up with (and only with) the following exception:
concurrent.futures.process.BrokenProcessPool: A child process terminated abruptly, the process pool is not usable anymore
Unfortunately, the program is complex and the error appears only after the program has run for 30 minutes. Therefore, I cannot provide a nice minimal example.
In order to find the cause of the issue, I wrapped the method that I run in parallel with a try-except-block:
def myMethod(*args):
try:
...
except Exception as e:
print(e)
The problem remained the same and the except block was never entered. I conclude that the exception does not come from my code.
My next step was to write a custom ProcessPoolExecutor class that is a child of the original ProcessPoolExecutor and allows me to replace some methods with cusomized ones. I copied and pasted the original code of the method _process_worker and added some print statements.
def _process_worker(call_queue, result_queue):
"""Evaluates calls from call_queue and places the results in result_queue.
...
"""
while True:
call_item = call_queue.get(block=True)
if call_item is None:
# Wake up queue management thread
result_queue.put(os.getpid())
return
try:
r = call_item.fn(*call_item.args, **call_item.kwargs)
except BaseException as e:
print("??? Exception ???") # newly added
print(e) # newly added
exc = _ExceptionWithTraceback(e, e.__traceback__)
result_queue.put(_ResultItem(call_item.work_id, exception=exc))
else:
result_queue.put(_ResultItem(call_item.work_id,
result=r))
Again, the except block is never entered. This was to be expected, because I already ensured that my code does not raise an exception (and if everything worked well, the exception should be passed to the main process).
Now I am lacking ideas how I could find the error. The exception is raised here:
def submit(self, fn, *args, **kwargs):
with self._shutdown_lock:
if self._broken:
raise BrokenProcessPool('A child process terminated '
'abruptly, the process pool is not usable anymore')
if self._shutdown_thread:
raise RuntimeError('cannot schedule new futures after shutdown')
f = _base.Future()
w = _WorkItem(f, fn, args, kwargs)
self._pending_work_items[self._queue_count] = w
self._work_ids.put(self._queue_count)
self._queue_count += 1
# Wake up queue management thread
self._result_queue.put(None)
self._start_queue_management_thread()
return f
The process pool is set to be broken here:
def _queue_management_worker(executor_reference,
processes,
pending_work_items,
work_ids_queue,
call_queue,
result_queue):
"""Manages the communication between this process and the worker processes.
...
"""
executor = None
def shutting_down():
return _shutdown or executor is None or executor._shutdown_thread
def shutdown_worker():
...
reader = result_queue._reader
while True:
_add_call_item_to_queue(pending_work_items,
work_ids_queue,
call_queue)
sentinels = [p.sentinel for p in processes.values()]
assert sentinels
ready = wait([reader] + sentinels)
if reader in ready:
result_item = reader.recv()
else: #THIS BLOCK IS ENTERED WHEN THE ERROR OCCURS
# Mark the process pool broken so that submits fail right now.
executor = executor_reference()
if executor is not None:
executor._broken = True
executor._shutdown_thread = True
executor = None
# All futures in flight must be marked failed
for work_id, work_item in pending_work_items.items():
work_item.future.set_exception(
BrokenProcessPool(
"A process in the process pool was "
"terminated abruptly while the future was "
"running or pending."
))
# Delete references to object. See issue16284
del work_item
pending_work_items.clear()
# Terminate remaining workers forcibly: the queues or their
# locks may be in a dirty state and block forever.
for p in processes.values():
p.terminate()
shutdown_worker()
return
...
It is (or seems to be) a fact that a process terminates, but I have no clue why. Are my thoughts correct so far? What are possible causes that make a process terminate without a message? (Is this even possible?) Where could I apply further diagnostics? Which questions should I ask myself in order to come closer to a solution?
I am using python 3.5 on 64bit Linux.
I think I was able to get as far as possible:
I changed the _queue_management_worker method in my changed ProcessPoolExecutor module such that the exit code of the failed process is printed:
def _queue_management_worker(executor_reference,
processes,
pending_work_items,
work_ids_queue,
call_queue,
result_queue):
"""Manages the communication between this process and the worker processes.
...
"""
executor = None
def shutting_down():
return _shutdown or executor is None or executor._shutdown_thread
def shutdown_worker():
...
reader = result_queue._reader
while True:
_add_call_item_to_queue(pending_work_items,
work_ids_queue,
call_queue)
sentinels = [p.sentinel for p in processes.values()]
assert sentinels
ready = wait([reader] + sentinels)
if reader in ready:
result_item = reader.recv()
else:
# BLOCK INSERTED FOR DIAGNOSIS ONLY ---------
vals = list(processes.values())
for s in ready:
j = sentinels.index(s)
print("is_alive()", vals[j].is_alive())
print("exitcode", vals[j].exitcode)
# -------------------------------------------
# Mark the process pool broken so that submits fail right now.
executor = executor_reference()
if executor is not None:
executor._broken = True
executor._shutdown_thread = True
executor = None
# All futures in flight must be marked failed
for work_id, work_item in pending_work_items.items():
work_item.future.set_exception(
BrokenProcessPool(
"A process in the process pool was "
"terminated abruptly while the future was "
"running or pending."
))
# Delete references to object. See issue16284
del work_item
pending_work_items.clear()
# Terminate remaining workers forcibly: the queues or their
# locks may be in a dirty state and block forever.
for p in processes.values():
p.terminate()
shutdown_worker()
return
...
Afterwards I looked up the meaning of the exit code:
from multiprocessing.process import _exitcode_to_name
print(_exitcode_to_name[my_exit_code])
whereby my_exit_code is the exit code that was printed in the block I inserted to the _queue_management_worker. In my case the code was -11, which means that I ran into a segmentation fault. Finding the reason for this issue will be a huge task but goes beyond the scope of this question.
If you are using macOS, there is a known issue with how some versions of macOS uses forking that's not considered fork-safe by Python in some scenarios. The workaround that worked for me is to use no_proxy environment variable.
Edit ~/.bash_profile and include the following (it might be better to specify list of domains or subnets here, instead of *)
no_proxy='*'
Refresh the current context
source ~/.bash_profile
My local versions the issue was seen and worked around are: Python 3.6.0 on
macOS 10.14.1 and 10.13.x
Sources:
Issue 30388
Issue 27126
Foreword
Okay, I have a really complex perfomance issue. I'm building a content managment system and one of the features should be generating tons of .docx files with different templates. I started with Webodt + Abiword. But then templates got too complex, so I had to swith my backend to Templated-docs + LibreOffice. So this is where my problems started.
I use:
Python 2.7.12
Django==1.8.2
templated-docs==0.2.9
LibreOffice 5.1.5.2
Ubuntu 16.04
The actual problem
I have an API which handles .docx render. I will show one of views, as an example, they are pretty similar:
#permission_classes((permissions.IsAdminUser,))
class BookDocxViewSet(mixins.RetrieveModelMixin, viewsets.GenericViewSet):
def retrieve(self, request, *args, **kwargs):
queryset = Pupils.objects.get(id=kwargs['pk'])
serializer = StudentSerializer(queryset)
context = dict(serializer.data)
doc = fill_template('crm/docs/book.ott', context, output_format='docx')
p = u'docs/books/%s/%s_%s_%s.doc' % (datetime.now().date(), context[u'surname'], context[u'name'], datetime.now().date())
with open(doc, 'rb') as f:
content = f.read()
path = default_storage.save(p, ContentFile(content))
f.close()
return response.Response(u'/media/' + path)
When I call it the first time, it creates a .docx file, saves it to my default_storage and then returns me a download link. But when I try to do it again, of do it with another method (which works with another template and context), my server just crashes without any logs. The last thing I see is either
Process finished with exit code 77 if I call it with a little delay (more then one second)
Process finished with exit code 139 (interrupted by signal 11: SIGSEGV) if call my method for the second time right away (in less than one second)
I tried to use debuger -- it said that my server crashes on this line:
doc = fill_template('crm/docs/book.ott', context, output_format='docx')
I bet what happens is:
When I call my method the first time templated_docs starts LibreOffice backend, and then does not stop it
When I call my method the second time templated_docs tries to start LibreOffice backend again, but it is already busy.
Questions
How do I debug LibreOffice to prove / refute my theory? (I guess I need to debug templated_docs instead)
Why do I get different exit codes depending of delay?
Is it enough base to oppen an issue on GitHub?
How do I fix that?
UPD
It is not an issue of REST Framework or not using FileResponce().
I already tried to test it with regular view.
def get_document(request, *args, **kwargs):
context = Pupils.objects.get(id=kwargs['pk']).__dict__
doc = fill_template('crm/docs/book.ott', context, output_format='docx')
p = u'%s_%s_%s' % (context[u'surname'], context[u'name'], datetime.now().date())
return FileResponse(doc, p)
And the problem is same.
UPD 2
Okay. This line is chashing my server:
# pylokit/lokit.py
self.lokit = lo.libreofficekit_hook(six.b(lo_path))
Okay, that was a bug in templated_docs. I was right, it happens because templated_docs is trying to start LibreOffice twice. As it said in pylokit documentation:
The use of _exit() instead of default exit() is required because in
some circumstances LibreOffice segfaults on process exit.
It means the process that used pylockt should be killed after. But we cannot kill Django server. So I decided to use multiprocessing:
# templated_docs/__init__.py
if source_extension[1:] != output_format:
lo_path = getattr(
settings,
'TEMPLATED_DOCS_LIBREOFFICE_PATH',
'/usr/lib/libreoffice/program/')
def f(conn):
with Office(lo_path) as lo:
conv_file = NamedTemporaryFile(delete=False,
suffix='.%s' % output_format)
with lo.documentLoad(str(dest_file.name)) as doc:
doc.saveAs(conv_file.name)
os.unlink(dest_file.name)
conn.send(conv_file.name)
conn.close()
parent_conn, child_conn = Pipe()
p = Process(target=f, args=(child_conn,))
p.start()
conv_file_name = parent_conn.recv()
p.join()
return conv_file_name
else:
return dest_file.name
I oppened an issue and made a pull request.
I am currently trying to get the percentage complete messages that are returned by the InfoMessage event from ADO (and a SQL server) when running the BACKUP command. (See my previous question for more details).
I have managed to connect to the SQL server and issue it SQL commands, and event get events back. However when I execute the the BACKUP command the cmd.Execute method blocks until the backup is complete.
But during this time I will get a single InfoMessage event call (which will have a message like "1 Percent Complete") and after that I won't receive any more events.
I have tried this using a stored procedure, where the stored procedure prints 3 messages, and even here I will get the first message and nothing else.
I suspect that I need to call pythoncom.PumpWaitingMessages(), but because the cmd.Execute() call blocks I never get anything of any use.
Can anyone work out how to get more that just a single InfoMessage event.
Below is the code that I'm currently using:
import win32com
import pythoncom
import adodbapi
import time
import win32gui
from win32com.client import gencache
gencache.EnsureModule('{2A75196C-D9EB-4129-B803-931327F72D5C}', 0, 2, 8)
defaultNamedOptArg=pythoncom.Empty
defaultNamedNotOptArg=pythoncom.Empty
defaultUnnamedArg=pythoncom.Empty
global connected
connected = False
class events():
def OnInfoMessage(self, pError, adStatus, pConnection):
print 'Info Message'
a = pError.QueryInterface(pythoncom.IID_IDispatch)
a = win32com.client.Dispatch(a)
print a.Description
print a.Number
print a.Source
#print 'B', adStatus
c = pConnection.QueryInterface(pythoncom.IID_IDispatch)
c = win32com.client.Dispatch(c)
print c.Errors.Count
print c.Errors.Item(0).Description
return 1
def OnCommitTransComplete(self, pError=defaultNamedNotOptArg, adStatus=defaultNamedNotOptArg, pConnection=defaultNamedNotOptArg): pass
def OnWillExecute(self, Source=defaultNamedNotOptArg, CursorType=defaultNamedNotOptArg, LockType=defaultNamedNotOptArg, Options=defaultNamedNotOptArg
, adStatus=defaultNamedNotOptArg, pCommand=defaultNamedNotOptArg, pRecordset=defaultNamedNotOptArg, pConnection=defaultNamedNotOptArg):
print 'Execute Event'
return Source
def OnDisconnect(self, adStatus=defaultNamedNotOptArg, pConnection=defaultNamedNotOptArg):
print 'Disconnected'
def OnExecuteComplete(self, RecordsAffected=defaultNamedNotOptArg, pError=defaultNamedNotOptArg, adStatus=defaultNamedNotOptArg, pCommand=defaultNamedNotOptArg
, pRecordset=defaultNamedNotOptArg, pConnection=defaultNamedNotOptArg):
print 'Execute complete'
def OnWillConnect(self, ConnectionString=defaultNamedNotOptArg, UserID=defaultNamedNotOptArg, Password=defaultNamedNotOptArg, Options=defaultNamedNotOptArg
, adStatus=defaultNamedNotOptArg, pConnection=defaultNamedNotOptArg):
print 'About to connect'
def OnConnectComplete(self, pError=defaultNamedNotOptArg, adStatus=defaultNamedNotOptArg, pConnection=defaultNamedNotOptArg):
print 'Connected'
global connected
connected = True
def OnBeginTransComplete(self, TransactionLevel=defaultNamedNotOptArg, pError=defaultNamedNotOptArg, adStatus=defaultNamedNotOptArg, pConnection=defaultNamedNotOptArg):pass
def OnRollbackTransComplete(self, pError=defaultNamedNotOptArg, adStatus=defaultNamedNotOptArg, pConnection=defaultNamedNotOptArg): pass
if __name__ == '__main__':
pythoncom.CoInitialize()
conn = win32com.client.DispatchWithEvents("ADODB.Connection", events)
conn.ConnectionString = 'Data Source=HPDX2250RAAZ\\SQLEXPRESS; Provider=SQLOLEDB; Integrated Security=SSPI'
conn.CommandTimeout = 30
conn.CursorLocation = 2
conn.Open(pythoncom.Empty,pythoncom.Empty,pythoncom.Empty,0x10)
while not connected:
#pythoncom.PumpWaitingMessages()
win32gui.PumpWaitingMessages()
time.sleep(0.1)
conn.BeginTrans()
conn.Errors.Clear()
cmd=win32com.client.Dispatch("ADODB.Command")
cmd.ActiveConnection=conn
cmd.CommandTimeout = 30 #v2.1 Simons
cmd.CommandText="EXECUTE [test].[dbo].[Test] "
print 'Execute'
cmd.Execute()
pythoncom.PumpWaitingMessages()
print 'Called'
print ''
print conn.Errors.Count
conn.RollbackTrans()
conn.Close()
I was having the same issue and what the issue is, if you are experiencing the same problem is the messages are basically being held up by the SQL Server engine itself. To get arround this you need to tell SQL not to wait till the end of processing to send the messages but to send them as they occur.
Try this on for size:
SET #message = 'My message...'
RAISERROR (#message, 10, 1) WITH NOWAIT
This should send the message and your front end should pick these up as the system goes along.
Hope this helps
I found a workaround that is compatible with pymssql and other drivers. I use the SQL from Is there a SQL script that I can use to determine the progress of a SQL Server backup or restore process? plus a background thread that each X seconds run that query. Now, for notification I use http://pydispatcher.sourceforge.net/ to get back the progress.
#This is rough extract from my actual code. Probably not work as is, but outline the idea
import dispatch #Decoupled send of messages, identical to django signals
def monitorBackup(self):
return self.selectSql(SQL_MONITOR)
def backup(sql):
con = self.getCon() #Get new connection, we are in another thread!
con.execute_query("HERE THE BACKUP SQL")
result = threading.Thread(target=partial(backup, sql))
result.start()
while result.isAlive():
time.sleep(5) # with the monitor SQL result, is possible to get a estimated time to complete and adjust this...
rows = self.monitorBackup()
if len(rows) > 0:
percentage = rows[0].Percent
self.send(
msg="%d %%" % percentage,
action="progress",
progress=percentage
)
Is there a Pythonic way to have only one instance of a program running?
The only reasonable solution I've come up with is trying to run it as a server on some port, then second program trying to bind to same port - fails. But it's not really a great idea, maybe there's something more lightweight than this?
(Take into consideration that program is expected to fail sometimes, i.e. segfault - so things like "lock file" won't work)
The following code should do the job, it is cross-platform and runs on Python 2.4-3.2. I tested it on Windows, OS X and Linux.
from tendo import singleton
me = singleton.SingleInstance() # will sys.exit(-1) if other instance is running
The latest code version is available singleton.py. Please file bugs here.
You can install tend using one of the following methods:
easy_install tendo
pip install tendo
manually by getting it from http://pypi.python.org/pypi/tendo
Simple, cross-platform solution, found in another question by zgoda:
import fcntl
import os
import sys
def instance_already_running(label="default"):
"""
Detect if an an instance with the label is already running, globally
at the operating system level.
Using `os.open` ensures that the file pointer won't be closed
by Python's garbage collector after the function's scope is exited.
The lock will be released when the program exits, or could be
released if the file pointer were closed.
"""
lock_file_pointer = os.open(f"/tmp/instance_{label}.lock", os.O_WRONLY)
try:
fcntl.lockf(lock_file_pointer, fcntl.LOCK_EX | fcntl.LOCK_NB)
already_running = False
except IOError:
already_running = True
return already_running
A lot like S.Lott's suggestion, but with the code.
This code is Linux specific. It uses 'abstract' UNIX domain sockets, but it is simple and won't leave stale lock files around. I prefer it to the solution above because it doesn't require a specially reserved TCP port.
try:
import socket
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
## Create an abstract socket, by prefixing it with null.
s.bind( '\0postconnect_gateway_notify_lock')
except socket.error as e:
error_code = e.args[0]
error_string = e.args[1]
print "Process already running (%d:%s ). Exiting" % ( error_code, error_string)
sys.exit (0)
The unique string postconnect_gateway_notify_lock can be changed to allow multiple programs that need a single instance enforced.
I don't know if it's pythonic enough, but in the Java world listening on a defined port is a pretty widely used solution, as it works on all major platforms and doesn't have any problems with crashing programs.
Another advantage of listening to a port is that you could send a command to the running instance. For example when the users starts the program a second time, you could send the running instance a command to tell it to open another window (that's what Firefox does, for example. I don't know if they use TCP ports or named pipes or something like that, 'though).
Never written python before, but this is what I've just implemented in mycheckpoint, to prevent it being started twice or more by crond:
import os
import sys
import fcntl
fh=0
def run_once():
global fh
fh=open(os.path.realpath(__file__),'r')
try:
fcntl.flock(fh,fcntl.LOCK_EX|fcntl.LOCK_NB)
except:
os._exit(0)
run_once()
Found Slava-N's suggestion after posting this in another issue (http://stackoverflow.com/questions/2959474). This one is called as a function, locks the executing scripts file (not a pid file) and maintains the lock until the script ends (normal or error).
Use a pid file. You have some known location, "/path/to/pidfile" and at startup you do something like this (partially pseudocode because I'm pre-coffee and don't want to work all that hard):
import os, os.path
pidfilePath = """/path/to/pidfile"""
if os.path.exists(pidfilePath):
pidfile = open(pidfilePath,"r")
pidString = pidfile.read()
if <pidString is equal to os.getpid()>:
# something is real weird
Sys.exit(BADCODE)
else:
<use ps or pidof to see if the process with pid pidString is still running>
if <process with pid == 'pidString' is still running>:
Sys.exit(ALREADAYRUNNING)
else:
# the previous server must have crashed
<log server had crashed>
<reopen pidfilePath for writing>
pidfile.write(os.getpid())
else:
<open pidfilePath for writing>
pidfile.write(os.getpid())
So, in other words, you're checking if a pidfile exists; if not, write your pid to that file. If the pidfile does exist, then check to see if the pid is the pid of a running process; if so, then you've got another live process running, so just shut down. If not, then the previous process crashed, so log it, and then write your own pid to the file in place of the old one. Then continue.
The best solution for this on windows is to use mutexes as suggested by #zgoda.
import win32event
import win32api
from winerror import ERROR_ALREADY_EXISTS
mutex = win32event.CreateMutex(None, False, 'name')
last_error = win32api.GetLastError()
if last_error == ERROR_ALREADY_EXISTS:
print("App instance already running")
Some answers use fctnl (included also in #sorin tendo package) which is not available on windows and should you try to freeze your python app using a package like pyinstaller which does static imports, it throws an error.
Also, using the lock file method, creates a read-only problem with database files( experienced this with sqlite3).
Here is my eventual Windows-only solution. Put the following into a module, perhaps called 'onlyone.py', or whatever. Include that module directly into your __ main __ python script file.
import win32event, win32api, winerror, time, sys, os
main_path = os.path.abspath(sys.modules['__main__'].__file__).replace("\\", "/")
first = True
while True:
mutex = win32event.CreateMutex(None, False, main_path + "_{<paste YOUR GUID HERE>}")
if win32api.GetLastError() == 0:
break
win32api.CloseHandle(mutex)
if first:
print "Another instance of %s running, please wait for completion" % main_path
first = False
time.sleep(1)
Explanation
The code attempts to create a mutex with name derived from the full path to the script. We use forward-slashes to avoid potential confusion with the real file system.
Advantages
No configuration or 'magic' identifiers needed, use it in as many different scripts as needed.
No stale files left around, the mutex dies with you.
Prints a helpful message when waiting
This may work.
Attempt create a PID file to a known location. If you fail, someone has the file locked, you're done.
When you finish normally, close and remove the PID file, so someone else can overwrite it.
You can wrap your program in a shell script that removes the PID file even if your program crashes.
You can, also, use the PID file to kill the program if it hangs.
For anybody using wxPython for their application, you can use the function wx.SingleInstanceChecker documented here.
I personally use a subclass of wx.App which makes use of wx.SingleInstanceChecker and returns False from OnInit() if there is an existing instance of the app already executing like so:
import wx
class SingleApp(wx.App):
"""
class that extends wx.App and only permits a single running instance.
"""
def OnInit(self):
"""
wx.App init function that returns False if the app is already running.
"""
self.name = "SingleApp-%s".format(wx.GetUserId())
self.instance = wx.SingleInstanceChecker(self.name)
if self.instance.IsAnotherRunning():
wx.MessageBox(
"An instance of the application is already running",
"Error",
wx.OK | wx.ICON_WARNING
)
return False
return True
This is a simple drop-in replacement for wx.App that prohibits multiple instances. To use it simply replace wx.App with SingleApp in your code like so:
app = SingleApp(redirect=False)
frame = wx.Frame(None, wx.ID_ANY, "Hello World")
frame.Show(True)
app.MainLoop()
Using a lock-file is a quite common approach on unix. If it crashes, you have to clean up manually. You could stor the PID in the file, and on startup check if there is a process with this PID, overriding the lock-file if not. (However, you also need a lock around the read-file-check-pid-rewrite-file). You will find what you need for getting and checking pid in the os-package. The common way of checking if there exists a process with a given pid, is to send it a non-fatal signal.
Other alternatives could be combining this with flock or posix semaphores.
Opening a network socket, as saua proposed, would probably be the easiest and most portable.
I'm posting this as an answer because I'm a new user and Stack Overflow won't let me vote yet.
Sorin Sbarnea's solution works for me under OS X, Linux and Windows, and I am grateful for it.
However, tempfile.gettempdir() behaves one way under OS X and Windows and another under other some/many/all(?) *nixes (ignoring the fact that OS X is also Unix!). The difference is important to this code.
OS X and Windows have user-specific temp directories, so a tempfile created by one user isn't visible to another user. By contrast, under many versions of *nix (I tested Ubuntu 9, RHEL 5, OpenSolaris 2008 and FreeBSD 8), the temp dir is /tmp for all users.
That means that when the lockfile is created on a multi-user machine, it's created in /tmp and only the user who creates the lockfile the first time will be able to run the application.
A possible solution is to embed the current username in the name of the lock file.
It's worth noting that the OP's solution of grabbing a port will also misbehave on a multi-user machine.
Building upon Roberto Rosario's answer, I come up with the following function:
SOCKET = None
def run_single_instance(uniq_name):
try:
import socket
global SOCKET
SOCKET = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
## Create an abstract socket, by prefixing it with null.
# this relies on a feature only in linux, when current process quits, the
# socket will be deleted.
SOCKET.bind('\0' + uniq_name)
return True
except socket.error as e:
return False
We need to define global SOCKET vaiable since it will only be garbage collected when the whole process quits. If we declare a local variable in the function, it will go out of scope after the function exits, thus the socket be deleted.
All the credit should go to Roberto Rosario, since I only clarify and elaborate upon his code. And this code will work only on Linux, as the following quoted text from https://troydhanson.github.io/network/Unix_domain_sockets.html explains:
Linux has a special feature: if the pathname for a UNIX domain socket
begins with a null byte \0, its name is not mapped into the
filesystem. Thus it won’t collide with other names in the filesystem.
Also, when a server closes its UNIX domain listening socket in the
abstract namespace, its file is deleted; with regular UNIX domain
sockets, the file persists after the server closes it.
Late answer, but for windows you can use:
from win32event import CreateMutex
from win32api import CloseHandle, GetLastError
from winerror import ERROR_ALREADY_EXISTS
import sys
class singleinstance:
""" Limits application to single instance """
def __init__(self):
self.mutexname = "testmutex_{D0E858DF-985E-4907-B7FB-8D732C3FC3B9}"
self.mutex = CreateMutex(None, False, self.mutexname)
self.lasterror = GetLastError()
def alreadyrunning(self):
return (self.lasterror == ERROR_ALREADY_EXISTS)
def __del__(self):
if self.mutex:
CloseHandle(self.mutex)
Usage
# do this at beginnig of your application
myapp = singleinstance()
# check is another instance of same program running
if myapp.alreadyrunning():
print ("Another instance of this program is already running")
sys.exit(1)
Here is a cross platform example that I've tested on Windows Server 2016 and Ubuntu 20.04 using Python 3.7.9:
import os
class SingleInstanceChecker:
def __init__(self, id):
if isWin():
ensure_win32api()
self.mutexname = id
self.lock = win32event.CreateMutex(None, False, self.mutexname)
self.running = (win32api.GetLastError() == winerror.ERROR_ALREADY_EXISTS)
else:
ensure_fcntl()
self.lock = open(f"/tmp/isnstance_{id}.lock", 'wb')
try:
fcntl.lockf(self.lock, fcntl.LOCK_EX | fcntl.LOCK_NB)
self.running = False
except IOError:
self.running = True
def already_running(self):
return self.running
def __del__(self):
if self.lock:
try:
if isWin():
win32api.CloseHandle(self.lock)
else:
os.close(self.lock)
except Exception as ex:
pass
# ---------------------------------------
# Utility Functions
# Dynamically load win32api on demand
# Install with: pip install pywin32
win32api=winerror=win32event=None
def ensure_win32api():
global win32api,winerror,win32event
if win32api is None:
import win32api
import winerror
import win32event
# Dynamically load fcntl on demand
# Install with: pip install fcntl
fcntl=None
def ensure_fcntl():
global fcntl
if fcntl is None:
import fcntl
def isWin():
return (os.name == 'nt')
# ---------------------------------------
Here is it in use:
import time, sys
def main(argv):
_timeout = 10
print("main() called. sleeping for %s seconds" % _timeout)
time.sleep(_timeout)
print("DONE")
if __name__ == '__main__':
SCR_NAME = "my_script"
sic = SingleInstanceChecker(SCR_NAME)
if sic.already_running():
print("An instance of {} is already running.".format(SCR_NAME))
sys.exit(1)
else:
main(sys.argv[1:])
I use single_process on my gentoo;
pip install single_process
example:
from single_process import single_process
#single_process
def main():
print 1
if __name__ == "__main__":
main()
refer: https://pypi.python.org/pypi/single_process/
I keep suspecting there ought to be a good POSIXy solution using process groups, without having to hit the file system, but I can't quite nail it down. Something like:
On startup, your process sends a 'kill -0' to all processes in a particular group. If any such processes exist, it exits. Then it joins the group. No other processes use that group.
However, this has a race condition - multiple processes could all do this at precisely the same time and all end up joining the group and running simultaneously. By the time you've added some sort of mutex to make it watertight, you no longer need the process groups.
This might be acceptable if your process only gets started by cron, once every minute or every hour, but it makes me a bit nervous that it would go wrong precisely on the day when you don't want it to.
I guess this isn't a very good solution after all, unless someone can improve on it?
I ran into this exact problem last week, and although I did find some good solutions, I decided to make a very simple and clean python package and uploaded it to PyPI. It differs from tendo in that it can lock any string resource name. Although you could certainly lock __file__ to achieve the same effect.
Install with: pip install quicklock
Using it is extremely simple:
[nate#Nates-MacBook-Pro-3 ~/live] python
Python 2.7.6 (default, Sep 9 2014, 15:04:36)
[GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.39)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> from quicklock import singleton
>>> # Let's create a lock so that only one instance of a script will run
...
>>> singleton('hello world')
>>>
>>> # Let's try to do that again, this should fail
...
>>> singleton('hello world')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/nate/live/gallery/env/lib/python2.7/site-packages/quicklock/quicklock.py", line 47, in singleton
raise RuntimeError('Resource <{}> is currently locked by <Process {}: "{}">'.format(resource, other_process.pid, other_process.name()))
RuntimeError: Resource <hello world> is currently locked by <Process 24801: "python">
>>>
>>> # But if we quit this process, we release the lock automatically
...
>>> ^D
[nate#Nates-MacBook-Pro-3 ~/live] python
Python 2.7.6 (default, Sep 9 2014, 15:04:36)
[GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.39)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> from quicklock import singleton
>>> singleton('hello world')
>>>
>>> # No exception was thrown, we own 'hello world'!
Take a look: https://pypi.python.org/pypi/quicklock
linux example
This method is based on the creation of a temporary file automatically deleted after you close the application.
the program launch we verify the existence of the file;
if the file exists ( there is a pending execution) , the program is closed ; otherwise it creates the file and continues the execution of the program.
from tempfile import *
import time
import os
import sys
f = NamedTemporaryFile( prefix='lock01_', delete=True) if not [f for f in os.listdir('/tmp') if f.find('lock01_')!=-1] else sys.exit()
YOUR CODE COMES HERE
On a Linux system one could also ask
pgrep -a for the number of instances, the script
is found in the process list (option -a reveals the
full command line string). E.g.
import os
import sys
import subprocess
procOut = subprocess.check_output( "/bin/pgrep -u $UID -a python", shell=True,
executable="/bin/bash", universal_newlines=True)
if procOut.count( os.path.basename(__file__)) > 1 :
sys.exit( ("found another instance of >{}<, quitting."
).format( os.path.basename(__file__)))
Remove -u $UID if the restriction should apply to all users.
Disclaimer: a) it is assumed that the script's (base)name is unique, b) there might be race conditions.
Here's a good example for django with contextmanager and memcached:
https://docs.celeryproject.org/en/latest/tutorials/task-cookbook.html
Can be used to protect simultaneous operation on different hosts.
Can be used to manage multiple tasks.
Can also be changed for simple python scripts.
My modification of the above code is here:
import time
from contextlib import contextmanager
from django.core.cache import cache
#contextmanager
def memcache_lock(lock_key, lock_value, lock_expire):
timeout_at = time.monotonic() + lock_expire - 3
# cache.add fails if the key already exists
status = cache.add(lock_key, lock_value, lock_expire)
try:
yield status
finally:
# memcache delete is very slow, but we have to use it to take
# advantage of using add() for atomic locking
if time.monotonic() < timeout_at and status:
# don't release the lock if we exceeded the timeout
# to lessen the chance of releasing an expired lock owned by someone else
# also don't release the lock if we didn't acquire it
cache.delete(lock_key)
LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes
def main():
lock_name, lock_value = "lock_1", "locked"
with memcache_lock(lock_name, lock_value, LOCK_EXPIRE) as acquired:
if acquired:
# single instance code here:
pass
if __name__ == "__main__":
main()
Here is a cross-platform implementation, creating a temporary lock file using a context manager.
Can be used to manage multiple tasks.
import os
from contextlib import contextmanager
from time import sleep
class ExceptionTaskInProgress(Exception):
pass
# Context manager for suppressing exceptions
class SuppressException:
def __init__(self):
pass
def __enter__(self):
return self
def __exit__(self, *exc):
return True
# Context manager for task
class TaskSingleInstance:
def __init__(self, task_name, lock_path):
self.task_name = task_name
self.lock_path = lock_path
self.lock_filename = os.path.join(self.lock_path, self.task_name + ".lock")
if os.path.exists(self.lock_filename):
raise ExceptionTaskInProgress("Resource already in use")
def __enter__(self):
self.fl = open(self.lock_filename, "w")
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.fl.close()
os.unlink(self.lock_filename)
# Here the task is silently interrupted
# if it is already running on another instance.
def main1():
task_name = "task1"
tmp_filename_path = "."
with SuppressException():
with TaskSingleInstance(task_name, tmp_filename_path):
print("The task `{}` has started.".format(task_name))
# The single task instance code is here.
sleep(5)
print("The task `{}` has completed.".format(task_name))
# Here the task is interrupted with a message
# if it is already running in another instance.
def main2():
task_name = "task1"
tmp_filename_path = "."
try:
with TaskSingleInstance(task_name, tmp_filename_path):
print("The task `{}` has started.".format(task_name))
# The single task instance code is here.
sleep(5)
print("Task `{}` completed.".format(task_name))
except ExceptionTaskInProgress as ex:
print("The task `{}` is already running.".format(task_name))
if __name__ == "__main__":
main1()
main2()
import sys,os
# start program
try: # (1)
os.unlink('lock') # (2)
fd=os.open("lock", os.O_CREAT|os.O_EXCL) # (3)
except:
try: fd=os.open("lock", os.O_CREAT|os.O_EXCL) # (4)
except:
print "Another Program running !.." # (5)
sys.exit()
# your program ...
# ...
# exit program
try: os.close(fd) # (6)
except: pass
try: os.unlink('lock')
except: pass
sys.exit()