Related
I've looked around quite a bit, but haven't found an error quite like this. When I execute my code (below), I get the exception ControllerFactory instance has no attribute 'startedConnecting'. I tried adding the method with the body just being pass, but that simply causes it to stall without transmitting anything, leading me to believe that the problem lies within the way I've set up the classes.
This code is based on code from the twisted website. It's meant to be able to transmit python files, which the server saves, then later transmit arguments for running the python file.
#!/usr/bin/env python
from twisted.internet import reactor, protocol
import argparse
file_header = "pfile:"
run_header = "runwith:"
class Controller(protocol.Protocol):
def sendMessage(self,message):
self.transport.write(message)
class ControllerFactory(protocol.Factory):
def buildProtocol(self, addr):
cont = Controller()
cont.factory = self
return cont
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--address")
parser.add_argument("--file")
parser.add_argument("--args")
args = parser.parse_args()
if(args.file and args.args):
raise Exception("Can't send file and args at same time.")
reactor.connectTCP(args.address, 1337, ControllerFactory())
reactor.run()
if(args.file):
print(args.file)
a = open(args.file)
factory.connectedProtocol.sendMessage(file_header + a.read())
a.close()
if(args.args):
print(args.args)
factory.connectedProtocol.sendMessage(run_header + args.args)
Use twisted.internet.protocol.ClientFactory for clients (instead of twisted.internet.protocol.Factory). Or use something from twisted.internet.endpoints instead of twisted.internet.reactor.connectTCP.
Also, note that reactor.run() blocks. All of your code that follows that line will not run in any useful way.
I'd like to embed pylint in a program. The user enters python programs (in Qt, in a QTextEdit, although not relevant) and in the background I call pylint to check the text he enters. Finally, I print the errors in a message box.
There are thus two questions: First, how can I do this without writing the entered text to a temporary file and giving it to pylint ? I suppose at some point pylint (or astroid) handles a stream and not a file anymore.
And, more importantly, is it a good idea ? Would it cause problems for imports or other stuffs ? Intuitively I would say no since it seems to spawn a new process (with epylint) but I'm no python expert so I'm really not sure. And if I use this to launch pylint, is it okay too ?
Edit:
I tried tinkering with pylint's internals, event fought with it, but finally have been stuck at some point.
Here is the code so far:
from astroid.builder import AstroidBuilder
from astroid.exceptions import AstroidBuildingException
from logilab.common.interface import implements
from pylint.interfaces import IRawChecker, ITokenChecker, IAstroidChecker
from pylint.lint import PyLinter
from pylint.reporters.text import TextReporter
from pylint.utils import PyLintASTWalker
class Validator():
def __init__(self):
self._messagesBuffer = InMemoryMessagesBuffer()
self._validator = None
self.initValidator()
def initValidator(self):
self._validator = StringPyLinter(reporter=TextReporter(output=self._messagesBuffer))
self._validator.load_default_plugins()
self._validator.disable('W0704')
self._validator.disable('I0020')
self._validator.disable('I0021')
self._validator.prepare_import_path([])
def destroyValidator(self):
self._validator.cleanup_import_path()
def check(self, string):
return self._validator.check(string)
class InMemoryMessagesBuffer():
def __init__(self):
self.content = []
def write(self, st):
self.content.append(st)
def messages(self):
return self.content
def reset(self):
self.content = []
class StringPyLinter(PyLinter):
"""Does what PyLinter does but sets checkers once
and redefines get_astroid to call build_string"""
def __init__(self, options=(), reporter=None, option_groups=(), pylintrc=None):
super(StringPyLinter, self).__init__(options, reporter, option_groups, pylintrc)
self._walker = None
self._used_checkers = None
self._tokencheckers = None
self._rawcheckers = None
self.initCheckers()
def __del__(self):
self.destroyCheckers()
def initCheckers(self):
self._walker = PyLintASTWalker(self)
self._used_checkers = self.prepare_checkers()
self._tokencheckers = [c for c in self._used_checkers if implements(c, ITokenChecker)
and c is not self]
self._rawcheckers = [c for c in self._used_checkers if implements(c, IRawChecker)]
# notify global begin
for checker in self._used_checkers:
checker.open()
if implements(checker, IAstroidChecker):
self._walker.add_checker(checker)
def destroyCheckers(self):
self._used_checkers.reverse()
for checker in self._used_checkers:
checker.close()
def check(self, string):
modname = "in_memory"
self.set_current_module(modname)
astroid = self.get_astroid(string, modname)
self.check_astroid_module(astroid, self._walker, self._rawcheckers, self._tokencheckers)
self._add_suppression_messages()
self.set_current_module('')
self.stats['statement'] = self._walker.nbstatements
def get_astroid(self, string, modname):
"""return an astroid representation for a module"""
try:
return AstroidBuilder().string_build(string, modname)
except SyntaxError as ex:
self.add_message('E0001', line=ex.lineno, args=ex.msg)
except AstroidBuildingException as ex:
self.add_message('F0010', args=ex)
except Exception as ex:
import traceback
traceback.print_exc()
self.add_message('F0002', args=(ex.__class__, ex))
if __name__ == '__main__':
code = """
a = 1
print(a)
"""
validator = Validator()
print(validator.check(code))
The traceback is the following:
Traceback (most recent call last):
File "validator.py", line 16, in <module>
main()
File "validator.py", line 13, in main
print(validator.check(code))
File "validator.py", line 30, in check
self._validator.check(string)
File "validator.py", line 79, in check
self.check_astroid_module(astroid, self._walker, self._rawcheckers, self._tokencheckers)
File "c:\Python33\lib\site-packages\pylint\lint.py", line 659, in check_astroid_module
tokens = tokenize_module(astroid)
File "c:\Python33\lib\site-packages\pylint\utils.py", line 103, in tokenize_module
print(module.file_stream)
AttributeError: 'NoneType' object has no attribute 'file_stream'
# And sometimes this is added :
File "c:\Python33\lib\site-packages\astroid\scoped_nodes.py", line 251, in file_stream
return open(self.file, 'rb')
OSError: [Errno 22] Invalid argument: '<?>'
I'll continue digging tomorrow. :)
I got it running.
the first one (NoneType …) is really easy and a bug in your code:
Encountering an exception can make get_astroid “fail”, i.e. send one syntax error message and return!
But for the secong one… such bullshit in pylint’s/logilab’s API… Let me explain: Your astroid object here is of type astroid.scoped_nodes.Module.
It’s also created by a factory, AstroidBuilder, which sets astroid.file = '<?>'.
Unfortunately, the Module class has following property:
#property
def file_stream(self):
if self.file is not None:
return open(self.file, 'rb')
return None
And there’s no way to skip that except for subclassing (Which would render us unable to use the magic in AstroidBuilder), so… monkey patching!
We replace the ill-defined property with one that checks an instance for a reference to our code bytes (e.g. astroid._file_bytes) before engaging in above default behavior.
def _monkeypatch_module(module_class):
if module_class.file_stream.fget.__name__ == 'file_stream_patched':
return # only patch if patch isn’t already applied
old_file_stream_fget = module_class.file_stream.fget
def file_stream_patched(self):
if hasattr(self, '_file_bytes'):
return BytesIO(self._file_bytes)
return old_file_stream_fget(self)
module_class.file_stream = property(file_stream_patched)
That monkeypatching can be called just before calling check_astroid_module. But one more thing has to be done. See, there’s more implicit behavior: Some checkers expect and use astroid’s file_encoding field. So we now have this code in the middle of check:
astroid = self.get_astroid(string, modname)
if astroid is not None:
_monkeypatch_module(astroid.__class__)
astroid._file_bytes = string.encode('utf-8')
astroid.file_encoding = 'utf-8'
self.check_astroid_module(astroid, self._walker, self._rawcheckers, self._tokencheckers)
One could say that no amount of linting creates actually good code. Unfortunately pylint unites enormous complexity with a specialization of calling it on files. Really good code has a nice native API and wraps that with a CLI interface. Don’t ask me why file_stream exists if internally, Module gets built from but forgets the source code.
PS: i had to change sth else in your code: load_default_plugins has to come before some other stuff (maybe prepare_checkers, maybe sth. else)
PPS: i suggest subclassing BaseReporter and using that instead of your InMemoryMessagesBuffer
PPPS: this just got pulled (3.2014), and will fix this: https://bitbucket.org/logilab/astroid/pull-request/15/astroidbuilderstring_build-was/diff
4PS: this is now in the official version, so no monkey patching required: astroid.scoped_nodes.Module now has a file_bytes property (without leading underscore).
Working with an unlocatable stream may definitly cause problems in case of relative imports, since the location is then needed to find the actually imported module.
Astroid support building an AST from a stream, but this is not used/exposed through Pylint which is a level higher and designed to work with files. So while you may acheive this it will need a bit of digging into the low-level APIs.
The easiest way is definitly to save the buffer to the file then to use the SA answer to start pylint programmatically if you wish (totally forgot this other account of mine found in other responses ;). Another option being to write a custom reporter to gain more control.
I am working on a quick python script using the cmd module that will allow the user to enter text commands followed by parameters in basic url query string format. The prompts will be answered with something like
commandname foo=bar&baz=brack
Using cmd, I can't seem to find which method to override to affect the way the argument line is handed off to all the do_* methods. I want to run urlparse.parse_qs on these values, and calling this upon line in every do_* method seems clumsy.
The precmd method gets the whole line, before the commandname is split off and interpreted, so this will not work for my purposes. I'm also not terribly familiar with how to place a decorator inside a class like this and haven't been able to pull it off without breaking the scope.
Basically, the python docs for cmd say the following
Repeatedly issue a prompt, accept input, parse an initial prefix off
the received input, and dispatch to action methods, passing them the
remainder of the line as argument.
I want to make a method that will do additional processing to that "remainder of the line" and hand that generated dictionary off to the member functions as the line argument, rather than interpreting them in every function.
Thanks!
You could potentially override the onecmd() method, as the following quick example shows. The onecmd() method there is basically a copy of the one from the original cmd.py, but adds a call to urlparse.parse_qs() before passing the arguments to a function.
import cmd
import urlparse
class myCmd(cmd.Cmd):
def onecmd(self, line):
"""Mostly ripped from Python's cmd.py"""
cmd, arg, line = self.parseline(line)
arg = urlparse.parse_qs(arg) # <- added line
if not line:
return self.emptyline()
if cmd is None:
return self.default(line)
self.lastcmd = line
if cmd == '':
return self.default(line)
else:
try:
func = getattr(self, 'do_' + cmd)
except AttributeError:
return self.default(line)
return func(arg)
def do_foo(self, arg)
print arg
my_cmd = myCmd()
my_cmd.cmdloop()
Sample output:
(Cmd) foo
{}
(Cmd) foo a b c
{}
(Cmd) foo a=b&c=d
{'a': ['b'], 'c': ['d']}
Is this what you are trying to achieve?
Here's another potential solution that uses a class decorator to modify a
cmd.Cmd subclass and basically apply a decorator function to all do_*
methods of that class:
import cmd
import urlparse
import types
# function decorator to add parse_qs to individual functions
def parse_qs_f(f):
def f2(self, arg):
return f(self, urlparse.parse_qs(arg))
return f2
# class decorator to iterate over all attributes of a class and apply
# the parse_qs_f decorator to all do_* methods
def parse_qs(cls):
for attr_name in dir(cls):
attr = getattr(cls, attr_name)
if attr_name.startswith('do_') and type(attr) == types.MethodType:
setattr(cls, attr_name, parse_qs_f(attr))
return cls
#parse_qs
class myCmd(cmd.Cmd):
def do_foo(self, args):
print args
my_cmd = myCmd()
my_cmd.cmdloop()
I quickly cobbled this together and it appears to work as intended, however, I'm
open to suggestions on any pitfalls or how this solution could be improved.
I have a log file being written by another process which I want to watch for changes. Each time a change occurs I'd like to read the new data in to do some processing on it.
What's the best way to do this? I was hoping there'd be some sort of hook from the PyWin32 library. I've found the win32file.FindNextChangeNotification function but have no idea how to ask it to watch a specific file.
If anyone's done anything like this I'd be really grateful to hear how...
[Edit] I should have mentioned that I was after a solution that doesn't require polling.
[Edit] Curses! It seems this doesn't work over a mapped network drive. I'm guessing windows doesn't 'hear' any updates to the file the way it does on a local disk.
Did you try using Watchdog?
Python API library and shell utilities to monitor file system events.
Directory monitoring made easy with
A cross-platform API.
A shell tool to run commands in response to directory changes.
Get started quickly with a simple example in Quickstart...
If polling is good enough for you, I'd just watch if the "modified time" file stat changes. To read it:
os.stat(filename).st_mtime
(Also note that the Windows native change event solution does not work in all circumstances, e.g. on network drives.)
import os
class Monkey(object):
def __init__(self):
self._cached_stamp = 0
self.filename = '/path/to/file'
def ook(self):
stamp = os.stat(self.filename).st_mtime
if stamp != self._cached_stamp:
self._cached_stamp = stamp
# File has changed, so do something...
If you want a multiplatform solution, then check QFileSystemWatcher.
Here an example code (not sanitized):
from PyQt4 import QtCore
#QtCore.pyqtSlot(str)
def directory_changed(path):
print('Directory Changed!!!')
#QtCore.pyqtSlot(str)
def file_changed(path):
print('File Changed!!!')
fs_watcher = QtCore.QFileSystemWatcher(['/path/to/files_1', '/path/to/files_2', '/path/to/files_3'])
fs_watcher.connect(fs_watcher, QtCore.SIGNAL('directoryChanged(QString)'), directory_changed)
fs_watcher.connect(fs_watcher, QtCore.SIGNAL('fileChanged(QString)'), file_changed)
It should not work on windows (maybe with cygwin ?), but for unix user, you should use the "fcntl" system call. Here is an example in Python. It's mostly the same code if you need to write it in C (same function names)
import time
import fcntl
import os
import signal
FNAME = "/HOME/TOTO/FILETOWATCH"
def handler(signum, frame):
print "File %s modified" % (FNAME,)
signal.signal(signal.SIGIO, handler)
fd = os.open(FNAME, os.O_RDONLY)
fcntl.fcntl(fd, fcntl.F_SETSIG, 0)
fcntl.fcntl(fd, fcntl.F_NOTIFY,
fcntl.DN_MODIFY | fcntl.DN_CREATE | fcntl.DN_MULTISHOT)
while True:
time.sleep(10000)
Check out pyinotify.
inotify replaces dnotify (from an earlier answer) in newer linuxes and allows file-level rather than directory-level monitoring.
For watching a single file with polling, and minimal dependencies, here is a fully fleshed-out example, based on answer from Deestan (above):
import os
import sys
import time
class Watcher(object):
running = True
refresh_delay_secs = 1
# Constructor
def __init__(self, watch_file, call_func_on_change=None, *args, **kwargs):
self._cached_stamp = 0
self.filename = watch_file
self.call_func_on_change = call_func_on_change
self.args = args
self.kwargs = kwargs
# Look for changes
def look(self):
stamp = os.stat(self.filename).st_mtime
if stamp != self._cached_stamp:
self._cached_stamp = stamp
# File has changed, so do something...
print('File changed')
if self.call_func_on_change is not None:
self.call_func_on_change(*self.args, **self.kwargs)
# Keep watching in a loop
def watch(self):
while self.running:
try:
# Look for changes
time.sleep(self.refresh_delay_secs)
self.look()
except KeyboardInterrupt:
print('\nDone')
break
except FileNotFoundError:
# Action on file not found
pass
except:
print('Unhandled error: %s' % sys.exc_info()[0])
# Call this function each time a change happens
def custom_action(text):
print(text)
watch_file = 'my_file.txt'
# watcher = Watcher(watch_file) # simple
watcher = Watcher(watch_file, custom_action, text='yes, changed') # also call custom action function
watcher.watch() # start the watch going
Well after a bit of hacking of Tim Golden's script, I have the following which seems to work quite well:
import os
import win32file
import win32con
path_to_watch = "." # look at the current directory
file_to_watch = "test.txt" # look for changes to a file called test.txt
def ProcessNewData( newData ):
print "Text added: %s"%newData
# Set up the bits we'll need for output
ACTIONS = {
1 : "Created",
2 : "Deleted",
3 : "Updated",
4 : "Renamed from something",
5 : "Renamed to something"
}
FILE_LIST_DIRECTORY = 0x0001
hDir = win32file.CreateFile (
path_to_watch,
FILE_LIST_DIRECTORY,
win32con.FILE_SHARE_READ | win32con.FILE_SHARE_WRITE,
None,
win32con.OPEN_EXISTING,
win32con.FILE_FLAG_BACKUP_SEMANTICS,
None
)
# Open the file we're interested in
a = open(file_to_watch, "r")
# Throw away any exising log data
a.read()
# Wait for new data and call ProcessNewData for each new chunk that's written
while 1:
# Wait for a change to occur
results = win32file.ReadDirectoryChangesW (
hDir,
1024,
False,
win32con.FILE_NOTIFY_CHANGE_LAST_WRITE,
None,
None
)
# For each change, check to see if it's updating the file we're interested in
for action, file in results:
full_filename = os.path.join (path_to_watch, file)
#print file, ACTIONS.get (action, "Unknown")
if file == file_to_watch:
newText = a.read()
if newText != "":
ProcessNewData( newText )
It could probably do with a load more error checking, but for simply watching a log file and doing some processing on it before spitting it out to the screen, this works well.
Thanks everyone for your input - great stuff!
Check my answer to a similar question. You could try the same loop in Python. This page suggests:
import time
while 1:
where = file.tell()
line = file.readline()
if not line:
time.sleep(1)
file.seek(where)
else:
print line, # already has newline
Also see the question tail() a file with Python.
This is another modification of Tim Goldan's script that runs on unix types and adds a simple watcher for file modification by using a dict (file=>time).
usage: whateverName.py path_to_dir_to_watch
#!/usr/bin/env python
import os, sys, time
def files_to_timestamp(path):
files = [os.path.join(path, f) for f in os.listdir(path)]
return dict ([(f, os.path.getmtime(f)) for f in files])
if __name__ == "__main__":
path_to_watch = sys.argv[1]
print('Watching {}..'.format(path_to_watch))
before = files_to_timestamp(path_to_watch)
while 1:
time.sleep (2)
after = files_to_timestamp(path_to_watch)
added = [f for f in after.keys() if not f in before.keys()]
removed = [f for f in before.keys() if not f in after.keys()]
modified = []
for f in before.keys():
if not f in removed:
if os.path.getmtime(f) != before.get(f):
modified.append(f)
if added: print('Added: {}'.format(', '.join(added)))
if removed: print('Removed: {}'.format(', '.join(removed)))
if modified: print('Modified: {}'.format(', '.join(modified)))
before = after
Here is a simplified version of Kender's code that appears to do the same trick and does not import the entire file:
# Check file for new data.
import time
f = open(r'c:\temp\test.txt', 'r')
while True:
line = f.readline()
if not line:
time.sleep(1)
print 'Nothing New'
else:
print 'Call Function: ', line
Well, since you are using Python, you can just open a file and keep reading lines from it.
f = open('file.log')
If the line read is not empty, you process it.
line = f.readline()
if line:
// Do what you want with the line
You may be missing that it is ok to keep calling readline at the EOF. It will just keep returning an empty string in this case. And when something is appended to the log file, the reading will continue from where it stopped, as you need.
If you are looking for a solution that uses events, or a particular library, please specify this in your question. Otherwise, I think this solution is just fine.
Simplest solution for me is using watchdog's tool watchmedo
From https://pypi.python.org/pypi/watchdog I now have a process that looks up the sql files in a directory and executes them if necessary.
watchmedo shell-command \
--patterns="*.sql" \
--recursive \
--command='~/Desktop/load_files_into_mysql_database.sh' \
.
As you can see in Tim Golden's article, pointed by Horst Gutmann, WIN32 is relatively complex and watches directories, not a single file.
I'd like to suggest you look into IronPython, which is a .NET python implementation.
With IronPython you can use all the .NET functionality - including
System.IO.FileSystemWatcher
Which handles single files with a simple Event interface.
This is an example of checking a file for changes. One that may not be the best way of doing it, but it sure is a short way.
Handy tool for restarting application when changes have been made to the source. I made this when playing with pygame so I can see effects take place immediately after file save.
When used in pygame make sure the stuff in the 'while' loop is placed in your game loop aka update or whatever. Otherwise your application will get stuck in an infinite loop and you will not see your game updating.
file_size_stored = os.stat('neuron.py').st_size
while True:
try:
file_size_current = os.stat('neuron.py').st_size
if file_size_stored != file_size_current:
restart_program()
except:
pass
In case you wanted the restart code which I found on the web. Here it is. (Not relevant to the question, though it could come in handy)
def restart_program(): #restart application
python = sys.executable
os.execl(python, python, * sys.argv)
Have fun making electrons do what you want them to do.
Seems that no one has posted fswatch. It is a cross-platform file system watcher. Just install it, run it and follow the prompts.
I've used it with python and golang programs and it just works.
ACTIONS = {
1 : "Created",
2 : "Deleted",
3 : "Updated",
4 : "Renamed from something",
5 : "Renamed to something"
}
FILE_LIST_DIRECTORY = 0x0001
class myThread (threading.Thread):
def __init__(self, threadID, fileName, directory, origin):
threading.Thread.__init__(self)
self.threadID = threadID
self.fileName = fileName
self.daemon = True
self.dir = directory
self.originalFile = origin
def run(self):
startMonitor(self.fileName, self.dir, self.originalFile)
def startMonitor(fileMonitoring,dirPath,originalFile):
hDir = win32file.CreateFile (
dirPath,
FILE_LIST_DIRECTORY,
win32con.FILE_SHARE_READ | win32con.FILE_SHARE_WRITE,
None,
win32con.OPEN_EXISTING,
win32con.FILE_FLAG_BACKUP_SEMANTICS,
None
)
# Wait for new data and call ProcessNewData for each new chunk that's
# written
while 1:
# Wait for a change to occur
results = win32file.ReadDirectoryChangesW (
hDir,
1024,
False,
win32con.FILE_NOTIFY_CHANGE_LAST_WRITE,
None,
None
)
# For each change, check to see if it's updating the file we're
# interested in
for action, file_M in results:
full_filename = os.path.join (dirPath, file_M)
#print file, ACTIONS.get (action, "Unknown")
if len(full_filename) == len(fileMonitoring) and action == 3:
#copy to main file
...
Since I have it installed globally, my favorite approach is to use nodemon. If your source code is in src, and your entry point is src/app.py, then it's as easy as:
nodemon -w 'src/**' -e py,html --exec python src/app.py
... where -e py,html lets you control what file types to watch for changes.
Here's an example geared toward watching input files that write no more than one line per second but usually a lot less. The goal is to append the last line (most recent write) to the specified output file. I've copied this from one of my projects and just deleted all the irrelevant lines. You'll have to fill in or change the missing symbols.
from PyQt5.QtCore import QFileSystemWatcher, QSettings, QThread
from ui_main_window import Ui_MainWindow # Qt Creator gen'd
class MainWindow(QMainWindow, Ui_MainWindow):
def __init__(self, parent=None):
QMainWindow.__init__(self, parent)
Ui_MainWindow.__init__(self)
self._fileWatcher = QFileSystemWatcher()
self._fileWatcher.fileChanged.connect(self.fileChanged)
def fileChanged(self, filepath):
QThread.msleep(300) # Reqd on some machines, give chance for write to complete
# ^^ About to test this, may need more sophisticated solution
with open(filepath) as file:
lastLine = list(file)[-1]
destPath = self._filemap[filepath]['dest file']
with open(destPath, 'a') as out_file: # a= append
out_file.writelines([lastLine])
Of course, the encompassing QMainWindow class is not strictly required, ie. you can use QFileSystemWatcher alone.
Just to put this out there since no one mentioned it: there's a Python module in the Standard Library named filecmp which has this cmp() function that compares two files.
Just make sure you don't do from filecmp import cmp to not overshadow the built-in cmp() function in Python 2.x. That's okay in Python 3.x, though, since there's no such built-in cmp() function anymore.
Anyway, this is how its use looks like:
import filecmp
filecmp.cmp(path_to_file_1, path_to_file_2, shallow=True)
The argument shallow defaults to True. If the argument's value is True, then only the metadata of the files are compared; however, if the argument's value is False, then the contents of the files are compared.
Maybe this information will be useful to someone.
watchfiles (https://github.com/samuelcolvin/watchfiles) is a Python API and CLI that uses the Notify (https://github.com/notify-rs/notify) library written in Rust.
The rust implementation currently (2022-10-09) supports:
Linux / Android: inotify
macOS: FSEvents or kqueue, see features
Windows: ReadDirectoryChangesW
FreeBSD / NetBSD / OpenBSD / DragonflyBSD: kqueue
All platforms: polling
Binaries available on PyPI (https://pypi.org/project/watchfiles/) and conda-forge (https://github.com/conda-forge/watchfiles-feedstock).
You can also use a simple library called repyt, here is an example:
repyt ./app.py
related #4Oh4 solution a smooth change for a list of files to watch;
import os
import sys
import time
class Watcher(object):
running = True
refresh_delay_secs = 1
# Constructor
def __init__(self, watch_files, call_func_on_change=None, *args, **kwargs):
self._cached_stamp = 0
self._cached_stamp_files = {}
self.filenames = watch_files
self.call_func_on_change = call_func_on_change
self.args = args
self.kwargs = kwargs
# Look for changes
def look(self):
for file in self.filenames:
stamp = os.stat(file).st_mtime
if not file in self._cached_stamp_files:
self._cached_stamp_files[file] = 0
if stamp != self._cached_stamp_files[file]:
self._cached_stamp_files[file] = stamp
# File has changed, so do something...
file_to_read = open(file, 'r')
value = file_to_read.read()
print("value from file", value)
file_to_read.seek(0)
if self.call_func_on_change is not None:
self.call_func_on_change(*self.args, **self.kwargs)
# Keep watching in a loop
def watch(self):
while self.running:
try:
# Look for changes
time.sleep(self.refresh_delay_secs)
self.look()
except KeyboardInterrupt:
print('\nDone')
break
except FileNotFoundError:
# Action on file not found
pass
except Exception as e:
print(e)
print('Unhandled error: %s' % sys.exc_info()[0])
# Call this function each time a change happens
def custom_action(text):
print(text)
# pass
watch_files = ['/Users/mexekanez/my_file.txt', '/Users/mexekanez/my_file1.txt']
# watcher = Watcher(watch_file) # simple
if __name__ == "__main__":
watcher = Watcher(watch_files, custom_action, text='yes, changed') # also call custom action function
watcher.watch() # start the watch going
The best and simplest solution is to use pygtail:
https://pypi.python.org/pypi/pygtail
from pygtail import Pygtail
import sys
while True:
for line in Pygtail("some.log"):
sys.stdout.write(line)
import inotify.adapters
from datetime import datetime
LOG_FILE='/var/log/mysql/server_audit.log'
def main():
start_time = datetime.now()
while True:
i = inotify.adapters.Inotify()
i.add_watch(LOG_FILE)
for event in i.event_gen(yield_nones=False):
break
del i
with open(LOG_FILE, 'r') as f:
for line in f:
entry = line.split(',')
entry_time = datetime.strptime(entry[0],
'%Y%m%d %H:%M:%S')
if entry_time > start_time:
start_time = entry_time
print(entry)
if __name__ == '__main__':
main()
The easiest solution would get the two instances of the same file after an interval and Compare them. You Could try something like this
while True:
# Capturing the two instances models.py after certain interval of time
print("Looking for changes in " + app_name.capitalize() + " models.py\nPress 'CTRL + C' to stop the program")
with open(app_name.capitalize() + '/filename', 'r+') as app_models_file:
filename_content = app_models_file.read()
time.sleep(5)
with open(app_name.capitalize() + '/filename', 'r+') as app_models_file_1:
filename_content_1 = app_models_file_1.read()
# Comparing models.py after certain interval of time
if filename_content == filename_content_1:
pass
else:
print("You made a change in " + app_name.capitalize() + " filename.\n")
cmd = str(input("Do something with the file?(y/n):"))
if cmd == 'y':
# Do Something
elif cmd == 'n':
# pass or do something
else:
print("Invalid input")
If you're using windows, create this POLL.CMD file
#echo off
:top
xcopy /m /y %1 %2 | find /v "File(s) copied"
timeout /T 1 > nul
goto :top
then you can type "poll dir1 dir2" and it will copy all the files from dir1 to dir2 and check for updates once per second.
The "find" is optional, just to make the console less noisy.
This is not recursive. Maybe you could make it recursive using /e on the xcopy.
I don't know any Windows specific function. You could try getting the MD5 hash of the file every second/minute/hour (depends on how fast you need it) and compare it to the last hash. When it differs you know the file has been changed and you read out the newest lines.
I'd try something like this.
try:
f = open(filePath)
except IOError:
print "No such file: %s" % filePath
raw_input("Press Enter to close window")
try:
lines = f.readlines()
while True:
line = f.readline()
try:
if not line:
time.sleep(1)
else:
functionThatAnalisesTheLine(line)
except Exception, e:
# handle the exception somehow (for example, log the trace) and raise the same exception again
raw_input("Press Enter to close window")
raise e
finally:
f.close()
The loop checks if there is a new line(s) since last time file was read - if there is, it's read and passed to the functionThatAnalisesTheLine function. If not, script waits 1 second and retries the process.
I have a log file being written by another process which I want to watch for changes. Each time a change occurs I'd like to read the new data in to do some processing on it.
What's the best way to do this? I was hoping there'd be some sort of hook from the PyWin32 library. I've found the win32file.FindNextChangeNotification function but have no idea how to ask it to watch a specific file.
If anyone's done anything like this I'd be really grateful to hear how...
[Edit] I should have mentioned that I was after a solution that doesn't require polling.
[Edit] Curses! It seems this doesn't work over a mapped network drive. I'm guessing windows doesn't 'hear' any updates to the file the way it does on a local disk.
Did you try using Watchdog?
Python API library and shell utilities to monitor file system events.
Directory monitoring made easy with
A cross-platform API.
A shell tool to run commands in response to directory changes.
Get started quickly with a simple example in Quickstart...
If polling is good enough for you, I'd just watch if the "modified time" file stat changes. To read it:
os.stat(filename).st_mtime
(Also note that the Windows native change event solution does not work in all circumstances, e.g. on network drives.)
import os
class Monkey(object):
def __init__(self):
self._cached_stamp = 0
self.filename = '/path/to/file'
def ook(self):
stamp = os.stat(self.filename).st_mtime
if stamp != self._cached_stamp:
self._cached_stamp = stamp
# File has changed, so do something...
If you want a multiplatform solution, then check QFileSystemWatcher.
Here an example code (not sanitized):
from PyQt4 import QtCore
#QtCore.pyqtSlot(str)
def directory_changed(path):
print('Directory Changed!!!')
#QtCore.pyqtSlot(str)
def file_changed(path):
print('File Changed!!!')
fs_watcher = QtCore.QFileSystemWatcher(['/path/to/files_1', '/path/to/files_2', '/path/to/files_3'])
fs_watcher.connect(fs_watcher, QtCore.SIGNAL('directoryChanged(QString)'), directory_changed)
fs_watcher.connect(fs_watcher, QtCore.SIGNAL('fileChanged(QString)'), file_changed)
It should not work on windows (maybe with cygwin ?), but for unix user, you should use the "fcntl" system call. Here is an example in Python. It's mostly the same code if you need to write it in C (same function names)
import time
import fcntl
import os
import signal
FNAME = "/HOME/TOTO/FILETOWATCH"
def handler(signum, frame):
print "File %s modified" % (FNAME,)
signal.signal(signal.SIGIO, handler)
fd = os.open(FNAME, os.O_RDONLY)
fcntl.fcntl(fd, fcntl.F_SETSIG, 0)
fcntl.fcntl(fd, fcntl.F_NOTIFY,
fcntl.DN_MODIFY | fcntl.DN_CREATE | fcntl.DN_MULTISHOT)
while True:
time.sleep(10000)
Check out pyinotify.
inotify replaces dnotify (from an earlier answer) in newer linuxes and allows file-level rather than directory-level monitoring.
For watching a single file with polling, and minimal dependencies, here is a fully fleshed-out example, based on answer from Deestan (above):
import os
import sys
import time
class Watcher(object):
running = True
refresh_delay_secs = 1
# Constructor
def __init__(self, watch_file, call_func_on_change=None, *args, **kwargs):
self._cached_stamp = 0
self.filename = watch_file
self.call_func_on_change = call_func_on_change
self.args = args
self.kwargs = kwargs
# Look for changes
def look(self):
stamp = os.stat(self.filename).st_mtime
if stamp != self._cached_stamp:
self._cached_stamp = stamp
# File has changed, so do something...
print('File changed')
if self.call_func_on_change is not None:
self.call_func_on_change(*self.args, **self.kwargs)
# Keep watching in a loop
def watch(self):
while self.running:
try:
# Look for changes
time.sleep(self.refresh_delay_secs)
self.look()
except KeyboardInterrupt:
print('\nDone')
break
except FileNotFoundError:
# Action on file not found
pass
except:
print('Unhandled error: %s' % sys.exc_info()[0])
# Call this function each time a change happens
def custom_action(text):
print(text)
watch_file = 'my_file.txt'
# watcher = Watcher(watch_file) # simple
watcher = Watcher(watch_file, custom_action, text='yes, changed') # also call custom action function
watcher.watch() # start the watch going
Well after a bit of hacking of Tim Golden's script, I have the following which seems to work quite well:
import os
import win32file
import win32con
path_to_watch = "." # look at the current directory
file_to_watch = "test.txt" # look for changes to a file called test.txt
def ProcessNewData( newData ):
print "Text added: %s"%newData
# Set up the bits we'll need for output
ACTIONS = {
1 : "Created",
2 : "Deleted",
3 : "Updated",
4 : "Renamed from something",
5 : "Renamed to something"
}
FILE_LIST_DIRECTORY = 0x0001
hDir = win32file.CreateFile (
path_to_watch,
FILE_LIST_DIRECTORY,
win32con.FILE_SHARE_READ | win32con.FILE_SHARE_WRITE,
None,
win32con.OPEN_EXISTING,
win32con.FILE_FLAG_BACKUP_SEMANTICS,
None
)
# Open the file we're interested in
a = open(file_to_watch, "r")
# Throw away any exising log data
a.read()
# Wait for new data and call ProcessNewData for each new chunk that's written
while 1:
# Wait for a change to occur
results = win32file.ReadDirectoryChangesW (
hDir,
1024,
False,
win32con.FILE_NOTIFY_CHANGE_LAST_WRITE,
None,
None
)
# For each change, check to see if it's updating the file we're interested in
for action, file in results:
full_filename = os.path.join (path_to_watch, file)
#print file, ACTIONS.get (action, "Unknown")
if file == file_to_watch:
newText = a.read()
if newText != "":
ProcessNewData( newText )
It could probably do with a load more error checking, but for simply watching a log file and doing some processing on it before spitting it out to the screen, this works well.
Thanks everyone for your input - great stuff!
Check my answer to a similar question. You could try the same loop in Python. This page suggests:
import time
while 1:
where = file.tell()
line = file.readline()
if not line:
time.sleep(1)
file.seek(where)
else:
print line, # already has newline
Also see the question tail() a file with Python.
This is another modification of Tim Goldan's script that runs on unix types and adds a simple watcher for file modification by using a dict (file=>time).
usage: whateverName.py path_to_dir_to_watch
#!/usr/bin/env python
import os, sys, time
def files_to_timestamp(path):
files = [os.path.join(path, f) for f in os.listdir(path)]
return dict ([(f, os.path.getmtime(f)) for f in files])
if __name__ == "__main__":
path_to_watch = sys.argv[1]
print('Watching {}..'.format(path_to_watch))
before = files_to_timestamp(path_to_watch)
while 1:
time.sleep (2)
after = files_to_timestamp(path_to_watch)
added = [f for f in after.keys() if not f in before.keys()]
removed = [f for f in before.keys() if not f in after.keys()]
modified = []
for f in before.keys():
if not f in removed:
if os.path.getmtime(f) != before.get(f):
modified.append(f)
if added: print('Added: {}'.format(', '.join(added)))
if removed: print('Removed: {}'.format(', '.join(removed)))
if modified: print('Modified: {}'.format(', '.join(modified)))
before = after
Here is a simplified version of Kender's code that appears to do the same trick and does not import the entire file:
# Check file for new data.
import time
f = open(r'c:\temp\test.txt', 'r')
while True:
line = f.readline()
if not line:
time.sleep(1)
print 'Nothing New'
else:
print 'Call Function: ', line
Well, since you are using Python, you can just open a file and keep reading lines from it.
f = open('file.log')
If the line read is not empty, you process it.
line = f.readline()
if line:
// Do what you want with the line
You may be missing that it is ok to keep calling readline at the EOF. It will just keep returning an empty string in this case. And when something is appended to the log file, the reading will continue from where it stopped, as you need.
If you are looking for a solution that uses events, or a particular library, please specify this in your question. Otherwise, I think this solution is just fine.
Simplest solution for me is using watchdog's tool watchmedo
From https://pypi.python.org/pypi/watchdog I now have a process that looks up the sql files in a directory and executes them if necessary.
watchmedo shell-command \
--patterns="*.sql" \
--recursive \
--command='~/Desktop/load_files_into_mysql_database.sh' \
.
As you can see in Tim Golden's article, pointed by Horst Gutmann, WIN32 is relatively complex and watches directories, not a single file.
I'd like to suggest you look into IronPython, which is a .NET python implementation.
With IronPython you can use all the .NET functionality - including
System.IO.FileSystemWatcher
Which handles single files with a simple Event interface.
This is an example of checking a file for changes. One that may not be the best way of doing it, but it sure is a short way.
Handy tool for restarting application when changes have been made to the source. I made this when playing with pygame so I can see effects take place immediately after file save.
When used in pygame make sure the stuff in the 'while' loop is placed in your game loop aka update or whatever. Otherwise your application will get stuck in an infinite loop and you will not see your game updating.
file_size_stored = os.stat('neuron.py').st_size
while True:
try:
file_size_current = os.stat('neuron.py').st_size
if file_size_stored != file_size_current:
restart_program()
except:
pass
In case you wanted the restart code which I found on the web. Here it is. (Not relevant to the question, though it could come in handy)
def restart_program(): #restart application
python = sys.executable
os.execl(python, python, * sys.argv)
Have fun making electrons do what you want them to do.
Seems that no one has posted fswatch. It is a cross-platform file system watcher. Just install it, run it and follow the prompts.
I've used it with python and golang programs and it just works.
ACTIONS = {
1 : "Created",
2 : "Deleted",
3 : "Updated",
4 : "Renamed from something",
5 : "Renamed to something"
}
FILE_LIST_DIRECTORY = 0x0001
class myThread (threading.Thread):
def __init__(self, threadID, fileName, directory, origin):
threading.Thread.__init__(self)
self.threadID = threadID
self.fileName = fileName
self.daemon = True
self.dir = directory
self.originalFile = origin
def run(self):
startMonitor(self.fileName, self.dir, self.originalFile)
def startMonitor(fileMonitoring,dirPath,originalFile):
hDir = win32file.CreateFile (
dirPath,
FILE_LIST_DIRECTORY,
win32con.FILE_SHARE_READ | win32con.FILE_SHARE_WRITE,
None,
win32con.OPEN_EXISTING,
win32con.FILE_FLAG_BACKUP_SEMANTICS,
None
)
# Wait for new data and call ProcessNewData for each new chunk that's
# written
while 1:
# Wait for a change to occur
results = win32file.ReadDirectoryChangesW (
hDir,
1024,
False,
win32con.FILE_NOTIFY_CHANGE_LAST_WRITE,
None,
None
)
# For each change, check to see if it's updating the file we're
# interested in
for action, file_M in results:
full_filename = os.path.join (dirPath, file_M)
#print file, ACTIONS.get (action, "Unknown")
if len(full_filename) == len(fileMonitoring) and action == 3:
#copy to main file
...
Since I have it installed globally, my favorite approach is to use nodemon. If your source code is in src, and your entry point is src/app.py, then it's as easy as:
nodemon -w 'src/**' -e py,html --exec python src/app.py
... where -e py,html lets you control what file types to watch for changes.
Here's an example geared toward watching input files that write no more than one line per second but usually a lot less. The goal is to append the last line (most recent write) to the specified output file. I've copied this from one of my projects and just deleted all the irrelevant lines. You'll have to fill in or change the missing symbols.
from PyQt5.QtCore import QFileSystemWatcher, QSettings, QThread
from ui_main_window import Ui_MainWindow # Qt Creator gen'd
class MainWindow(QMainWindow, Ui_MainWindow):
def __init__(self, parent=None):
QMainWindow.__init__(self, parent)
Ui_MainWindow.__init__(self)
self._fileWatcher = QFileSystemWatcher()
self._fileWatcher.fileChanged.connect(self.fileChanged)
def fileChanged(self, filepath):
QThread.msleep(300) # Reqd on some machines, give chance for write to complete
# ^^ About to test this, may need more sophisticated solution
with open(filepath) as file:
lastLine = list(file)[-1]
destPath = self._filemap[filepath]['dest file']
with open(destPath, 'a') as out_file: # a= append
out_file.writelines([lastLine])
Of course, the encompassing QMainWindow class is not strictly required, ie. you can use QFileSystemWatcher alone.
Just to put this out there since no one mentioned it: there's a Python module in the Standard Library named filecmp which has this cmp() function that compares two files.
Just make sure you don't do from filecmp import cmp to not overshadow the built-in cmp() function in Python 2.x. That's okay in Python 3.x, though, since there's no such built-in cmp() function anymore.
Anyway, this is how its use looks like:
import filecmp
filecmp.cmp(path_to_file_1, path_to_file_2, shallow=True)
The argument shallow defaults to True. If the argument's value is True, then only the metadata of the files are compared; however, if the argument's value is False, then the contents of the files are compared.
Maybe this information will be useful to someone.
watchfiles (https://github.com/samuelcolvin/watchfiles) is a Python API and CLI that uses the Notify (https://github.com/notify-rs/notify) library written in Rust.
The rust implementation currently (2022-10-09) supports:
Linux / Android: inotify
macOS: FSEvents or kqueue, see features
Windows: ReadDirectoryChangesW
FreeBSD / NetBSD / OpenBSD / DragonflyBSD: kqueue
All platforms: polling
Binaries available on PyPI (https://pypi.org/project/watchfiles/) and conda-forge (https://github.com/conda-forge/watchfiles-feedstock).
You can also use a simple library called repyt, here is an example:
repyt ./app.py
related #4Oh4 solution a smooth change for a list of files to watch;
import os
import sys
import time
class Watcher(object):
running = True
refresh_delay_secs = 1
# Constructor
def __init__(self, watch_files, call_func_on_change=None, *args, **kwargs):
self._cached_stamp = 0
self._cached_stamp_files = {}
self.filenames = watch_files
self.call_func_on_change = call_func_on_change
self.args = args
self.kwargs = kwargs
# Look for changes
def look(self):
for file in self.filenames:
stamp = os.stat(file).st_mtime
if not file in self._cached_stamp_files:
self._cached_stamp_files[file] = 0
if stamp != self._cached_stamp_files[file]:
self._cached_stamp_files[file] = stamp
# File has changed, so do something...
file_to_read = open(file, 'r')
value = file_to_read.read()
print("value from file", value)
file_to_read.seek(0)
if self.call_func_on_change is not None:
self.call_func_on_change(*self.args, **self.kwargs)
# Keep watching in a loop
def watch(self):
while self.running:
try:
# Look for changes
time.sleep(self.refresh_delay_secs)
self.look()
except KeyboardInterrupt:
print('\nDone')
break
except FileNotFoundError:
# Action on file not found
pass
except Exception as e:
print(e)
print('Unhandled error: %s' % sys.exc_info()[0])
# Call this function each time a change happens
def custom_action(text):
print(text)
# pass
watch_files = ['/Users/mexekanez/my_file.txt', '/Users/mexekanez/my_file1.txt']
# watcher = Watcher(watch_file) # simple
if __name__ == "__main__":
watcher = Watcher(watch_files, custom_action, text='yes, changed') # also call custom action function
watcher.watch() # start the watch going
The best and simplest solution is to use pygtail:
https://pypi.python.org/pypi/pygtail
from pygtail import Pygtail
import sys
while True:
for line in Pygtail("some.log"):
sys.stdout.write(line)
import inotify.adapters
from datetime import datetime
LOG_FILE='/var/log/mysql/server_audit.log'
def main():
start_time = datetime.now()
while True:
i = inotify.adapters.Inotify()
i.add_watch(LOG_FILE)
for event in i.event_gen(yield_nones=False):
break
del i
with open(LOG_FILE, 'r') as f:
for line in f:
entry = line.split(',')
entry_time = datetime.strptime(entry[0],
'%Y%m%d %H:%M:%S')
if entry_time > start_time:
start_time = entry_time
print(entry)
if __name__ == '__main__':
main()
The easiest solution would get the two instances of the same file after an interval and Compare them. You Could try something like this
while True:
# Capturing the two instances models.py after certain interval of time
print("Looking for changes in " + app_name.capitalize() + " models.py\nPress 'CTRL + C' to stop the program")
with open(app_name.capitalize() + '/filename', 'r+') as app_models_file:
filename_content = app_models_file.read()
time.sleep(5)
with open(app_name.capitalize() + '/filename', 'r+') as app_models_file_1:
filename_content_1 = app_models_file_1.read()
# Comparing models.py after certain interval of time
if filename_content == filename_content_1:
pass
else:
print("You made a change in " + app_name.capitalize() + " filename.\n")
cmd = str(input("Do something with the file?(y/n):"))
if cmd == 'y':
# Do Something
elif cmd == 'n':
# pass or do something
else:
print("Invalid input")
If you're using windows, create this POLL.CMD file
#echo off
:top
xcopy /m /y %1 %2 | find /v "File(s) copied"
timeout /T 1 > nul
goto :top
then you can type "poll dir1 dir2" and it will copy all the files from dir1 to dir2 and check for updates once per second.
The "find" is optional, just to make the console less noisy.
This is not recursive. Maybe you could make it recursive using /e on the xcopy.
I don't know any Windows specific function. You could try getting the MD5 hash of the file every second/minute/hour (depends on how fast you need it) and compare it to the last hash. When it differs you know the file has been changed and you read out the newest lines.
I'd try something like this.
try:
f = open(filePath)
except IOError:
print "No such file: %s" % filePath
raw_input("Press Enter to close window")
try:
lines = f.readlines()
while True:
line = f.readline()
try:
if not line:
time.sleep(1)
else:
functionThatAnalisesTheLine(line)
except Exception, e:
# handle the exception somehow (for example, log the trace) and raise the same exception again
raw_input("Press Enter to close window")
raise e
finally:
f.close()
The loop checks if there is a new line(s) since last time file was read - if there is, it's read and passed to the functionThatAnalisesTheLine function. If not, script waits 1 second and retries the process.