I solved my last problem, but now, it pretty much:
Traceback (most recent call last):
File "C:\Users\Qihua Huang\AppData\Local\Programs\Python\Python310\Lib\site-packages\win32\lib\pywintypes.py", line 115, in <module>
__import_pywin32_system_module__("pywintypes", globals())
File "C:\Users\Qihua Huang\AppData\Local\Programs\Python\Python310\Lib\site-packages\win32\lib\pywintypes.py", line 104, in __import_pywin32_system_module__
old_mod = sys.modules[modname]
KeyError: 'pywintypes'
I accessed the pwintypes.py, and inserted print statements to fish out error:
# Magic utility that "redirects" to pywintypesxx.dll
import importlib.util, importlib.machinery, sys, os
def __import_pywin32_system_module__(modname, globs):
# This has been through a number of iterations. The problem: how to
# locate pywintypesXX.dll when it may be in a number of places, and how
# to avoid ever loading it twice. This problem is compounded by the
# fact that the "right" way to do this requires win32api, but this
# itself requires pywintypesXX.
# And the killer problem is that someone may have done 'import win32api'
# before this code is called. In that case Windows will have already
# loaded pywintypesXX as part of loading win32api - but by the time
# we get here, we may locate a different one. This appears to work, but
# then starts raising bizarre TypeErrors complaining that something
# is not a pywintypes type when it clearly is!
# So in what we hope is the last major iteration of this, we now
# rely on a _win32sysloader module, implemented in C but not relying
# on pywintypesXX.dll. It then can check if the DLL we are looking for
# lib is already loaded.
# See if this is a debug build.
suffix = "_d" if "_d.pyd" in importlib.machinery.EXTENSION_SUFFIXES else ""
filename = "%s%d%d%s.dll" % (modname,sys.version_info[0],sys.version_info[1],suffix)
if hasattr(sys, "frozen"):
# If we are running from a frozen program (py2exe, McMillan, freeze)
# then we try and load the DLL from our sys.path
# XXX - This path may also benefit from _win32sysloader? However,
# MarkH has never seen the DLL load problem with py2exe programs...
for look in sys.path:
# If the sys.path entry is a (presumably) .zip file, use the
# directory
if os.path.isfile(look):
look = os.path.dirname(look)
found = os.path.join(look, filename)
if os.path.isfile(found):
break
else:
raise ImportError(
"Module '%s' isn't in frozen sys.path %s" % (modname, sys.path)
)
else:
# First see if it already in our process - if so, we must use that.
from win32 import _win32sysloader
found = _win32sysloader.GetModuleFilename(filename)
if found is None:
# We ask Windows to load it next. This is in an attempt to
# get the exact same module loaded should pywintypes be imported
# first (which is how we are here) or if, eg, win32api was imported
# first thereby implicitly loading the DLL.
# Sadly though, it doesn't quite work - if pywintypesxx.dll
# is in system32 *and* the executable's directory, on XP SP2, an
# import of win32api will cause Windows to load pywintypes
# from system32, where LoadLibrary for that name will
# load the one in the exe's dir.
# That shouldn't really matter though, so long as we only ever
# get one loaded.
found = _win32sysloader.LoadModule(filename)
if found is None:
# Windows can't find it - which although isn't relevent here,
# means that we *must* be the first win32 import, as an attempt
# to import win32api etc would fail when Windows attempts to
# locate the DLL.
# This is most likely to happen for "non-admin" installs, where
# we can't put the files anywhere else on the global path.
# If there is a version in our Python directory, use that
if os.path.isfile(os.path.join(sys.prefix, filename)):
found = os.path.join(sys.prefix, filename)
if found is None:
# Not in the Python directory? Maybe we were installed via
# easy_install...
if os.path.isfile(os.path.join(os.path.dirname(__file__), filename)):
found = os.path.join(os.path.dirname(__file__), filename)
print(found)
found='C:\\Users\Qihua Huang\\AppData\\Local\\Programs\\Python\\Python310\\Lib\\site-packages\\pywin32_system32\\pywintypes310.dll'
print(found)
# There are 2 site-packages directories - one "global" and one "user".
# We could be in either, or both (but with different versions!). Factors include
# virtualenvs, post-install script being run or not, `setup.py install` flags, etc.
# In a worst-case, it means, say 'python -c "import win32api"'
# will not work but 'python -c "import pywintypes, win32api"' will,
# but it's better than nothing.
# We prefer the "user" site-packages if it exists...
if found is None:
import site
maybe = os.path.join(site.USER_SITE, "pywin32_system32", filename)
print(maybe)
if os.path.isfile(maybe):
found = maybe
print(found)
# Or the "global" site-packages.
if found is None:
import sysconfig
maybe = os.path.join(
sysconfig.get_paths()["platlib"], "pywin32_system32", filename
)
print(maybe)
if os.path.isfile(maybe):
found = maybe
print(found)
if found is None:
# give up in disgust.
raise ImportError("No system module '%s' (%s)" % (modname, filename))
# After importing the module, sys.modules is updated to the DLL we just
# loaded - which isn't what we want. So we update sys.modules to refer to
# this module, and update our globals from it.
old_mod = sys.modules[modname]
# Load the DLL.
loader = importlib.machinery.ExtensionFileLoader(modname, found)
spec = importlib.machinery.ModuleSpec(name=modname, loader=loader, origin=found)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
# Check the sys.modules[] behaviour we describe above is true...
assert sys.modules[modname] is mod
# as above - re-reset to the *old* module object then update globs.
sys.modules[modname] = old_mod
globs.update(mod.__dict__)
__import_pywin32_system_module__("pywintypes", globals())
The output that followed before error was:
C:\Users\*user*\AppData\Local\Programs\Python\Python310\Lib\site-packages\win32\lib\pywintypes310.dll
C:\Users\*user*\AppData\Local\Programs\Python\Python310\Lib\site-packages\pywin32_system32\pywintypes310.dll
Related
I want to get all the functions and classes in module: __main__ of the source code directory: /tmp/rebound/rebound.
When I use the pyclbr.readmodule_ex API:
source_code_data = pyclbr.readmodule_ex(source_code_module, path=source_code_path)
I specify it the module and it's path:
DEBUG:root:Source code module: __main__, Source code path: ['/tmp/rebound/rebound/rebound']
I then get this error:
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/importlib/util.py", line 69, in _find_spec_from_path
raise ValueError('{}.__spec__ is None'.format(name))
ValueError: __main__.__spec__ is None
I then tried to use the function that is not supposed to be used by the public: _readmodule:
source_code_data = pyclbr._readmodule(source_code_module, source_code_path, )
But I could not decide what should be the value of the parameter: inpackage.
Upon tracing the code via debugger, I spotted a mistake:
def _find_spec_from_path(name, path=None):
"""Return the spec for the specified module.
First, sys.modules is checked to see if the module was already imported. If
so, then sys.modules[name].__spec__ is returned. If that happens to be
set to None, then ValueError is raised. If the module is not in
sys.modules, then sys.meta_path is searched for a suitable spec with the
value of 'path' given to the finders. None is returned if no spec could
be found.
Dotted names do not have their parent packages implicitly imported. You will
most likely need to explicitly import all parent packages in the proper
order for a submodule to get the correct spec.
"""
if name not in sys.modules:
return _find_spec(name, path)
else:
module = sys.modules[name]
if module is None:
return None
try:
spec = module.__spec__
except AttributeError:
raise ValueError('{}.__spec__ is not set'.format(name)) from None
else:
if spec is None:
raise ValueError('{}.__spec__ is None'.format(name))
return spec
This is the function in the module: python3.8/importlib/util.py and it evaluates __main__ as a built-in module as it falls in the else block.
How do I differentiate __main__ of my target source code to read from the built-in __main__? In other words, how do I read the module __main__ of the codebase: rebound?
TL:DR
Try:
source_code_data = pyclbr.readmodule_ex("rebound.__main__", path=source_code_path)
Explanation
As you already know: _find_spec_from_path will search for name in sys.modules and
__main__ is always present there.
If you inspect sys.modules.keys() you'll notice that it contains dot separated module names.
Example from Ipython shell:
'IPython.display',
'IPython.extensions',
'IPython.extensions.storemagic',
'IPython.lib',
'IPython.lib.backgroundjobs',
'IPython.lib.clipboard',
'IPython.lib.display',
'IPython.lib.pretty',
'IPython.lib.security',
'IPython.paths',
And if you realize you are looking for rebound.__main__ and not __main__ it becomes obvious. In order to step into if block the name can't be in sys.modules. The last remark would be that _find_spec_from_path has no bugs.
# python3.8/importlib/util.py
def _find_spec_from_path(name, path=None):
# ...
if name not in sys.modules:
return _find_spec(name, path)
else:
#...
Let's say I have a file script.py located at path = "foo/bar/script.py". I'm looking for a way in Python to programmatically execute script.py from within my main Python program through a function execute_script(). However, I've got a few requirements that seem to prevent me from employing a naive approach involving importlib or exec():
script.py should get executed in a "fresh-looking" Python environment as if it were run through $ python script.py. That is, all relevant globals like __name__, __file__, sys.modules, sys.path and the working directory should be set accordingly and as little information as possible should leak from my main program into the file's execution. (It is okay, though, if script.py could find out through the inspect module that it wasn't executed through $ python script.py directly.)
I need access to the result of the execution, i.e. execute_script() should return the module given by script.py with all its variables, functions and classes. (This prevents starting a new Python interpreter in a subprocess.)
execute_script() must internally use open() to read in script.py. This is so that I can use the pyfakefs package to mock out the file system during unit tests. (This prevents a simple solution involving importlib.)
execute_script() must not (permanently) modify any global state in my main program like sys.path or sys.modules.
If possible, script.py should not be able to affect my main program's global state. (At the very least it should not be able to affect sys.path and sys.modules in my main program.)
I need to be able to modify the sys.path that script.py sees. execute_function() should therefore accept an optional list of system paths as argument.
Stack traces and handling of errors occurring during the execution of script.py should work as usual. (This makes a solution involving exec() difficult.)
The solution should be as future-proof as possible and not depend on implementation details of the Python interpreter.
I'd be very grateful for any ideas!
I just came across the fact that exec() also accepts code objects (that can be obtained e.g. from compile()) and have come up with an approach that seems to fulfill nearly all requirements. "nearly" because with the exception of sys.path and sys.modules the script can still affect the global state of the main program. Moreover, it also gets to see all modules that are imported before execute_script() is called. For the time being I'm happy with this, though.
Here is the full code including tests:
import os
import sys
from typing import List
module = os.__class__
def create_module(name: str, file: str) -> module:
mod = module(name)
# Instances of `module` automatically come with properties __doc__,
# __loader__, __name__, __package__ and __spec___. Let's add some
# more properties that main modules usually come with:
mod.__annotations__ = {}
# __builtins__ doesn't show up in dir() but still exists
mod.__builtins__ = __builtins__
mod.__file__ = file
return mod
def exec_script(path: str, working_dir: str, syspath: List[str] = None) -> module:
"""
Execute a Python script as if it were executed using `$ python
<path>` from inside the given working directory. `path` can either
be an absolute path or a path relative to `working_dir`.
If `syspath` is provided, a copy of it will be used as `sys.path`
during execution. Otherwise, `sys.path` will be set to
`sys.path[1:]` which – assuming that `sys.path` has not been
modified so far – removes the working directory from the time when
the current Python program was started. Either way, the directory
containing the script at `path` will always be added at position 0
in `sys.path` afterwards, so as to simulate execution via `$ python
<path>`.
"""
if os.path.isabs(path):
abs_path = path
else:
abs_path = os.path.join(os.path.abspath(working_dir), path)
with open(abs_path, "r") as f:
source = f.read()
if sys.version_info < (3, 9):
# Prior to Python 3.9, the __file__ variable inside the main
# module always contained the path exactly as it was given to `$
# python`, no matter whether it is relative or absolute and/or a
# symlink.
the__file__ = path
else:
# Starting from Python 3.9, __file__ inside the main module is
# always an absolute path.
the__file__ = abs_path
# The filename passed to compile() will be used in stack traces and
# error messages. It normally it agrees with __file__.
code = compile(source, filename=the__file__, mode="exec")
sysmodules_backup = sys.modules
sys.modules = sys.modules.copy()
the_module = create_module(name="__main__", file=the__file__)
sys.modules["__main__"] = the_module
# According to
# https://docs.python.org/3/tutorial/modules.html#the-module-search-path
# if the script is a symlink, the symlink is followed before the
# directory containing the script is added to sys.path.
if os.path.islink(abs_path):
sys_path_dir = os.path.dirname(os.readlink(abs_path))
else:
sys_path_dir = os.path.dirname(abs_path)
if syspath is None:
syspath = sys.path[1:]
syspath_backup = sys.path
sys.path = [
sys_path_dir
] + syspath # This will automatically create a copy of syspath
cwd_backup = os.getcwd()
os.chdir(working_dir)
# For code inside a module, global and local variables are given by
# the *same* dictionary
globals_ = the_module.__dict__
locals_ = the_module.__dict__
exec(code, globals_, locals_)
os.chdir(cwd_backup)
sys.modules = sysmodules_backup
sys.path = syspath_backup
return the_module
#################
##### Tests #####
#################
# Make sure to install pyfakefs via pip!
import unittest
import pyfakefs
class Test_exec_script(pyfakefs.fake_filesystem_unittest.TestCase):
def setUp(self):
self.setUpPyfakefs()
self.fs.create_file(
"/folder/script.py",
contents="\n".join(
[
"import os",
"import sys",
"",
"cwd = os.getcwd()",
"sysmodules = sys.modules",
"syspath = sys.path",
"",
"sys.modules['test_module'] = 'bar'",
"sys.path.append('/some/path')",
]
),
)
self.fs.create_symlink("/folder2/symlink.py", "/folder/script.py")
#
# __name__
#
def test__name__is_set_correctly(self):
module = exec_script("script.py", "/folder")
assert module.__name__ == "__main__"
#
# __file__
#
def test_relative_path_works_and__file__shows_it(self):
module = exec_script("script.py", "/folder")
assert module.__file__ == "script.py"
def test_absolute_path_works_and__file__shows_it(self):
module = exec_script("/folder/script.py", "/folder")
assert module.__file__ == "/folder/script.py"
def test__file__doesnt_follow_symlink(self):
module = exec_script("symlink.py", "/folder2")
assert module.__file__ == "symlink.py"
#
# working dir
#
def test_working_directory_is_set_and_reset_correctly(self):
os.chdir("/")
module = exec_script("/folder/script.py", "/folder")
assert module.cwd == "/folder"
assert os.getcwd() == "/"
#
# sys.modules
#
def test__main__module_is_set_correctly(self):
module = exec_script("/folder/script.py", "/folder")
assert module.sysmodules["__main__"] == module
def test_script_cannot_modify_our_sys_modules(self):
sysmodules_backup = sys.modules.copy()
exec_script("/folder/script.py", "/folder")
assert sys.modules == sysmodules_backup
#
# sys.path
#
def test_script_cannot_modify_our_sys_path(self):
syspath_backup = sys.path.copy()
exec_script("/folder/script.py", "/folder")
assert sys.path == syspath_backup
def test_sys_path_is_set_up_correctly(self):
syspath_backup = sys.path[:]
module = exec_script("/folder/script.py", "/folder")
assert module.syspath[0] == "/folder"
assert module.syspath[1:] == syspath_backup[1:] + ["/some/path"]
def test_symlink_is_followed_before_adding_base_dir_to_sys_path(self):
module = exec_script("symlink.py", "/folder2")
assert module.syspath[0] == "/folder"
if __name__ == "__main__":
unittest.main()
I hope the following question is not too long. But otherwise I cannot explain by problem and what I want:
Learned from How to use importlib to import modules from arbitrary sources? (my question of yesterday)
I have written a specfic loader for a new file type (.xxx).
(In fact the xxx is an encrypted version of a pyc to protect code from being stolen).
I would like just to add an import hook for the new file type "xxx" without affecting the other types (.py, .pyc, .pyd) in any way.
Now, the loader is ModuleLoader, inheriting from mportlib.machinery.SourcelessFileLoader.
Using sys.path_hooks the loader shall be added as a hook:
myFinder = importlib.machinery.FileFinder
loader_details = (ModuleLoader, ['.xxx'])
sys.path_hooks.append(myFinder.path_hook(loader_details))
Note: This is activated once by calling modloader.activateLoader()
Upon loading a module named test (which is a test.xxx) I get:
>>> import modloader
>>> modloader.activateLoader()
>>> import test
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ImportError: No module named 'test'
>>>
However, when I delete content of sys.path_hooks before adding the hook:
sys.path_hooks = []
sys.path.insert(0, '.') # current directory
sys.path_hooks.append(myFinder.path_hook(loader_details))
it works:
>>> modloader.activateLoader()
>>> import test
using xxx class
in xxxLoader exec_module
in xxxLoader get_code: .\test.xxx
ANALYZING ...
GENERATE CODE OBJECT ...
2 0 LOAD_CONST 0
3 LOAD_CONST 1 ('foo2')
6 MAKE_FUNCTION 0
9 STORE_NAME 0 (foo2)
12 LOAD_CONST 2 (None)
15 RETURN_VALUE
>>>>>> test
<module 'test' from '.\\test.xxx'>
The module is imported correctly after conversion of the files content to a code object.
However I cannot load the same module from a package: import pack.test
Note: __init__.py is of course as an empty file in pack directory.
>>> import pack.test
Traceback (most recent call last):
File "<frozen importlib._bootstrap>", line 2218, in _find_and_load_unlocked
AttributeError: 'module' object has no attribute '__path__'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ImportError: No module named 'pack.test'; 'pack' is not a package
>>>
Not enough, I cannot load plain *.py modules from that package anymore: I get the same error as above:
>>> import pack.testpy
Traceback (most recent call last):
File "<frozen importlib._bootstrap>", line 2218, in _find_and_load_unlocked
AttributeError: 'module' object has no attribute '__path__'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ImportError: No module named 'pack.testpy'; 'pack' is not a package
>>>
For my understanding sys.path_hooks is traversed until the last entry is tried. So why is the first variant (without deleting sys.path_hooks) not recognizing the new extension "xxx" and the second variant (deleting sys.path_hooks) do?
It looks like the machinery is throwing an exception rather than traversing further to the next entry, when an entry of sys.path_hooks is not able to recognize "xxx".
And why is the second version working for py, pyc and xxx modules in the current directory, but not working in the package pack? I would expect that py and pyc is not even working in the current dir, because sys.path_hooks contains only a hook for "xxx"...
The short answer is that the default PathFinder in sys.meta_path isn't meant to have new file extensions and importers added in the same paths it already supports. But there's still hope!
Quick Breakdown
sys.path_hooks is consumed by the importlib._bootstrap_external.PathFinder class.
When an import happens, each entry in sys.meta_path is asked to find a matching spec for the requested module. The PathFinder in particular will then take the contents of sys.path and pass it to the factory functions in sys.path_hooks. Each factory function has a chance to either raise an ImportError (basically the factory saying "nope, I don't support this path entry") or return a finder instance for that path. The first successfully returned finder is then cached in sys.path_importer_cache. From then on PathFinder will only ask those cached finder instances if they can provide the requested module.
If you look at the contents of sys.path_importer_cache, you'll see all of the directory entries from sys.path have been mapped to FileFinder instances. Non-directory entries (zip files, etc) will be mapped to other finders.
Thus, if you append a new factory created via FileFinder.path_hook to sys.path_hooks, your factory will only be invoked if the previous FileFinder hook didn't accept the path. This is unlikely, since FileFinder will work on any existing directory.
Alternatively, if you insert your new factory to sys.path_hooks ahead of the existing factories, the default hook will only be used if your new factory doesn't accept the path. And again, since FileFinder is so liberal with what it will accept, this would lead to only your loader being used, as you've already observed.
Making it Work
So you can either try to adjust that existing factory to also support your file extension and importer (which is difficult as the importers and extension string tuples are held in a closure), or do what I ended up doing, which is add a new meta path finder.
So eg. from my own project,
import sys
from importlib.abc import FileLoader
from importlib.machinery import FileFinder, PathFinder
from os import getcwd
from os.path import basename
from sibilant.module import prep_module, exec_module
SOURCE_SUFFIXES = [".lspy", ".sibilant"]
_path_importer_cache = {}
_path_hooks = []
class SibilantPathFinder(PathFinder):
"""
An overridden PathFinder which will hunt for sibilant files in
sys.path. Uses storage in this module to avoid conflicts with the
original PathFinder
"""
#classmethod
def invalidate_caches(cls):
for finder in _path_importer_cache.values():
if hasattr(finder, 'invalidate_caches'):
finder.invalidate_caches()
#classmethod
def _path_hooks(cls, path):
for hook in _path_hooks:
try:
return hook(path)
except ImportError:
continue
else:
return None
#classmethod
def _path_importer_cache(cls, path):
if path == '':
try:
path = getcwd()
except FileNotFoundError:
# Don't cache the failure as the cwd can easily change to
# a valid directory later on.
return None
try:
finder = _path_importer_cache[path]
except KeyError:
finder = cls._path_hooks(path)
_path_importer_cache[path] = finder
return finder
class SibilantSourceFileLoader(FileLoader):
def create_module(self, spec):
return None
def get_source(self, fullname):
return self.get_data(self.get_filename(fullname)).decode("utf8")
def exec_module(self, module):
name = module.__name__
source = self.get_source(name)
filename = basename(self.get_filename(name))
prep_module(module)
exec_module(module, source, filename=filename)
def _get_lspy_file_loader():
return (SibilantSourceFileLoader, SOURCE_SUFFIXES)
def _get_lspy_path_hook():
return FileFinder.path_hook(_get_lspy_file_loader())
def _install():
done = False
def install():
nonlocal done
if not done:
_path_hooks.append(_get_lspy_path_hook())
sys.meta_path.append(SibilantPathFinder)
done = True
return install
_install = _install()
_install()
The SibilantPathFinder overrides PathFinder and replaces only those methods which reference sys.path_hook and sys.path_importer_cache with similar implementations which instead look in a _path_hook and _path_importer_cache which are local to this module.
During import, the existing PathFinder will try to find a matching module. If it cannot, then my injected SibilantPathFinder will re-traverse the sys.path and try to find a match with one of my own file extensions.
Figuring More Out
I ended up delving into the source for the _bootstrap_external module
https://github.com/python/cpython/blob/master/Lib/importlib/_bootstrap_external.py
The _install function and the PathFinder.find_spec method are the best starting points to seeing why things work the way they do.
#obriencj's analysis of the situation is correct. But I came up with a different solution to this problem that doesn't require putting anything in sys.meta_path. Instead, it installs a special hook in sys.path_hooks that acts almost as a sort of middle-ware between the PathFinder in sys.meta_path, and the hooks in sys.path_hooks where, rather than just using the first hook that says "I can handle this path!" it tries all matching hooks in order, until it finds one that actually returns a useful ModuleSpec from its find_spec method:
#PathEntryFinder.register
class MetaFileFinder:
"""
A 'middleware', if you will, between the PathFinder sys.meta_path hook,
and sys.path_hooks hooks--particularly FileFinder.
The hook returned by FileFinder.path_hook is rather 'promiscuous' in that
it will handle *any* directory. So if one wants to insert another
FileFinder.path_hook into sys.path_hooks, that will totally take over
importing for any directory, and previous path hooks will be ignored.
This class provides its own sys.path_hooks hook as follows: If inserted
on sys.path_hooks (it should be inserted early so that it can supersede
anything else). Its find_spec method then calls each hook on
sys.path_hooks after itself and, for each hook that can handle the given
sys.path entry, it calls the hook to create a finder, and calls that
finder's find_spec. So each sys.path_hooks entry is tried until a spec is
found or all finders are exhausted.
"""
class hook:
"""
Use this little internal class rather than a function with a closure
or a classmethod or anything like that so that it's easier to
identify our hook and skip over it while processing sys.path_hooks.
"""
def __init__(self, basepath=None):
self.basepath = os.path.abspath(basepath)
def __call__(self, path):
if not os.path.isdir(path):
raise ImportError('only directories are supported', path=path)
elif not self.handles(path):
raise ImportError(
'only directories under {} are supported'.format(
self.basepath), path=path)
return MetaFileFinder(path)
def handles(self, path):
"""
Return whether this hook will handle the given path, depending on
what its basepath is.
"""
path = os.path.abspath(path)
return (self.basepath is None or
os.path.commonpath([self.basepath, path]) == self.basepath)
def __init__(self, path):
self.path = path
self._finder_cache = {}
def __repr__(self):
return '{}({!r})'.format(self.__class__.__name__, self.path)
def find_spec(self, fullname, target=None):
if not sys.path_hooks:
return None
last = len(sys.path_hooks) - 1
for idx, hook in enumerate(sys.path_hooks):
if isinstance(hook, self.__class__.hook):
continue
finder = None
try:
if hook in self._finder_cache:
finder = self._finder_cache[hook]
if finder is None:
# We've tried this finder before and got an ImportError
continue
except TypeError:
# The hook is unhashable
pass
if finder is None:
try:
finder = hook(self.path)
except ImportError:
pass
try:
self._finder_cache[hook] = finder
except TypeError:
# The hook is unhashable for some reason so we don't bother
# caching it
pass
if finder is not None:
spec = finder.find_spec(fullname, target)
if (spec is not None and
(spec.loader is not None or idx == last)):
# If no __init__.<suffix> was found by any Finder,
# we may be importing a namespace package (which
# FileFinder.find_spec returns in this case). But we
# only want to return the namespace ModuleSpec if we've
# exhausted every other finder first.
return spec
# Module spec not found through any of the finders
return None
def invalidate_caches(self):
for finder in self._finder_cache.values():
finder.invalidate_caches()
#classmethod
def install(cls, basepath=None):
"""
Install the MetaFileFinder in the front sys.path_hooks, so that
it can support any existing sys.path_hooks and any that might
be appended later.
If given, only support paths under and including basepath. In this
case it's not necessary to invalidate the entire
sys.path_importer_cache, but only any existing entries under basepath.
"""
if basepath is not None:
basepath = os.path.abspath(basepath)
hook = cls.hook(basepath)
sys.path_hooks.insert(0, hook)
if basepath is None:
sys.path_importer_cache.clear()
else:
for path in list(sys.path_importer_cache):
if hook.handles(path):
del sys.path_importer_cache[path]
This is still, depressing, far more complication than should be necessary. I feel like on Python 2, before the import system rewrite, it was much simpler to do this since less of the support for the built-in module types (.py, etc.) was built on top of the import hooks themselves, so it was harder to break importing normal modules by adding hooks to import new modules types. I'm going to start a discussion on python-ideas to see if there's any way we can't improve this situation.
I came up with yet an alternative tweak. I won't say it is beautiful as it does a closure on an already existing one, but at least short :)
It adds loaders to the default FileLoader objects through a new hook. The original path_hook_for_FileFinder is wrapped in a closure and the loaders are injected into the FileFinder objects returned by the original hook.
After the new hook added the path_importer_cache is cleared as that is already filled with the original FileFinder objects. Those could also be updated dynamically, but I did not bother for now.
Disclaimer: not extensively tested yet. It does what I need in the easiest possible way I know, but the import system is complicated enough to produce funny side-effects for a tweak like this.
import sys
import importlib.machinery
def extend_path_hook_for_FileFinder(*loader_details):
orig_hook, orig_pos = None, None
for i, hook in enumerate(sys.path_hooks):
if hook.__name__ == 'path_hook_for_FileFinder':
orig_hook, orig_pos = hook, i
break
sys.path_hooks.remove(orig_hook)
def extended_path_hook_for_FileFinder(path):
orig_finder = orig_hook(path)
loaders = []
for loader, suffixes in loader_details:
loaders.extend((suffix, loader) for suffix in suffixes)
orig_finder._loaders.extend(loaders)
return orig_finder
sys.path_hooks.insert(orig_pos, extended_path_hook_for_FileFinder)
MY_SUFFIXES = ['.pymy']
class MySourceFileLoader(importlib.machinery.SourceFileLoader):
pass
loader_detail = (MySourceFileLoader, MY_SUFFIXES)
extend_path_hook_for_FileFinder(loader_detail)
# empty cache as it is already filled with simple FileFinder
# objects for the most common path elements
sys.path_importer_cache.clear()
sys.path_importer_cache.invalidate_caches()
Is there a way to either:
Always have -t enabled by default (warn about inconsistent tab usage)
Be able to enable it programmatically on startup (eg. in a sitecustomize.py module)
It would need to work for embedded Python too (so aliasing python or similar solutions will be of no use). Use of sitecustomize.py allows us to hook into embedded Python instances, so this seems like a good place for it.
I thought the warnings module would provide a way to turn this warning on but I don't see anything.
For reference:
usage: python [option] ... [-c cmd | -m mod | file | -] [arg] ...
Options and arguments (and corresponding environment variables):
...
-t : issue warnings about inconsistent tab usage (-tt: issue errors)
...
Any suggestions on how this might be done?
Thanks.
There is no such option.
You can either
wrap the interpreter call inside a bash script
define an alias
The only solution I was able to come to involves import hooks. While this is somewhat beyond what I'd hoped to have to do, I felt it was a good excuse for me to learn how they work.
This solution doesn't check for "inconsistent whitespace" it just checks for tabs, but it would be easy to extend.
Here is the result:
import sys
import imp
import warnings
class TabCheckImporter(object):
"""Finder and loader class for checking for the presence of tabs
"""
def find_module(self, fullname, path=None):
"""Module finding method
"""
# Save the path so we know where to look in load_module
self.path = path
return self
def load_module(self, name):
"""Module loading method.
"""
# Check if it was already imported
module = sys.modules.get(name)
if module is not None:
return module
# Find the module and check for tabs
file_, pathname, description = imp.find_module(name, self.path)
try:
content = file_.read()
tab = content.find("\t")
if tab > -1:
lineno = content[:tab].count("\n") + 1
warnings.warn_explicit(
"module '{0}' contains a tab character".format(name),
ImportWarning,
pathname,
lineno)
except Exception as e:
warnings.warn("Module '{0}' could not be checked".format(name),
ImportWarning)
# Import the module
try:
module = imp.load_module(name, file_, pathname, description)
finally:
if file_:
file_.close()
sys.modules[name] = module
return module
# Register the hook
sys.meta_path = (sys.meta_path or []) + [TabCheckImporter()]
# Enable ImportWarnings
warnings.simplefilter("always", ImportWarning)
Importing this file (replacing ->| with a literal tab):
# File: test_tabbed.py
if True:
->| print "This line starts with a tab"
Yields this output:
$ python -c 'import hook; import test_normal; import test_tabbed;'
test_tabbed.py:3: ImportWarning: module 'test_tabbed' contains a tab character
print "This line starts with a tab"
I'm working on an auto-reload feature for WHIFF
http://whiff.sourceforge.net
(so you have to restart the HTTP server less often, ideally never).
I have the following code to reload a package module "location"
if a file is added to the package directory. It doesn't work on Windows XP.
How can I fix it? I think the problem is that getmtime(dir) doesn't
change on Windows when the directory content changes?
I'd really rather not compare an os.listdir(dir) with the last directory
content every time I access the package...
if not do_reload and hasattr(location, "__path__"):
path0 = location.__path__[0]
if os.path.exists(path0):
dir_mtime = int( os.path.getmtime(path0) )
if fn_mtime<dir_mtime:
print "dir change: reloading package root", location
do_reload = True
md_mtime = dir_mtime
In the code the "fn_mtime" is the recorded mtime from the last (re)load.
... added comment: I came up with the following work around, which I think
may work, but I don't care for it too much since it involves code generation.
I dynamically generate a code fragment to load a module and if it fails
it tries again after a reload. Not tested yet.
GET_MODULE_FUNCTION = """
def f():
import %(parent)s
try:
from %(parent)s import %(child)s
except ImportError:
# one more time...
reload(%(parent)s)
from %(parent)s import %(child)s
return %(child)s
"""
def my_import(partname, parent):
f = None # for pychecker
parentname = parent.__name__
defn = GET_MODULE_FUNCTION % {"parent": parentname, "child": partname}
#pr "executing"
#pr defn
try:
exec(defn) # defines function f()
except SyntaxError:
raise ImportError, "bad function name "+repr(partname)+"?"
partmodule = f()
#pr "got", partmodule
setattr(parent, partname, partmodule)
#pr "setattr", parent, ".", partname, "=", getattr(parent, partname)
return partmodule
Other suggestions welcome. I'm not happy about this...
long time no see. I'm not sure exactly what you're doing, but the equivalent of your code:
GET_MODULE_FUNCTION = """
def f():
import %(parent)s
try:
from %(parent)s import %(child)s
except ImportError:
# one more time...
reload(%(parent)s)
from %(parent)s import %(child)s
return %(child)s
"""
to be execed with:
defn = GET_MODULE_FUNCTION % {"parent": parentname, "child": partname}
exec(defn)
is (per the docs), assuming parentname names a package and partname names a module in that package (if partname is a top-level name of the parentname package, such as a function or class, you'll have to use a getattr at the end):
import sys
def f(parentname, partname):
name = '%s.%s' % (parentname, partname)
try:
__import__(name)
except ImportError:
parent = __import__(parentname)
reload(parent)
__import__(name)
return sys.modules[name]
without exec or anything weird, just call this f appropriately.
you can try using getatime() instead.
I'm not understanding your question completely...
Are you calling getmtime() on a directory or an individual file?
There are two things about your first code snippet that concern me:
You cast the float from getmtime to int. Dependening on the frequency this code is run, you might get unreliable results.
At the end of the code you assign dir_mtime to a variable md_mtime. fn_mtime, which you check against, seems not to be updated.