Python equivalent of Matlab addpath - python

Does python have an equivalent to Matlab's addpath? I know about sys.path.append, but that only seems to work for python files/modules, not for general files.
Suppose I have a file config.txt in C:\Data and the current working directory is something else, say D:\my_project.
I would like to have code similar to:
def foo():
with open('config.txt') as f:
print(f.read())
def main():
addpath(r'C:\Data')
foo()
Obviously I could pass the path to foo here, but that is very difficult in the actual use case.

You can't add multiple paths like you would in matlab.
You can use os.chdir to change directories, and access files and sub-directories from that directory:
import os
def foo():
with open('config.txt') as f:
print(f.read())
def main():
os.chdir(r'C:\Data')
foo()
To manage multiple directories, using a context manager that returns to the previous directory after the expiration of the context works:
import contextlib
import os
#contextlib.contextmanager
def working_directory(path):
prev_cwd = os.getcwd()
os.chdir(path)
yield
os.chdir(prev_cwd)
def foo():
with open('config.txt') as f:
print(f.read())
def main():
with working_directory(r'C:\Data'):
foo()
# previous working directory

No, it doesn't. Python doesn't work this way. Files are loaded from the current working directory or a specific, resolved path. There is no such thing as a set of pre-defined paths for loading arbitrary files.
Keeping data and program logic separate is an important concept in Python (and most other programming languages besides MATLAB). An key principle of python is "Explicit is better than implicit." Making sure the data file you want to load is defined explicitly is much safer, more reliable, and less error-prone.
So although others have shown how you can hack some workarounds, I would very, very strongly advise you to not use this approach. It is going to make maintaining your code much harder.

You can use the os.chdir functionality along with your own open function to check all the paths you want to.
class FileOpener(object):
def __init__(self):
self.paths = []
def add_path(self, path):
self.paths.append(path)
def __open_path(self, path, *args, **kwargs):
old_path = os.getcwd()
try:
os.chdir(path)
return open(*args, **kwargs)
except:
pass
finally:
os.chdir(old_path)
def open(self, *args, **kwargs):
for path in self.paths + [os.getcwd()]:
f = self.__open_path(path, *args, **kwargs)
if f is not None:
return f
raise IOError("no such file")
my_file_opener = FileOpener()
my_file_opener.add_path("C:/Data")
my_file_opener.add_path("C:/Blah")
my_file_opener.open("some_file") # checks in C:/Data, C:/Blah and then the current working directory, returns the first file named "some_file" that it finds, and raises an IOError otherwise

sorry, I made a mistake.
I guess you can use this to solve it. (as a stack newbee, I hope it helps)
import sys
sys.path.append(r'path')

def foo(prefix):
path = prefix + 'config.txt'
with open(path) as f:
print(f.read())
def main():
foo(r'C:\Data\')
---update----
import os
class changeFileDir:
def __init__(self, path):
self.path= os.path.expanduser(path)
def __enter__(self):
self.savedPath = os.getcwd()
os.chdir(self.path)
def __exit__(self, etype, value, traceback):
os.chdir(self.savedPath)
with changeFileDir(r'C:\Data'):
foo()

Related

How do I load a Python module from a string while preserving debug?

I'm interested in loading a Python module that has its source embedded in a C extension. It should be possible to do something with Python's importlib machinery like importlib.util.spec_from_file_location so that the source code will appear if you are debugging. How would I implement an importlib.util.spec_from_string?
Here's how to define a loader that takes the module's source from a string, and then creates and loads the module into sys.modules. It could be useful if the module's source is not in a file. If there is already a file then use https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
Although inspect.getsource(module) works for a subclass of importlib.abc.InspectLoader for which it would only be necessary to define get_source, tracebacks and pdb don't appear to be willing to display the source code until you inherit from SourceLoader.
import sys
import importlib.abc, importlib.util
class StringLoader(importlib.abc.SourceLoader):
def __init__(self, data):
self.data = data
def get_source(self, fullname):
return self.data
def get_source(self, fullname):
return self.data
def get_data(self, path):
return self.data.encode("utf-8")
def get_filename(self, fullname):
return "<not a real path>/" + fullname + ".py"
module_name = "testmodule"
with open("testmodule.py", "r") as module:
loader = StringLoader(module.read())
spec = importlib.util.spec_from_loader(module_name, loader, origin="built-in")
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
As a quick fix, you can dump it in a temporary module, import it using exec and delete the temp module when you're done.
Here's a toy example:
dummy_src ="""
print("imported!")
x = 5
"""
with open("temp.py", "w") as f:
f.write(dummy_src)
exec("import temp")
print(temp.x)
Output:
imported!
5

Relative path of file does not get solved

Apparently python takes every related importation in relation to the first called file.
I have the following file structure
src
|--myunittests.py
|--subfolder1
|--__init__.py
|--printFileContent.py
|--subfolder2
|--__init__.py
|--file
myunittests.py will test the behavior of functions inside printFileContent:
from subfolder1.printFileContent import printFileContent
printFileContent()
printFileContent prints the content of a file contained inside the subfolder:
def printFileContent():
with open("./subfolder2/file") as file:
for line in file:
print(line)
if __name__ == "__main__":
printFileContent()
file just contains some text.
Question:
Doing python3 printFileContent.py inside the subfolder1 will correctly output the file content.
But doing python3 myunittests.py raises the error, that the file could not be found.
Is there a way to solve this problem? (Is there a way to tell python, that files refered relative programmatically should be relative to the file they are used in?
constraints
changing content inside printFileContent.py is not an option (generated file)
Such calls of printFileContent are at arbitrary places throughout the code (A unittest file calls a dialog, that calls printFileContent vv inside subdirectory2)
When does this behavior occur?
When file is an icon that is used inside printFileContent.py, while printFileContent.py is called from myunittests.py
Sidequestion:
Is there a proper title/bulletpoint word for explaining / finding out about this behavior and problems with it?
If you cannot modify printFileContent.py, you can save the current directory, go to the directory of subfolder1 and then come back to the original directory:
import subfolder1
import os
# Save current directory (absolute path)
cdir = os.path.abspath(os.path.curdir)
# Change directory, and call printFileContent
os.chdir(os.path.dirname(subfolder1.__file__))
subfolder1.printFileContent()
# Go back to the original directory
os.chdir(cdir)
If you have to use this a lot of time, you can make this behavior a class usable with a with statement so that it's easier to use and more robust (you won't forget to chdir back):
import os
class TmpDirChanger:
def __init__(self, tmpPath):
self.currentDir = os.path.abspath(os.path.curdir)
os.chdir(tmpPath)
def __enter__(self): pass
def __exit__(self, exc_type, exc_val, exc_tb):
#change back to original dir
os.chdir(self.currentDir)
with TmpDirChanger('path/to/some/dir'):
do_something()
If you can modify printFileContent.py, it is less tricky:
import os
def printFileContent():
# This will give you path to subfolder1
sfolder1 = os.path.dirname(__file__)
# This will give you path to "file" in subfolder2
name = os.path.join(sfolder1, 'subfolder2', 'file')
with open(name) as file:
for line in file:
print(line)

Python open filename from custom PATH

Similar to the system path, I want to offer some convenience in my code allowing a user to specify a file name that could be in one of a handful of paths.
Say I had two or more config paths
['~/.foo-config/', '/usr/local/myapp/foo-config/']
And my user wants to open bar, (AKA bar.baz)
Is there a convenient build in way to let open('bar') or open('bar.baz') automatically search these paths for that file in LTR order of precedence? Eg, will temporary adjusting my sys.path to only be these directories do this for me?
Else, how would you suggest implementing a PATH-like searching open-wrapper?
As other people already mentioned: sys.path only affects the module search path, i.e. it's relevant for importing Python modules, but not at all for open().
I would suggest separating the logic for searching the paths in order of precedence and opening the file, because that way it's easier to test and read.
I would do something like this:
import os
PATHS = ['~/.foo-config/', '/usr/local/myapp/foo-config/']
def find_first(filename, paths):
for directory in paths:
full_path = os.path.join(directory, filename)
if os.path.isfile(full_path):
return full_path
def main():
filename = 'file.txt'
path = find_first(filename, PATHS)
if path:
with open(path) as f:
print f
else:
print "File {} not found in any of the directories".format(filename)
if __name__ == '__main__':
main()
open doesn't get into that kind of logic. If you want, write a wrapper function that uses os.path.join to join each member of sys.path to the parameter filename, and tries to open them in order, handling the error that occurs when no such file is found.
I'll add that, as another user stated, this is kind of a misuse of sys.path, but this function would work for any list of paths. Indeed, maybe the nicest option is to use the environment variables suggested by another user to specify a colon-delimited list of config directories, which you then parse and use within your search function.
environmental variables
say your app is named foo ... in the readme tell the user to use the FOO_PATH environmental variable to specify the extra paths
then inside your app do something like
for path in os.environ.get("FOO_PATH",".").split(";"):
lookfor(os.path.join(path,"somefile.txt"))
you could wrap it into a generic function
def open_foo(fname):
for path in os.environ.get("FOO_PATH",".").split(";"):
path_to_test = os.path.join(path,"somefile.txt")
if os.path.exists(path_to_test):
return open(path_to_test)
raise Exception("No File Found On FOOPATH")
then you could use it just like normal open
with open_foo("my_config.txt") as f:
print f.read()
Extract from Python Standard Library documentation for open built-in function:
open(file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None)
...file is either a string or bytes object giving the pathname (absolute or relative to the current working directory) of the file to be opened ...
Explicitely, open does not bring anything to automagically find a file : if path is not absolute, it is only searched in current directory.
So you will have to use a custom function or a custom class for that. For example:
class path_opener(object):
def __init__(self, path = [.]):
self.path = path
def set(self, path):
self.path = path
def append(self, path):
self.path.append(path)
def extent(self, path):
self.path.extend(path)
def find(self, file):
for folder in self.path:
path = os.path.join(folder, file)
if os.path.isfile(path):
return path
raise FileNotFoundError()
def open(self, file, *args, **kwargs):
return open(self.find(file), *args, **kwargs)
That means that a file opener will keep its own path, will be initialized by default with current path, will have methods to set, append to or extend its path, and will normaly raise a FileNotFoundError is a file is not found in any of the directories listed in its path.
Usage :
o = path_opener(['~/.foo-config/', '/usr/local/myapp/foo-config/'])
with o.open('foo') as fd:
...

Matching MD5 Hashes from another script

Ok so i'm trying to create a script that does the following: Searches a directory for known hashes. Here is my first script:
Hash.py
import hashlib
from functools import partial
#call another python script
execfile("knownHashes.py")
def md5sum(filename):
with open(filename, mode='rb') as f:
d = hashlib.md5()
for buf in iter(partial(f.read, 128), b''):
d.update(buf)
return d.hexdigest()
print "Hash of is: "
print(md5sum('photo.jpg'))
if md5List == md5sum:
print "Match"
knownHashes.py
print ("Call worked\n")
md5List = "01071709f67193b295beb7eab6e66646" + "5d41402abc4b2a76b9719d911017c592"
The problem at the moment is that I manually have to type in the file I want to find out the hash of where it says photo.jpg. Also, The I haven't got the md5List to work yet.
I want the script to eventually work like this:
python hash.py <directory>
1 match
cookies.jpg matches hash
So how can I get the script to search a directory rather than manually type in what file to hash? Also, how can I fix the md5List because that is wrong?
You can get a list of files in the current working directory using the following. This is the directory that you run the script from.
import os
#Get list of files in working directory
files_list = os.listdir(os.getcwd())
You can iterate through the list using a for loop:
for file in files_list:
#do something
As equinoxel also mentioned below, you can use os.walk() as well.
Simple little gist should solve most of your problems. Understandable if you don't like using OOP for this problem, but I believe all of the important conceptual pieces are here in a pretty clean, concise representation. Let me know if you have any questions.
class PyGrep:
def __init__(self, directory):
self.directory = directory
def grab_all_files_with_ending(self, file_ending):
"""Will return absolute paths to all files with given file ending in self.directory"""
walk_results = os.walk(self.directory)
file_check = lambda walk: len(walk[2]) > 0
ending_prelim = lambda walk: file_ending in " ".join(walk[2])
relevant_results = (entry for entry in walk_results if file_check(entry) and ending_prelim(entry))
return (self.grab_files_from_os_walk(result, file_ending) for result in relevant_results)
def grab_files_from_os_walk(self, os_walk_tuple, file_ending):
format_check = lambda file_name: file_ending in file_name
directory, subfolders, file_paths = os_walk_tuple
return [os.path.join(directory, file_path) for file_path in file_paths if format_check(file_path)]

How to determine whether specified file is placed inside of the specified folder?

Let's say I have two paths: the first one (may be file or folder path): file_path, and the second one (may only be a folder path): folder_path. And I want to determine whether an object collocated with file_path is inside of the object collocated with folder_path.
I have an idea of doing this:
import os
...
def is_inside(file_path, folder_path):
full_file_path = os.path.realpath(file_path)
full_folder_path = os.path.realpath(folder_path)
return full_folder_path.startswith(full_file_path)
but I'm afraid there are some pitfalls in this approach. Also I think there must be a prettier way to do this.
The solution must work on Linux but it would be great if you propose me some cross-platform trick.
Use os.path.commonprefix. Here's an example based on your idea.
import os.path as _osp
def is_inside(file_path, folder_path):
full_file_path = _osp.realpath(file_path)
full_folder_path = _osp.realpath(folder_path)
return _osp.commonprefix([full_file_path, full_folder_path]) == \
full_folder_path
Parse the file name from the file path and do
os.path.exists(full_folder_path + '/' + file_name)

Categories

Resources