Bazel read temporary dynamically added file - python

I have a python script that downloads some json data and then uploads it somewehere else. The project is build and run using bazel and has the following simplified structure.
-scripts/
-exported_file_1.json
-main.py
-tests/
-BUILD
My issue is that when it comes to reading the exported files and loading them in memory, those files cannot be found using:
def get_filepath(filename):
"""Get the full path to file"""
cwd = os.path.dirname(__file__)
return os.path.join(cwd, filename)
If I manually add a json file to the project structure, declare it in the BUILD file to be visible, do a bazel build then it works fine
py_binary(
name = "main",
srcs = [
"main.py",
],
data = ["scripts/exported_file_1.json"],
python_version = "PY2",
visibility = ["//visibility:public"],
deps = [],
)
But how would one handle the case when your files are added dynamically?

Perhaps using glob might work?
Something like:
import os
import glob
def get_json_files():
cwd = os.path.dirname(__file__)
p=f"{cwd}/*.json"
return glob.glob(p)
for json_file in get_json_files():
print(f"found file: {json_file}")

Using glob in the BUILD file will allow the binary to find all .json files in the scripts directory:
py_binary(
name = "main",
srcs = [
"scripts/main.py",
],
data = glob(["scripts/*.json"]),
python_version = "PY2",
visibility = ["//visibility:public"],
deps = [],
)
This would still require the JSON files to be downloaded and present in the scripts/ directory when bazel run :main is executed.

Related

python yaml path after deployment

So this is a question about how to handle settings files and relative paths in python (probably also something about best practice).
So I have coded a smaller project that i want to deploy to a docker image and everything is set up now except when I try to run the python task (Through cron) I get the error: settings/settings.yml not found.
tree .
├───settings
│ └───settings/settings.yml
└───main.py
And am referencing the yml file as
open('settings/settings.yml', 'r') as f:
config = yaml.load(f, Loader=yaml.FullLoader)
I can see this is what is causing the problem but am unsure about how to fix it. I wish to reference the main file basically by using the entry_points from setuptools in the future so my quick fix with cd'ing before python main.py will not be a lasting solution.
Instead of hardcoding a path as a string, you can find the directories and build the file path with os.path. For example:
import os
import yaml
current_dir = os.path.dirname(os.path.abspath(__file__))
settings_dir = os.path.join(current_dir, "settings")
filename = "settings.yml"
settings_path = os.path.join(settings_dir, filename)
with open(settings_path, "r") as infile:
settings_data = yaml.load(infile)
This way it can be run in any file system and the python file can be called from any directory.

Python - How to change configuration files in data package during installtion

As title states, I write a library that contains data package with several cofiguration files.
The configuration files contains hard-coded paths to other configuration files, that I would like to change during installation time, so the new hard-coded paths will point to where the library is actually installed.
I tried different approaches that work well under the Windows environmet, but not under Unix based platorms (e.g. Ubuntu).
My setup.py code:
import atexit
import os
import sys
import fileinput
import fnmatch
import glob
from setuptools import setup
from setuptools.command.develop import develop
from setuptools.command.install import install
from setuptools.command.egg_info import egg_info
LIB_NAME = "namsim"
NAMSIM_DATA_DIRECTORY = "data"
NAMSIM_CONF_DIRECTORY = "default_namsim_conf"
def post_install_operations(lib_path):
# TODO: workaround to exit in library creation process
if 'site-packages' not in lib_path:
return
# set conf path and replace slash to backslash to support UNIX systems
conf_dir_path = os.path.join(lib_path, NAMSIM_DATA_DIRECTORY, NAMSIM_CONF_DIRECTORY)
conf_dir_path = conf_dir_path.replace(os.sep, '/')
# change paths in all conf .xml files
file_pattern = "*.xml"
for path, dirs, files in os.walk(conf_dir_path):
for filename in fnmatch.filter(files, file_pattern):
full_file_path = os.path.join(path, filename)
print(full_file_path)
# replace stub with the actual path
stub_name = 'STUB_PATH'
# Read in the file
with open(full_file_path, 'r') as file:
file_data = file.read()
print(file_data)
# Replace the target string and fix slash direction based
file_data = file_data.replace(stub_name, conf_dir_path)
print(file_data)
# Write the file out again
with open(full_file_path, 'w') as file:
file.write(file_data)
def post_install_decorator(command_subclass):
"""A decorator for classes subclassing one of the setuptools commands.
It modifies the run() method so that it will change the configuration paths.
"""
orig_run = command_subclass.run
def modified_run(self):
def find_module_path():
for p in sys.path:
if os.path.isdir(p) and LIB_NAME in os.listdir(p):
return os.path.join(p, LIB_NAME)
orig_run(self)
lib_path = find_module_path()
post_install_operations(lib_path)
command_subclass.run = modified_run
return command_subclass
#post_install_decorator
class CustomDevelopCommand(develop):
pass
#post_install_decorator
class CustomInstallCommand(install):
pass
#post_install_decorator
class CustomEggInfoCommand(egg_info):
pass
atexit.register(all_done)
setup(
name="namsim",
version="1.0.0",
author="Barak David",
license="MIT",
keywords="Name similarity mock-up library.",
packages=['namsim', 'namsim.wrapper', 'namsim.data'],
package_date={'data': ['default_namsim_conf/*']},
include_package_data=True,
cmdclass={
'develop': CustomDevelopCommand,
'install': CustomInstallCommand,
'egg_info': CustomEggInfoCommand
}
)
Picture of my library source tree:
To be clear, the original namsim_config.xml original contains the text:
STUB_PATH/conf/multiplier_config.xml
My goal is that the text will be changed after installaion to:
{actual lib installation path}/conf/multiplier_config.xml
Some additional information:
I tried the above code on both python 2.7 and 3.x platforms.
On Windows I get the expected result, in contrast to Unix based platforms.
I use "python setup.py sdist" command on Windows to create the libary, and I install the resulting tar.gz on the different platforms.
I also tried using the atexit module to change the configurations before process termination, but I got the same result.
Thank you.

How do I call data I included in a python package?

I have a python package with this file structure:
package
- bin
clean_spam_ratings.py
- spam_module
- data
spam_ratings.csv
__init__.py
spam_ratings_functions.py
Contents of clean_spam_ratings.py:
import spam_module
with open(path_to_spam_ratings_csv, 'r') as fin:
spam_module.spam_ratings_functions(fin)
What should I set path_to_spam_ratings_csv to?
If you are in a module, then you can get the absolute path for the directory that contains that module via:
os.path.dirname(__file__)
You can use then that to construct the path to your csv file. For example, if you are in spam_ratings_functions.py, use:
path_to_spam_ratings_csv = os.path.join(os.path.dirname(__file__), "..", "data", "spam_ratings.csv")

Include .pyd module files in py2exe compilation

I'm trying to compile a python script. On executing the exe I got:-
C:\Python27\dist>visualn.exe
Traceback (most recent call last):
File "visualn.py", line 19, in <module>
File "MMTK\__init__.pyc", line 39, in <module>
File "Scientific\Geometry\__init__.pyc", line 30, in <module>
File "Scientific\Geometry\VectorModule.pyc", line 9, in <module>
File "Scientific\N.pyc", line 1, in <module>
ImportError: No module named Scientific_numerics_package_id
I can see the file Scientific_numerics_package_id.pyd at the location "C:\Python27\Lib\site-packages\Scientific\win32". I want to include this module file into the compilation. I tried to copy the above file in the "dist" folder but no good. Any idea?
Update:
Here is the script:
from MMTK import *
from MMTK.Proteins import Protein
from Scientific.Visualization import VRML2; visualization_module = VRML2
protein = Protein('3CLN.pdb')
center, inertia = protein.centerAndMomentOfInertia()
distance_away = 8.0
front_cam = visualization_module.Camera(position= [center[0],center[1],center[2]+distance_away],description="Front")
right_cam = visualization_module.Camera(position=[center[0]+distance_away,center[1],center[2]],orientation=(Vector(0, 1, 0),3.14159*0.5),description="Right")
back_cam = visualization_module.Camera(position=[center[0],center[1],center[2]-distance_away],orientation=(Vector(0, 1, 0),3.14159),description="Back")
left_cam = visualization_module.Camera(position=[center[0]-distance_away,center[1],center[2]],orientation=(Vector(0, 1, 0),3.14159*1.5),description="Left")
model_name = 'vdw'
graphics = protein.graphicsObjects(graphics_module = visualization_module,model=model_name)
visualization_module.Scene(graphics, cameras=[front_cam,right_cam,back_cam,left_cam]).view()
Py2exe lets you specify additional Python modules (both .py and .pyd) via the includes option:
setup(
...
options={"py2exe": {"includes": ["Scientific.win32.Scientific_numerics_package_id"]}}
)
EDIT. The above should work if Python is able to
import Scientific.win32.Scientific_numerics_package_id
There is a way to work around this types of issues that I have used a number of times. In order to add extra files to the py2exe result you can extend the media collector in order to have a custom version of it. The following code is an example:
import glob
from py2exe.build_exe import py2exe as build_exe
def get_py2exe_extension():
"""Return an extension class of py2exe."""
class MediaCollector(build_exe):
"""Extension that copies Scientific_numerics_package_id missing data."""
def _add_module_data(self, module_name):
"""Add the data from a given path."""
# Create the media subdir where the
# Python files are collected.
media = module_name.replace('.', os.path.sep)
full = os.path.join(self.collect_dir, media)
if not os.path.exists(full):
self.mkpath(full)
# Copy the media files to the collection dir.
# Also add the copied file to the list of compiled
# files so it will be included in zipfile.
module = __import__(module_name, None, None, [''])
for path in module.__path__:
for f in glob.glob(path + '/*'): # does not like os.path.sep
log.info('Copying file %s', f)
name = os.path.basename(f)
if not os.path.isdir(f):
self.copy_file(f, os.path.join(full, name))
self.compiled_files.append(os.path.join(media, name))
else:
self.copy_tree(f, os.path.join(full, name))
def copy_extensions(self, extensions):
"""Copy the missing extensions."""
build_exe.copy_extensions(self, extensions)
for module in ['Scientific_numerics_package_id',]:
self._add_module_data(module)
return MediaCollector
I'm not sure which is the Scientific_numerics_package_id module so I've assumed that you can import it like that. The copy extensions method will get a the different module names that you are having problems with and will copy all their data into the dir folder for you. Once you have that, in order to use the new Media collector you just have to do something like the following:
cmdclass['py2exe'] = get_py2exe_extension()
So that the correct extension is used. You might need to touch the code a little but this should be a good starting point for what you need.
I encountered similar probelm with py2exe and the only solution I can find ,is to use another tool to convert python to exe - pyinstaller
Its very easy tool to use and more important , it works!
UPDATE
As I understood from your comments below , running your script from command line is not working also , due to import error (My recommendation is to first check your code from command line ,and than try to convert it to EXE)
It looks like PYTHONPATH problem.
PYTHONPATH is list of paths (similar of Windows PATH) that python programs use to find import modules.
If your script run from your IDE , that means the PYTHONPATH is set correctly in the IDE ,so all imported modules are found.
In order to set PYTHONPATH you can use :
import sys|
sys.path.append(pathname)
or use the following code that add the all folders under path parameter to PYTHONPATH:
import os
import sys
def add_tree_to_pythonpath(path):
"""
Function: add_tree_to_pythonpath
Description: Go over each directory in path and add it to PYTHONPATH
Parameters: path - Parent path to start from
Return: None
"""
# Go over each directory and file in path
for f in os.listdir(path):
if f == ".bzr" or f.lower() == "dll":
# Ignore bzr and dll directories (optional to NOT include specific folders)
continue
pathname = os.path.join(path, f)
if os.path.isdir(pathname) == True:
# Add path to PYTHONPATH
sys.path.append(pathname)
# It is a directory, recurse into it
add_tree_to_pythonpath(pathname)
else:
continue
def startup():
"""
Function: startup
Description: Startup actions needed before call to main function
Parameters: None
Return: None
"""
parent_path = os.path.normpath(os.path.join(os.getcwd(), ".."))
parent_path = os.path.normpath(os.path.join(parent_path, ".."))
# Go over each directory in parent_path and add it to PYTHONPATH
add_tree_to_pythonpath(parent_path)
# Start the program
main()
startup()
The ImportError is rectified by using "Gil.I" and "Janne Karila" suggestion by setting pythonpath and by using include function. But before this I had to create __init__.py file in the win32 folder of both the modules.
BTW I still got another error for the above script - link

How to make tar backup using python

I have directory /home/user1 , user2 .
I want to loop through all usernames home dir and then make the tar.gz file and then store it in /backups directory.
I am new to python so confused how to start
This should work:
import os
import tarfile
home = '/home/'
backup_dir = '/backup/'
home_dirs = [ name for name in os.listdir(home) if os.path.isdir(os.path.join(home, name)) ]
for directory in home_dirs:
full_dir = os.path.join(home, directory)
tar = tarfile.open(os.path.join(backup_dir, directory+'.tar.gz'), 'w:gz')
tar.add(full_dir)
tar.close()
python write string directly to tarfile
and http://docs.python.org/library/tarfile.html#tar-examples

Categories

Resources