Currently am creating files using the below code,I want to create a directory based on the timestamp at that point in the cwd,save the directory location to a variable and then create the file in the newly created directory,does anyone have ideas on how can this be done?
def filecreation(list, filename):
#print "list"
with open(filename, 'w') as d:
d.writelines(list)
def main():
list=['1','2']
filecreation(list,"list.txt")
if __name__ == '__main__':
main()
You mean, something like this?
import os, datetime
mydir = os.path.join(os.getcwd(), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
os.makedirs(mydir)
with open(os.path.join(mydir, 'filename.txt'), 'w') as d:
pass # ... etc ...
Complete function
import errno
import os
from datetime import datetime
def filecreation(list, filename):
mydir = os.path.join(
os.getcwd(),
datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
try:
os.makedirs(mydir)
except OSError as e:
if e.errno != errno.EEXIST:
raise # This was not a "directory exist" error..
with open(os.path.join(mydir, filename), 'w') as d:
d.writelines(list)
Update: check errno.EEXIST constant instead of hard-coding the error number
Related
I'm trying to get a message that file not found when it doesn't match the file_name*.txt pattern in specific directory.
When calling the script with file_name*.txt argument, all works fine. While entering invalid name file_*.txt throws:
File "etl.py", line 14, in main
path, file = file_path.get_source_file_path()
ValueError: too many values to unpack (expected 2)
Why is this happening?
import fnmatch
import os
class FilePrep:
def __init__(self, path_dir, file_name):
self.path = path_dir
self.file_name = file_name
def get_source_file_path(self):
source_file = []
for file_name in os.listdir(self.path):
if fnmatch.fnmatch(file_name, self.file_name):
source_file.append(file_name)
try:
source_file_name = str(source_file[0])
except IndexError:
return "file not found"
file_path = os.path.join(self.path, source_file_name)
return file_path, source_file_name
main.py
import file_prep
import xml.etree.ElementTree as element_tree
import pandas
import sys
def main():
dir = sys.argv[1]
file_input = sys.argv[2]
#python3 etl.py /home/user/projects/python/folder/ file_name*.xml
file_path = file_prep.FilePrep(dir, file_input)
path, file = file_path.get_source_file_path()
print(path)
Your problem is in this line in main.py:
path, file = file_path.get_source_file_path()
here you are unpacking the return value of get_source_file_path() into two variables. This works fine if the file exists (because you actually return two values) but does not when the file does not exist since you only return one value return "file not found".
To fix this I would raise an error instead of returning a string message in case of failure. Your code can become:
import fnmatch
import os
class FileNotFoundError(Exception):
pass
class FilePrep:
def __init__(self, path_dir, file_name):
self.path = path_dir
self.file_name = file_name
def get_source_file_path(self):
source_file = []
for file_name in os.listdir(self.path):
if fnmatch.fnmatch(file_name, self.file_name):
source_file.append(file_name)
try:
source_file_name = str(source_file[0])
except IndexError:
raise FileNotFoundError(f"file {self.file_name} not found")
file_path = os.path.join(self.path, source_file_name)
return file_path, source_file_name
import file_prep
import xml.etree.ElementTree as element_tree
import pandas
import sys
def main():
dir = sys.argv[1]
file_input = sys.argv[2]
#python3 etl.py /home/user/projects/python/folder/ file_name*.xml
file_path = file_prep.FilePrep(dir, file_input)
try:
path, file = file_path.get_source_file_path()
print(path)
except file_prep.FileNotFoundError as e:
print(e)
The previous answers somewhat got it the wrong way, saying in case of failure, "only one" value is returned. While that's true, it doesn't explain why the error message says there are "too many values to unpack", since 1 is not more than 2. The reason is that that one value is a string, which, due to to the multi-assignment, will be treated as an iterable. Since it has 14 characters, that's 14 values, and that's too many.
The issue is this following line:
except IndexError:
return "file not found"
When you hit that except condition your return from your function call returns only one variable, that string. You could modify your condition to do the following:
except IndexError:
source_file_name = "file not found"
However this won't work as you try to use this file name later on, a better approach would be to raise an error properly as shown in this answer by Matteo here.
When you call the function here, it's expected that two values will be returned
path, file = file_path.get_source_file_path()
When an exception occurs here, only one value is being returned which is causing the error
try:
source_file_name = str(source_file[0])
except IndexError:
return "file not found"
There are two ways to fix this.
Make it so only one value is expected to be returned and you modify the return at the bottom to only pass one value
You modify the exception so it returns two values
I'm trying to rewrite some code for learning purposes and got stuck with implementing try/except part into the code.
Class FilePrep takes two arguments (file_name and path_dir`), the loop checks whether the file exists and returns entire path.
How to properly implement the handler part so error message will be clear rather then throwing list index out of range?
import xml.etree.ElementTree as element_tree
import fnmatch
import os
import errno
class FilePrep:
def __init__(self, path_dir, file_name):
self.path = path_dir
self.file_name = file_name
def get_source_file_path(self):
source_file = []
for file_name in os.listdir(self.path):
try:
if fnmatch.fnmatch(file_name, self.file_name):
source_file.append(file_name)
except IndexError:
print("file not found")
source_file_old_name = str(source_file[0])
file_path = os.path.join(self.path, source_file_old_name)
return file_path
Function.py
import file_prep
file_path = file_prep.FilePrep('path', 'file_name*.xml')
print(file_path.get_source_file_path())
mainly problem into the below line:
source_file_old_name = str(source_file[0])
you can use below solution:
try:
source_file_old_name = str(source_file[0])
except IndexError:
return ""
file_path = os.path.join(self.path, source_file_old_name)
return file_path
Your try/except-block is placed at the wrong place.
Actually, the error occurs, when you're trying to access source_file[0], which is an empty list ([]) in case, that no file exists which matches the specified filename.
Therefore, I suggest to change it to the following implementation, where the lenght of the list, which should contain the matched files, is checked. If it is empty, a FileNotFoundError will be raised, like so:
if not source_file:
raise FileNotFoundError(f"No files matching '{self.file_name}'")
This results in the following class:
import xml.etree.ElementTree as element_tree
import fnmatch
import os
import errno
class FilePrep:
def __init__(self, path_dir, file_name):
self.path = path_dir
self.file_name = file_name
def get_source_file_path(self):
source_file = []
for file_name in os.listdir(self.path):
if fnmatch.fnmatch(file_name, self.file_name):
source_file.append(file_name)
if not source_file:
raise FileNotFoundError(f"No files matching '{self.file_name}'")
source_file_old_name = str(source_file[0])
file_path = os.path.join(self.path, source_file_old_name)
return file_path
**Update 1/8/2019 0945 EST
I have passed the script through the function given by bhakta0007 but received a path error "The system cannot find the path specified:".
After review, I added the below statement to the end of the script to pass the list through the function and the code works.
for f in fList:
excel_csv(fList)
I have added an answer to the question below.
I have a small script that I run to convert excel files to .csv. Currently , I have to repeat the script with the paths hardcoded in. The current paths have the exact same structure with the exceptions of a 3 digit identifier which I would like to create a list that I can call from. Below is my code. You will see I have variables that have the paths and I pass these variables where needed.I have looked into os.path, glob, and pathlib, but I can't find a good solution for the problem.
Original Code
import os
import glob
import pandas as pd
import shutil
Target_Path = os.path.join(os.path.dirname('//fs/Unprocessed/261/Edlog/Working/'))
Move_Path = os.path.join(os.path.dirname('//fs/Unprocessed/261/Edlog/ToProcess/'))
Process_Path = os.path.join(os.path.dirname('//fs/Unprocessed/261/Edlog/Processed/'))
os.chdir(Target_Path)
try:
for f in glob.glob('*.xls'):
out = f.split('.')[0]+'.csv'
df = pd.read_excel(f,)
df.to_csv(out, index=False)
finally:
for f in glob.glob('*.xlsx'):
out = f.split('.')[0]+'.csv'
df = pd.read_excel(f,)
df.to_csv(out, index=False)
xlsCounter = len(glob.glob1(Target_Path,"*.xls"))
xlsxCounter = len(glob.glob1(Target_Path,"*.xlsx"))
csvcounter = len(glob.glob1(Target_Path,"*.csv"))
if csvcounter == xlsCounter + xlsxCounter :
print('Complete Convert')
else:
print('Failed Convert')
for files in glob.glob('*.csv'):
shutil.move(files, Move_Path)
for files in glob.glob('*.xls'):
shutil.move(files, Process_Path)
for files in glob.glob('*.xlsx'):
shutil.move(files, Process_Path)
if len(os.listdir(Target_Path) ) == 0:
print('Complete Move')
else:
print('Failed Move')
I have used the function created from Bhakta0007, but received "The system cannot find the path specified:" error.
-Revisions added-
I added in a "For" clause at the end of the script and passed the list through the function and was able to run the script successfully in all directories.
I also used an fstring for the "Facility" instead of .format(facility)
Below is the working Code
import os
import glob
import pandas as pd
import shutil
def excel_csv(facility):
for f in facility:
Target_Path = os.path.join(os.path.dirname(f'//fs/Unprocessed/{facility}/Edlog/Working/'))
Move_Path = os.path.join(os.path.dirname(f'//fs/Unprocessed/{facility}/Edlog/ToProcess/'))
Process_Path = os.path.join(os.path.dirname(f'//fs/Unprocessed/{facility}/Edlog/Processed/'))
os.chdir(Target_Path)
try:
for f in glob.glob('*.xls'):
out = f.split('.')[0]+'.csv'
df = pd.read_excel(f,)
df.to_csv(out, index=False)
finally:
for f in glob.glob('*.xlsx'):
out = f.split('.')[0]+'.csv'
df = pd.read_excel(f,)
df.to_csv(out, index=False)
xlsCounter = len(glob.glob1(Target_Path,"*.xls"))
xlsxCounter = len(glob.glob1(Target_Path,"*.xlsx"))
csvcounter = len(glob.glob1(Target_Path,"*.csv"))
if csvcounter == xlsCounter + xlsxCounter :
print('Complete Convert')
else:
print('Failed Convert')
for files in glob.glob('*.csv'):
shutil.move(files, Move_Path)
for files in glob.glob('*.xls'):
shutil.move(files, Process_Path)
for files in glob.glob('*.xlsx'):
shutil.move(files, Process_Path)
if len(os.listdir(Target_Path) ) == 0:
print('Complete Move')
else:
print('Failed Move')
fList = ['261', '262', '278', '300']
for f in fList:
excel_csv(fList)
import os
import glob
import pandas as pd
import shutil
def process(folders):
for f in folders:
Target_Path = os.path.join(os.path.dirname('//fs/Unprocessed/{}/Edlog/Working/').format(folder))
Move_Path = os.path.join(os.path.dirname('//fs/Unprocessed/{}/Edlog/ToProcess/').format(folder))
Process_Path = os.path.join(os.path.dirname('//fs/Unprocessed/{}/Edlog/Processed/').format(folder))
os.chdir(Target_Path)
<Rest of our code>
fList = [261, 262, 278, 300]
process(fList)
Suppose I have the following BUILD file
py_library(
name = "foo",
src = ["foo.py"],
data = ["//bar:data.json"],
)
How should I refer to the data.json in foo.py file? I wanted to have something like below, what should I use for some_path?
with open(os.path.join(some_path, "bar/data.json"), 'r') as fp:
data = json.load(fp)
I couldn't find much general documentation about *.runfiles online -- any pointer will be appreciated!
Short answer: os.path.dirname(__file__)
Here is the full example:
$ ls
bar/ BUILD foo.py WORKSPACE
$ cat BUILD
py_binary(
name = "foo",
srcs = ["foo.py"],
data = ["//bar:data.json"],
)
$ cat foo.py
import json
import os
ws = os.path.dirname(__file__)
with open(os.path.join(ws, "bar/data.json"), 'r') as fp:
print(json.load(fp))
$ cat bar/BUILD
exports_files(["data.json"])
$ bazel run :foo
Edit: it doesn't work well when your package is in a subdirectory. You may need to go back using os.path.dirname.
Here is a function that should return the path to the runfiles root for any py_binary in all the cases that I'm aware of:
import os
import re
def find_runfiles():
"""Find the runfiles tree (useful when _not_ run from a zip file)"""
# Follow symlinks, looking for my module space
stub_filename = os.path.abspath(sys.argv[0])
while True:
# Found it?
module_space = stub_filename + '.runfiles'
if os.path.isdir(module_space):
break
runfiles_pattern = r"(.*\.runfiles)"
matchobj = re.match(runfiles_pattern, os.path.abspath(sys.argv[0]))
if matchobj:
module_space = matchobj.group(1)
break
raise RuntimeError('Cannot find .runfiles directory for %s' %
sys.argv[0])
return module_space
For the example in your question you could use it like so:
with open(os.path.join(find_runfiles(), "name_of_workspace/bar/data.json"), 'r') as fp:
data = json.load(fp)
Note that this function won't help if you build zipped executables of your python apps (using subpar, probably); for those you will need some more code. This next snippet includes get_resource_filename() and get_resource_directory(), which will work for both regular py_binary and .par binaries:
import atexit
import os
import re
import shutil
import sys
import tempfile
import zipfile
def get_resource_filename(path):
zip_path = get_zip_path(sys.modules.get("__main__").__file__)
if zip_path:
tmpdir = tempfile.mkdtemp()
atexit.register(lambda: shutil.rmtree(tmpdir, ignore_errors=True))
zf = BetterZipFile(zip_path)
zf.extract(member=path, path=tmpdir)
return os.path.join(tmpdir, path)
elif os.path.exists(path):
return path
else:
path_in_runfiles = os.path.join(find_runfiles(), path)
if os.path.exists(path_in_runfiles):
return path_in_runfiles
else:
raise ResourceNotFoundError
def get_resource_directory(path):
"""Find or extract an entire subtree and return its location."""
zip_path = get_zip_path(sys.modules.get("__main__").__file__)
if zip_path:
tmpdir = tempfile.mkdtemp()
atexit.register(lambda: shutil.rmtree(tmpdir, ignore_errors=True))
zf = BetterZipFile(zip_path)
members = []
for fn in zf.namelist():
if fn.startswith(path):
members += [fn]
zf.extractall(members=members, path=tmpdir)
return os.path.join(tmpdir, path)
elif os.path.exists(path):
return path
else:
path_in_runfiles = os.path.join(find_runfiles(), path)
if os.path.exists(path_in_runfiles):
return path_in_runfiles
else:
raise ResourceNotFoundError
def get_zip_path(path):
"""If path is inside a zip file, return the zip file's path."""
if path == os.path.sep:
return None
elif zipfile.is_zipfile(path):
return path
return get_zip_path(os.path.dirname(path))
class ResourceNotFoundError(RuntimeError):
pass
def find_runfiles():
"""Find the runfiles tree (useful when _not_ run from a zip file)"""
# Follow symlinks, looking for my module space
stub_filename = os.path.abspath(sys.argv[0])
while True:
# Found it?
module_space = stub_filename + '.runfiles'
if os.path.isdir(module_space):
break
runfiles_pattern = r"(.*\.runfiles)"
matchobj = re.match(runfiles_pattern, os.path.abspath(sys.argv[0]))
if matchobj:
module_space = matchobj.group(1)
break
raise RuntimeError('Cannot find .runfiles directory for %s' %
sys.argv[0])
return module_space
class BetterZipFile(zipfile.ZipFile):
"""Shim around ZipFile that preserves permissions on extract."""
def extract(self, member, path=None, pwd=None):
if not isinstance(member, zipfile.ZipInfo):
member = self.getinfo(member)
if path is None:
path = os.getcwd()
ret_val = self._extract_member(member, path, pwd)
attr = member.external_attr >> 16
os.chmod(ret_val, attr)
return ret_val
Using this second code snippet, your example would look like:
with open(get_resource_filename("name_of_workspace/bar/data.json"), 'r') as fp:
data = json.load(fp)
I need to setup some test conditions to simulate a filled up disk. I created the following to simply write garbage to the disk:
#!/usr/bin/python
import os
import sys
import mmap
def freespace(p):
"""
Returns the number of free bytes on the drive that ``p`` is on
"""
s = os.statvfs(p)
return s.f_bsize * s.f_bavail
if __name__ == '__main__':
drive_path = sys.argv[1]
output_path = sys.argv[2]
output_file = open(output_path, 'w')
while freespace(drive_path) > 0:
output_file.write("!")
print freespace(drive_path)
output_file.flush()
output_file.close()
As far as I can tell by looking at the return value from freespace, the write method does not write the file to until it is closed, thereby making the while condition invalid.
Is there a way I can write the data directly to the file? Or another solution perhaps?
This is untested but I imagine something along these lines will be the quickest way to fill the disk easily
import sys
import errno
write_str = "!"*1024*1024*5 # 5MB
output_path = sys.argv[1]
with open(output_path, "w") as f:
while True:
try:
f.write(write_str)
f.flush()
except IOError as err:
if err.errno == errno.ENOSPC:
write_str_len = len(write_str)
if write_str_len > 1:
write_str = write_str[:write_str_len/2]
else:
break
else:
raise
You could try/catch a disk full exception on write.