Problem with file cleaning process?

Problem with file cleaning process? - python

I am working on very large file system. My task is to clean the system with some given parameters. Below program fragment can give a idea.
import DirectoryWalker
extentions_to_delete = list([".rar",".doc",".URL",".js",".EXE",".mht",".css",".txt", ".cache", ".xml"])
extentions_to_copy = list([".jpg",".BMP",".GIF",".jpeg",".gif",".bmp",".png",".JPG"])
dw = DirectoryWalker.DirectoryWalker("/media/08247451247443AA/home/crap/")
def copy_advice(key, files):
for ext in extentions_to_copy:
if(ext == key):
print str(len(files)) + " Files of type " + key + " should be coppied to the target folder."
for file in files:
copy_to = "/media/08247451247443AA/home/crap-pics/"
moved = dw.move_file_to(file, copy_to, True)
if not moved:
print file + " : not moved"
walks = dw.get_all_file_types()
for key in DirectoryWalker.Walk.store.keys():
files = DirectoryWalker.Walk.store[key]
copy_advice(key, files)
In the DirectoryWalker following code is written. Walk is a simple class which have a store object.
def get_all_file_types(self):
extentions = []
for dirpath,dirnames,filenames in os.walk(self.dir_name):
for file in filenames:
extentions.append(Walk(dirpath +"/"+ file))
return extentions
def move_file_to(self, file_path, copy_to, rename_if_exists= False):
file_name = os.path.split(file_path)[1]
target_file_name = copy_to + file_name;
coppied = False
if not os.path.isfile(target_file_name):
coppied = True
try:
os.rename(file_path, target_file_name)
except OSError:
coppied = False
print "Oops! Unable to rename : " + file_path + " to target : " + target_file_name
if rename_if_exists:
coppied = True
file_name = "new_"+ file_name
try:
os.rename(file_path, target_file_name)
except OSError:
coppied = False
print "Oops! Unable to rename : " + file_path + " to target : " + target_file_name
return coppied
The Walk class
class Walk:
store = dict([])
def __init__(self, filename):
self.file_ext = os.path.splitext(filename)[-1]
self.file_name = filename
if not (Walk.store.has_key(self.file_ext)):
Walk.store[self.file_ext] = list()
Walk.store[self.file_ext].append(self.file_name)
But when program executed, it only moves almost 10400 files. But manual calculation suggest, there should be 13400 files in the file system. Please let me know, what I am doing wrong?
Update Solutions
After a careful investigations, I come out with result that there are many ambiguous file names in the target file system and those files were missing.

To answer your question, why not start with a simpler piece of code to test?
import os
all_files = []
for root, dirs, files in os.walk('/media/08247451247443AA/home/crap/'):
all_files.extend(files)
print len(all_files)
As a side note, could you replace the Walk class with a defaultdict?

After a careful investigations, I come out with result that there are many ambiguous file names in the target file system and those files were missing.

Related

Python - How to create a csv, if csv already exists [duplicate]

Does Python have any built-in functionality to add a number to a filename if it already exists?
My idea is that it would work the way certain OS's work - if a file is output to a directory where a file of that name already exists, it would append a number or increment it.
I.e: if "file.pdf" exists it will create "file2.pdf", and next time "file3.pdf".

I ended up writing my own simple function for this. Primitive, but gets the job done:
def uniquify(path):
filename, extension = os.path.splitext(path)
counter = 1
while os.path.exists(path):
path = filename + " (" + str(counter) + ")" + extension
counter += 1
return path

In a way, Python has this functionality built into the tempfile module. Unfortunately, you have to tap into a private global variable, tempfile._name_sequence. This means that officially, tempfile makes no guarantee that in future versions _name_sequence even exists -- it is an implementation detail.
But if you are okay with using it anyway, this shows how you can create uniquely named files of the form file#.pdf in a specified directory such as /tmp:
import tempfile
import itertools as IT
import os
def uniquify(path, sep = ''):
def name_sequence():
count = IT.count()
yield ''
while True:
yield '{s}{n:d}'.format(s = sep, n = next(count))
orig = tempfile._name_sequence
with tempfile._once_lock:
tempfile._name_sequence = name_sequence()
path = os.path.normpath(path)
dirname, basename = os.path.split(path)
filename, ext = os.path.splitext(basename)
fd, filename = tempfile.mkstemp(dir = dirname, prefix = filename, suffix = ext)
tempfile._name_sequence = orig
return filename
print(uniquify('/tmp/file.pdf'))

I was trying to implement the same thing in my project but #unutbu's answer seemed too 'heavy' for my needs so I came up with following code finally:
import os
index = ''
while True:
try:
os.makedirs('../hi'+index)
break
except WindowsError:
if index:
index = '('+str(int(index[1:-1])+1)+')' # Append 1 to number in brackets
else:
index = '(1)'
pass # Go and try create file again
Just in case someone stumbled upon this and requires something simpler.

If all files being numbered isn't a problem, and you know beforehand the name of the file to be written, you could simply do:
import os
counter = 0
filename = "file{}.pdf"
while os.path.isfile(filename.format(counter)):
counter += 1
filename = filename.format(counter)

recently I encountered the same thing and here is my approach:
import os
file_name = "file_name.txt"
if os.path.isfile(file_name):
expand = 1
while True:
expand += 1
new_file_name = file_name.split(".txt")[0] + str(expand) + ".txt"
if os.path.isfile(new_file_name):
continue
else:
file_name = new_file_name
break

Let's say you already have those files:
This function generates the next available non-already-existing filename, by adding a _1, _2, _3, ... suffix before the extension if necessary:
import os
def nextnonexistent(f):
fnew = f
root, ext = os.path.splitext(f)
i = 0
while os.path.exists(fnew):
i += 1
fnew = '%s_%i%s' % (root, i, ext)
return fnew
print(nextnonexistent('foo.txt')) # foo_3.txt
print(nextnonexistent('bar.txt')) # bar_1.txt
print(nextnonexistent('baz.txt')) # baz.txt

Since the tempfile hack A) is a hack and B) still requires a decent amount of code anyway, I went with a manual implementation. You basically need:
A way to Safely create a file if and only if it does not exist (this is what the tempfile hack affords us).
A generator for filenames.
A wrapping function to hide the mess.
I defined a safe_open that can be used just like open:
def iter_incrementing_file_names(path):
"""
Iterate incrementing file names. Start with path and add " (n)" before the
extension, where n starts at 1 and increases.
:param path: Some path
:return: An iterator.
"""
yield path
prefix, ext = os.path.splitext(path)
for i in itertools.count(start=1, step=1):
yield prefix + ' ({0})'.format(i) + ext
def safe_open(path, mode):
"""
Open path, but if it already exists, add " (n)" before the extension,
where n is the first number found such that the file does not already
exist.
Returns an open file handle. Make sure to close!
:param path: Some file name.
:return: Open file handle... be sure to close!
"""
flags = os.O_CREAT | os.O_EXCL | os.O_WRONLY
if 'b' in mode and platform.system() == 'Windows':
flags |= os.O_BINARY
for filename in iter_incrementing_file_names(path):
try:
file_handle = os.open(filename, flags)
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
raise
else:
return os.fdopen(file_handle, mode)
# Example
with safe_open("some_file.txt", "w") as fh:
print("Hello", file=fh)

I haven't tested this yet but it should work, iterating over possible filenames until the file in question does not exist at which point it breaks.
def increment_filename(fn):
fn, extension = os.path.splitext(path)
n = 1
yield fn + extension
for n in itertools.count(start=1, step=1)
yield '%s%d.%s' % (fn, n, extension)
for filename in increment_filename(original_filename):
if not os.isfile(filename):
break

This works for me.
The initial file name is 0.yml, if it exists, it will add one until meet the requirement
import os
import itertools
def increment_filename(file_name):
fid, extension = os.path.splitext(file_name)
yield fid + extension
for n in itertools.count(start=1, step=1):
new_id = int(fid) + n
yield "%s%s" % (new_id, extension)
def get_file_path():
target_file_path = None
for file_name in increment_filename("0.yml"):
file_path = os.path.join('/tmp', file_name)
if not os.path.isfile(file_path):
target_file_path = file_path
break
return target_file_path

import os
class Renamer():
def __init__(self, name):
self.extension = name.split('.')[-1]
self.name = name[:-len(self.extension)-1]
self.filename = self.name
def rename(self):
i = 1
if os.path.exists(self.filename+'.'+self.extension):
while os.path.exists(self.filename+'.'+self.extension):
self.filename = '{} ({})'.format(self.name,i)
i += 1
return self.filename+'.'+self.extension

I found that the os.path.exists() conditional function did what I needed. I'm using a dictionary-to-csv saving as an example, but the same logic could work for any file type:
import os
def smart_save(filename, dict):
od = filename + '_' # added underscore before number for clarity
for i in np.arange(0,500,1): # I set an arbitrary upper limit of 500
d = od + str(i)
if os.path.exists(d + '.csv'):
pass
else:
with open(d + '.csv', 'w') as f: #or any saving operation you need
for key in dict.keys():
f.write("%s,%s\n"%(key, dictionary[key]))
break
Note: this appends a number (starting at 0) to the file name by default, but it's easy to shift that around.

This function validates if the file name exists using regex expresion and recursion
def validate_outfile_name(input_path):
filename, extension = os.path.splitext(input_path)
if os.path.exists(input_path):
output_path = ""
pattern = '\([0-9]\)'
match = re.search(pattern, filename)
if match:
version = filename[match.start() + 1]
try: new_version = int(version) + 1
except: new_version = 1
output_path = f"{filename[:match.start()]}({new_version}){extension}"
output_path = validate_outfile_name(output_path)
else:
version = 1
output_path = f"{filename}({version}){extension}"
return output_path
else:
return input_path

I've implemented a similar solution with pathlib:
Create file-names that match the pattern path/<file-name>-\d\d.ext. Perhaps this solution can help...
import pathlib
from toolz import itertoolz as itz
def file_exists_add_number(path_file_name, digits=2):
pfn = pathlib.Path(path_file_name)
parent = pfn.parent # parent-dir of file
stem = pfn.stem # file-name w/o extension
suffix = pfn.suffix # NOTE: extension starts with '.' (dot)!
try:
# search for files ending with '-\d\d.ext'
last_file = itz.last(parent.glob(f"{stem}-{digits * '?'}{suffix}"))
except:
curr_no = 1
else:
curr_no = int(last_file.stem[-digits:]) + 1
# int to string and add leading zeros
curr_no = str(last_no).zfill(digits)
path_file_name = parent / f"{stem}-{curr_no}{suffix}"
return str(path_file_name)
Pls note: That solution starts at 01 and will only find file-pattern containing -\d\d!

def create_file():
counter = 0
filename = "file"
while os.path.isfile(f"dir/{filename}{counter}.txt"):
counter += 1
print(f"{filename}{counter}.txt")

A little bit later but there is still something like this should work properly, mb it will be useful for someone.
You can use built-in iterator to do this ( image downloader as example for you ):
def image_downloader():
image_url = 'some_image_url'
for count in range(10):
image_data = requests.get(image_url).content
with open(f'image_{count}.jpg', 'wb') as handler:
handler.write(image_data)
Files will increment properly. Result is:
image.jpg
image_0.jpg
image_1.jpg
image_2.jpg
image_3.jpg
image_4.jpg
image_5.jpg
image_6.jpg
image_7.jpg
image_8.jpg
image_9.jpg

Easy way for create new file if this name in your folder
if 'sample.xlsx' in os.listdir('testdir/'):
i = 2
while os.path.exists(f'testdir/sample ({i}).xlsx'):
i += 1
wb.save(filename=f"testdir/sample ({i}).xlsx")
else:
wb.save(filename=f"testdir/sample.xlsx")

os.rename deleting files python 3

Python novice, my simple script gets a given directory and renames all files sequentially, however it is deleting the files but the print is showing the files names getting renamed, not sure where its going wrong here.
Also, in what order does it retrieve these files?
import os
path = os.path.abspath("D:\Desktop\gp")
i = 0
for file_name in os.listdir(path):
try:
print (file_name + " - " + str(i))
os.rename(os.path.join(path,file_name), str(i))
except WindowsError:
os.remove(str(i))
os.rename(os.path.join(path,file_name), str(i))
i += 1
print(str(i) + " files.")
Edit
Below is the solution with working code, retrieves all files in a dir by creation date and assigns them a iterated number while retaining file extension.
import os
def sorted_dir(folder):
def getctime(name):
path = os.path.join(folder, name)
return os.path.getctime(path)
return sorted(os.listdir(path), key=getctime)
path = os.path.abspath("D:\Path\Here")
i = 0
for file_name in sorted_dir(path):
_, ext = os.path.splitext(file_name)
print (file_name + " - " + str(i)+ext)
os.rename(os.path.join(path,file_name), os.path.join(path, str(i) + ext))
i += 1
print(str(i-1) + " files.")

The problem is that you're using an absolute path for the source, but a relative path for the destination. So the files aren't getting deleted, they're just getting moved into the current working directory.
To fix it so they get renamed into the same directory they were already in, you can do the same thing on the destination you do on the source:
os.rename(os.path.join(path,file_name), os.path.join(path, str(i)))
From a comment, it sounds like you may want to preserve the extensions on these files. To do that:
_, ext = os.path.splitext(file_name)
os.rename(os.path.join(path,file_name), os.path.join(path, str(i) + ext))

traverse directory structure in python recursively without os.walk

I am trying to write a python2 function that will recursively traverse through the whole directory structure of a given directory, and print out the results.
All without using os.walk
This is what I have got so far:
test_path = "/home/user/Developer/test"
def scanning(sPath):
output = os.path.join(sPath, 'output')
if os.path.exists(output):
with open(output) as file1:
for line in file1:
if line.startswith('Final value:'):
print line
else:
for name in os.listdir(sPath):
path = os.path.join(sPath, name)
if os.path.isdir(path):
print "'", name, "'"
print_directory_contents(path)
scanning(test_path)
This is what I currently get, the script doesn't enter the new folder:
' test2'
'new_folder'
The issue is that it does not go further down than one directory. I would also like to able to indicate visually what is a directory, and what is a file

Try this:
import os
test_path = "YOUR_DIRECTORY"
def print_directory_contents(dir_path):
for child in os.listdir(dir_path):
path = os.path.join(dir_path, child)
if os.path.isdir(path):
print("FOLDER: " + "\t" + path)
print_directory_contents(path)
else:
print("FILE: " + "\t" + path)
print_directory_contents(test_path)
I worked on windows, verify if still working on unix.
Adapted from:
http://codegists.com/snippet/python/print_directory_contentspy_skobnikoff_python

Try this out with recursion
it is much simple and less code
import os
def getFiles(path="/var/log", files=[]):
if os.path.isfile(path):
return files.append(path)
for item in os.listdir(path):
item = os.path.join(path, item)
if os.path.isfile(item):
files.append(item)
else:
files = getFiles(item, files)
return files
for f in getFiles("/home/afouda/test", []):
print(f)

Try using a recursive function,
def lastline(fil):
with open(fil) as f:
for li in f.readlines():
if li.startswith("Final Value:"):
print(li)
## If it still doesnt work try putting 'dirs=[]' here
def lookforfiles(basepath):
contents = os.listdir(basepath)
dirs = []
i = 0
while i <= len(contents):
i += 1
for n in contents:
f = os.path.join(basepath, n)
if os.path.isfile(f):
lastline(f)
print("\n\nfile %s" % n)
elif os.path.isdir(f):
print("Adding dir")
if f in dirs:
pass
else:
dirs.append(f)
else:
for x in dirs:
print("dir %s" % x)
lookforfiles(x)
sorry if this doesn't fit your example precisely but I had a hard time understanding what you were trying to do.

This question is a duplicate of Print out the whole directory tree.
TL;TR: Use os.listdir.

Move file to a folder or make a renamed copy if it exists in the destination folder

I have a piece of code i wrote for school:
import os
source = "/home/pi/lab"
dest = os.environ["HOME"]
for file in os.listdir(source):
if file.endswith(".c")
shutil.move(file,dest+"/c")
elif file.endswith(".cpp")
shutil.move(file,dest+"/cpp")
elif file.endswith(".sh")
shutil.move(file,dest+"/sh")
what this code is doing is looking for files in a source directory and then if a certain extension is found the file is moved to that directory. This part works. If the file already exists in the destination folder of the same name add 1 at end of the file name, and before the extension and if they are multiples copies do "1++".
Like this: test1.c,test2.c, test3.c
I tried using os.isfile(filename) but this only looks at the source directory. and I get a true or false.

To test if the file exists in the destination folder you should os.path.join the dest folder with the file name
import os
import shutil
source = "/home/pi/lab"
dest = os.environ["HOME"]
# Avoid using the reserved word 'file' for a variable - renamed it to 'filename' instead
for filename in os.listdir(source):
# os.path.splitext does exactly what its name suggests - split the name and extension of the file including the '.'
name, extension = os.path.splitext(filename)
if extension == ".c":
dest_filename = os.path.join(dest, filename)
if not os.path.isfile(dest_filename):
# We copy the file as is
shutil.copy(os.path.join(source, filename) , dest)
else:
# We rename the file with a number in the name incrementing the number until we find one that is not used.
# This should be moved to a separate function to avoid code duplication when handling the different file extensions
i = 0
dest_filename = os.path.join(dest, "%s%d%s" % (name, i, extension))
while os.path.isfile(dest_filename):
i += 1
dest_filename = os.path.join(dest, "%s%d%s" % (name, i, extension))
shutil.copy(os.path.join(source, filename), dest_filename)
elif extension == ".cpp"
...
# Handle other extensions
If you want to have put the renaming logic in a separate function using glob and re this is one way:
import glob
import re
...
def rename_file(source_filename, source_ext):
filename_pattern = os.path.join(dest, "%s[0-9]*%s"
% (source_filename, source_ext))
# Contains file such as 'a1.c', 'a2.c', etc...
existing_files = glob.glob(filename_pattern)
regex = re.compile("%s([0-9]*)%s" % (source_filename, source_ext))
# Retrieve the max of the index used for this file using regex
max_index = max([int(match.group(1))
for match in map(regex.search, existing_files)
if match])
source_full_path = os.path.join(source, "%s%s"
% (source_filename, source_ext))
# Rebuild the destination filename with the max index + 1
dest_full_path = os.path.join(dest, "%s%d%s"
% (source_filename,
(max_index + 1),
source_ext))
shutil.copy(source_full_path, dest_full_path)
...
# If the file already exists i.e. replace the while loop in the else statement
rename_file(name, extension)

I din't test the code. But something like this should do the job:-
i = 0
filename = "a.txt"
while True:
if os.isfile(filename):
i+= 1
break
if i:
fname, ext = filename.split('.')
filename = fname + str(i) + '.' + ext

python function not repeating

I have a python tool to 'touch' (utime) a file, then move to another folder. However, if the file already exists in the destination folder, it is silently overwritten. I would like to check for a file of the same name in the destination folder and, if it exists, rename the one I am moving to its name plus '-n' at the end, where n is a number starting at '1' and, if the file with '-1' at the end already exists, '-2' etc.
For example, say in the source folder is a file 'foo.txt', but there is one 'foo.txt' in the destination folder as well. This function should return '(absolute path)/foo-1.txt'.
So, I have made a function to check for these circumstances and return the modified string, so I can use rename later and not overwrite. However, currently, if the file exists, it returns nothing. Following is the function code, assuming these vars:
fileName - input filepath, absolute path. e.g. /Users/foo/sourceFolder/bar.txt
index - iterator variable, set to '1' at the start of each file being opened (externally to function)
def checkExists(fileName):
global index
print "checkExists(" + fileName + ")"
if exists(fileName):
splitPath = split(newFile)
splitName = splitext(splitPath[1])
newSplitName = splitName[0] + "-" + str(index)
index += 1
newName = splitPath[0] + "/" + newSplitName + splitName[1]
print "newName = " + newName
else:
print "(else) fileName = " + fileName
print "(else) fileName = " + str(type(fileName))
print ""
return fileName
checkExists(newName)
Now it seems that the inner call for checkExists() at the end is not running.
I hope I have been clear in my explanation.
IAmThePiGuy
P.S. I do not want to hear about potential race problems with utime, I know that the files in the source directory will not be accessed by anything else.

Your problem is that, if the file exists, you don't return anything. I think you're intending to use recursion to check the new filename, but you don't return the result of that call. Here's a stab:
def checkExists(fileName, index=0):
print "checkExists(" + fileName + ")"
if exists(fileName):
splitPath = split(newFile)
splitName = splitext(splitPath[1])
newSplitName = splitName[0] + "-" + str(index)
index += 1
newName = splitPath[0] + "/" + newSplitName + splitName[1]
print "newName = " + newName
return checkExists(newName, index) # recurse
else:
print "(else) fileName = " + fileName
print "(else) fileName = " + str(type(fileName))
print ""
return fileName
I also took the liberty of moving the recursive call closer to the generation of newName and removing the global variable and replacing that with recursion as well.

Here's an iterative approach to a similar problem:
def copyfile(path, dstdir, verbose=True, dryrun=False):
"""Copy `path` file to `dstdir` directory incrementing name if necessary."""
filename = os.path.basename(path)
basename, ext = os.path.splitext(filename)
for i in itertools.count(2):
destpath = os.path.join(dstdir, filename)
if not os.path.exists(destpath):
if verbose:
print(path, '->', destpath)
if not dryrun:
shutil.copyfile(path, destpath)
return
# increment filename
filename = "%s_%02d%s" % (basename, i, ext)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Problem with file cleaning process? - python

To answer your question, why not start with a simpler piece of code to test? import os all_files = [] for root, dirs, files in os.walk('/media/08247451247443AA/home/crap/'): all_files.extend(files) print len(all_files) As a side note, could you replace the Walk class with a defaultdict?

After a careful investigations, I come out with result that there are many ambiguous file names in the target file system and those files were missing.

Related

Python - How to create a csv, if csv already exists [duplicate]

os.rename deleting files python 3

traverse directory structure in python recursively without os.walk

Move file to a folder or make a renamed copy if it exists in the destination folder

python function not repeating

Categories

Resources