Related
With pyminizip i am able to zip a file with password in python :
filepath=r"C:\Users\xxx\Desktop\myFolder\file.txt"
import pyminizip
pyminizip.compress(filepath, None,"output.zip", "password", 0)
But how do I zip the whole folder 'myFolder' into a zip file with password?
I tried removing the filename from the path but it gives the error
OSError: error in opening C:\Users\xxx\Desktop\myFolder for reading
EDIT :
The below link has a function which will zip the directory. But It wont add a password.
https://www.calazan.com/how-to-zip-an-entire-directory-with-python/
If anyone can let me know if it is possible to add a password to an existing zip file, that will solve my problem. Is that possible?
I was finally able to accomplish encryping the whole directory(including all subfolder struncture and files) using a library called 'pyzipper' suggested by Anupam Chaplot.
Here is the solution :
def zip_folderPyzipper(folder_path, output_path):
"""Zip the contents of an entire folder (with that folder included
in the archive). Empty subfolders will be included in the archive
as well.
"""
parent_folder = os.path.dirname(folder_path)
# Retrieve the paths of the folder contents.
contents = os.walk(folder_path)
try:
zip_file = pyzipper.AESZipFile('new_test.zip','w',compression=pyzipper.ZIP_DEFLATED,encryption=pyzipper.WZ_AES)
zip_file.pwd=b'PASSWORD'
for root, folders, files in contents:
# Include all subfolders, including empty ones.
for folder_name in folders:
absolute_path = os.path.join(root, folder_name)
relative_path = absolute_path.replace(parent_folder + '\\',
'')
print ("Adding '%s' to archive." % absolute_path)
zip_file.write(absolute_path, relative_path)
for file_name in files:
absolute_path = os.path.join(root, file_name)
relative_path = absolute_path.replace(parent_folder + '\\',
'')
print ("Adding '%s' to archive." % absolute_path)
zip_file.write(absolute_path, relative_path)
print ("'%s' created successfully." % output_path)
except IOError as message:
print (message)
sys.exit(1)
except OSError as message:
print(message)
sys.exit(1)
except zipfile.BadZipfile as message:
print (message)
sys.exit(1)
finally:
zip_file.close()
Since I am new in python i cant explain the code in detail. Here are the references :
https://pypi.org/project/pyzipper/
https://www.calazan.com/how-to-zip-an-entire-directory-with-python/
To extract the Generated ZIP file in windows :
Right Click - > Unzip(Encripted)
If you directly click Extract All option, then it will give error
Try this:
Firstly check here please for pynzip. After that try it.
import pyminizip as pyzip
compression = 8
pyzip.compress("test.txt", "test.zip", "Pswrd", compression)
Here is how to copy all a directory with its subdirectories and its files, then compress it and encrypt a zip, with password and without needing an associated backup file, here we will see how to authorize a mac address to execute the decryption. So then it's up to you to change or improve the script.
But the essentials work very well.
After a lot of research, testing and thinking, I created this effective solution
my setup:
Python 3.8 64:bits on windows 7 64:bits
Usage terminology:
First step, we need to import the cryptography module
check for support or other is here https://cryptography.io/en/latest/installation/
command:
pip install cryptography
Then we will use the fernet object resulting from this module
https://cryptography.io/en/latest/fernet/
with password
https://cryptography.io/en/latest/fernet/#using-passwords-with-fernet
and shutil:
https://docs.python.org/3/library/shutil.html
file second.py:
import os
import re, uuid
import string
import shutil
import zlib
from cryptography.fernet import Fernet
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
import base64
import zipfile
class zipy:
def __init__(self, pathDir=None):
"""If pathDir optional is none, this script copy all directory in current execution."""
if pathDir != None:
if os.path.isdir(pathDir):
pathDir = pathDir.replace(os.sep, '/')
if pathDir.endswith('/'):
self.root = pathDir
else:
self.root = pathDir + '/'
else:
self.root = os.getcwd()+os.sep
self.root = self.root.replace(os.sep, '/')
else:
self.root = os.getcwd()+os.sep
self.root = self.root.replace(os.sep, '/')
os.chdir(self.root)
self.name = 'sauvegarde'
self.dirSauvegarde = self.root+self.name
self.dirSauvegarde = self.dirSauvegarde.replace(os.sep, '/')
lectureDossier = os.listdir(self.root)
print(lectureDossier)
self.path_system = {}
for element in lectureDossier:
if os.path.isdir(element):
if element != '__pycache__':
self.path_system[element] = self.root + element + os.sep.replace(os.sep, '/')
self.path_system[element] = self.path_system[element].replace(os.sep, '/')
else:
pass
elif os.path.isfile(element):
self.path_system[element] = self.root + element
self.path_system[element] = self.path_system[element].replace(os.sep, '/')
else:
pass
self.zipi = myZip(self.dirSauvegarde)
def save(self):
"""sauvegarde le fichier"""
self.createDir(self.dirSauvegarde)
chemin_src = ""
chemin_dist = ""
for element in self.path_system:
if element != self.dirSauvegarde:
chemin_src = self.root+element
chemin_dest = self.dirSauvegarde + os.sep + element
chemin_dest = chemin_dest.replace(os.sep, '/')
if os.path.isdir(chemin_src):
self.copyDir(chemin_src, chemin_dest)
else:
self.copyFile(chemin_src, chemin_dest)
self.zipi.zip(zip_exist=True)
self.delDir(self.dirSauvegarde)
def copyDir(self, src, dest):
try:
shutil.copytree(src, dest, dirs_exist_ok=True)
except:
pass
def copyFile(self, src, dest):
try:
shutil.copyfile(src, dest)
except:
pass
def createDir(self, dirPath):
if os.path.isdir(dirPath):
self.delDir(dirPath)
else:
pass
os.makedirs(dirPath, exist_ok=True)
def delDir(self, dir):
if os.path.isdir(dir):
if len(os.listdir(dir)) > 0:
try:
print('rmtree')
shutil.rmtree(dir, ignore_errors=True)
except:
pass
else:
try:
os.rmdir(dir)
except:
pass
def decrypt(self):
self.zipi.unzip()
class myZip:
def __init__(self, dir):
self.pathDir = dir
self.nom = os.path.basename(dir)
self.pathZip = self.pathDir + '.zip'
self.crypt = Encryptor()
def zip(self, zip_exist=False):
if zip_exist == False:
pass
else:
if os.path.isfile(self.pathZip):
try:
os.remove(self.pathZip)
except:
pass
shutil.make_archive(os.path.splitext(self.pathZip)[0], 'zip', self.pathDir)
key = self.crypt.key_create()
#TEST
self.crypt.file_encrypt(key, self.pathZip, self.pathZip)
self.crypt.key_write(self.pathZip, key)
def unzip(self):
#TEST
if self.crypt.checkPass(self.pathZip):
#print('ok adresse mac autoriser')
key = self.crypt.key_load(self.pathZip)
self.crypt.file_decrypt(key, self.pathZip, self.pathZip)
else:
print('pas ok adresse mac erroner')
class Encryptor:
def __init__(self):
self.salto = None
def key_create(self):
password = self.getMac()
password = bytes(password, encoding="utf-8")
self.salto = os.urandom(16)
print(self.salto)
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=self.salto,
iterations=100,
)
key = base64.urlsafe_b64encode(kdf.derive(password))
return key
def key_write(self, pathZip, key):
with zipfile.ZipFile(pathZip, 'a') as zip:
zip.comment = key + bytes(' byMe ', encoding="utf-8") + self.salto
def key_load(self, pathZip):
stri = []
with zipfile.ZipFile(pathZip, 'a') as zip:
stri = zip.comment.split(b' byMe ')
print(stri[0])
print(stri[1])
key = stri[0]
self.salto = stri[1]
return key
def checkPass(self, pathZip):
key = base64.urlsafe_b64decode(self.key_load(pathZip))
salt = self.salto
mdp = self.getMac()
mdp = bytes(mdp, encoding="utf-8")
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=salt,
iterations=100,
)
retour = False
try:
kdf.verify(mdp, key)
retour = True
except:
retour = False
return retour
def file_encrypt(self, key, original_file, encrypted_file):
f = Fernet(key)
with open(original_file, 'rb') as file:
original = file.read()
encrypted = f.encrypt(original)
with open (encrypted_file, 'wb') as file:
file.write(encrypted)
def file_decrypt(self, key, encrypted_file, decrypted_file):
f = Fernet(key)
with open(encrypted_file, 'rb') as file:
encrypted = file.read()
decrypted = f.decrypt(encrypted)
with open(decrypted_file, 'wb') as file:
file.write(decrypted)
def getMac(self):
return "".join(re.findall('..', '%012x' % uuid.getnode()))
Use like this:
file : main.py
from second import zipy
#If the argument is empty, the script will make a copy of the directory being executed, otherwise the script will work and output the zip in the place indicated in argument
dd = zipy("E:/path")
#or dd = zipy("E:/path/") or dd = zipy() if you give arg, give absolute path
#Save the zip and encrypt it. Change second.py to directly give it a password as an argument
dd.save()
#decrypt zip
dd.decrypt()
Here's a snippet with pyminizip: gets a list of files and zips the whole thing.
import pyminizip
import os
def get_paths_recursively(src_root_path):
files = []
if src_root_path is not None:
for root, directories, filenames in os.walk(src_root_path):
entries = []
for filename in filenames:
full_file_name = os.path.join(root, filename)
if os.path.isfile(full_file_name) and not filename.startswith('.'):
files.append(os.path.join(root, filename))
return files
def pyminizip_zipper(folder_path, output_path, password):
paths = get_paths_recursively(folder_path)
roots = []
for path in paths:
roots.append(os.path.dirname(path.replace(os.path.dirname(folder_path), './')))
pyminizip.compress_multiple(paths, roots, output_path, password, 5)
The code below makes an md5/metadata fingerprint, but crashes on files with unknown corruption (e.g., files, that can be copied, mostly even opened, but that can not be hashed or zipped up [to disguise their corruption]).
Question: How one makes this code to skip or ignore any and all problem files and just do the rest? Imagine 1 million files on 8 TB. Otherwise I leave it running and having no real-time monitoring of progress, 2 days later I find out that nothing got hashed because a couple problem files made the code hung.
Part of the code (see full code below):
def createBasicInfoListFromDisk():
global diskCompareListDetails, onlyFileNameOnDisk, driveLetter,walk_dir
walk_dir = os.path.abspath(walk_dir)
for root, subdirs, files in os.walk(walk_dir, topdown=True, onerror=None, followlinks=True ):
for filename in files:
file_path = os.path.join(root, filename)
temp = file_path.split(":")
driveLetter = temp[0]
filePathWithoutDriveLetter = temp[1]
fileSize = os.path.getsize(file_path)
mod_on = get_last_write_time(file_path)
print('\t- file %s (full path: %s)' % (filename, file_path))
print('FileName : {filename} is of size {size} and was modified on{mdt}'.format(filename=file_path,size=fileSize,mdt=mod_on ))
diskCompareListDetails.append("\"" + filePathWithoutDriveLetter+"\",\""+str(fileSize) + "\",\"" + mod_on +'"')
onlyFileNameOnDisk.append("\""+filePathWithoutDriveLetter+"\"")
return
Error:
FileName : T:\problemtest\problemfile.doc is of size 27136 and was modified on2010-10-10 13:58:32
Traceback (most recent call last):
File "t:\scripts\test.py", line 196, in <module>
createBasicInfoListFromDisk()
File "t:\scripts\test.py", line 76, in createBasicInfoListFromDisk
mod_on = get_last_write_time(file_path)
File "t:\scripts\test.py", line 61, in get_last_write_time
convert_time_to_human_readable = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(st.st_mtime))
OSError: [Errno 22] Invalid argument
Full code:
import os
import sys
import time
import datetime
import difflib
import decimal
import hashlib
from pip._vendor.distlib.compat import raw_input
csvListDetails = list()
csvCompareListDetails = list()
diskCompareListDetails = list()
onlyFileNameOnDisk = list()
addedFiles = list()
removedFiles = list()
driveLetter =""
finalFilesToChange=list()
finalFilesToDelete=list()
changedFiles=list()
csvfilewithPath="md5.csv"
import shutil
walk_dir=""
def findAndReadCSVFile(fileName):
global csvListDetails
global csvCompareListDetails
haveIgnoredLine = 0
foundFile=0
try :
inputFileHandler = open(fileName,"rt",encoding='utf-8')
update_time = get_last_write_time(fileName)
print("\n Found md5.csv, last updated on: %s" % update_time)
foundFile=1
except (OSError, IOError, FileNotFoundError):
print("\n md5.csv not found. Will create a new one.")
return foundFile
for line in inputFileHandler:
if (haveIgnoredLine==0):
haveIgnoredLine=1
continue
rowItem = line.replace("\n","").split('","')
csvCompareListDetails.append('"' + rowItem[3]+',"'+rowItem[2]+'","' +rowItem[1]+'"')
lineDetails = list()
for detailNum in range (0,len(rowItem)):
lineDetails.append('"' + (rowItem[detailNum].replace('"','')) + '"')
csvListDetails.append(lineDetails)
inputFileHandler.close()
return foundFile
def get_last_write_time(filename):
st = os.stat(filename)
convert_time_to_human_readable = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(st.st_mtime))
return convert_time_to_human_readable
def createBasicInfoListFromDisk():
global diskCompareListDetails, onlyFileNameOnDisk, driveLetter,walk_dir
walk_dir = os.path.abspath(walk_dir)
for root, subdirs, files in os.walk(walk_dir, topdown=True, onerror=None, followlinks=True ):
for filename in files:
file_path = os.path.join(root, filename)
temp = file_path.split(":")
driveLetter = temp[0]
filePathWithoutDriveLetter = temp[1]
fileSize = os.path.getsize(file_path)
mod_on = get_last_write_time(file_path)
print('\t- file %s (full path: %s)' % (filename, file_path))
print('FileName : {filename} is of size {size} and was modified on{mdt}'.format(filename=file_path,size=fileSize,mdt=mod_on ))
diskCompareListDetails.append("\"" + filePathWithoutDriveLetter+"\",\""+str(fileSize) + "\",\"" + mod_on +'"')
onlyFileNameOnDisk.append("\""+filePathWithoutDriveLetter+"\"")
return
def compareLogAndDiskLists():
global addedFiles, removedFiles
diff = difflib.unified_diff(csvCompareListDetails, diskCompareListDetails, fromfile='file1', tofile='file2', lineterm='', n=0)
lines = list(diff)[2:]
addedFiles = [line[1:] for line in lines if line[0] == '+']
removedFiles = [line[1:] for line in lines if line[0] == '-']
return
def displayInfoForUserInput():
global finalFilesToChange, finalFilesToDelete
changedOrNewFileCount = 0
noLongerExistingFilesCount = 0
totalSizeOfChange = 0
for line in addedFiles:
if line not in removedFiles:
changedOrNewFileCount = changedOrNewFileCount +1
elements = line.replace("\n","").split('","')
sizeOfFile= int(elements[1].replace('"',''))
totalSizeOfChange = totalSizeOfChange + sizeOfFile
finalFilesToChange.append(elements[0] +'"')
for line in removedFiles:
elements = line.split('","')
if elements[0]+'"' not in onlyFileNameOnDisk:
noLongerExistingFilesCount = noLongerExistingFilesCount + 1
finalFilesToDelete.append(elements[0]+'"')
GBModSz= decimal.Decimal(totalSizeOfChange) / decimal.Decimal('1073741824')
print("\n New or modified files on drive: {} (need to hash)".format(changedOrNewFileCount))
print (" Obsolete lines in md5.csv (files modified or not on drive): {} (lines to delete)".format(noLongerExistingFilesCount))
print (" {} files ({:.2f} GB) needs to be hashed.".format(changedOrNewFileCount,GBModSz))
userInput = raw_input("\n Proceed with hash? (Y/N, Yes/No) ")
if (userInput.strip().upper() == "Y" or userInput.strip().upper() == "YES"):
print("Continuing Processing...")
else:
print("You opted not to continue, Exiting")
sys.exit()
return
def processFiles(foundFile):
if (foundFile==1):
oldFileName = walk_dir+"/md5.csv"
shutil.copy( oldFileName, getTargetFileName(oldFileName))
BLOCKSIZE = 1048576*4
global changedFiles
for fileToHash in finalFilesToChange:
hasher = hashlib.new('md5')
fileToUse=driveLetter+":"+fileToHash.replace('"','')
with open(fileToUse, 'rb') as afile:
buf = afile.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(BLOCKSIZE)
fileDetails = list()
fileDetails.append(hasher.hexdigest())
fileDetails.append(get_last_write_time(fileToUse))
fileDetails.append(os.path.getsize(fileToUse))
fileDetails.append(fileToHash)
changedFiles.append(fileDetails)
return
def getTargetFileName(oldFileName):
targetFileName= walk_dir+"/generated_on_" + get_last_write_time(oldFileName).replace(" ","_").replace("-","").replace(":","")
targetFileName = targetFileName + "__archived_on_" + datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
targetFileName = targetFileName + "__md5.csv"
return targetFileName
def writeCSVFile(fileName):
try :
outputFileHandler=open(fileName,"wt",encoding='utf-8')
outputFileHandler.write("\"md5Hash\",\"LastWriteTime\",\"Length\",\"FullName\"\n")
for details in csvListDetails:
if details[3] in finalFilesToDelete:
continue
if details[3] in finalFilesToChange:
continue
outputFileHandler.write("{},{},{},{}\n".format(details[0],details[1],details[2],details[3]))
for details in changedFiles:
outputFileHandler.write("\"{}\",\"{}\",\"{}\",{}\n".format(details[0],details[1],details[2],details[3]))
outputFileHandler.close()
except (OSError, IOError, FileNotFoundError) as e:
print("ERROR :")
print("File {} is either not writable or some other error: {}".format(fileName,e))
return
if __name__ == '__main__':
walk_dir = raw_input("\n Enter drive or directory to scan: ")
csvfilewithPath=walk_dir+"/md5.csv"
print("\n Drive to scan: " + walk_dir)
foundFile = 0
foundFile=findAndReadCSVFile(csvfilewithPath)
createBasicInfoListFromDisk()
compareLogAndDiskLists()
displayInfoForUserInput()
processFiles(foundFile)
writeCSVFile(csvfilewithPath)
Trying this fix, no luck:
def get_last_write_time(filename):
try:
st = os.stat(filename)
convert_time_to_human_readable = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(st.st_mtime))
return convert_time_to_human_readable
except OSError:
pass
return "ERROR"
def createBasicInfoListFromDisk():
I agree with IMCoins and I'm very qurius on why except isn't catching the error.
So first thing I would do is to go to the source where the OSError is being raised and try to catch it explicity.
def get_last_write_time(filename):
try:
st = os.stat(filename)
convert_time_to_human_readable = time.strftime("%Y-%m-%d %H:%M:%S",
time.localtime(st.st_mtime)
return convert_time_to_human_readable
except OSError:
pass
return "ERROR" #or whatever string you want add
Updated answer, for updated post.
As stated earlier, except statement with exception type specified catches everything. So, in order to do what want... I'm afraid possible answer are either :
To make a method that identifies corrupted files, and handles it properly.
Make try, except statement that encapsulate every part of your code where there could be an error.
Let me warn you about the second solution though, as sometimes, there are system errors that you do not want to avoid. I believe you should print the exception that you catch, in order to identify further problems you may encounter.
Just so you know, as you may not : your error is not in a try, except statement. Your error is in (if I copied and pasted properly in my editor) line 196, createBasicinfoListFromDisk(), then line 76, mod_on = get_last_write_time(file_path)
As you also mentioned you are using python 3.x, I suggest you are looking into the suppress function (https://docs.python.org/3/library/contextlib.html#contextlib.suppress).
I hope it helped you.
I'm working on a script to create epub from html files, but when I check my epub I have the following error : Mimetype entry missing or not the first in archive
The Mimetype is present, but it's not the first file in the epub. Any idea how to put it in first place in any case using Python ?
Sorry, I don't have the time right now to give a detailed explanation, but here's a (relatively) simple epub processing program I wrote a while ago that shows how to do that.
epubpad.py
#! /usr/bin/env python
''' Pad the the ends of paragraph lines in an epub file with a single space char
Written by PM 2Ring 2013.05.12
'''
import sys, re, zipfile
def bold(s): return "\x1b[1m%s\x1b[0m" % s
def report(attr, val):
print "%s '%s'" % (bold(attr + ':'), val)
def fixepub(oldname, newname):
oldz = zipfile.ZipFile(oldname, 'r')
nlist = oldz.namelist()
#print '\n'.join(nlist) + '\n'
if nlist[0] != 'mimetype':
print bold('Warning!!!'), "First file is '%s', not 'mimetype" % nlist[0]
#get the name of the contents file from the container
container = 'META-INF/container.xml'
# container should be in nlist
s = oldz.read(container)
p = re.compile(r'full-path="(.*?)"')
a = p.search(s)
contents = a.group(1)
#report("Contents file", contents)
i = contents.find('/')
if i>=0:
dirname = contents[:i+1]
else:
#No directory separator in contents name!
dirname = ''
report("dirname", dirname)
s = oldz.read(contents)
#print s
p = re.compile(r'<dc:creator.*>(.*)</dc:creator>')
a = p.search(s)
creator = a.group(1)
report("Creator", creator)
p = re.compile(r'<dc:title>(.*)</dc:title>')
a = p.search(s)
title = a.group(1)
report("Title", title)
#Find the names of all xhtml & html text files
p = re.compile(r'\.[x]?htm[l]?')
htmnames = [i for i in nlist if p.search(i) and i.find('wrap')==-1]
#Pattern for end of lines that don't need padding
eolp = re.compile(r'[>}]$')
newz = zipfile.ZipFile(newname, 'w', zipfile.ZIP_DEFLATED)
for fname in nlist:
print fname,
s = oldz.read(fname)
if fname == 'mimetype':
f = open(fname, 'w')
f.write(s)
f.close()
newz.write(fname, fname, zipfile.ZIP_STORED)
print ' * stored'
continue
if fname in htmnames:
print ' * text',
#Pad lines that are (hopefully) inside paragraphs...
newlines = []
for line in s.splitlines():
if len(line)==0 or eolp.search(line):
newlines.append(line)
else:
newlines.append(line + ' ')
s = '\n'.join(newlines)
newz.writestr(fname, s)
print
newz.close()
oldz.close()
def main():
oldname = len(sys.argv) > 1 and sys.argv[1]
if not oldname:
print 'No filename given!'
raise SystemExit
newname = len(sys.argv) > 2 and sys.argv[2]
if not newname:
if oldname.rfind('.') == -1:
newname = oldname + '_P'
else:
newname = oldname.replace('.epub', '_P.epub')
newname = newname.replace(' ', '_')
print "Processing '%s' to '%s' ..." % (oldname, newname)
fixepub(oldname, newname)
if __name__ == '__main__':
main()
FWIW, I wrote this program to process files for my simple e-reader that annoyingly joins paragraphs together if they don't end with white space.
The solution I've found:
delete the previous mimetype file
when creating the new archive create an new mimetype file before adding anything else : zipFile.writestr("mimetype", "application/epub+zip")
Why does it work : the mimetype is the same for all epub : "application/epub+zip", no need to use the original file.
I am working on very large file system. My task is to clean the system with some given parameters. Below program fragment can give a idea.
import DirectoryWalker
extentions_to_delete = list([".rar",".doc",".URL",".js",".EXE",".mht",".css",".txt", ".cache", ".xml"])
extentions_to_copy = list([".jpg",".BMP",".GIF",".jpeg",".gif",".bmp",".png",".JPG"])
dw = DirectoryWalker.DirectoryWalker("/media/08247451247443AA/home/crap/")
def copy_advice(key, files):
for ext in extentions_to_copy:
if(ext == key):
print str(len(files)) + " Files of type " + key + " should be coppied to the target folder."
for file in files:
copy_to = "/media/08247451247443AA/home/crap-pics/"
moved = dw.move_file_to(file, copy_to, True)
if not moved:
print file + " : not moved"
walks = dw.get_all_file_types()
for key in DirectoryWalker.Walk.store.keys():
files = DirectoryWalker.Walk.store[key]
copy_advice(key, files)
In the DirectoryWalker following code is written. Walk is a simple class which have a store object.
def get_all_file_types(self):
extentions = []
for dirpath,dirnames,filenames in os.walk(self.dir_name):
for file in filenames:
extentions.append(Walk(dirpath +"/"+ file))
return extentions
def move_file_to(self, file_path, copy_to, rename_if_exists= False):
file_name = os.path.split(file_path)[1]
target_file_name = copy_to + file_name;
coppied = False
if not os.path.isfile(target_file_name):
coppied = True
try:
os.rename(file_path, target_file_name)
except OSError:
coppied = False
print "Oops! Unable to rename : " + file_path + " to target : " + target_file_name
if rename_if_exists:
coppied = True
file_name = "new_"+ file_name
try:
os.rename(file_path, target_file_name)
except OSError:
coppied = False
print "Oops! Unable to rename : " + file_path + " to target : " + target_file_name
return coppied
The Walk class
class Walk:
store = dict([])
def __init__(self, filename):
self.file_ext = os.path.splitext(filename)[-1]
self.file_name = filename
if not (Walk.store.has_key(self.file_ext)):
Walk.store[self.file_ext] = list()
Walk.store[self.file_ext].append(self.file_name)
But when program executed, it only moves almost 10400 files. But manual calculation suggest, there should be 13400 files in the file system. Please let me know, what I am doing wrong?
Update Solutions
After a careful investigations, I come out with result that there are many ambiguous file names in the target file system and those files were missing.
To answer your question, why not start with a simpler piece of code to test?
import os
all_files = []
for root, dirs, files in os.walk('/media/08247451247443AA/home/crap/'):
all_files.extend(files)
print len(all_files)
As a side note, could you replace the Walk class with a defaultdict?
After a careful investigations, I come out with result that there are many ambiguous file names in the target file system and those files were missing.
can some one please provide me with an explanation of the code especially the use of maxversions and statements following the line "for f in files:".
I want to understand what xrange(MAXVERSION) means? What is the use of indexing i.e
for index in xrange(MAXVERSIONS): backup = '%s.%2.2d' % (destpath, index)
The code:
!/usr/bin/env python
import sys,os, shutil, filecmp
MAXVERSIONS=100
BAKFOLDER = '.bak'
def backup_files(tree_top, bakdir_name=BAKFOLDER):
top_dir = os.path.basename(tree_top)
tree_top += os.sep
for dir, subdirs, files in os.walk(tree_top):
if os.path.isabs(bakdir_name):
relpath = dir.replace(tree_top,'')
backup_dir = os.path.join(bakdir_name, top_dir, relpath)
else:
backup_dir = os.path.join(dir, bakdir_name)
if not os.path.exists(backup_dir):
os.makedirs(backup_dir)
subdirs[:] = [d for d in subdirs if d != bakdir_name]
for f in files:
filepath = os.path.join(dir, f)
destpath = os.path.join(backup_dir, f)
for index in xrange(MAXVERSIONS):
backup = '%s.%2.2d' % (destpath, index)
abspath = os.path.abspath(filepath)
if index > 0:
old_backup = '%s.%2.2d' % (destpath, index-1)
if not os.path.exists(old_backup): break
abspath = os.path.abspath(old_backup)
try:
if os.path.isfile(abspath) and filecmp.cmp(abspath, filepath, shallow=False):
continue
except OSError:
pass
try:
if not os.path.exists(backup):
print 'Copying %s to %s...' % (filepath, backup)
shutil.copy(filepath, backup)
except (OSError, IOError), e:
pass
if __name__=="__main__":
if len(sys.argv)<2:
sys.exit("Usage: %s [directory] [backup directory]" % sys.argv[0])
tree_top = os.path.abspath(os.path.expanduser(os.path.expandvars(sys.argv[1])))
if len(sys.argv)>=3:
bakfolder = os.path.abspath(os.path.expanduser(os.path.expandvars(sys.argv[2])))
else:
bakfolder = BAKFOLDER
if os.path.isdir(tree_top):
backup_files(tree_top, bakfolder)
The script tries to recursively copy the contents of a directory (defaults to current directory) to a backup directory (defaults to .bak in the current directory);
for each filename.ext, it creates a duplicate named filename.ext.00; if filename.ext.00 already exists, it creates filename.ext.01 instead, and so on.
xrange() is a generator which returns all numbers in 0..(MAXVERSION-1), so MAXVERSION controls how many version-suffixes to try, ie how many old versions of the file to keep.