Zip directories from listed ones - python

I am relatively new to Python, I am trying to make a script which will only zip subfolders that I define in a list, with their content of course. I tried modifying various codes from Stack Overflow and whatever I found on internet but either I zip all subfolders, or I zip subfolders that I want but without content.
List could be full path to subfolder or can I define the path to the root folder and then specify subfolders?
This is the idea: 3 subfolders 1,2,3 and I want to zip only subfolders 1 and 3. I added the last code that I was modifying but I just can't return the list in a function.
Folder
|- SubFolder1
| |- file1.txt
| |- file2.txt
|- SubFolder2
| |- file1.txt
| |- file2.txt
|- SubFolder3
| |- file1.txt
| |- file2.txt
The code:
import os
import zipfile
list=["SubFolder1", "SubFolder3"]
def zipdir(path, ziph):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
for file in files:
ziph.write(os.path.join(root, file))
if __name__ == '__main__':
zipf = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)
zipdir(**list**, zipf)
zipf.close()

I modified your code and think this way it should work as expected if I understand your request correctly. Please check out this thread too, I think it's pretty much what you are looking for ;-)
1) os.walk goes through each of your subfolders, too. So just wait until each subfolder becomes the "root" (here is an example).
2) Check whether the folder is contained in the list. If it is, zip all files within this folder
3) Recreate the subfolder structure relative to your original root-folder
import os
import zipfile
rootpath = r'C:\path\to\your\rootfolder'
list=["SubFolder1", "SubFolder3"]
def zipdir(path, ziph, dirlist):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
if os.path.basename(root) in dirlist:
for file in files:
file_path = os.path.join(root, file)
relative_path = os.path.relpath(file_path, path)
ziph.write(file_path, relative_path)
if __name__ == '__main__':
zipf = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)
zipdir(rootpath, zipf, list)
zipf.close()

Related

Python zip multiple folders [duplicate]

How can I create a zip archive of a directory structure in Python?
The easiest way is to use shutil.make_archive. It supports both zip and tar formats.
import shutil
shutil.make_archive(output_filename, 'zip', dir_name)
If you need to do something more complicated than zipping the whole directory (such as skipping certain files), then you'll need to dig into the zipfile module as others have suggested.
As others have pointed out, you should use zipfile. The documentation tells you what functions are available, but doesn't really explain how you can use them to zip an entire directory. I think it's easiest to explain with some example code:
import os
import zipfile
def zipdir(path, ziph):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
for file in files:
ziph.write(os.path.join(root, file),
os.path.relpath(os.path.join(root, file),
os.path.join(path, '..')))
with zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
zipdir('tmp/', zipf)
To add the contents of mydirectory to a new zip file, including all files and subdirectories:
import os
import zipfile
zf = zipfile.ZipFile("myzipfile.zip", "w")
for dirname, subdirs, files in os.walk("mydirectory"):
zf.write(dirname)
for filename in files:
zf.write(os.path.join(dirname, filename))
zf.close()
How can I create a zip archive of a directory structure in Python?
In a Python script
In Python 2.7+, shutil has a make_archive function.
from shutil import make_archive
make_archive(
'zipfile_name',
'zip', # the archive format - or tar, bztar, gztar
root_dir=None, # root for archive - current working dir if None
base_dir=None) # start archiving from here - cwd if None too
Here the zipped archive will be named zipfile_name.zip. If base_dir is farther down from root_dir it will exclude files not in the base_dir, but still archive the files in the parent dirs up to the root_dir.
I did have an issue testing this on Cygwin with 2.7 - it wants a root_dir argument, for cwd:
make_archive('zipfile_name', 'zip', root_dir='.')
Using Python from the shell
You can do this with Python from the shell also using the zipfile module:
$ python -m zipfile -c zipname sourcedir
Where zipname is the name of the destination file you want (add .zip if you want it, it won't do it automatically) and sourcedir is the path to the directory.
Zipping up Python (or just don't want parent dir):
If you're trying to zip up a python package with a __init__.py and __main__.py, and you don't want the parent dir, it's
$ python -m zipfile -c zipname sourcedir/*
And
$ python zipname
would run the package. (Note that you can't run subpackages as the entry point from a zipped archive.)
Zipping a Python app:
If you have python3.5+, and specifically want to zip up a Python package, use zipapp:
$ python -m zipapp myapp
$ python myapp.pyz
This function will recursively zip up a directory tree, compressing the files, and recording the correct relative filenames in the archive. The archive entries are the same as those generated by zip -r output.zip source_dir.
import os
import zipfile
def make_zipfile(output_filename, source_dir):
relroot = os.path.abspath(os.path.join(source_dir, os.pardir))
with zipfile.ZipFile(output_filename, "w", zipfile.ZIP_DEFLATED) as zip:
for root, dirs, files in os.walk(source_dir):
# add directory (needed for empty dirs)
zip.write(root, os.path.relpath(root, relroot))
for file in files:
filename = os.path.join(root, file)
if os.path.isfile(filename): # regular files only
arcname = os.path.join(os.path.relpath(root, relroot), file)
zip.write(filename, arcname)
Use shutil, which is part of python standard library set.
Using shutil is so simple(see code below):
1st arg: Filename of resultant zip/tar file,
2nd arg: zip/tar,
3rd arg: dir_name
Code:
import shutil
shutil.make_archive('/home/user/Desktop/Filename','zip','/home/username/Desktop/Directory')
Modern Python (3.6+) using the pathlib module for concise OOP-like handling of paths, and pathlib.Path.rglob() for recursive globbing. As far as I can tell, this is equivalent to George V. Reilly's answer: zips with compression, the topmost element is a directory, keeps empty dirs, uses relative paths.
from pathlib import Path
from zipfile import ZIP_DEFLATED, ZipFile
from os import PathLike
from typing import Union
def zip_dir(zip_name: str, source_dir: Union[str, PathLike]):
src_path = Path(source_dir).expanduser().resolve(strict=True)
with ZipFile(zip_name, 'w', ZIP_DEFLATED) as zf:
for file in src_path.rglob('*'):
zf.write(file, file.relative_to(src_path.parent))
Note: as optional type hints indicate, zip_name can't be a Path object (would be fixed in 3.6.2+).
With python 3.9, pathlib & zipfile module you can create a zip files from anywhere in the system.
def zip_dir(dir: Union[Path, str], filename: Union[Path, str]):
"""Zip the provided directory without navigating to that directory using `pathlib` module"""
# Convert to Path object
dir = Path(dir)
with zipfile.ZipFile(filename, "w", zipfile.ZIP_DEFLATED) as zip_file:
for entry in dir.rglob("*"):
zip_file.write(entry, entry.relative_to(dir))
It is neat, typed, and has less code.
For adding compression to the resulting zip file, check out this link.
You need to change:
zip = zipfile.ZipFile('Python.zip', 'w')
to
zip = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)
I've made some changes to code given by Mark Byers. Below function will also adds empty directories if you have them. Examples should make it more clear what is the path added to the zip.
#!/usr/bin/env python
import os
import zipfile
def addDirToZip(zipHandle, path, basePath=""):
"""
Adding directory given by \a path to opened zip file \a zipHandle
#param basePath path that will be removed from \a path when adding to archive
Examples:
# add whole "dir" to "test.zip" (when you open "test.zip" you will see only "dir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir')
zipHandle.close()
# add contents of "dir" to "test.zip" (when you open "test.zip" you will see only it's contents)
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir', 'dir')
zipHandle.close()
# add contents of "dir/subdir" to "test.zip" (when you open "test.zip" you will see only contents of "subdir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir/subdir', 'dir/subdir')
zipHandle.close()
# add whole "dir/subdir" to "test.zip" (when you open "test.zip" you will see only "subdir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir/subdir', 'dir')
zipHandle.close()
# add whole "dir/subdir" with full path to "test.zip" (when you open "test.zip" you will see only "dir" and inside it only "subdir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir/subdir')
zipHandle.close()
# add whole "dir" and "otherDir" (with full path) to "test.zip" (when you open "test.zip" you will see only "dir" and "otherDir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir')
addDirToZip(zipHandle, 'otherDir')
zipHandle.close()
"""
basePath = basePath.rstrip("\\/") + ""
basePath = basePath.rstrip("\\/")
for root, dirs, files in os.walk(path):
# add dir itself (needed for empty dirs
zipHandle.write(os.path.join(root, "."))
# add files
for file in files:
filePath = os.path.join(root, file)
inZipPath = filePath.replace(basePath, "", 1).lstrip("\\/")
#print filePath + " , " + inZipPath
zipHandle.write(filePath, inZipPath)
Above is a simple function that should work for simple cases. You can find more elegant class in my Gist:
https://gist.github.com/Eccenux/17526123107ca0ac28e6
I have another code example that may help, using python3, pathlib and zipfile.
It should work in any OS.
from pathlib import Path
import zipfile
from datetime import datetime
DATE_FORMAT = '%y%m%d'
def date_str():
"""returns the today string year, month, day"""
return '{}'.format(datetime.now().strftime(DATE_FORMAT))
def zip_name(path):
"""returns the zip filename as string"""
cur_dir = Path(path).resolve()
parent_dir = cur_dir.parents[0]
zip_filename = '{}/{}_{}.zip'.format(parent_dir, cur_dir.name, date_str())
p_zip = Path(zip_filename)
n = 1
while p_zip.exists():
zip_filename = ('{}/{}_{}_{}.zip'.format(parent_dir, cur_dir.name,
date_str(), n))
p_zip = Path(zip_filename)
n += 1
return zip_filename
def all_files(path):
"""iterator returns all files and folders from path as absolute path string
"""
for child in Path(path).iterdir():
yield str(child)
if child.is_dir():
for grand_child in all_files(str(child)):
yield str(Path(grand_child))
def zip_dir(path):
"""generate a zip"""
zip_filename = zip_name(path)
zip_file = zipfile.ZipFile(zip_filename, 'w')
print('create:', zip_filename)
for file in all_files(path):
print('adding... ', file)
zip_file.write(file)
zip_file.close()
if __name__ == '__main__':
zip_dir('.')
print('end!')
To retain the folder hierarchy under the parent directory to be archived:
import glob
import os
import zipfile
with zipfile.ZipFile(fp_zip, "w", zipfile.ZIP_DEFLATED) as zipf:
for fp in glob(os.path.join(parent, "**/*")):
base = os.path.commonpath([parent, fp])
zipf.write(fp, arcname=fp.replace(base, ""))
If you want, you could change this to use pathlib for file globbing.
If you want a functionality like the compress folder of any common graphical file manager you can use the following code, it uses the zipfile module. Using this code you will have the zip file with the path as its root folder.
import os
import zipfile
def zipdir(path, ziph):
# Iterate all the directories and files
for root, dirs, files in os.walk(path):
# Create a prefix variable with the folder structure inside the path folder.
# So if a file is at the path directory will be at the root directory of the zip file
# so the prefix will be empty. If the file belongs to a containing folder of path folder
# then the prefix will be that folder.
if root.replace(path,'') == '':
prefix = ''
else:
# Keep the folder structure after the path folder, append a '/' at the end
# and remome the first character, if it is a '/' in order to have a path like
# folder1/folder2/file.txt
prefix = root.replace(path, '') + '/'
if (prefix[0] == '/'):
prefix = prefix[1:]
for filename in files:
actual_file_path = root + '/' + filename
zipped_file_path = prefix + filename
zipf.write( actual_file_path, zipped_file_path)
zipf = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)
zipdir('/tmp/justtest/', zipf)
zipf.close()
So many answers here, and I hope I might contribute with my own version, which is based on the original answer (by the way), but with a more graphical perspective, also using context for each zipfile setup and sorting os.walk(), in order to have a ordered output.
Having these folders and them files (among other folders), I wanted to create a .zip for each cap_ folder:
$ tree -d
.
├── cap_01
|    ├── 0101000001.json
|   ├── 0101000002.json
| ├── 0101000003.json
|
├── cap_02
| ├── 0201000001.json
| ├── 0201000002.json
| ├── 0201001003.json
|
├── cap_03
| ├── 0301000001.json
| ├── 0301000002.json
| ├── 0301000003.json
|
├── docs
| ├── map.txt
| ├── main_data.xml
|
├── core_files
├── core_master
├── core_slave
Here's what I applied, with comments for better understanding of the process.
$ cat zip_cap_dirs.py
""" Zip 'cap_*' directories. """
import os
import zipfile as zf
for root, dirs, files in sorted(os.walk('.')):
if 'cap_' in root:
print(f"Compressing: {root}")
# Defining .zip name, according to Capítulo.
cap_dir_zip = '{}.zip'.format(root)
# Opening zipfile context for current root dir.
with zf.ZipFile(cap_dir_zip, 'w', zf.ZIP_DEFLATED) as new_zip:
# Iterating over os.walk list of files for the current root dir.
for f in files:
# Defining relative path to files from current root dir.
f_path = os.path.join(root, f)
# Writing the file on the .zip file of the context
new_zip.write(f_path)
Basically, for each iteration over os.walk(path), I'm opening a context for zipfile setup and afterwards, iterating iterating over files, which is a list of files from root directory, forming the relative path for each file based on the current root directory, appending to the zipfile context which is running.
And the output is presented like this:
$ python3 zip_cap_dirs.py
Compressing: ./cap_01
Compressing: ./cap_02
Compressing: ./cap_03
To see the contents of each .zip directory, you can use less command:
$ less cap_01.zip
Archive: cap_01.zip
Length Method Size Cmpr Date Time CRC-32 Name
-------- ------ ------- ---- ---------- ----- -------- ----
22017 Defl:N 2471 89% 2019-09-05 08:05 7a3b5ec6 cap_01/0101000001.json
21998 Defl:N 2471 89% 2019-09-05 08:05 155bece7 cap_01/0101000002.json
23236 Defl:N 2573 89% 2019-09-05 08:05 55fced20 cap_01/0101000003.json
-------- ------- --- -------
67251 7515 89% 3 files
You probably want to look at the zipfile module; there's documentation at http://docs.python.org/library/zipfile.html.
You may also want os.walk() to index the directory structure.
To give more flexibility, e.g. select directory/file by name use:
import os
import zipfile
def zipall(ob, path, rel=""):
basename = os.path.basename(path)
if os.path.isdir(path):
if rel == "":
rel = basename
ob.write(path, os.path.join(rel))
for root, dirs, files in os.walk(path):
for d in dirs:
zipall(ob, os.path.join(root, d), os.path.join(rel, d))
for f in files:
ob.write(os.path.join(root, f), os.path.join(rel, f))
break
elif os.path.isfile(path):
ob.write(path, os.path.join(rel, basename))
else:
pass
For a file tree:
.
├── dir
│   ├── dir2
│   │   └── file2.txt
│   ├── dir3
│   │   └── file3.txt
│   └── file.txt
├── dir4
│   ├── dir5
│   └── file4.txt
├── listdir.zip
├── main.py
├── root.txt
└── selective.zip
You can e.g. select only dir4 and root.txt:
cwd = os.getcwd()
files = [os.path.join(cwd, f) for f in ['dir4', 'root.txt']]
with zipfile.ZipFile("selective.zip", "w" ) as myzip:
for f in files:
zipall(myzip, f)
Or just listdir in script invocation directory and add everything from there:
with zipfile.ZipFile("listdir.zip", "w" ) as myzip:
for f in os.listdir():
if f == "listdir.zip":
# Creating a listdir.zip in the same directory
# will include listdir.zip inside itself, beware of this
continue
zipall(myzip, f)
Here is a variation on the answer given by Nux that works for me:
def WriteDirectoryToZipFile( zipHandle, srcPath, zipLocalPath = "", zipOperation = zipfile.ZIP_DEFLATED ):
basePath = os.path.split( srcPath )[ 0 ]
for root, dirs, files in os.walk( srcPath ):
p = os.path.join( zipLocalPath, root [ ( len( basePath ) + 1 ) : ] )
# add dir
zipHandle.write( root, p, zipOperation )
# add files
for f in files:
filePath = os.path.join( root, f )
fileInZipPath = os.path.join( p, f )
zipHandle.write( filePath, fileInZipPath, zipOperation )
Try the below one .it worked for me.
import zipfile, os
zipf = "compress.zip"
def main():
directory = r"Filepath"
toZip(directory)
def toZip(directory):
zippedHelp = zipfile.ZipFile(zipf, "w", compression=zipfile.ZIP_DEFLATED )
list = os.listdir(directory)
for file_list in list:
file_name = os.path.join(directory,file_list)
if os.path.isfile(file_name):
print file_name
zippedHelp.write(file_name)
else:
addFolderToZip(zippedHelp,file_list,directory)
print "---------------Directory Found-----------------------"
zippedHelp.close()
def addFolderToZip(zippedHelp,folder,directory):
path=os.path.join(directory,folder)
print path
file_list=os.listdir(path)
for file_name in file_list:
file_path=os.path.join(path,file_name)
if os.path.isfile(file_path):
zippedHelp.write(file_path)
elif os.path.isdir(file_name):
print "------------------sub directory found--------------------"
addFolderToZip(zippedHelp,file_name,path)
if __name__=="__main__":
main()
Say you want to Zip all the folders(sub directories) in the current directory.
for root, dirs, files in os.walk("."):
for sub_dir in dirs:
zip_you_want = sub_dir+".zip"
zip_process = zipfile.ZipFile(zip_you_want, "w", zipfile.ZIP_DEFLATED)
zip_process.write(file_you_want_to_include)
zip_process.close()
print("Successfully zipped directory: {sub_dir}".format(sub_dir=sub_dir))
Zip a file or a tree (a directory and its sub-directories).
from pathlib import Path
from zipfile import ZipFile, ZIP_DEFLATED
def make_zip(tree_path, zip_path, mode='w', skip_empty_dir=False):
with ZipFile(zip_path, mode=mode, compression=ZIP_DEFLATED) as zf:
paths = [Path(tree_path)]
while paths:
p = paths.pop()
if p.is_dir():
paths.extend(p.iterdir())
if skip_empty_dir:
continue
zf.write(p)
To append to an existing archive, pass mode='a', to create a fresh archive mode='w' (the default in the above). So let's say you want to bundle 3 different directory trees under the same archive.
make_zip(path_to_tree1, path_to_arch, mode='w')
make_zip(path_to_tree2, path_to_arch, mode='a')
make_zip(path_to_file3, path_to_arch, mode='a')
A solution using pathlib.Path, which is independent of the OS used:
import zipfile
from pathlib import Path
def zip_dir(path: Path, zip_file_path: Path):
"""Zip all contents of path to zip_file"""
files_to_zip = [
file for file in path.glob('*') if file.is_file()]
with zipfile.ZipFile(
zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zip_f:
for file in files_to_zip:
print(file.name)
zip_f.write(file, file.name)
current_dir = Path.cwd()
zip_dir = current_dir / "test"
tools.zip_dir(
zip_dir, current_dir / 'Zipped_dir.zip')
The obvious way to go would be to go with shutil, Like the second top answer says so, But if you still wish to go with ZipFile for some reason, And if you are getting some trouble doing that (Like ERR 13 in Windows etc), You can use this fix:
import os
import zipfile
def retrieve_file_paths(dirName):
filePaths = []
for root, directories, files in os.walk(dirName):
for filename in files:
filePath = os.path.join(root, filename)
filePaths.append(filePath)
return filePaths
def main(dir_name, output_filename):
filePaths = retrieve_file_paths(dir_name)
zip_file = zipfile.ZipFile(output_filename+'.zip', 'w')
with zip_file:
for file in filePaths:
zip_file.write(file)
main("my_dir", "my_dir_archived")
This one recursively iterates through every sub-folder/file in your given folder, And writes them to a zip file instead of attempting to directly zip a folder.
Here's a modern approach, using pathlib, and a context manager. Puts the files directly in the zip, rather than in a subfolder.
def zip_dir(filename: str, dir_to_zip: pathlib.Path):
with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
# Use glob instead of iterdir(), to cover all subdirectories.
for directory in dir_to_zip.glob('**'):
for file in directory.iterdir():
if not file.is_file():
continue
# Strip the first component, so we don't create an uneeded subdirectory
# containing everything.
zip_path = pathlib.Path(*file.parts[1:])
# Use a string, since zipfile doesn't support pathlib directly.
zipf.write(str(file), str(zip_path))
I prepared a function by consolidating Mark Byers' solution with Reimund and Morten Zilmer's comments (relative path and including empty directories). As a best practice, with is used in ZipFile's file construction.
The function also prepares a default zip file name with the zipped directory name and '.zip' extension. Therefore, it works with only one argument: the source directory to be zipped.
import os
import zipfile
def zip_dir(path_dir, path_file_zip=''):
if not path_file_zip:
path_file_zip = os.path.join(
os.path.dirname(path_dir), os.path.basename(path_dir)+'.zip')
with zipfile.ZipFile(path_file_zip, 'wb', zipfile.ZIP_DEFLATED) as zip_file:
for root, dirs, files in os.walk(path_dir):
for file_or_dir in files + dirs:
zip_file.write(
os.path.join(root, file_or_dir),
os.path.relpath(os.path.join(root, file_or_dir),
os.path.join(path_dir, os.path.pardir)))
# import required python modules
# You have to install zipfile package using pip install
import os,zipfile
# Change the directory where you want your new zip file to be
os.chdir('Type your destination')
# Create a new zipfile ( I called it myfile )
zf = zipfile.ZipFile('myfile.zip','w')
# os.walk gives a directory tree. Access the files using a for loop
for dirnames,folders,files in os.walk('Type your directory'):
zf.write('Type your Directory')
for file in files:
zf.write(os.path.join('Type your directory',file))
Well, after reading the suggestions I came up with a very similar way that works with 2.7.x without creating "funny" directory names (absolute-like names), and will only create the specified folder inside the zip.
Or just in case you needed your zip to contain a folder inside with the contents of the selected directory.
def zipDir( path, ziph ) :
"""
Inserts directory (path) into zipfile instance (ziph)
"""
for root, dirs, files in os.walk( path ) :
for file in files :
ziph.write( os.path.join( root, file ) , os.path.basename( os.path.normpath( path ) ) + "\\" + file )
def makeZip( pathToFolder ) :
"""
Creates a zip file with the specified folder
"""
zipf = zipfile.ZipFile( pathToFolder + 'file.zip', 'w', zipfile.ZIP_DEFLATED )
zipDir( pathToFolder, zipf )
zipf.close()
print( "Zip file saved to: " + pathToFolder)
makeZip( "c:\\path\\to\\folder\\to\\insert\\into\\zipfile" )
Function to create zip file.
def CREATEZIPFILE(zipname, path):
#function to create a zip file
#Parameters: zipname - name of the zip file; path - name of folder/file to be put in zip file
zipf = zipfile.ZipFile(zipname, 'w', zipfile.ZIP_DEFLATED)
zipf.setpassword(b"password") #if you want to set password to zipfile
#checks if the path is file or directory
if os.path.isdir(path):
for files in os.listdir(path):
zipf.write(os.path.join(path, files), files)
elif os.path.isfile(path):
zipf.write(os.path.join(path), path)
zipf.close()
One thing completely missed by previous answers is the fact that using os.path.join() can easily return POSIX-incompatible paths when you run the code on Windows. The resulting archive will contain files with literal backslashes in their names when processing it with any common archive software on Linux, which is not what you want. Use path.as_posix() for the arcname parameter instead!
import zipfile
from pathlib import Path
with zipfile.ZipFile("archive.zip", "w", zipfile.ZIP_DEFLATED) as zf:
for path in Path("include_all_of_this_folder").rglob("*"):
zf.write(path, path.as_posix())

Glob pattern: Exclude file only from top directory

Lets take a below example as my folder structure,
|---GCD.txt
|---Azure.png
|---AWS.txt
|---foo/
| |--- app.txt
| |--- bar.txt
GCD.txt, azure.png, AWS.txt files are at root folder. I don't know the folder name(New GUID every time).
Now I want to write a glob pattern such that text file(*.txt) only from root folder should skip, not from sub-folders. So expected behavior should be,
|---Azure.png
|---foo/
| |--- app.txt
| |--- bar.txt
GCD.txt and AWS.txt should be skipped.
My attempts:
.*.txt
./*.txt
*.txt
None of the above pattern helped. Am I missing something.
I don't think there's an expression that can be passed to glob that will do this. Here's how it could be done though:
from os import walk
from os.path import join
from pathlib import Path
files_of_interest = []
base_directory = '.'
def istxt(filename):
return Path(filename).suffix.lower() in {'.txt'}
for root, _, files in walk(base_directory):
if root == base_directory:
for file in files:
if not istxt(file):
files_of_interest.append(join(root, file))
else:
for file in files:
if istxt(file):
files_of_interest.append(join(root, file))
print(files_of_interest)

How to zip all sub folders and their files into a single zip file of a directory [duplicate]

How can I create a zip archive of a directory structure in Python?
The easiest way is to use shutil.make_archive. It supports both zip and tar formats.
import shutil
shutil.make_archive(output_filename, 'zip', dir_name)
If you need to do something more complicated than zipping the whole directory (such as skipping certain files), then you'll need to dig into the zipfile module as others have suggested.
As others have pointed out, you should use zipfile. The documentation tells you what functions are available, but doesn't really explain how you can use them to zip an entire directory. I think it's easiest to explain with some example code:
import os
import zipfile
def zipdir(path, ziph):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
for file in files:
ziph.write(os.path.join(root, file),
os.path.relpath(os.path.join(root, file),
os.path.join(path, '..')))
with zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
zipdir('tmp/', zipf)
To add the contents of mydirectory to a new zip file, including all files and subdirectories:
import os
import zipfile
zf = zipfile.ZipFile("myzipfile.zip", "w")
for dirname, subdirs, files in os.walk("mydirectory"):
zf.write(dirname)
for filename in files:
zf.write(os.path.join(dirname, filename))
zf.close()
How can I create a zip archive of a directory structure in Python?
In a Python script
In Python 2.7+, shutil has a make_archive function.
from shutil import make_archive
make_archive(
'zipfile_name',
'zip', # the archive format - or tar, bztar, gztar
root_dir=None, # root for archive - current working dir if None
base_dir=None) # start archiving from here - cwd if None too
Here the zipped archive will be named zipfile_name.zip. If base_dir is farther down from root_dir it will exclude files not in the base_dir, but still archive the files in the parent dirs up to the root_dir.
I did have an issue testing this on Cygwin with 2.7 - it wants a root_dir argument, for cwd:
make_archive('zipfile_name', 'zip', root_dir='.')
Using Python from the shell
You can do this with Python from the shell also using the zipfile module:
$ python -m zipfile -c zipname sourcedir
Where zipname is the name of the destination file you want (add .zip if you want it, it won't do it automatically) and sourcedir is the path to the directory.
Zipping up Python (or just don't want parent dir):
If you're trying to zip up a python package with a __init__.py and __main__.py, and you don't want the parent dir, it's
$ python -m zipfile -c zipname sourcedir/*
And
$ python zipname
would run the package. (Note that you can't run subpackages as the entry point from a zipped archive.)
Zipping a Python app:
If you have python3.5+, and specifically want to zip up a Python package, use zipapp:
$ python -m zipapp myapp
$ python myapp.pyz
This function will recursively zip up a directory tree, compressing the files, and recording the correct relative filenames in the archive. The archive entries are the same as those generated by zip -r output.zip source_dir.
import os
import zipfile
def make_zipfile(output_filename, source_dir):
relroot = os.path.abspath(os.path.join(source_dir, os.pardir))
with zipfile.ZipFile(output_filename, "w", zipfile.ZIP_DEFLATED) as zip:
for root, dirs, files in os.walk(source_dir):
# add directory (needed for empty dirs)
zip.write(root, os.path.relpath(root, relroot))
for file in files:
filename = os.path.join(root, file)
if os.path.isfile(filename): # regular files only
arcname = os.path.join(os.path.relpath(root, relroot), file)
zip.write(filename, arcname)
Use shutil, which is part of python standard library set.
Using shutil is so simple(see code below):
1st arg: Filename of resultant zip/tar file,
2nd arg: zip/tar,
3rd arg: dir_name
Code:
import shutil
shutil.make_archive('/home/user/Desktop/Filename','zip','/home/username/Desktop/Directory')
Modern Python (3.6+) using the pathlib module for concise OOP-like handling of paths, and pathlib.Path.rglob() for recursive globbing. As far as I can tell, this is equivalent to George V. Reilly's answer: zips with compression, the topmost element is a directory, keeps empty dirs, uses relative paths.
from pathlib import Path
from zipfile import ZIP_DEFLATED, ZipFile
from os import PathLike
from typing import Union
def zip_dir(zip_name: str, source_dir: Union[str, PathLike]):
src_path = Path(source_dir).expanduser().resolve(strict=True)
with ZipFile(zip_name, 'w', ZIP_DEFLATED) as zf:
for file in src_path.rglob('*'):
zf.write(file, file.relative_to(src_path.parent))
Note: as optional type hints indicate, zip_name can't be a Path object (would be fixed in 3.6.2+).
With python 3.9, pathlib & zipfile module you can create a zip files from anywhere in the system.
def zip_dir(dir: Union[Path, str], filename: Union[Path, str]):
"""Zip the provided directory without navigating to that directory using `pathlib` module"""
# Convert to Path object
dir = Path(dir)
with zipfile.ZipFile(filename, "w", zipfile.ZIP_DEFLATED) as zip_file:
for entry in dir.rglob("*"):
zip_file.write(entry, entry.relative_to(dir))
It is neat, typed, and has less code.
For adding compression to the resulting zip file, check out this link.
You need to change:
zip = zipfile.ZipFile('Python.zip', 'w')
to
zip = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)
I've made some changes to code given by Mark Byers. Below function will also adds empty directories if you have them. Examples should make it more clear what is the path added to the zip.
#!/usr/bin/env python
import os
import zipfile
def addDirToZip(zipHandle, path, basePath=""):
"""
Adding directory given by \a path to opened zip file \a zipHandle
#param basePath path that will be removed from \a path when adding to archive
Examples:
# add whole "dir" to "test.zip" (when you open "test.zip" you will see only "dir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir')
zipHandle.close()
# add contents of "dir" to "test.zip" (when you open "test.zip" you will see only it's contents)
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir', 'dir')
zipHandle.close()
# add contents of "dir/subdir" to "test.zip" (when you open "test.zip" you will see only contents of "subdir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir/subdir', 'dir/subdir')
zipHandle.close()
# add whole "dir/subdir" to "test.zip" (when you open "test.zip" you will see only "subdir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir/subdir', 'dir')
zipHandle.close()
# add whole "dir/subdir" with full path to "test.zip" (when you open "test.zip" you will see only "dir" and inside it only "subdir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir/subdir')
zipHandle.close()
# add whole "dir" and "otherDir" (with full path) to "test.zip" (when you open "test.zip" you will see only "dir" and "otherDir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir')
addDirToZip(zipHandle, 'otherDir')
zipHandle.close()
"""
basePath = basePath.rstrip("\\/") + ""
basePath = basePath.rstrip("\\/")
for root, dirs, files in os.walk(path):
# add dir itself (needed for empty dirs
zipHandle.write(os.path.join(root, "."))
# add files
for file in files:
filePath = os.path.join(root, file)
inZipPath = filePath.replace(basePath, "", 1).lstrip("\\/")
#print filePath + " , " + inZipPath
zipHandle.write(filePath, inZipPath)
Above is a simple function that should work for simple cases. You can find more elegant class in my Gist:
https://gist.github.com/Eccenux/17526123107ca0ac28e6
I have another code example that may help, using python3, pathlib and zipfile.
It should work in any OS.
from pathlib import Path
import zipfile
from datetime import datetime
DATE_FORMAT = '%y%m%d'
def date_str():
"""returns the today string year, month, day"""
return '{}'.format(datetime.now().strftime(DATE_FORMAT))
def zip_name(path):
"""returns the zip filename as string"""
cur_dir = Path(path).resolve()
parent_dir = cur_dir.parents[0]
zip_filename = '{}/{}_{}.zip'.format(parent_dir, cur_dir.name, date_str())
p_zip = Path(zip_filename)
n = 1
while p_zip.exists():
zip_filename = ('{}/{}_{}_{}.zip'.format(parent_dir, cur_dir.name,
date_str(), n))
p_zip = Path(zip_filename)
n += 1
return zip_filename
def all_files(path):
"""iterator returns all files and folders from path as absolute path string
"""
for child in Path(path).iterdir():
yield str(child)
if child.is_dir():
for grand_child in all_files(str(child)):
yield str(Path(grand_child))
def zip_dir(path):
"""generate a zip"""
zip_filename = zip_name(path)
zip_file = zipfile.ZipFile(zip_filename, 'w')
print('create:', zip_filename)
for file in all_files(path):
print('adding... ', file)
zip_file.write(file)
zip_file.close()
if __name__ == '__main__':
zip_dir('.')
print('end!')
To retain the folder hierarchy under the parent directory to be archived:
import glob
import os
import zipfile
with zipfile.ZipFile(fp_zip, "w", zipfile.ZIP_DEFLATED) as zipf:
for fp in glob(os.path.join(parent, "**/*")):
base = os.path.commonpath([parent, fp])
zipf.write(fp, arcname=fp.replace(base, ""))
If you want, you could change this to use pathlib for file globbing.
If you want a functionality like the compress folder of any common graphical file manager you can use the following code, it uses the zipfile module. Using this code you will have the zip file with the path as its root folder.
import os
import zipfile
def zipdir(path, ziph):
# Iterate all the directories and files
for root, dirs, files in os.walk(path):
# Create a prefix variable with the folder structure inside the path folder.
# So if a file is at the path directory will be at the root directory of the zip file
# so the prefix will be empty. If the file belongs to a containing folder of path folder
# then the prefix will be that folder.
if root.replace(path,'') == '':
prefix = ''
else:
# Keep the folder structure after the path folder, append a '/' at the end
# and remome the first character, if it is a '/' in order to have a path like
# folder1/folder2/file.txt
prefix = root.replace(path, '') + '/'
if (prefix[0] == '/'):
prefix = prefix[1:]
for filename in files:
actual_file_path = root + '/' + filename
zipped_file_path = prefix + filename
zipf.write( actual_file_path, zipped_file_path)
zipf = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)
zipdir('/tmp/justtest/', zipf)
zipf.close()
So many answers here, and I hope I might contribute with my own version, which is based on the original answer (by the way), but with a more graphical perspective, also using context for each zipfile setup and sorting os.walk(), in order to have a ordered output.
Having these folders and them files (among other folders), I wanted to create a .zip for each cap_ folder:
$ tree -d
.
├── cap_01
|    ├── 0101000001.json
|   ├── 0101000002.json
| ├── 0101000003.json
|
├── cap_02
| ├── 0201000001.json
| ├── 0201000002.json
| ├── 0201001003.json
|
├── cap_03
| ├── 0301000001.json
| ├── 0301000002.json
| ├── 0301000003.json
|
├── docs
| ├── map.txt
| ├── main_data.xml
|
├── core_files
├── core_master
├── core_slave
Here's what I applied, with comments for better understanding of the process.
$ cat zip_cap_dirs.py
""" Zip 'cap_*' directories. """
import os
import zipfile as zf
for root, dirs, files in sorted(os.walk('.')):
if 'cap_' in root:
print(f"Compressing: {root}")
# Defining .zip name, according to Capítulo.
cap_dir_zip = '{}.zip'.format(root)
# Opening zipfile context for current root dir.
with zf.ZipFile(cap_dir_zip, 'w', zf.ZIP_DEFLATED) as new_zip:
# Iterating over os.walk list of files for the current root dir.
for f in files:
# Defining relative path to files from current root dir.
f_path = os.path.join(root, f)
# Writing the file on the .zip file of the context
new_zip.write(f_path)
Basically, for each iteration over os.walk(path), I'm opening a context for zipfile setup and afterwards, iterating iterating over files, which is a list of files from root directory, forming the relative path for each file based on the current root directory, appending to the zipfile context which is running.
And the output is presented like this:
$ python3 zip_cap_dirs.py
Compressing: ./cap_01
Compressing: ./cap_02
Compressing: ./cap_03
To see the contents of each .zip directory, you can use less command:
$ less cap_01.zip
Archive: cap_01.zip
Length Method Size Cmpr Date Time CRC-32 Name
-------- ------ ------- ---- ---------- ----- -------- ----
22017 Defl:N 2471 89% 2019-09-05 08:05 7a3b5ec6 cap_01/0101000001.json
21998 Defl:N 2471 89% 2019-09-05 08:05 155bece7 cap_01/0101000002.json
23236 Defl:N 2573 89% 2019-09-05 08:05 55fced20 cap_01/0101000003.json
-------- ------- --- -------
67251 7515 89% 3 files
You probably want to look at the zipfile module; there's documentation at http://docs.python.org/library/zipfile.html.
You may also want os.walk() to index the directory structure.
To give more flexibility, e.g. select directory/file by name use:
import os
import zipfile
def zipall(ob, path, rel=""):
basename = os.path.basename(path)
if os.path.isdir(path):
if rel == "":
rel = basename
ob.write(path, os.path.join(rel))
for root, dirs, files in os.walk(path):
for d in dirs:
zipall(ob, os.path.join(root, d), os.path.join(rel, d))
for f in files:
ob.write(os.path.join(root, f), os.path.join(rel, f))
break
elif os.path.isfile(path):
ob.write(path, os.path.join(rel, basename))
else:
pass
For a file tree:
.
├── dir
│   ├── dir2
│   │   └── file2.txt
│   ├── dir3
│   │   └── file3.txt
│   └── file.txt
├── dir4
│   ├── dir5
│   └── file4.txt
├── listdir.zip
├── main.py
├── root.txt
└── selective.zip
You can e.g. select only dir4 and root.txt:
cwd = os.getcwd()
files = [os.path.join(cwd, f) for f in ['dir4', 'root.txt']]
with zipfile.ZipFile("selective.zip", "w" ) as myzip:
for f in files:
zipall(myzip, f)
Or just listdir in script invocation directory and add everything from there:
with zipfile.ZipFile("listdir.zip", "w" ) as myzip:
for f in os.listdir():
if f == "listdir.zip":
# Creating a listdir.zip in the same directory
# will include listdir.zip inside itself, beware of this
continue
zipall(myzip, f)
Here is a variation on the answer given by Nux that works for me:
def WriteDirectoryToZipFile( zipHandle, srcPath, zipLocalPath = "", zipOperation = zipfile.ZIP_DEFLATED ):
basePath = os.path.split( srcPath )[ 0 ]
for root, dirs, files in os.walk( srcPath ):
p = os.path.join( zipLocalPath, root [ ( len( basePath ) + 1 ) : ] )
# add dir
zipHandle.write( root, p, zipOperation )
# add files
for f in files:
filePath = os.path.join( root, f )
fileInZipPath = os.path.join( p, f )
zipHandle.write( filePath, fileInZipPath, zipOperation )
Try the below one .it worked for me.
import zipfile, os
zipf = "compress.zip"
def main():
directory = r"Filepath"
toZip(directory)
def toZip(directory):
zippedHelp = zipfile.ZipFile(zipf, "w", compression=zipfile.ZIP_DEFLATED )
list = os.listdir(directory)
for file_list in list:
file_name = os.path.join(directory,file_list)
if os.path.isfile(file_name):
print file_name
zippedHelp.write(file_name)
else:
addFolderToZip(zippedHelp,file_list,directory)
print "---------------Directory Found-----------------------"
zippedHelp.close()
def addFolderToZip(zippedHelp,folder,directory):
path=os.path.join(directory,folder)
print path
file_list=os.listdir(path)
for file_name in file_list:
file_path=os.path.join(path,file_name)
if os.path.isfile(file_path):
zippedHelp.write(file_path)
elif os.path.isdir(file_name):
print "------------------sub directory found--------------------"
addFolderToZip(zippedHelp,file_name,path)
if __name__=="__main__":
main()
Say you want to Zip all the folders(sub directories) in the current directory.
for root, dirs, files in os.walk("."):
for sub_dir in dirs:
zip_you_want = sub_dir+".zip"
zip_process = zipfile.ZipFile(zip_you_want, "w", zipfile.ZIP_DEFLATED)
zip_process.write(file_you_want_to_include)
zip_process.close()
print("Successfully zipped directory: {sub_dir}".format(sub_dir=sub_dir))
Zip a file or a tree (a directory and its sub-directories).
from pathlib import Path
from zipfile import ZipFile, ZIP_DEFLATED
def make_zip(tree_path, zip_path, mode='w', skip_empty_dir=False):
with ZipFile(zip_path, mode=mode, compression=ZIP_DEFLATED) as zf:
paths = [Path(tree_path)]
while paths:
p = paths.pop()
if p.is_dir():
paths.extend(p.iterdir())
if skip_empty_dir:
continue
zf.write(p)
To append to an existing archive, pass mode='a', to create a fresh archive mode='w' (the default in the above). So let's say you want to bundle 3 different directory trees under the same archive.
make_zip(path_to_tree1, path_to_arch, mode='w')
make_zip(path_to_tree2, path_to_arch, mode='a')
make_zip(path_to_file3, path_to_arch, mode='a')
A solution using pathlib.Path, which is independent of the OS used:
import zipfile
from pathlib import Path
def zip_dir(path: Path, zip_file_path: Path):
"""Zip all contents of path to zip_file"""
files_to_zip = [
file for file in path.glob('*') if file.is_file()]
with zipfile.ZipFile(
zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zip_f:
for file in files_to_zip:
print(file.name)
zip_f.write(file, file.name)
current_dir = Path.cwd()
zip_dir = current_dir / "test"
tools.zip_dir(
zip_dir, current_dir / 'Zipped_dir.zip')
The obvious way to go would be to go with shutil, Like the second top answer says so, But if you still wish to go with ZipFile for some reason, And if you are getting some trouble doing that (Like ERR 13 in Windows etc), You can use this fix:
import os
import zipfile
def retrieve_file_paths(dirName):
filePaths = []
for root, directories, files in os.walk(dirName):
for filename in files:
filePath = os.path.join(root, filename)
filePaths.append(filePath)
return filePaths
def main(dir_name, output_filename):
filePaths = retrieve_file_paths(dir_name)
zip_file = zipfile.ZipFile(output_filename+'.zip', 'w')
with zip_file:
for file in filePaths:
zip_file.write(file)
main("my_dir", "my_dir_archived")
This one recursively iterates through every sub-folder/file in your given folder, And writes them to a zip file instead of attempting to directly zip a folder.
Here's a modern approach, using pathlib, and a context manager. Puts the files directly in the zip, rather than in a subfolder.
def zip_dir(filename: str, dir_to_zip: pathlib.Path):
with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
# Use glob instead of iterdir(), to cover all subdirectories.
for directory in dir_to_zip.glob('**'):
for file in directory.iterdir():
if not file.is_file():
continue
# Strip the first component, so we don't create an uneeded subdirectory
# containing everything.
zip_path = pathlib.Path(*file.parts[1:])
# Use a string, since zipfile doesn't support pathlib directly.
zipf.write(str(file), str(zip_path))
I prepared a function by consolidating Mark Byers' solution with Reimund and Morten Zilmer's comments (relative path and including empty directories). As a best practice, with is used in ZipFile's file construction.
The function also prepares a default zip file name with the zipped directory name and '.zip' extension. Therefore, it works with only one argument: the source directory to be zipped.
import os
import zipfile
def zip_dir(path_dir, path_file_zip=''):
if not path_file_zip:
path_file_zip = os.path.join(
os.path.dirname(path_dir), os.path.basename(path_dir)+'.zip')
with zipfile.ZipFile(path_file_zip, 'wb', zipfile.ZIP_DEFLATED) as zip_file:
for root, dirs, files in os.walk(path_dir):
for file_or_dir in files + dirs:
zip_file.write(
os.path.join(root, file_or_dir),
os.path.relpath(os.path.join(root, file_or_dir),
os.path.join(path_dir, os.path.pardir)))
# import required python modules
# You have to install zipfile package using pip install
import os,zipfile
# Change the directory where you want your new zip file to be
os.chdir('Type your destination')
# Create a new zipfile ( I called it myfile )
zf = zipfile.ZipFile('myfile.zip','w')
# os.walk gives a directory tree. Access the files using a for loop
for dirnames,folders,files in os.walk('Type your directory'):
zf.write('Type your Directory')
for file in files:
zf.write(os.path.join('Type your directory',file))
Well, after reading the suggestions I came up with a very similar way that works with 2.7.x without creating "funny" directory names (absolute-like names), and will only create the specified folder inside the zip.
Or just in case you needed your zip to contain a folder inside with the contents of the selected directory.
def zipDir( path, ziph ) :
"""
Inserts directory (path) into zipfile instance (ziph)
"""
for root, dirs, files in os.walk( path ) :
for file in files :
ziph.write( os.path.join( root, file ) , os.path.basename( os.path.normpath( path ) ) + "\\" + file )
def makeZip( pathToFolder ) :
"""
Creates a zip file with the specified folder
"""
zipf = zipfile.ZipFile( pathToFolder + 'file.zip', 'w', zipfile.ZIP_DEFLATED )
zipDir( pathToFolder, zipf )
zipf.close()
print( "Zip file saved to: " + pathToFolder)
makeZip( "c:\\path\\to\\folder\\to\\insert\\into\\zipfile" )
Function to create zip file.
def CREATEZIPFILE(zipname, path):
#function to create a zip file
#Parameters: zipname - name of the zip file; path - name of folder/file to be put in zip file
zipf = zipfile.ZipFile(zipname, 'w', zipfile.ZIP_DEFLATED)
zipf.setpassword(b"password") #if you want to set password to zipfile
#checks if the path is file or directory
if os.path.isdir(path):
for files in os.listdir(path):
zipf.write(os.path.join(path, files), files)
elif os.path.isfile(path):
zipf.write(os.path.join(path), path)
zipf.close()
One thing completely missed by previous answers is the fact that using os.path.join() can easily return POSIX-incompatible paths when you run the code on Windows. The resulting archive will contain files with literal backslashes in their names when processing it with any common archive software on Linux, which is not what you want. Use path.as_posix() for the arcname parameter instead!
import zipfile
from pathlib import Path
with zipfile.ZipFile("archive.zip", "w", zipfile.ZIP_DEFLATED) as zf:
for path in Path("include_all_of_this_folder").rglob("*"):
zf.write(path, path.as_posix())

Create text file in multiple directories - Python

How would I create foo.txt in multiple directories using python? For example, let's say I have this tree
example
--- folder1
--- folder2
--- folder3
How would I put foo.txt in each folder? I've tried this code but it didn't work.
import os
for root, dirs, files in os.walk("/", topdown=False):
for name in dirs:
f = open(name + '/file.txt', 'w')
f.write('ex')
You have to join path parts together, including the root part (here /)
import os
for root, dirs, files in os.walk("/", topdown=False):
for name in dirs:
with open(os.path.join(root,name,'file.txt'), 'w') as f:
f.write('ex')
Aside:
if you don't run this code as root you'll get an exception on protected directories and it will stop. Maybe consider starting a little lower than / (for instance your home directory) or catch the write exceptions to be able to continue.
this code is "dangerous" because it will create a file in all directories (you have to run it as root to do so)
use a with block so the file is automatically closed when exiting from the block
You could make a function that writes to a given path and then loop through it. e.g. something like:
def write_txt_file(dir, filename, text):
d = dir + filename + ".txt"
outF = open(d, "w")
outF.write(text)
outF.close()
dirs = ["P:/example/folder1/", "P:/example/folder2/", "P:/example/folder3/"]
for dir in dirs:
write_txt_file(dir, "foo", "Hi there!")

How to create a zip archive of a directory?

How can I create a zip archive of a directory structure in Python?
The easiest way is to use shutil.make_archive. It supports both zip and tar formats.
import shutil
shutil.make_archive(output_filename, 'zip', dir_name)
If you need to do something more complicated than zipping the whole directory (such as skipping certain files), then you'll need to dig into the zipfile module as others have suggested.
As others have pointed out, you should use zipfile. The documentation tells you what functions are available, but doesn't really explain how you can use them to zip an entire directory. I think it's easiest to explain with some example code:
import os
import zipfile
def zipdir(path, ziph):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
for file in files:
ziph.write(os.path.join(root, file),
os.path.relpath(os.path.join(root, file),
os.path.join(path, '..')))
with zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
zipdir('tmp/', zipf)
To add the contents of mydirectory to a new zip file, including all files and subdirectories:
import os
import zipfile
zf = zipfile.ZipFile("myzipfile.zip", "w")
for dirname, subdirs, files in os.walk("mydirectory"):
zf.write(dirname)
for filename in files:
zf.write(os.path.join(dirname, filename))
zf.close()
How can I create a zip archive of a directory structure in Python?
In a Python script
In Python 2.7+, shutil has a make_archive function.
from shutil import make_archive
make_archive(
'zipfile_name',
'zip', # the archive format - or tar, bztar, gztar
root_dir=None, # root for archive - current working dir if None
base_dir=None) # start archiving from here - cwd if None too
Here the zipped archive will be named zipfile_name.zip. If base_dir is farther down from root_dir it will exclude files not in the base_dir, but still archive the files in the parent dirs up to the root_dir.
I did have an issue testing this on Cygwin with 2.7 - it wants a root_dir argument, for cwd:
make_archive('zipfile_name', 'zip', root_dir='.')
Using Python from the shell
You can do this with Python from the shell also using the zipfile module:
$ python -m zipfile -c zipname sourcedir
Where zipname is the name of the destination file you want (add .zip if you want it, it won't do it automatically) and sourcedir is the path to the directory.
Zipping up Python (or just don't want parent dir):
If you're trying to zip up a python package with a __init__.py and __main__.py, and you don't want the parent dir, it's
$ python -m zipfile -c zipname sourcedir/*
And
$ python zipname
would run the package. (Note that you can't run subpackages as the entry point from a zipped archive.)
Zipping a Python app:
If you have python3.5+, and specifically want to zip up a Python package, use zipapp:
$ python -m zipapp myapp
$ python myapp.pyz
This function will recursively zip up a directory tree, compressing the files, and recording the correct relative filenames in the archive. The archive entries are the same as those generated by zip -r output.zip source_dir.
import os
import zipfile
def make_zipfile(output_filename, source_dir):
relroot = os.path.abspath(os.path.join(source_dir, os.pardir))
with zipfile.ZipFile(output_filename, "w", zipfile.ZIP_DEFLATED) as zip:
for root, dirs, files in os.walk(source_dir):
# add directory (needed for empty dirs)
zip.write(root, os.path.relpath(root, relroot))
for file in files:
filename = os.path.join(root, file)
if os.path.isfile(filename): # regular files only
arcname = os.path.join(os.path.relpath(root, relroot), file)
zip.write(filename, arcname)
Use shutil, which is part of python standard library set.
Using shutil is so simple(see code below):
1st arg: Filename of resultant zip/tar file,
2nd arg: zip/tar,
3rd arg: dir_name
Code:
import shutil
shutil.make_archive('/home/user/Desktop/Filename','zip','/home/username/Desktop/Directory')
Modern Python (3.6+) using the pathlib module for concise OOP-like handling of paths, and pathlib.Path.rglob() for recursive globbing. As far as I can tell, this is equivalent to George V. Reilly's answer: zips with compression, the topmost element is a directory, keeps empty dirs, uses relative paths.
from pathlib import Path
from zipfile import ZIP_DEFLATED, ZipFile
from os import PathLike
from typing import Union
def zip_dir(zip_name: str, source_dir: Union[str, PathLike]):
src_path = Path(source_dir).expanduser().resolve(strict=True)
with ZipFile(zip_name, 'w', ZIP_DEFLATED) as zf:
for file in src_path.rglob('*'):
zf.write(file, file.relative_to(src_path.parent))
Note: as optional type hints indicate, zip_name can't be a Path object (would be fixed in 3.6.2+).
With python 3.9, pathlib & zipfile module you can create a zip files from anywhere in the system.
def zip_dir(dir: Union[Path, str], filename: Union[Path, str]):
"""Zip the provided directory without navigating to that directory using `pathlib` module"""
# Convert to Path object
dir = Path(dir)
with zipfile.ZipFile(filename, "w", zipfile.ZIP_DEFLATED) as zip_file:
for entry in dir.rglob("*"):
zip_file.write(entry, entry.relative_to(dir))
It is neat, typed, and has less code.
For adding compression to the resulting zip file, check out this link.
You need to change:
zip = zipfile.ZipFile('Python.zip', 'w')
to
zip = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)
I've made some changes to code given by Mark Byers. Below function will also adds empty directories if you have them. Examples should make it more clear what is the path added to the zip.
#!/usr/bin/env python
import os
import zipfile
def addDirToZip(zipHandle, path, basePath=""):
"""
Adding directory given by \a path to opened zip file \a zipHandle
#param basePath path that will be removed from \a path when adding to archive
Examples:
# add whole "dir" to "test.zip" (when you open "test.zip" you will see only "dir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir')
zipHandle.close()
# add contents of "dir" to "test.zip" (when you open "test.zip" you will see only it's contents)
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir', 'dir')
zipHandle.close()
# add contents of "dir/subdir" to "test.zip" (when you open "test.zip" you will see only contents of "subdir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir/subdir', 'dir/subdir')
zipHandle.close()
# add whole "dir/subdir" to "test.zip" (when you open "test.zip" you will see only "subdir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir/subdir', 'dir')
zipHandle.close()
# add whole "dir/subdir" with full path to "test.zip" (when you open "test.zip" you will see only "dir" and inside it only "subdir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir/subdir')
zipHandle.close()
# add whole "dir" and "otherDir" (with full path) to "test.zip" (when you open "test.zip" you will see only "dir" and "otherDir")
zipHandle = zipfile.ZipFile('test.zip', 'w')
addDirToZip(zipHandle, 'dir')
addDirToZip(zipHandle, 'otherDir')
zipHandle.close()
"""
basePath = basePath.rstrip("\\/") + ""
basePath = basePath.rstrip("\\/")
for root, dirs, files in os.walk(path):
# add dir itself (needed for empty dirs
zipHandle.write(os.path.join(root, "."))
# add files
for file in files:
filePath = os.path.join(root, file)
inZipPath = filePath.replace(basePath, "", 1).lstrip("\\/")
#print filePath + " , " + inZipPath
zipHandle.write(filePath, inZipPath)
Above is a simple function that should work for simple cases. You can find more elegant class in my Gist:
https://gist.github.com/Eccenux/17526123107ca0ac28e6
I have another code example that may help, using python3, pathlib and zipfile.
It should work in any OS.
from pathlib import Path
import zipfile
from datetime import datetime
DATE_FORMAT = '%y%m%d'
def date_str():
"""returns the today string year, month, day"""
return '{}'.format(datetime.now().strftime(DATE_FORMAT))
def zip_name(path):
"""returns the zip filename as string"""
cur_dir = Path(path).resolve()
parent_dir = cur_dir.parents[0]
zip_filename = '{}/{}_{}.zip'.format(parent_dir, cur_dir.name, date_str())
p_zip = Path(zip_filename)
n = 1
while p_zip.exists():
zip_filename = ('{}/{}_{}_{}.zip'.format(parent_dir, cur_dir.name,
date_str(), n))
p_zip = Path(zip_filename)
n += 1
return zip_filename
def all_files(path):
"""iterator returns all files and folders from path as absolute path string
"""
for child in Path(path).iterdir():
yield str(child)
if child.is_dir():
for grand_child in all_files(str(child)):
yield str(Path(grand_child))
def zip_dir(path):
"""generate a zip"""
zip_filename = zip_name(path)
zip_file = zipfile.ZipFile(zip_filename, 'w')
print('create:', zip_filename)
for file in all_files(path):
print('adding... ', file)
zip_file.write(file)
zip_file.close()
if __name__ == '__main__':
zip_dir('.')
print('end!')
To retain the folder hierarchy under the parent directory to be archived:
import glob
import os
import zipfile
with zipfile.ZipFile(fp_zip, "w", zipfile.ZIP_DEFLATED) as zipf:
for fp in glob(os.path.join(parent, "**/*")):
base = os.path.commonpath([parent, fp])
zipf.write(fp, arcname=fp.replace(base, ""))
If you want, you could change this to use pathlib for file globbing.
If you want a functionality like the compress folder of any common graphical file manager you can use the following code, it uses the zipfile module. Using this code you will have the zip file with the path as its root folder.
import os
import zipfile
def zipdir(path, ziph):
# Iterate all the directories and files
for root, dirs, files in os.walk(path):
# Create a prefix variable with the folder structure inside the path folder.
# So if a file is at the path directory will be at the root directory of the zip file
# so the prefix will be empty. If the file belongs to a containing folder of path folder
# then the prefix will be that folder.
if root.replace(path,'') == '':
prefix = ''
else:
# Keep the folder structure after the path folder, append a '/' at the end
# and remome the first character, if it is a '/' in order to have a path like
# folder1/folder2/file.txt
prefix = root.replace(path, '') + '/'
if (prefix[0] == '/'):
prefix = prefix[1:]
for filename in files:
actual_file_path = root + '/' + filename
zipped_file_path = prefix + filename
zipf.write( actual_file_path, zipped_file_path)
zipf = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)
zipdir('/tmp/justtest/', zipf)
zipf.close()
So many answers here, and I hope I might contribute with my own version, which is based on the original answer (by the way), but with a more graphical perspective, also using context for each zipfile setup and sorting os.walk(), in order to have a ordered output.
Having these folders and them files (among other folders), I wanted to create a .zip for each cap_ folder:
$ tree -d
.
├── cap_01
|    ├── 0101000001.json
|   ├── 0101000002.json
| ├── 0101000003.json
|
├── cap_02
| ├── 0201000001.json
| ├── 0201000002.json
| ├── 0201001003.json
|
├── cap_03
| ├── 0301000001.json
| ├── 0301000002.json
| ├── 0301000003.json
|
├── docs
| ├── map.txt
| ├── main_data.xml
|
├── core_files
├── core_master
├── core_slave
Here's what I applied, with comments for better understanding of the process.
$ cat zip_cap_dirs.py
""" Zip 'cap_*' directories. """
import os
import zipfile as zf
for root, dirs, files in sorted(os.walk('.')):
if 'cap_' in root:
print(f"Compressing: {root}")
# Defining .zip name, according to Capítulo.
cap_dir_zip = '{}.zip'.format(root)
# Opening zipfile context for current root dir.
with zf.ZipFile(cap_dir_zip, 'w', zf.ZIP_DEFLATED) as new_zip:
# Iterating over os.walk list of files for the current root dir.
for f in files:
# Defining relative path to files from current root dir.
f_path = os.path.join(root, f)
# Writing the file on the .zip file of the context
new_zip.write(f_path)
Basically, for each iteration over os.walk(path), I'm opening a context for zipfile setup and afterwards, iterating iterating over files, which is a list of files from root directory, forming the relative path for each file based on the current root directory, appending to the zipfile context which is running.
And the output is presented like this:
$ python3 zip_cap_dirs.py
Compressing: ./cap_01
Compressing: ./cap_02
Compressing: ./cap_03
To see the contents of each .zip directory, you can use less command:
$ less cap_01.zip
Archive: cap_01.zip
Length Method Size Cmpr Date Time CRC-32 Name
-------- ------ ------- ---- ---------- ----- -------- ----
22017 Defl:N 2471 89% 2019-09-05 08:05 7a3b5ec6 cap_01/0101000001.json
21998 Defl:N 2471 89% 2019-09-05 08:05 155bece7 cap_01/0101000002.json
23236 Defl:N 2573 89% 2019-09-05 08:05 55fced20 cap_01/0101000003.json
-------- ------- --- -------
67251 7515 89% 3 files
You probably want to look at the zipfile module; there's documentation at http://docs.python.org/library/zipfile.html.
You may also want os.walk() to index the directory structure.
To give more flexibility, e.g. select directory/file by name use:
import os
import zipfile
def zipall(ob, path, rel=""):
basename = os.path.basename(path)
if os.path.isdir(path):
if rel == "":
rel = basename
ob.write(path, os.path.join(rel))
for root, dirs, files in os.walk(path):
for d in dirs:
zipall(ob, os.path.join(root, d), os.path.join(rel, d))
for f in files:
ob.write(os.path.join(root, f), os.path.join(rel, f))
break
elif os.path.isfile(path):
ob.write(path, os.path.join(rel, basename))
else:
pass
For a file tree:
.
├── dir
│   ├── dir2
│   │   └── file2.txt
│   ├── dir3
│   │   └── file3.txt
│   └── file.txt
├── dir4
│   ├── dir5
│   └── file4.txt
├── listdir.zip
├── main.py
├── root.txt
└── selective.zip
You can e.g. select only dir4 and root.txt:
cwd = os.getcwd()
files = [os.path.join(cwd, f) for f in ['dir4', 'root.txt']]
with zipfile.ZipFile("selective.zip", "w" ) as myzip:
for f in files:
zipall(myzip, f)
Or just listdir in script invocation directory and add everything from there:
with zipfile.ZipFile("listdir.zip", "w" ) as myzip:
for f in os.listdir():
if f == "listdir.zip":
# Creating a listdir.zip in the same directory
# will include listdir.zip inside itself, beware of this
continue
zipall(myzip, f)
Here is a variation on the answer given by Nux that works for me:
def WriteDirectoryToZipFile( zipHandle, srcPath, zipLocalPath = "", zipOperation = zipfile.ZIP_DEFLATED ):
basePath = os.path.split( srcPath )[ 0 ]
for root, dirs, files in os.walk( srcPath ):
p = os.path.join( zipLocalPath, root [ ( len( basePath ) + 1 ) : ] )
# add dir
zipHandle.write( root, p, zipOperation )
# add files
for f in files:
filePath = os.path.join( root, f )
fileInZipPath = os.path.join( p, f )
zipHandle.write( filePath, fileInZipPath, zipOperation )
Try the below one .it worked for me.
import zipfile, os
zipf = "compress.zip"
def main():
directory = r"Filepath"
toZip(directory)
def toZip(directory):
zippedHelp = zipfile.ZipFile(zipf, "w", compression=zipfile.ZIP_DEFLATED )
list = os.listdir(directory)
for file_list in list:
file_name = os.path.join(directory,file_list)
if os.path.isfile(file_name):
print file_name
zippedHelp.write(file_name)
else:
addFolderToZip(zippedHelp,file_list,directory)
print "---------------Directory Found-----------------------"
zippedHelp.close()
def addFolderToZip(zippedHelp,folder,directory):
path=os.path.join(directory,folder)
print path
file_list=os.listdir(path)
for file_name in file_list:
file_path=os.path.join(path,file_name)
if os.path.isfile(file_path):
zippedHelp.write(file_path)
elif os.path.isdir(file_name):
print "------------------sub directory found--------------------"
addFolderToZip(zippedHelp,file_name,path)
if __name__=="__main__":
main()
Say you want to Zip all the folders(sub directories) in the current directory.
for root, dirs, files in os.walk("."):
for sub_dir in dirs:
zip_you_want = sub_dir+".zip"
zip_process = zipfile.ZipFile(zip_you_want, "w", zipfile.ZIP_DEFLATED)
zip_process.write(file_you_want_to_include)
zip_process.close()
print("Successfully zipped directory: {sub_dir}".format(sub_dir=sub_dir))
Zip a file or a tree (a directory and its sub-directories).
from pathlib import Path
from zipfile import ZipFile, ZIP_DEFLATED
def make_zip(tree_path, zip_path, mode='w', skip_empty_dir=False):
with ZipFile(zip_path, mode=mode, compression=ZIP_DEFLATED) as zf:
paths = [Path(tree_path)]
while paths:
p = paths.pop()
if p.is_dir():
paths.extend(p.iterdir())
if skip_empty_dir:
continue
zf.write(p)
To append to an existing archive, pass mode='a', to create a fresh archive mode='w' (the default in the above). So let's say you want to bundle 3 different directory trees under the same archive.
make_zip(path_to_tree1, path_to_arch, mode='w')
make_zip(path_to_tree2, path_to_arch, mode='a')
make_zip(path_to_file3, path_to_arch, mode='a')
A solution using pathlib.Path, which is independent of the OS used:
import zipfile
from pathlib import Path
def zip_dir(path: Path, zip_file_path: Path):
"""Zip all contents of path to zip_file"""
files_to_zip = [
file for file in path.glob('*') if file.is_file()]
with zipfile.ZipFile(
zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zip_f:
for file in files_to_zip:
print(file.name)
zip_f.write(file, file.name)
current_dir = Path.cwd()
zip_dir = current_dir / "test"
tools.zip_dir(
zip_dir, current_dir / 'Zipped_dir.zip')
The obvious way to go would be to go with shutil, Like the second top answer says so, But if you still wish to go with ZipFile for some reason, And if you are getting some trouble doing that (Like ERR 13 in Windows etc), You can use this fix:
import os
import zipfile
def retrieve_file_paths(dirName):
filePaths = []
for root, directories, files in os.walk(dirName):
for filename in files:
filePath = os.path.join(root, filename)
filePaths.append(filePath)
return filePaths
def main(dir_name, output_filename):
filePaths = retrieve_file_paths(dir_name)
zip_file = zipfile.ZipFile(output_filename+'.zip', 'w')
with zip_file:
for file in filePaths:
zip_file.write(file)
main("my_dir", "my_dir_archived")
This one recursively iterates through every sub-folder/file in your given folder, And writes them to a zip file instead of attempting to directly zip a folder.
Here's a modern approach, using pathlib, and a context manager. Puts the files directly in the zip, rather than in a subfolder.
def zip_dir(filename: str, dir_to_zip: pathlib.Path):
with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
# Use glob instead of iterdir(), to cover all subdirectories.
for directory in dir_to_zip.glob('**'):
for file in directory.iterdir():
if not file.is_file():
continue
# Strip the first component, so we don't create an uneeded subdirectory
# containing everything.
zip_path = pathlib.Path(*file.parts[1:])
# Use a string, since zipfile doesn't support pathlib directly.
zipf.write(str(file), str(zip_path))
I prepared a function by consolidating Mark Byers' solution with Reimund and Morten Zilmer's comments (relative path and including empty directories). As a best practice, with is used in ZipFile's file construction.
The function also prepares a default zip file name with the zipped directory name and '.zip' extension. Therefore, it works with only one argument: the source directory to be zipped.
import os
import zipfile
def zip_dir(path_dir, path_file_zip=''):
if not path_file_zip:
path_file_zip = os.path.join(
os.path.dirname(path_dir), os.path.basename(path_dir)+'.zip')
with zipfile.ZipFile(path_file_zip, 'wb', zipfile.ZIP_DEFLATED) as zip_file:
for root, dirs, files in os.walk(path_dir):
for file_or_dir in files + dirs:
zip_file.write(
os.path.join(root, file_or_dir),
os.path.relpath(os.path.join(root, file_or_dir),
os.path.join(path_dir, os.path.pardir)))
# import required python modules
# You have to install zipfile package using pip install
import os,zipfile
# Change the directory where you want your new zip file to be
os.chdir('Type your destination')
# Create a new zipfile ( I called it myfile )
zf = zipfile.ZipFile('myfile.zip','w')
# os.walk gives a directory tree. Access the files using a for loop
for dirnames,folders,files in os.walk('Type your directory'):
zf.write('Type your Directory')
for file in files:
zf.write(os.path.join('Type your directory',file))
Well, after reading the suggestions I came up with a very similar way that works with 2.7.x without creating "funny" directory names (absolute-like names), and will only create the specified folder inside the zip.
Or just in case you needed your zip to contain a folder inside with the contents of the selected directory.
def zipDir( path, ziph ) :
"""
Inserts directory (path) into zipfile instance (ziph)
"""
for root, dirs, files in os.walk( path ) :
for file in files :
ziph.write( os.path.join( root, file ) , os.path.basename( os.path.normpath( path ) ) + "\\" + file )
def makeZip( pathToFolder ) :
"""
Creates a zip file with the specified folder
"""
zipf = zipfile.ZipFile( pathToFolder + 'file.zip', 'w', zipfile.ZIP_DEFLATED )
zipDir( pathToFolder, zipf )
zipf.close()
print( "Zip file saved to: " + pathToFolder)
makeZip( "c:\\path\\to\\folder\\to\\insert\\into\\zipfile" )
Function to create zip file.
def CREATEZIPFILE(zipname, path):
#function to create a zip file
#Parameters: zipname - name of the zip file; path - name of folder/file to be put in zip file
zipf = zipfile.ZipFile(zipname, 'w', zipfile.ZIP_DEFLATED)
zipf.setpassword(b"password") #if you want to set password to zipfile
#checks if the path is file or directory
if os.path.isdir(path):
for files in os.listdir(path):
zipf.write(os.path.join(path, files), files)
elif os.path.isfile(path):
zipf.write(os.path.join(path), path)
zipf.close()
One thing completely missed by previous answers is the fact that using os.path.join() can easily return POSIX-incompatible paths when you run the code on Windows. The resulting archive will contain files with literal backslashes in their names when processing it with any common archive software on Linux, which is not what you want. Use path.as_posix() for the arcname parameter instead!
import zipfile
from pathlib import Path
with zipfile.ZipFile("archive.zip", "w", zipfile.ZIP_DEFLATED) as zf:
for path in Path("include_all_of_this_folder").rglob("*"):
zf.write(path, path.as_posix())

Categories

Resources