Zip the folder and its content in python(2.2.1) [duplicate] - python

Once I have all the files I require in a particular folder, I would like my python script to zip the folder contents.
Is this possible?
And how could I go about doing it?

On python 2.7 you might use: shutil.make_archive(base_name, format[, root_dir[, base_dir[, verbose[, dry_run[, owner[, group[, logger]]]]]]]).
base_name archive name minus extension
format format of the archive
root_dir directory to compress.
For example
shutil.make_archive(target_file, format="bztar", root_dir=compress_me)

Adapted version of the script is:
#!/usr/bin/env python
from __future__ import with_statement
from contextlib import closing
from zipfile import ZipFile, ZIP_DEFLATED
import os
def zipdir(basedir, archivename):
assert os.path.isdir(basedir)
with closing(ZipFile(archivename, "w", ZIP_DEFLATED)) as z:
for root, dirs, files in os.walk(basedir):
#NOTE: ignore empty directories
for fn in files:
absfn = os.path.join(root, fn)
zfn = absfn[len(basedir)+len(os.sep):] #XXX: relative path
z.write(absfn, zfn)
if __name__ == '__main__':
import sys
basedir = sys.argv[1]
archivename = sys.argv[2]
zipdir(basedir, archivename)
Example:
C:\zipdir> python -mzipdir c:\tmp\test test.zip
It creates 'C:\zipdir\test.zip' archive with the contents of the 'c:\tmp\test' directory.

Here is a recursive version
def zipfolder(path, relname, archive):
paths = os.listdir(path)
for p in paths:
p1 = os.path.join(path, p)
p2 = os.path.join(relname, p)
if os.path.isdir(p1):
zipfolder(p1, p2, archive)
else:
archive.write(p1, p2)
def create_zip(path, relname, archname):
archive = zipfile.ZipFile(archname, "w", zipfile.ZIP_DEFLATED)
if os.path.isdir(path):
zipfolder(path, relname, archive)
else:
archive.write(path, relname)
archive.close()

Both jfs's solution and Kozyarchuk's solution could work for the OP's use case, however:
jfs's solution zips all of the files in a source folder and stores them in the zip at the root level (not preserving the original source folder within the structure of the zip).
Kozyarchuk's solution inadvertently puts the newly-created zip file into itself since it is a recursive solution (e.g. creating new zip file "myzip.zip" with this code will result in the archive "myzip.zip" itself containing an empty file "myzip.zip")
Thus, here is a solution that will simply add a source folder (and any subfolders to any depth) to a zip archive. This is motivated by the fact that you cannot pass a folder name to the built-in method ZipFile.write() -- the function below, add_folder_to_zip(), offers a simple method to add a folder and all of its contents to a zip archive. Below code works for Python2 and Python3.
import zipfile
import os
def add_folder_to_zip(src_folder_name, dst_zip_archive):
""" Adds a folder and its contents to a zip archive
Args:
src_folder_name (str): Source folder name to add to the archive
dst_zip_archive (ZipFile): Destination zip archive
Returns:
None
"""
for walk_item in os.walk(src_folder_name):
for file_item in walk_item[2]:
# walk_item[2] is a list of files in the folder entry
# walk_item[0] is the folder entry full path
fn_to_add = os.path.join(walk_item[0], file_item)
dst_zip_archive.write(fn_to_add)
if __name__ == '__main__':
zf = zipfile.ZipFile('myzip.zip', mode='w')
add_folder_to_zip('zip_this_folder', zf)
zf.close()

Related

Edit identical line in several files [duplicate]

I need to iterate through all .asm files inside a given directory and do some actions on them.
How can this be done in a efficient way?
Python 3.6 version of the above answer, using os - assuming that you have the directory path as a str object in a variable called directory_in_str:
import os
directory = os.fsencode(directory_in_str)
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith(".asm") or filename.endswith(".py"):
# print(os.path.join(directory, filename))
continue
else:
continue
Or recursively, using pathlib:
from pathlib import Path
pathlist = Path(directory_in_str).glob('**/*.asm')
for path in pathlist:
# because path is object not string
path_in_str = str(path)
# print(path_in_str)
Use rglob to replace glob('**/*.asm') with rglob('*.asm')
This is like calling Path.glob() with '**/' added in front of the given relative pattern:
from pathlib import Path
pathlist = Path(directory_in_str).rglob('*.asm')
for path in pathlist:
# because path is object not string
path_in_str = str(path)
# print(path_in_str)
Original answer:
import os
for filename in os.listdir("/path/to/dir/"):
if filename.endswith(".asm") or filename.endswith(".py"):
# print(os.path.join(directory, filename))
continue
else:
continue
This will iterate over all descendant files, not just the immediate children of the directory:
import os
for subdir, dirs, files in os.walk(rootdir):
for file in files:
#print os.path.join(subdir, file)
filepath = subdir + os.sep + file
if filepath.endswith(".asm"):
print (filepath)
You can try using glob module:
import glob
for filepath in glob.iglob('my_dir/*.asm'):
print(filepath)
and since Python 3.5 you can search subdirectories as well:
glob.glob('**/*.txt', recursive=True) # => ['2.txt', 'sub/3.txt']
From the docs:
The glob module finds all the pathnames matching a specified pattern according to the rules used by the Unix shell, although results are returned in arbitrary order. No tilde expansion is done, but *, ?, and character ranges expressed with [] will be correctly matched.
Since Python 3.5, things are much easier with os.scandir() and 2-20x faster (source):
with os.scandir(path) as it:
for entry in it:
if entry.name.endswith(".asm") and entry.is_file():
print(entry.name, entry.path)
Using scandir() instead of listdir() can significantly increase the
performance of code that also needs file type or file attribute
information, because os.DirEntry objects expose this information if
the operating system provides it when scanning a directory. All
os.DirEntry methods may perform a system call, but is_dir() and
is_file() usually only require a system call for symbolic links;
os.DirEntry.stat() always requires a system call on Unix but only
requires one for symbolic links on Windows.
Python 3.4 and later offer pathlib in the standard library. You could do:
from pathlib import Path
asm_pths = [pth for pth in Path.cwd().iterdir()
if pth.suffix == '.asm']
Or if you don't like list comprehensions:
asm_paths = []
for pth in Path.cwd().iterdir():
if pth.suffix == '.asm':
asm_pths.append(pth)
Path objects can easily be converted to strings.
Here's how I iterate through files in Python:
import os
path = 'the/name/of/your/path'
folder = os.fsencode(path)
filenames = []
for file in os.listdir(folder):
filename = os.fsdecode(file)
if filename.endswith( ('.jpeg', '.png', '.gif') ): # whatever file types you're using...
filenames.append(filename)
filenames.sort() # now you have the filenames and can do something with them
NONE OF THESE TECHNIQUES GUARANTEE ANY ITERATION ORDERING
Yup, super unpredictable. Notice that I sort the filenames, which is important if the order of the files matters, i.e. for video frames or time dependent data collection. Be sure to put indices in your filenames though!
You can use glob for referring the directory and the list :
import glob
import os
#to get the current working directory name
cwd = os.getcwd()
#Load the images from images folder.
for f in glob.glob('images\*.jpg'):
dir_name = get_dir_name(f)
image_file_name = dir_name + '.jpg'
#To print the file name with path (path will be in string)
print (image_file_name)
To get the list of all directory in array you can use os :
os.listdir(directory)
I'm not quite happy with this implementation yet, I wanted to have a custom constructor that does DirectoryIndex._make(next(os.walk(input_path))) such that you can just pass the path you want a file listing for. Edits welcome!
import collections
import os
DirectoryIndex = collections.namedtuple('DirectoryIndex', ['root', 'dirs', 'files'])
for file_name in DirectoryIndex(*next(os.walk('.'))).files:
file_path = os.path.join(path, file_name)
I really like using the scandir directive that is built into the os library. Here is a working example:
import os
i = 0
with os.scandir('/usr/local/bin') as root_dir:
for path in root_dir:
if path.is_file():
i += 1
print(f"Full path is: {path} and just the name is: {path.name}")
print(f"{i} files scanned successfully.")
Get all the .asm files in a directory by doing this.
import os
path = "path_to_file"
file_type = '.asm'
for filename in os.listdir(path=path):
if filename.endswith(file_type):
print(filename)
print(f"{path}/{filename}")
# do something below
I don't understand why some answers are complicated. This is how I would do it with Python 2.7. Replace DIRECTORY_TO_LOOP with the directory you want to use.
import os
DIRECTORY_TO_LOOP = '/var/www/files/'
for root, dirs, files in os.walk(DIRECTORY_TO_LOOP, topdown=False):
for name in files:
print(os.path.join(root, name))

How can I extract all .zip extension in a folder without retaining directory using python?

Here is my code I don't know how can I loop every .zip in a folder, please help me: I want all contents of 5 zip files to extracted in one folder, not including its directory name
import os
import shutil
import zipfile
my_dir = r"C:\\Users\\Guest\\Desktop\\OJT\\scanner\\samples_raw"
my_zip = r"C:\\Users\\Guest\\Desktop\\OJT\\samples\\001-100.zip"
with zipfile.ZipFile(my_zip) as zip_file:
zip_file.setpassword(b"virus")
for member in zip_file.namelist():
filename = os.path.basename(member)
# skip directories
if not filename:
continue
# copy file (taken from zipfile's extract)
source = zip_file.open(member)
target = file(os.path.join(my_dir, filename), "wb")
with source, target:
shutil.copyfileobj(source, target)
repeated question, please refer below link.
How to extract zip file recursively in Pythonn
What you are looking for is glob. Which can be used like this:
#<snip>
import glob
#assuming all your zip files are in the directory below.
for my_zip in glob.glob(r"C:\\Users\\Guest\\Desktop\\OJT\\samples\\*.zip"):
with zipfile.ZipFile(my_zip) as zip_file:
zip_file.setpassword(b"virus")
for member in zip_file.namelist():
#<snip> rest of your code here.

Iterating over files in a directory and applying a user defined function automatically [duplicate]

I need to iterate through all .asm files inside a given directory and do some actions on them.
How can this be done in a efficient way?
Python 3.6 version of the above answer, using os - assuming that you have the directory path as a str object in a variable called directory_in_str:
import os
directory = os.fsencode(directory_in_str)
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith(".asm") or filename.endswith(".py"):
# print(os.path.join(directory, filename))
continue
else:
continue
Or recursively, using pathlib:
from pathlib import Path
pathlist = Path(directory_in_str).glob('**/*.asm')
for path in pathlist:
# because path is object not string
path_in_str = str(path)
# print(path_in_str)
Use rglob to replace glob('**/*.asm') with rglob('*.asm')
This is like calling Path.glob() with '**/' added in front of the given relative pattern:
from pathlib import Path
pathlist = Path(directory_in_str).rglob('*.asm')
for path in pathlist:
# because path is object not string
path_in_str = str(path)
# print(path_in_str)
Original answer:
import os
for filename in os.listdir("/path/to/dir/"):
if filename.endswith(".asm") or filename.endswith(".py"):
# print(os.path.join(directory, filename))
continue
else:
continue
This will iterate over all descendant files, not just the immediate children of the directory:
import os
for subdir, dirs, files in os.walk(rootdir):
for file in files:
#print os.path.join(subdir, file)
filepath = subdir + os.sep + file
if filepath.endswith(".asm"):
print (filepath)
You can try using glob module:
import glob
for filepath in glob.iglob('my_dir/*.asm'):
print(filepath)
and since Python 3.5 you can search subdirectories as well:
glob.glob('**/*.txt', recursive=True) # => ['2.txt', 'sub/3.txt']
From the docs:
The glob module finds all the pathnames matching a specified pattern according to the rules used by the Unix shell, although results are returned in arbitrary order. No tilde expansion is done, but *, ?, and character ranges expressed with [] will be correctly matched.
Since Python 3.5, things are much easier with os.scandir() and 2-20x faster (source):
with os.scandir(path) as it:
for entry in it:
if entry.name.endswith(".asm") and entry.is_file():
print(entry.name, entry.path)
Using scandir() instead of listdir() can significantly increase the
performance of code that also needs file type or file attribute
information, because os.DirEntry objects expose this information if
the operating system provides it when scanning a directory. All
os.DirEntry methods may perform a system call, but is_dir() and
is_file() usually only require a system call for symbolic links;
os.DirEntry.stat() always requires a system call on Unix but only
requires one for symbolic links on Windows.
Python 3.4 and later offer pathlib in the standard library. You could do:
from pathlib import Path
asm_pths = [pth for pth in Path.cwd().iterdir()
if pth.suffix == '.asm']
Or if you don't like list comprehensions:
asm_paths = []
for pth in Path.cwd().iterdir():
if pth.suffix == '.asm':
asm_pths.append(pth)
Path objects can easily be converted to strings.
Here's how I iterate through files in Python:
import os
path = 'the/name/of/your/path'
folder = os.fsencode(path)
filenames = []
for file in os.listdir(folder):
filename = os.fsdecode(file)
if filename.endswith( ('.jpeg', '.png', '.gif') ): # whatever file types you're using...
filenames.append(filename)
filenames.sort() # now you have the filenames and can do something with them
NONE OF THESE TECHNIQUES GUARANTEE ANY ITERATION ORDERING
Yup, super unpredictable. Notice that I sort the filenames, which is important if the order of the files matters, i.e. for video frames or time dependent data collection. Be sure to put indices in your filenames though!
You can use glob for referring the directory and the list :
import glob
import os
#to get the current working directory name
cwd = os.getcwd()
#Load the images from images folder.
for f in glob.glob('images\*.jpg'):
dir_name = get_dir_name(f)
image_file_name = dir_name + '.jpg'
#To print the file name with path (path will be in string)
print (image_file_name)
To get the list of all directory in array you can use os :
os.listdir(directory)
I'm not quite happy with this implementation yet, I wanted to have a custom constructor that does DirectoryIndex._make(next(os.walk(input_path))) such that you can just pass the path you want a file listing for. Edits welcome!
import collections
import os
DirectoryIndex = collections.namedtuple('DirectoryIndex', ['root', 'dirs', 'files'])
for file_name in DirectoryIndex(*next(os.walk('.'))).files:
file_path = os.path.join(path, file_name)
I really like using the scandir directive that is built into the os library. Here is a working example:
import os
i = 0
with os.scandir('/usr/local/bin') as root_dir:
for path in root_dir:
if path.is_file():
i += 1
print(f"Full path is: {path} and just the name is: {path.name}")
print(f"{i} files scanned successfully.")
Get all the .asm files in a directory by doing this.
import os
path = "path_to_file"
file_type = '.asm'
for filename in os.listdir(path=path):
if filename.endswith(file_type):
print(filename)
print(f"{path}/{filename}")
# do something below
I don't understand why some answers are complicated. This is how I would do it with Python 2.7. Replace DIRECTORY_TO_LOOP with the directory you want to use.
import os
DIRECTORY_TO_LOOP = '/var/www/files/'
for root, dirs, files in os.walk(DIRECTORY_TO_LOOP, topdown=False):
for name in files:
print(os.path.join(root, name))

Python zip a sub folder and not the entire folder path

I have a program to zip all the contents in a folder. I did not write this code but I found it somewhere online and I am using it. I intend to zip a folder for example say, C:/folder1/folder2/folder3/ . I want to zip folder3 and all its contents in a file say folder3.zip. With the below code, once i zip it, the contents of folder3.zip wil be folder1/folder2/folder3/and files. I do not want the entire path to be zipped and i only want the subfolder im interested to zip (folder3 in this case). I tried some os.chdir etc, but no luck.
def makeArchive(fileList, archive):
"""
'fileList' is a list of file names - full path each name
'archive' is the file name for the archive with a full path
"""
try:
a = zipfile.ZipFile(archive, 'w', zipfile.ZIP_DEFLATED)
for f in fileList:
print "archiving file %s" % (f)
a.write(f)
a.close()
return True
except: return False
def dirEntries(dir_name, subdir, *args):
# Creates a list of all files in the folder
'''Return a list of file names found in directory 'dir_name'
If 'subdir' is True, recursively access subdirectories under 'dir_name'.
Additional arguments, if any, are file extensions to match filenames. Matched
file names are added to the list.
If there are no additional arguments, all files found in the directory are
added to the list.
Example usage: fileList = dirEntries(r'H:\TEMP', False, 'txt', 'py')
Only files with 'txt' and 'py' extensions will be added to the list.
Example usage: fileList = dirEntries(r'H:\TEMP', True)
All files and all the files in subdirectories under H:\TEMP will be added
to the list. '''
fileList = []
for file in os.listdir(dir_name):
dirfile = os.path.join(dir_name, file)
if os.path.isfile(dirfile):
if not args:
fileList.append(dirfile)
else:
if os.path.splitext(dirfile)[1][1:] in args:
fileList.append(dirfile)
# recursively access file names in subdirectories
elif os.path.isdir(dirfile) and subdir:
print "Accessing directory:", dirfile
fileList.extend(dirEntries(dirfile, subdir, *args))
return fileList
You can call this by makeArchive(dirEntries(folder, True), zipname).
Any ideas as to how to solve this problem? I am uing windows OS annd python 25, i know in python 2.7 there is shutil make_archive which helps but since i am working on 2.5 i need another solution :-/
You'll have to give an arcname argument to ZipFile.write() that uses a relative path. Do this by giving the root path to remove to makeArchive():
def makeArchive(fileList, archive, root):
"""
'fileList' is a list of file names - full path each name
'archive' is the file name for the archive with a full path
"""
a = zipfile.ZipFile(archive, 'w', zipfile.ZIP_DEFLATED)
for f in fileList:
print "archiving file %s" % (f)
a.write(f, os.path.relpath(f, root))
a.close()
and call this with:
makeArchive(dirEntries(folder, True), zipname, folder)
I've removed the blanket try:, except:; there is no use for that here and only serves to hide problems you want to know about.
The os.path.relpath() function returns a path relative to root, effectively removing that root path from the archive entry.
On python 2.5, the relpath function is not available; for this specific usecase the following replacement would work:
def relpath(filename, root):
return filename[len(root):].lstrip(os.path.sep).lstrip(os.path.altsep)
and use:
a.write(f, relpath(f, root))
Note that the above relpath() function only works for your specific case where filepath is guaranteed to start with root; on Windows the general case for relpath() is a lot more complex. You really want to upgrade to Python 2.6 or newer if at all possible.
ZipFile.write has an optional argument arcname. Use this to remove parts of the path.
You could change your method to be:
def makeArchive(fileList, archive, path_prefix=None):
"""
'fileList' is a list of file names - full path each name
'archive' is the file name for the archive with a full path
"""
try:
a = zipfile.ZipFile(archive, 'w', zipfile.ZIP_DEFLATED)
for f in fileList:
print "archiving file %s" % (f)
if path_prefix is None:
a.write(f)
else:
a.write(f, f[len(path_prefix):] if f.startswith(path_prefix) else f)
a.close()
return True
except: return False
Martijn's approach using os.path is much more elegant, though.

How to add multiple files into a single zip folder

Actually i am writting a script which writes two files into a desktop, let it be as "a.txt" and "b.txt"....... so after writing into a desktop i have to read this files and zip into a folder....
can anyone help on this....i know how to zip a folder but dono how to add two files in to a zip
Reading from folder i know its like this
def zipdir(basedir, archivename):
assert os.path.isdir(basedir)
with closing(ZipFile(archivename, "w", ZIP_DEFLATED)) as z:
for root, dirs, files in os.walk(basedir):
for fn in files:
absfn = os.path.join(root, fn)
zfn = absfn[len(basedir)+len(os.sep):]
z.write(absfn, zfn)
if __name__ == '__main__':
import sys
basedir = sys.argv[1]
archivename = sys.argv[2]
zipdir(basedir, archivename)
The code which now i using is
import zipfile
zip = zipfile.ZipFile('Python.zip', 'a')
zip.write('fields.txt')
zip.write('grp.txt')
zip.close()
This is creating file of those two plus some extra folder which contains all files.......
you need to open the zip file with "a" -append parameter. Then you can use the write parameter without overwriting the file.
source: 12.4.1
EDIT:
zip.write('file.pdf','/folder/file.pdf')
The easiest wayh is to use shutil library. put all the files you want to zip in a single directoty(folder)
import shutil
shutil.make_archive(output_filename_dont_add_.zip, 'zip', directory_to_download)
Remember if you work with ipython you can use relative address for directory_to_download

Categories

Resources