How to recursively zip multiple folders as individual .zip files? - python

I have a set of folders (named *.pages), which I want to zip into their individual .zip files, e.g. "example1.pages" into "example1.pages.zip", "example2.pages" into "example2.pages.zip", etc. I also want to include the contents of the individual .pages folders.
Currently, the script zips all the .pages files into a single file with nested directories.
I'm not sure how to proceed, and I believe I'm missing something when performing the zipfile functions.
Any help will be most appreciated!
import os
import zipfile
start_path = "MY/DIRECTORY/HERE"
def zipdir(ziph):
dir_count = 0
file_count = 0
for (path,dirs,files) in os.walk(start_path):
print('Directory: {:s}'.format(path))
dir_count += 1
for file in dirs:
if file.endswith(".pages"):
print('\nAttempting to zip: \'{}\''.format(file))
ziph.write(os.path.join(path, file))
print('Done')
file_count += 1
print('\nProcessed {} files in {} directories.'.format(file_count,dir_count))
if __name__ == '__main__':
zipf = zipfile.ZipFile("NAME/OF/INDIVIDUAL/ZIP/FILE.zip", 'w', zipfile.ZIP_DEFLATED)
zipdir(zipf)
zipf.close()

You only open a single zip file and add everything there. If you want one zip per file, you need to create zip files in the loop as you scan the files.
import os
import zipfile
start_path = "MY/DIRECTORY/HERE"
start_path = '.'
def zipdir(start_path):
dir_count = 0
file_count = 0
for (path,dirs,files) in os.walk(start_path):
print('Directory: {:s}'.format(path))
dir_count += 1
for file in files:
if file.endswith(".pages"):
file_path = os.path.join(path, file)
print('\nAttempting to zip: \'{}\''.format(file_path))
with zipfile.ZipFile(file_path + '.zip', 'w', zipfile.ZIP_DEFLATED) as ziph:
ziph.write(file_path, file)
print('Done')
file_count += 1
print('\nProcessed {} files in {} directories.'.format(file_count,dir_count))
if __name__ == '__main__':
zipdir(start_path)

You can also adopt the code by #tdelaney and use shutil module as follows:
enter import os
import shutil
reports_path = os.getcwd()
def zipdir(reports_path):
for (path,dirs,files) in os.walk(reports_path):
for d in dirs:
file_path = os.path.join(path, d)
print 'Compressing ' + d
shutil.make_archive(d,'zip',file_path)
print "Done"
if __name__ == '__main__':
zipdir(reports_path)

Related

Python - copy specific file from subfolder to destination, get filename from text file

I want to get my script to read a list of names from a list(txt), then search for those in a selected folder with subfolders, then copy and paste those files to another selected folder. My script running without error but no result.
My script:
import os
import os.path
import shutil
textFile = ("D:\\Test\\list.txt")
sourceFolder = ("D:\\Test")
destinationFolder = ("D:\\")
filesToFind = []
with open(textFile, "r") as tx:
for row in tx:
filesToFind.append(row.strip())
for root, dirs, filename in os.walk(sourceFolder):
if filename in filesToFind:
f = os.path.join(root, filename)
shutil.copy(f, destinationFolder)
Haven’t test it but I think this will work - change this:
for root, dirs, filename in os.walk(sourceFolder):
if filename in filesToFind:
f = os.path.join(root, filename)
shutil.copy(f, destinationFolder)
To this:
for root, dirs, filenames in os.walk(sourceFolder):
for filename in filenames:
if filename in filesToFind:
f = os.path.join(root, filename)
shutil.copy(f, destinationFolder)
# Same code using glob #
## More efficient and also tested one ##
## One more feature added- checks file name given present or not ##
import os
import os.path
import shutil
import glob
textFile = ("D:\\Test\\list.txt")
sourceFolder = ("D:\Test")
destinationFolder = ("D:\\")
f = open(textFile, "r").readlines()
for i in f:
ListFile= glob.glob(os.path.join(sourceFolder,"**",i.strip()),recursive=True)
if len(ListFile):
print(ListFile[0],destinationFolder,os.path.basename(ListFile[0]))
destinationfile=os.path.join(destinationFolder,os.path.basename(ListFile[0]))
shutil.copyfile(ListFile[0],destinationfile)
else:
print(i,"-File not found")

python How do I import multiple .txt files in a folder to add characters to each .txt file?

There are text files of various names in the folder 'a'. I want to read all of these text files and add the letter 'b' to each text file. What should I do?
cwd = os.getcwd()
input_dir = os.path.join(cwd, "my .txt files dir")
sorts = sorted(glob(input_dir), key = lambda x:(len(x) , x))
for f in sorts :
f = open(input_dir, 'a')
data = "add text"
f.write(data)
f.close()
Append data to file:
- first: get all file in folder a.
- second: find extension with .txt.
- third: open it and do something('append', or 'rewrite').
Demo:
import os
# your .txt files dir
path = 'a'
# append data what you want
appendData = 'b'
fileNames = list(os.walk(path))[0][2]
fileNames.sort(key=len)
fileNums = len(fileNames)
# your dst file extension
fileExt = '.txt'
# # Extract extension from filename
# fileExt = os.path.splitext(fileNames[0])[1]
for fileName in fileNames:
if fileName.endswith(fileExt):
fileFullPath = os.path.join(path, fileName)
with open(fileFullPath, 'a') as f:
f.write(appendData)
Like the others said, this is an easy question that could easily be find on google. Anyway here's how to do it:
from os import listdir
from os.path import isfile, isdir, join
files = [file for file in listdir("files") if isfile(join("files", file))]
directories = [directory for directory in listdir("files") if isdir(join("files", directory))]
print(files)
for file_name in files:
try:
file = open("files/" + file_name, "a")
file.write("b")
file.close()
except IOError as err:
print("Could not open file because : ", err)
Replace "file" with the directory where your files are or the path to that directory like "directory0/directory1/directory_with_files"
Avoid to open files with
f = open(input_dir, 'a')
f.close()
Instead
with open(input_dir, 'a') as inputFile:
Do something
Also what you want is
import os
import glob # We will use this module to open only .txt files
path = 'your/path'
for filename in glob.glob(os.path.join(path, '*.txt'))
with open(filename, 'a') as inputFile:
inputFile.write('b')

How would I exclude directories from os.listdir results?

I'm making a script that will encode files within a directory using b64/b16 and I'm using os.listdir to do so, but it also lists directories which causes problems since now it's trying to encode directories as if it were a file.
How would I be able to exclude directories from os.listdir results?
import os
import sys
import base64
import codecs
import time
import string
import glob
#C:\\Users\\Fedora\\Desktop\\Win 10
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = []
filecount = 0
fileprogress = 0
for file in dirs:
files.append(file)
filecount = filecount + 1
for x in files:
os.system("cls")
fileprogress = fileprogress + 1
print("File " + str(fileprogress) + "/" + str(filecount))
print("Encrypting " + x + "...")
inputfile = open(path + "\\" + x, "rb")
data = inputfile.read()
inputfile.close()
data = base64.b16encode(data)
data = base64.b64encode(data)
data = base64.b16encode(data)
data = base64.b64encode(data)
data = base64.b16encode(data)
outputfile = open(path + "\\" + x + ".crypt", "wb")
outputfile.write(data)
outputfile.close()
use filter
filepath = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = filter(lambda x:os.path.isfile(os.path.join(filepath, x)), dirs)
or list comprehension with os.path.isfile()
filepath = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = [x for x in dirs if os.path.isfile(os.path.join(filepath, x))]
You can use os.path.isdir function to check if the current file is a directory.
Also, it is much better to use string formatting operations instead of string concatenation: not
print("File " + str(fileprogress) + "/" + str(filecount))
but
print("File {}/{}".format(fileprogress, filecount))
Such code is much easier to understand and modify.
Instead of using os.listdir() your can use os.walk which will return separate list for files and directories
python-oswalk-example
import os
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
for (path, dirs, files) in os.walk(path):
print path
print dirs
print files
pythoncentral os-walk
#Import the os module, for the os.walk function
import os
#Set the directory you want to start from
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
for dirName, subdirList, fileList in os.walk(path):
print('Found directory: %s' % dirName)
for fname in fileList:
print('\t%s' % fname)

I have a ".txt "file which consists of various filenames and I want to search each filename in a folder where these files are actually kept

Suppose I have a text file aiq_hits.txt.
Each line in this file corresponds a filename
ant1.aiq
ant2.aiq
ant3.aiq
ant4.aiq
I want to match each line of my textfile (ant1.aiq,ant2.aiq and so on) with filenames which are present at some specific place(R:\Sample) and extract matching files into some other place (R:\sample\wsa).
I have an idea that I need to use functions like os.walk() and fnmatch.fnmatch(), shutil.copy() but I am not able to implement them
My code:
import os
import shutil
import fnmatch
with open("aiq_hits.txt","r") as in_file:
for line in in_file:
I am stuck here
import os
import shutil
sourceDir = "R:\\Sample"
targetDir = "R:\\Sample\\wsa"
existingFiles = set(f for f in os.listdir(sourceDir) if os.path.isfile(os.path.join(sourceDir, f)))
infilepath = "aiq_hits.txt"
with open(infilepath) as infile:
for line in infile:
fname = line.strip()
if fname not in existingFiles: continue
shutil.move(os.path.join(sourceDir, fname), os.path.join(targetDir, fname))
I hope this will suffice:
import os
def match_files(url,file_read, dest):
f = open(file_read, 'rb')
file_list = os.listdir(url)
print(file_list)
saved_path = os.getcwd()
print("Current working directory is " + saved_path)
os.chdir(url)
match = []
for file_name in f:
file_name = file_name.strip()
if file_name in file_list:
match.append(file_name)
os.rename(os.path.join(url, file_name), os.path.join(dest, file_name))
os.chdir(saved_path)
print match
here, url is source directory or folder from which u want to match files, file_read is the name of file (with path) in which list of file names is given, dest is the destination folder.
this code moves the matching files from url to dest, i.e. these files won't remin in url after running the code.
Alternatively you could use the glob module which allows you to enter in a expression for the file name\extension which will then return a list that you can loop over.
I'd use this module if the source directory can have files with the same extension that you want to exclude from being looped over
Also I'm assuming that the file name list is not large and so storing it in a list wont be an issue
eg (I haven't tested the below )
from glob import glob
import os
import shutil
src = 'R:\\Sample'
dst = "R:\\Sample\\wsa"
in_file_list = "aiq_hits.txt"
list_Of_files = glob(os.path.join(src, 'ant*.aiq'))
data = []
with open(in_file_list) as reader:
data += reader.readlines()
for row in list_Of_files:
file_path, file_name = os.path.split(row)
if file_name in data:
shutil.copy2(row, os.path.join(dst, file_name))
# or if you want to move the file
# shutil.move(row, os.path.join(dst, file_name))

Flatten complex directory structure in Python

I want to move files from a complex directory structure to just one place. For example i have this deep hierarchy:
foo/
foo2/
1.jpg
2.jpg
...
I want it to be:
1.jpg
2.jpg
...
My current solution:
def move(destination):
for_removal = os.path.join(destination, '\\')
is_in_parent = lambda x: x.find(for_removal) > -1
with directory(destination):
files_to_move = filter(is_in_parent,
glob_recursive(path='.'))
for file in files_to_move:
shutil.move(file, destination)
Definitions: directory and glob_recursive. Note, that my code only moves files to their common parent directory, not an arbitrary destination.
How can i move all files from a complex hierarchy to a single place succinctly and elegantly?
I don't like testing the name of the file about to be moved to see if we're already in the destination directory. Instead, this solution only scans the subdirectories of the destination
import os
import itertools
import shutil
def move(destination):
all_files = []
for root, _dirs, files in itertools.islice(os.walk(destination), 1, None):
for filename in files:
all_files.append(os.path.join(root, filename))
for filename in all_files:
shutil.move(filename, destination)
Explanation: os.walk walks recursively the destination in a "top down" manner. whole filenames are constructed with the os.path.join(root, filename) call. Now, to prevent scanning files at the top of the destination, we just need to ignore the first element of the iteration of os.walk. To do that I use islice(iterator, 1, None). One other more explicit way would be to do this:
def move(destination):
all_files = []
first_loop_pass = True
for root, _dirs, files in os.walk(destination):
if first_loop_pass:
first_loop_pass = False
continue
for filename in files:
all_files.append(os.path.join(root, filename))
for filename in all_files:
shutil.move(filename, destination)
this would do, it also renames files if they collide (I commented out the actual move and replaced with a copy):
import os
import sys
import string
import shutil
#Generate the file paths to traverse, or a single path if a file name was given
def getfiles(path):
if os.path.isdir(path):
for root, dirs, files in os.walk(path):
for name in files:
yield os.path.join(root, name)
else:
yield path
destination = "./newdir/"
fromdir = "./test/"
for f in getfiles(fromdir):
filename = string.split(f, '/')[-1]
if os.path.isfile(destination+filename):
filename = f.replace(fromdir,"",1).replace("/","_")
#os.rename(f, destination+filename)
shutil.copy(f, destination+filename)
Run recursively through directory, move the files and launch move for directories:
import shutil
import os
def move(destination, depth=None):
if not depth:
depth = []
for file_or_dir in os.listdir(os.path.join([destination] + depth, os.sep)):
if os.path.isfile(file_or_dir):
shutil.move(file_or_dir, destination)
else:
move(destination, os.path.join(depth + [file_or_dir], os.sep))
import os.path, shutil
def move(src, dest):
not_in_dest = lambda x: os.path.samefile(x, dest)
files_to_move = filter(not_in_dest,
glob_recursive(path=src))
for f in files_to_move:
shutil.move(f, dest)
Source for glob_recursive. Does not change name of file, if they collide.
samefile is a safe way to compare paths. But it doesn't work on Windows, so check How to emulate os.path.samefile behaviour on Windows and Python 2.7?.
def splitPath(p):
a,b = os.path.split(p)
return (splitPath(a) if len(a) and len(b) else []) + [b]
def safeprint(s):
try:
print(s)
except UnicodeEncodeError:
if sys.version_info >= (3,):
print(s.encode('utf8').decode(sys.stdout.encoding))
else:
print(s.encode('utf8'))
def flatten(root, doit):
SEP = "¦"
REPL = "?"
folderCount = 0
fileCount = 0
if not doit:
print("Simulating:")
for path, dirs, files in os.walk(root, topdown=False):
if path != root:
for f in files:
sp = splitPath(path)
np = ""
for element in sp[1:]:
e2 = element.replace(SEP, REPL)
np += e2 + SEP
f2 = f.replace(SEP, REPL)
newName = np + f2
safeprint("Moved: "+ newName )
if doit:
shutil.move(os.path.join(path, f), os.path.join(root, f))
# Uncomment, if you want filenames to be based on folder hierarchy.
#shutil.move(os.path.join(path, f), os.path.join(root, newName))
fileCount += 1
safeprint("Removed: "+ path)
if doit:
os.rmdir(path)
folderCount += 1
if doit:
print("Done.")
else:
print("Simulation complete.")
print("Moved files:", fileCount)
print("Removed folders:", folderCount)
directory_path = r"C:\Users\jd\Documents\myFtpData"
flatten(directory_path, True)
Adding on to the answers, I believe my answer will satisfy all your needs, the other answers fail when there is a subdirectory and file with the same filename as the upper directory.
This was SOLVED here, Also look at my Github Repo for Structured File Copy and Flattened File Copy:
import os, fnmatch, shutil
PATTERN = '*.txt' # Regex Pattern to Match files
INPUT_FOLDER = "A" # os.getcwd()
INPUT_FOLDER = os.path.abspath(INPUT_FOLDER)
include_input_foldername = False
prepend = "_included" if include_input_foldername else ""
OUTPUT_FOLDER = f"Structured_Copy_{os.path.basename(INPUT_FOLDER)}{prepend}"
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
def find(pattern, path):
"""Utility to find files wrt a regex search"""
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
all_files = find(PATTERN, INPUT_FOLDER)
for each_path in all_files:
relative_path = os.path.relpath(each_path, os.path.dirname(INPUT_FOLDER)) if include_input_foldername else os.path.relpath(each_path, INPUT_FOLDER)
flattened_relative_fullpath = os.path.join(OUTPUT_FOLDER, relative_path)
os.makedirs(os.path.dirname(flattened_relative_fullpath), exist_ok=True)
shutil.copy(each_path, flattened_relative_fullpath)
print(f"Copied {each_path} to {flattened_relative_fullpath}")
print(f"Finished Copying {len(all_files)} Files from : {INPUT_FOLDER} to : {OUTPUT_FOLDER}")

Categories

Resources