ZipFile script that zips all desired file contents - python

So I'm dealing with a script that needs to zip all files into a single folder that share the same name. So, for example, the folder structure looks like this...
001.flt
001.hdr
001.prj
002.flt
002.hdr
002.prj
. .
.
700.flt
700.hdr
700.prj
In order to get a file to zip, I have a script that can handle a single file but does not recognize ["*.flt", "*.hdr", "*.prj"]
Is there a workaround for getting the script to recognize the file names based on their names and group them by name as well? I would like each individual zip file to contain file contents but zip it as
001.zip, 002.zip....
meaning the zip file contains the different file extensions
001.zip(
001.hdr,
001.prj,
001.flt
)
'''
import zipfile, sys, os, glob
inDir = r"\test\DEM"
outDir = r"\test\DEM_out"
filetype = "*.flt"
def zipfiletypeInDir(inDir, outDir):
# Check that input directory exists
if not os.path.exists(inDir):
print ("Input directory %s does not exist!" % inDir)
return False
print ("Zipping filetype(s) in folder %s to output folder %s" % (inDir, outDir))
# Loop through "filetype" in input directory, glob will match pathnames from
for inShp in glob.glob(os.path.join(inDir, filetype)):
# Build the filename of the output zip file
outZip = os.path.join(outDir, os.path.splitext(os.path.basename(inShp))[0] + ".zip")
# Zip the "filetype"
zipfiletype(inShp, outZip)
return True
def zipfiletype(infiletype, newZipFN):
print ('Starting to Zip '+(infiletype)+' to '+(newZipFN))
# Delete output zipfile if it already exists
if (os.path.exists(newZipFN)):
print ('Deleting'+ newZipFN)
os.remove(newZipFN)
# Output zipfile still exists, exit
if (os.path.exists(newZipFN)):
print ('Unable to Delete'+newZipFN)
return False
# Open zip file object
zipobj = zipfile.ZipFile(newZipFN,'w')
# Loop through "filetype" components
for infile in glob.glob( infiletype.lower().replace(filetype,"*.flt")):
# Skip .zip file extension
if os.path.splitext(infile)[1].lower() != ".zip":
print ("Zipping %s" % (infile))
# Zip the "filetype" component
zipobj.write(infile,os.path.basename(infile),zipfile.ZIP_DEFLATED)
zipobj.close()
return True
if __name__=="__main__":
zipfiletypeInDir(inDir, outDir)
print ("done!")

If the possible duplicate I provided doesn't answer your question....
One way would be to iterate over all the file names and make a dictionary grouping all the files with the same name.
In [54]: import collections, os, zipfile
In [55]: zips = collections.defaultdict(list)
In [55]:
In [56]: for f in os.listdir():
...: name, ext = os.path.splitext(f)
...: zips[name].append(f)
Then iterate over the dictionary; creating a new zip file for each key and adding each key's files to it.
In [57]: outdir = r'zips'
In [58]: for k,v in zips.items():
...: zname = k+'.zip'
...: fpath = os.path.join(outdir,zname)
...: #print(fpath)
...: with zipfile.ZipFile(fpath, 'w') as z:
...: for name in v:
...: z.write(name)

I Found what I was looking for, This script identifies the names of the files and groups them based on that with the Iterator.
#group files into separate zipfolders from single directory based from
#individual file names
import fnmatch, os, glob, zipfile
#edit data folders for in and out variables
path = r"D:\Users\in_path"
out_path = D"C:\Users\out_path"
#create variables used in iterator
obj = os.listdir(path)
my_iterator = obj.__iter__()
##
#iterate each file name as '%s.*'
for obj in my_iterator:
#define name of file for rest of iterator to preform
name = os.path.splitext(obj)[0]
print (name)
#create a zip folder to store data that is being compressed
zip_path = os.path.join(out_path, name + '.zip')
#create variable 'zip' that directs the data into the compressed folder
zip = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED)
os.chdir(path)
#files are written to the folder with glob.glob
for files in glob.glob('%s.*' %name):
zip.write(os.path.join(path,files), files)
#print each iteration of files being written
print ('All files written to %s' %zip_path)
zip.close()

Related

How to rename all files to include the directory name?

I'm trying to use a For loop in the code below to go through a list of files and rename them with the file directory's name.
import re # add this to your other imports
import os
for files in os.walk("."):
for f_new in files:
folder = files.split(os.sep)[-2]
print(folder)
name_elements = re.findall(r'(Position)(\d+)', f_new)[0]
name = name_elements[0] + str(int(name_elements[1]))
print(name) # just for demonstration
dst = folder + '_' + name
print(dst)
os.rename('Position014 (RGB rendering) - 1024 x 1024 x 1 x 1 - 3 ch (8 bits).tif', dst)
Use pathlib
Path.rglob: This is like calling Path.glob() with '**/' added in front of the given relative pattern:
.parent or .parents[0]: An immutable sequence providing access to the logical ancestors of the path
If yo want different parts of the path, index parents[] differently
file.parents[0].stem returns 'test1' or 'test2' depending on the file
file.parents[1].stem returns 'photos'
file.parents[2].stem returns 'stack_overflow'
.stem: The final path component, without its suffix
.suffix: The file extension of the final component
.rename: Rename this file or directory to the given target
The following code, finds only .tiff files. Use *.* to get all files.
If you only want the first 10 characters of file_name:
file_name = file_name[:10]
form pathlib import Path
# set path to files
p = Path('e:/PythonProjects/stack_overflow/photos/')
# get all files in subdirectories with a tiff extension
files = list(p.rglob('*.tiff'))
# print files example
[WindowsPath('e:/PythonProjects/stack_overflow/photos/test1/test.tiff'), WindowsPath('e:/PythonProjects/stack_overflow/photos/test2/test.tiff')]
# iterate through files
for file in files:
file_path = file.parent # get only path
dir_name = file.parent.stem # get the directory name
file_name = file.stem # get the file name
suffix = file.suffix # get the file extension
file_name_new = f'{dir_name}_{file_name}{suffix}' # make the new file name
file.rename(file_path / file_name_new) # rename the file
# output files renamed
[WindowsPath('e:/PythonProjects/stack_overflow/photos/test1/test1_test.tiff'), WindowsPath('e:/PythonProjects/stack_overflow/photos/test2/test2_test.tiff')]

python How do I import multiple .txt files in a folder to add characters to each .txt file?

There are text files of various names in the folder 'a'. I want to read all of these text files and add the letter 'b' to each text file. What should I do?
cwd = os.getcwd()
input_dir = os.path.join(cwd, "my .txt files dir")
sorts = sorted(glob(input_dir), key = lambda x:(len(x) , x))
for f in sorts :
f = open(input_dir, 'a')
data = "add text"
f.write(data)
f.close()
Append data to file:
- first: get all file in folder a.
- second: find extension with .txt.
- third: open it and do something('append', or 'rewrite').
Demo:
import os
# your .txt files dir
path = 'a'
# append data what you want
appendData = 'b'
fileNames = list(os.walk(path))[0][2]
fileNames.sort(key=len)
fileNums = len(fileNames)
# your dst file extension
fileExt = '.txt'
# # Extract extension from filename
# fileExt = os.path.splitext(fileNames[0])[1]
for fileName in fileNames:
if fileName.endswith(fileExt):
fileFullPath = os.path.join(path, fileName)
with open(fileFullPath, 'a') as f:
f.write(appendData)
Like the others said, this is an easy question that could easily be find on google. Anyway here's how to do it:
from os import listdir
from os.path import isfile, isdir, join
files = [file for file in listdir("files") if isfile(join("files", file))]
directories = [directory for directory in listdir("files") if isdir(join("files", directory))]
print(files)
for file_name in files:
try:
file = open("files/" + file_name, "a")
file.write("b")
file.close()
except IOError as err:
print("Could not open file because : ", err)
Replace "file" with the directory where your files are or the path to that directory like "directory0/directory1/directory_with_files"
Avoid to open files with
f = open(input_dir, 'a')
f.close()
Instead
with open(input_dir, 'a') as inputFile:
Do something
Also what you want is
import os
import glob # We will use this module to open only .txt files
path = 'your/path'
for filename in glob.glob(os.path.join(path, '*.txt'))
with open(filename, 'a') as inputFile:
inputFile.write('b')

Need help removing specific file extension from a folder

I am currently writing a python code to access each directory and delete the specific type of file extension specified. However, I only want the code to delete the files if there are two files with same name but different file extensions are present.
ie. I only want mytext.txt to delete if mytext.txt and mytext.docx are present both in the same folder, if only mytext.txt is present then I want the code to skip that specific folder.
I have added the following lines to remove the files with extension no matter the condition:
for item in test:
if item.endswith('.txt'):
os.remove(os.path.join(pathforRemove, item))
If 'f1.txt', 'f2.png', 'f2.txt', 'f3.png', 'f4.txt'
are your files:
from collections import defaultdict
test = ['f1.txt', 'f2.png', 'f2.txt', 'f3.png', 'f4.txt']
# construct a filename to extensions map
fname_to_ext = defaultdict(set)
pairs = list(map(lambda s: (s[:s.rfind('.')], s[s.rfind('.'):]), test))
for fname, ext in pairs:
fname_to_ext[fname].add(ext)
for fname, exts in fname_to_ext.items():
if len(exts) > 1 and '.txt' in exts:
print('deleting: ', fname + '.txt')
# os.remove(os.path.join(pathforRemove, item))
This prints:
deleting: f2.txt
You can slightly modify your answer to check if this condition exists by storing it in a dict with each value as a list. Then we gather all the values from the dictionary who have a length greater than two and have files that end in '.txt'. Once we have all those values we delete them.
from collections import defaultdict
recs = defaultdict(list)
for item in test:
name = os.path.basename(item)
no_ext = name.split('.')[0]
recs[no_ext].append(name)
to_delete = [val for v in recs.values() for val in v if len(v) >= 2 and val.endswith('.txt')]
for item in to_delete:
os.remove(os.path.join(pathforRemove, item))
you may try this code snippet to see if fulfill your ask:
import os
rootDir = '/test-dir-traverse'
extensionToBeRetained = 'docx'
extensionToBeRemoved = 'txt'
for dirName, subdirList, fileList in os.walk(rootDir):
print('Found directory: %s' % dirName)
for fnameToBeRemoved in fileList:
print('\t%s' % fnameToBeRemoved)
for fname in fileList:
if fnameToBeRemoved.endswith(extensionToBeRemoved) and fname.endswith(extensionToBeRetained) and fnameToBeRemoved[0:-len(extensionToBeRemoved)] == fname[0:-len(extensionToBeRetained)]:
print('Deleting file : {}').format(fnameToBeRemoved)
You can adjust file extensions and change further.

Python: program to sort files according to entries in csv file

import os, unicodecsv as csv
# open and store the csv file
IDs = {}
with open('labels.csv','rb') as csvfile:
timeReader = csv.reader(csvfile, delimiter = ',')
# build dictionary with associated IDs
for row in timeReader:
IDs[row[0]] = row[1]
# move files
path = 'train/'
tmpPath = 'train2/'
for oldname in os.listdir(path):
# ignore files in path which aren't in the csv file
if oldname in IDs:
try:
os.rename(os.path.join(path, oldname), os.path.join(tmpPath, IDs[oldname]))
except:
print 'File ' + oldname + ' could not be renamed to ' + IDs[oldname] + '!'
I am trying to sort my files according to this csv file. But the file contains many ids with same name. Is there a way to move files with same name to 1 folder or adding a number in front of a file if the file with same name already exist in directory?
Example-
id name
001232131hja1.jpg golden_retreiver
0121221122ld.jpg black_hound
0232113222kl.jpg golden_retreiver
0213113jjdsh.jpg alsetian
05hkhdsk1233a.jpg black_hound
I actually want to move all the files having id corresponding to golden_retreiver to one folder and so on.
Based on what you describe, here is my approach:
import csv
import os
SOURCE_ROOT = 'train'
DEST_ROOT = 'train2'
with open('labels.csv') as infile:
next(infile) # Skip the header row
reader = csv.reader(infile)
seen = set()
for dogid, breed in reader:
# Create a new directory if needed
if breed not in seen:
os.mkdir(os.path.join(DEST_ROOT, breed))
seen.add(breed)
src = os.path.join(SOURCE_ROOT, dogid + '.jpg')
dest = os.path.join(DEST_ROOT, breed, dogid + '.jpg')
try:
os.rename(src, dest)
except WindowsError as e:
print e
Notes
For every line in the data file, I create the breed directory at the destination. I use the set seen to make sure that I only create each directory once.
After that, it is a trivia matter of moving files into place
One possible move error: file does not exist in the source dir. In which case, the code just prints out the error and ignore it.

Check if a file exists using a text file

I have a folder (Molecules) with many sdf files (M00001.sdf, M00002.sdf and so on) representing different molecules. I also have a csv where each row represents the a molecule (M00001, M00002 etc).
I'm writing a code in order to get files on Molecules folder if their name is a row on the csv file.
First attempt
import os
path_to_files = '/path_to_folder/Molecules' # path to Molecules folder
for files in os.listdir(path_to_files):
names = os.path.splitext(files)[0] # get the basename (molecule name)
with open('molecules.csv') as ligs: # Open the csv file of molecules names
for hits in ligs:
if names == hits:
print names, hits
else:
print 'File is not here'
However this returns nothing on the command line (literally nothing). What is wrong with this code?
I am not sure that this is the best way (I only know that the following code works for my data) but if your molecule.csv has the standard csv format, i.e. "molecule1,molecule2,molecule3 ...", you can try to rearrange your code in this way:
import os
import csv
path_to_files = '/path_to_folder/Molecules' # path to Molecules folder
for files in os.listdir(path_to_files):
names = os.path.basename(files)
names = names.replace(".sdf","")
with open('molecules.csv','r') as ligs:
content = csv.reader(ligs)
for elem in content:
for hits in elem:
if names == hits:
print names, hits
else:
print 'File is not here'
See csv File Reading and Writing for csv module
I solved the problem with a rather brute approach
import os
import csv
import shutil
path_to_files = None # path to Molecules folder
new_path = None # new folder to save files
os.mkdir(new_path) # create the folder to store the molecules
hits = open('molecules.csv', 'r')
ligands = []
for line in hits:
lig = line.rstrip('\n')
ligands.append(lig)
for files in os.listdir(path_to_files):
molecule_name = os.path.splitext(files)[0]
full_name = '/' + molecule_name + '.sdf'
old_file = path_to_files + full_name
new_file = new_path + full_name
if molecule_name in ligands:
shutil.copy(old_file, new_file)

Categories

Resources