Avoid duplicate file names in a folder in Python

Avoid duplicate file names in a folder in Python - python

I want to download some files and save them in a folder and there may be some duplication in file names, so I want to avoid this to happen.
I think it needs an auto-naming system but now i don't know how to make it.
I used shutil and urllib2 to write my function.
This is a part of my code :
path = 'C:/DL/Others/'+filename+file_ext
with open(path, 'wb') as fp:
shutil.copyfileobj(req, fp)
As you know we can check that if a file exists or not by os.path.exists('path').
I wanna to rename my files and save them to avoid duplicated names using a pattern, for example by adding a number to file name.So if there was 4 files with same name, "fname", I want 4 files in this pattern :
fname - fname(1) - fname(2) - fname(3)

Something like this is probably reasonable:
path = 'c:/DL/Others/%s%s' % (filename, file_ext)
uniq = 1
while os.path.exists(path):
path = 'c:/DL/Others/%s_%d%s' % (filename, uniq, file_ext)
uniq += 1
If the original path doesn't exist you get no _1, but if it does exist it'll count up until it finds one that's free.

Track each filename's count as you create it:
fname_counts = {}
# ... whatever generates filename and file_ext goes here...
if filename + file_ext in fname_counts:
fname_counts[filename + file_ext] += 1
else:
fname_counts[filename + file_ext] = 0
# now check if it's a dupe when you create the path
if fname_counts[filename + file_ext]:
path = 'C:/DL/Others/%s_%s.%s' % (filename, fname_counts[filename + file_ext], file_ext)
else:
path = 'C:/DL/Others/' + filename + file_ext
Example at work with two duplicates ("test.txt"):
>>> filenames_and_exts = [('test', '.txt'), ('test', '.txt'), ('test2', '.txt'), ('test', '.cfg'), ('different_name', '.txt')]
>>> fname_counts = {}
>>> for filename, file_ext in filenames_and_exts:
if filename + file_ext in fname_counts:
fname_counts[filename + file_ext] += 1
else:
fname_counts[filename + file_ext] = 0
if fname_counts[filename + file_ext]:
path = 'C:/DL/Others/%s_%s%s' % (filename, fname_counts[filename + file_ext], file_ext)
else:
path = 'C:/DL/Others/' + filename + file_ext
print path
C:/DL/Others/test.txt
C:/DL/Others/test_1.txt
C:/DL/Others/test2.txt
C:/DL/Others/test.cfg
C:/DL/Others/different_name.txt

Related

Losing absolute paths after returning them from function

I have this function here that renames (if necessary) all files in a given folder and returns all filenames as absolute paths:
I forgot to mention: filepath is always an absolute path.
import re
import os
# This function renames all files in a given
# folder and return them as absolute paths
def rename_get_Files(filepath):
files = os.listdir(filepath)
files_list = []
counter = 1
files_renamed = 0
for filename in files:
# If the file does not start with 'Offer_Pic_' > rename it and add to the files_list
if not re.search('Offer_Pic_.+', filename):
file_name, file_extension = os.path.splitext(filepath+filename)
print(file_extension)
new_filename = "Offer_Pic_" + str(counter) + file_extension
old_filename = filepath + filename
new_filename = filepath + new_filename
# rename() function will
# rename all the files
os.rename(old_filename, new_filename)
counter += 1
print(f'This is the new filename: ' + new_filename)
files_list.append(new_filename)
files_renamed += 1
else:
# Append the absolute paths of all already correctly named files to the files_list
files_list.append(os.path.abspath(filename))
print(files_list)
print(f'We have renamed ' + str(files_renamed) + ' files.')
return files_list
However, when I call the function from another one to use these absolute paths:
pictures = rename_get_Files(filepath)
print(pictures)
... it returns the paths being inside the script's working directory.
Because of that, the next function of my script crashes because it assumes that the files are in the working directory of the script - which they are not as they were not moved from their initial location (filepath).
Please help me to keep the absolute file paths.
I tried to regain the absolute path of the pictures, but the wrong one (inside script dir keeps being returned)
for pic in pictures:
abs_path_pic = os.path.abspath(pic)
print(pic)
print(abs_path_pic)
pictureBox.send_keys(abs_path_pic)

I found the error: initially no absolute path was returned by os.
I added to the end of the function:
path = os.path.abspath(filepath)
files_list = [entry.path for entry in os.scandir(path) if entry.is_file()]
The whole function now looks like this:
# This function renames all files in a given folder and return them as absolute paths
def rename_get_files(filepath):
print(f'this is the given filepath: {filepath}')
files = os.listdir(filepath)
files_list = []
print(f'these are our initial files: {files}')
counter = 1
files_renamed = 0
for filename in files:
# If the file does not start with 'Offer_Pic_' > rename it and add to the files_list
if not re.search('Offer_Pic_.+', filename):
file_name, file_extension = os.path.splitext(filepath+filename)
print(file_extension)
new_filename = "Offer_Pic_" + str(counter) + file_extension
old_filename = filepath + "/" + filename
print(old_filename)
new_filename = filepath + "/" + new_filename
print(new_filename)
# rename() function will
# rename all the files
os.rename(old_filename, new_filename)
counter += 1
print(f'This is the new filename: ' + new_filename)
files_list.append(os.path.abspath(new_filename))
files_renamed += 1
else:
# Append the absolute paths of all already correctly named files to the files_list
files_list.append(os.path.abspath(filename))
print(f'We have renamed ' + str(files_renamed) + ' files.')
path = os.path.abspath(filepath)
files_list = [entry.path for entry in os.scandir(path) if entry.is_file()]
print(f'this is our files_list, no files were renamed: {files_list}')
return files_list
Now it works perfectly! Thank you all for your help!

How to rename files after being copied based on creation time?

After copying files from a user provided source and destination, the files need to be renamed based on the creation time. The files have multiple extension types that need to be preserved while being renamed and a record original filenames. The issue arises when attempting to rename the file after it has been copied to the destination. All attempted solutions creates an exception in callback. What's the best approach here?
def filter_copy(self):
current_dir = self.Source.get()
destination_dir = self.Destination.get()
extensions = (['.avi', '.mpg'])
os.chdir(current_dir)
for root, dir, files in os.walk('.'):
for file in files:
if os.path.splitext(file)[1] in extensions:
src = os.path.join(root, file)
time.sleep(0.1)
logging.info("Copying the file %s..." % file)
print("Copying the file %s..." % file)
try:
with open('OriginalFilesReceipt.txt', 'w') as f:
f.write(file)
except FileNotFoundError:
logging.info("The directory does not exist to create to generate a receipt")
shutil.copy(src, destination_dir)
for names in os.listdir('.'):
t = os.path.getmtime(names)
v = datetime.datetime.fromtimestamp(t)
x = v.strftime('%Y%m%d-%H%M%S')
os.rename(names, x)

Found a solution based on the work from yzhong52 here: https://gist.github.com/tvdsluijs/1640613a32416b1197fc36b45e7b4999
The main error that was being received should have been obvious. The working directory wasn't the destination directory. The solution for the original question:
os.chdir(destination_dir)
for names in os.listdir('.'):
#split into filename and extension
filename, extension = os.path.splitext(names)
if (extension in extensions):
create_time = os.path.getctime(names)
format_time = datetime.datetime.fromtimestamp(create_time)
format_time_string = format_time.strftime("%Y-%m-%d %H.%M.%S")
newfile = format_time_string + extension
#if other file with same timestamp
if(newfile in newfilesDictionary.keys()):
index = newfilesDictionary[newfile] + 1
newfilesDictionary[newfile] = index
newfile = format_time_string + '-' + str(index) + extension
else:
newfilesDictionary[newfile] = 0
os.rename(names, newfile)
num_files = num_files + 1
print(names.rjust(35) + ' => ' + newfile.ljust(35))

Unexpected end of data when zipping zip files in Python

Good day.
I wrote a little Python program to help me easily create .cbc files for Calibre, which is just a renamed .zip file with a text file called comics.txt for TOC purposes. Each chapter is another zip file.
The issue is that the last zip file zipped always has the error "Unexpected end of data". The file itself is not corrupt, if I unzip it and rezip it it works perfectly. Playing around it seems that the problem is that Python doesn't close the last zip file after zipping it, since I can't delete the last zip while the program is still running since it's still open in Python. Needless to say, Calibre doesn't like the file and fails to convert it unless I manually rezip the affected chapters.
The code is as follows, checking the folders for not-image files, zipping the folders, zipping the zips while creating the text file, and "changing" extension.
import re, glob, os, zipfile, shutil, pathlib, gzip, itertools
Folders = glob.glob("*/")
items = len(Folders)
cn_list = []
cn_list_filtered = []
dirs_filtered = []
ch_id = ["c", "Ch. "]
subdir_im = []
total = 0
Dirs = next(os.walk('.'))[1]
for i in range(0, len(Dirs)):
for items in os.listdir("./" + Dirs[i]):
if items.__contains__('.png') or items.__contains__('.jpg'):
total+=1
else:
print(items + " not an accepted format.")
subdir_im.append(total)
total = 0
for fname in Folders:
if re.search(ch_id[0] + r'\d+' + r'[\S]' + r'\d+', fname):
cn = re.findall(ch_id[0] + "(\d+[\S]\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[0] + r'\d+', fname):
cn = re.findall(ch_id[0] + "(\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[1] + r'\d+' + '[\S]' + r'\d+', fname):
cn = re.findall(ch_id[1] + "(\d+[\S]\d+)", fname)[0]
cn_list.append(cn)
elif re.search(ch_id[1] + r'\d+', fname):
cn = re.findall(ch_id[1] + "(\d+)", fname)[0]
cn_list.append(cn)
else:
print('Warning: File found without proper filename format.')
cn_list_filtered = set(cn_list)
cn_list_filtered = sorted(cn_list_filtered)
cwd = os.getcwd()
Dirs = Folders
subdir_zi = []
total = 0
for i in range(0, len(cn_list_filtered)):
for folders in Dirs:
if folders.__contains__(ch_id[0] + cn_list_filtered[i] + " ")\
or folders.__contains__(ch_id[1] + cn_list_filtered[i] + " "):
print('Zipping folder ', folders)
namezip = "Chapter " + cn_list_filtered[i] + ".zip"
current_zip = zipfile.ZipFile(namezip, "a")
for items in os.listdir(folders):
if items.__contains__('.png') or items.__contains__('.jpg'):
current_zip.write(folders + "/" + items, items)
total+=1
subdir_zi.append(total)
total = 0
print('Folder contents in order:', subdir_im, ' Total:', sum(subdir_im))
print("Number of items per zip: ", subdir_zi, ' Total:', sum(subdir_zi))
if subdir_im == subdir_zi:
print("All items in folders have been successfully zipped")
else:
print("Warning: File count in folders and zips do not match. Please check the affected chapters")
zips = glob.glob("*.zip")
namezip2 = os.path.basename(os.getcwd()) + ".zip"
zipfinal = zipfile.ZipFile(namezip2, "a")
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
Data = []
for i in range (0,len(cn_list_filtered),1):
Datai = ("Chapter " + cn_list_filtered[i] + ".zip" + ":Chapter " + cn_list_filtered[i] + "\r\n")
Data.append(Datai)
Dataok = ''.join(Data)
with zipfile.ZipFile(namezip2, 'a') as myzip:
myzip.writestr("comics.txt", Dataok)
zipfinal.close()
os.rename(namezip2, namezip2 + ".cbc")
os.system("pause")
I am by no means a programmer, that is just a Frankenstein monster code I eventually managed to put together by checking threads, but this last issue has me stumped.
Some solutions I tried are:
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
zips[i].close()
Fails with:
zips[i].close()
AttributeError: 'str' object has no attribute 'close'
and:
for i in range(0, len(zips), 1):
zipfinal.write(zips[i],zips[i])
zips[len(zips)].close()
Fails with:
zips[len(zips)].close()
IndexError: list index out of range
Thanks for the help.

This solved my issue:
def generate_zip(file_list, file_name=None):
zip_buffer = io.BytesIO()
zf = zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED)
for file in file_list:
print(f"Filename: {file[0]}\nData: {file[1]}")
zf.writestr(file[0], file[1])
**zf.close()**
with open(file_name, 'wb') as f:
f.write(zip_buffer.getvalue())
f.close()

Batch rename files in Python 2.7

I'm trying to rename files in a directory so that it the new name is the original name followed by a space + "17-" + an incrementally increasing number.
The code below is just renaming files from 151, upward. How do I keep the original name, adding the text "17-" and the numbers?
import os
path = 'C:\Users\dcs\Desktop\Test direct'
files = os.listdir(path)
i = 151
for file in files:
os.rename(os.path.join(path, file), os.path.join(path, str(i)+'.TIF'))
i = i+1

Simply by writing that concatenation expression. If I understand your details correctly, your new loop body would be
new_name = file + " 17-" + str(i) + ".TIF"
os.rename(os.path.join(path, file),
os.path.join(path, new_name) )
i += 1
This would change file "ABC" into file "ABC 17-151.TIF"; the next would contain "17-152", and so on.
FACEPALM
file is a built-in type. Change the loop index.
for fname in files:
new_name = fname + " 17-" + str(i) + ".TIF"
os.rename(os.path.join(path, fname), new_name)
i += 1

If I understand Prune's suggestion above, which I obviously don't, it would look like:
import os
path = 'C:\Users\dcs\Desktop\Test direct'
files = os.listdir(path)
i = 151
#The part that I want to strip the extensions from
for file in files:
new_name = file[:-3]
#The part that words correctly (thanks Prune)
for fname in files:
new_name = fname + " 17-" + str(i) + ".TIF"
os.rename(os.path.join(path, fname), new_name)
i += 1
However the first part, meant to strip the file of it's extension isn't working.

Converting multiple gz file within subdirectories into csv

I have many subdirectories in my main directory and would like to write a script to unzip and convert all the files within it. If possible, I would also like to combine all the CSV within a single directory into a single CSV. But more importantly, I need help with my nested loop.
import gzip
import csv
import os
subdirlist = os.listdir('/home/user/Desktop/testloop')
subtotal = len(subdirlist)
subcounter = 0
for dirlist in subdirlist:
print "Working On " + dirlist
total = len(dirlist)
counter = 0
for dir in dirlist:
print "Working On " + dir
f = gzip.open('/' + str(subdirlist) + '/' + dir, 'rb')
file_content = f.read()
f.close()
print "25% Complete"
filename = '/' + str(subdirlist) + '/temp.txt'
target = open(filename, 'w')
target.write(file_content)
target.close()
print "50% Complete!"
csv_file = '/' + str(subdirlist) + '/' + str(dir) + '.csv'
in_txt = csv.reader(open(filename, "rb"), delimiter = '\t')
out_csv = csv.writer(open(csv_file, 'wb'))
out_csv.writerows(in_txt)
os.remove(filename)
os.remove('/' + str(subdirlist) + '/' + dir)
counter+=1
print str(counter) + "/" + str(total) + " " + str(dir) + " Complete!"
print "SubDirectory Converted!"
print str(subcounter) + "/" + str(subtotal) + " " + str(subdirlist) + " Complete!"
subcounter+=1
print "All Files Converted!"
Thanks in advance

To get lists of files and subdirectories, you can use os.walk. Below is an implementation I wrote to get all files (optionally, of certain type(s)) in arbitrarily nested subdirectories:
from os import walk, sep
from functools import reduce # in Python 3.x only
def get_filelist(root, extensions=None):
"""Return a list of files (path and name) within a supplied root directory.
To filter by extension(s), provide a list of strings, e.g.
get_filelist(root, ["zip", "csv"])
"""
return reduce(lambda x, y: x+y,
[[sep.join([item[0], name]) for name in item[2]
if (extensions is None or
name.split(".")[-1] in extensions)]
for item in walk(root)])

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Avoid duplicate file names in a folder in Python - python

Related

Losing absolute paths after returning them from function

How to rename files after being copied based on creation time?

Unexpected end of data when zipping zip files in Python

Batch rename files in Python 2.7

Converting multiple gz file within subdirectories into csv

Categories

Resources