Python: simple batch rename files in windows folder - python

Trying to create a simple code, to batch rename a folder in windows.
Musts:
change every number , like "file02.txt", turn 02 into 0002
maybe work for every file format, like jpg, png, txt, docx and so on (becuse I'm not sure what will be in the folder, this code might be used for image sequences...)
Is this possible?
I did test versions, combination of the little knowledge I have, but it gets me confused.
my code so far:
import os
import sys
folder_path = os.listdir(raw_input("Insert folder path: "))
print "Files in folder: %s" % folder_path
# a split tool
def mysplit(s):
head = s.rstrip('0123456789')
tail = s[len(head):]
return head, tail
# a function to make a new name with needed 0000
def new_filename(filename):
file_name_part, ext = os.path.splitext(filename) # file01 and .ext
original_name, number = mysplit(file_name_part) # 01 and file
add_zero = number.rjust(4, "0") # add 0001
new_name = original_name + add_zero + ext
print new_name
# new_name comes like this ['file0001.txt'] but seperate, not in a list? Why?
for current_file_n in folder_path:
new = new_filename(current_file_n)
print list([new]) # trying to make the str into a list....
re_name = os.renames(current_file_n, new)
print re_name
print "Renamed files: %s" % folder_path
The desired outcome is the same as the beginning list, but collated with zeros,like this: ['file0001.txt', 'file0002.txt', 'file0003.txt'......'file0015.txt']
I've got errors like windows error: can't find file, and another error; can't connect str and list?
I need an explanation of what I'm doing wrong as simple as possible, or is there another method that I can use that will give me the desired outcome?

As martineau said your indentation is messed up.
Here's the working code:
import os
import sys
# a split tool
def mysplit(s):
head = s.rstrip('0123456789')
tail = s[len(head):]
return head, tail
# a function to make a new name with needed 0000
def new_filename(filename):
file_name_part, ext = os.path.splitext(filename) # file01 and .ext
original_name, number = mysplit(file_name_part) # 01 and file
add_zero = number.rjust(4, "0") # add 0001
new_name = original_name + add_zero + ext
return new_name
# new_name comes like this ['file0001.txt'] but seperate, not in a list? Why?
if __name__ == '__main__':
folder_path = os.listdir(raw_input("Insert folder path: "))
print "Files in folder: %s" % folder_path
renamed_files = []
for current_file_n in folder_path:
new = new_filename(current_file_n)
renamed_files.append(new) # Add renamed file's name to a list
try:
os.renames(current_file_n, new) #It doesn't return anything
print new
except:
print "Unexpected error while renaming %s:%s"%(new, sys.exc_info()[0])
print "Renamed files: %s" % renamed_files
Hope this helps

Your code can be simplified a lot by using regular expression substitution. re.sub() can take a replacement function. In this case adding leading zeroes to the first number found in the filename.
import os, re
def renumber_files(directory, zeroes=4):
os.chdir(directory)
for filename in os.listdir(directory):
new_name = re.sub(r'\d+', lambda m: m.group().zfill(zeroes), filename, count=1)
os.rename(filename, new_name)
renumber_files(raw_input("Insert folder path: "))
This works because re.sub() can take a callable as the replacement argument.
Signature: re.sub(pattern, repl, string, count=0, flags=0)
Return the string obtained by replacing the leftmost non-overlapping
occurrences of the pattern in string by the replacement repl. repl
can be either a string or a callable; if a string, backslash escapes
in it are processed. If it is a callable, it's passed the match
object and must return a replacement string to be used.
In the lambda m.group() returns a string matching the pattern \d+. For instance "1", "564645" or "005".
The next step, str.zfill(4), turns those into "0001", "564645", or "0005".

Related

Removing white space after looping a list

So, I've tried stripping from several of the variables, and I know their is no white space previously to the return statement, so I tried striping the variable in the return statement but the white space is still there...
Something easy I'm sure or maybe it would be best to re-write the loop?
def main():
file = input("File name:")
extension(file)
def extension(s):
split = (s.split("."))
join_s = (''.join(split[1]))
image_e = ['jpg', 'gif', 'jpeg', 'png']
for i in image_e:
print(image_e)
if join_s in image_e:
return print("Image/", join_s)
else:
return print("Application/", join_s)
main()
Output looks something like this:
Image/ jpg
Edit: One of the comments had asked why I used return and it was because if I just used print it would display the print 3-4 different times, is there a reason why I shouldn't use return in this situation or why it exactly does display it 4 times in row? (Assuming because of the loop.)
It looks like you want to generate a content type string. This will do it:
import os
def extension(s):
ext = s.rsplit('.')[1] # split on the *last* period
if ext in ('jpg', 'gif', 'jpeg', 'png'):
return f'Image/{ext}'
else:
return f'Application/{ext}'
file = input('File name: ')
content_type = extension(file)
print(content_type)
Output:
File name: test.jpg
Image/jpg
Looks like you want to determine the mimetype from a given filename.
import mimetypes
filename = "somefilename.png"
guessed_type, encoding = mimetypes.guess_type(filename)
guessed_type:
image/png
Python has many features/functions available to you via the standard libraries.
Here are some other methods:
os
import os
​
filename = "somefilename.png"
base, ext = os.path.splitext(filename)
('somefilename', '.png')
pathlib
from pathlib import Path
filename = "somefilename.png"
f = Path(filename)
f.suffix
'.png'
For strings python has .startswith() and .endswith() methods, which can optionally take an iterable, so you can write this without splitting the string:
filename = "somefilename.png"
image_exts = ('jpg', 'gif', 'jpeg', 'png')
if filename.endswith(image_exts):
ext = filename.split(".")[-1]
print(f"Image/{ext}")

In Python how to whitelist certain characters in a filename?

To secure uploaded image names, I'd like to strip out image's filenames from anything but string.ascii_letters , string.digits, dot and (one) whitespace.
So I'm wondering what is the best method to check a text against other characters?
import re
import os
s = 'asodgnasAIDID12313%*(#&(!$ 1231'
result = re.sub('[^a-zA-Z\d\. ]|( ){2,}','',s )
if result =='' or os.path.splitext(result)[0].isspace():
print "not a valid name"
else:
print "valid name"
EDIT:
changed it so it will also whitelist only one whitespace + added import re
Not sure if it's what you need but give it a try:
import sys, os
fileName, fileExtension = os.path.splitext('image 11%%22.jpg')
fileExtension = fileExtension.encode('ascii', 'ignore')
fileName = fileName.encode('ascii', 'ignore')
if fileExtension[1:] in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'tga']:
fileName = ''.join(e for e in fileName if e.isalnum())
print fileName+fileExtension
#image1122.jpg
else:
print "Extension not supported"
isalnum()
https://docs.python.org/2/library/stdtypes.html#str.isalnum
I wouldn't use regex for this. The only tricky requirement is the single space, but that can be done, too.
import string
whitelist = set(string.ascii_letters + string.digits)
good_filename = "herearesomelettersand123numbers andonespace"
bad_filename = "symbols&#! and more than one space"
def strip_filename(fname, whitelist):
"""Strips a filename
Removes any character from string `fname` and removes all but one
whitespace.
"""
whitelist.add(" ")
stripped = ''.join([ch for ch in fname if ch in whitelist])
split = stripped.split()
result = " ".join([split[0], ''.join(split[1:])])
return result
Then call it with:
good_sanitized = strip_filename(good_filename, whitelist)
bad_sanitized = strip_filename(bad_filename, whitelist)
print(good_sanitized)
# 'herearesomelettersand123numbers andonespace'
print(bad_sanitized)
# 'symbols andmorethanonespace'

Python case insensitive file name?

I need to load a file given it's name, but the name I get is case insensitive. "A.txt" could actually be "a.txt". How to do this the fast way (not generate all possible names and try each)?
You could list the directory the file's in (os.listdir), and see if there are matches for your filename. The matching can be done by lower-casing both filenames and comparing.
You can't do it without taking a directory listing and taking both the item you're looking for and each in the directory to a common case for comparison. The filesystem is case sensitive and that's all there is to it.
Here is a function (well, two) that I wrote to do it completely, matching a filename in an insensitive manner, recursively: http://portableapps.hg.sourceforge.net/hgweb/portableapps/development-toolkit/file/775197d56e86/utils.py#l78.
def path_insensitive(path):
"""
Get a case-insensitive path for use on a case sensitive system.
>>> path_insensitive('/Home')
'/home'
>>> path_insensitive('/Home/chris')
'/home/chris'
>>> path_insensitive('/HoME/CHris/')
'/home/chris/'
>>> path_insensitive('/home/CHRIS')
'/home/chris'
>>> path_insensitive('/Home/CHRIS/.gtk-bookmarks')
'/home/chris/.gtk-bookmarks'
>>> path_insensitive('/home/chris/.GTK-bookmarks')
'/home/chris/.gtk-bookmarks'
>>> path_insensitive('/HOME/Chris/.GTK-bookmarks')
'/home/chris/.gtk-bookmarks'
>>> path_insensitive("/HOME/Chris/I HOPE this doesn't exist")
"/HOME/Chris/I HOPE this doesn't exist"
"""
return _path_insensitive(path) or path
def _path_insensitive(path):
"""
Recursive part of path_insensitive to do the work.
"""
if path == '' or os.path.exists(path):
return path
base = os.path.basename(path) # may be a directory or a file
dirname = os.path.dirname(path)
suffix = ''
if not base: # dir ends with a slash?
if len(dirname) < len(path):
suffix = path[:len(path) - len(dirname)]
base = os.path.basename(dirname)
dirname = os.path.dirname(dirname)
if not os.path.exists(dirname):
dirname = _path_insensitive(dirname)
if not dirname:
return
# at this point, the directory exists but not the file
try: # we are expecting dirname to be a directory, but it could be a file
files = os.listdir(dirname)
except OSError:
return
baselow = base.lower()
try:
basefinal = next(fl for fl in files if fl.lower() == baselow)
except StopIteration:
return
if basefinal:
return os.path.join(dirname, basefinal) + suffix
else:
return
This is a simple recursive function to to the search Eli suggests above:
def find_sensitive_path(dir, insensitive_path):
insensitive_path = insensitive_path.strip(os.path.sep)
parts = insensitive_path.split(os.path.sep)
next_name = parts[0]
for name in os.listdir(dir):
if next_name.lower() == name.lower():
improved_path = os.path.join(dir, name)
if len(parts) == 1:
return improved_path
else:
return find_sensitive_path(improved_path, os.path.sep.join(parts[1:]))
return None
Make a directory listing; and create a dictionary containing a mapping of upper-case filenames to their actual-case filenames. Then, make your input upper-case, and look for it in the dictionary.

Passing a relative path in a function

Can someone tell me if the following function declaration is the correct way to pass a relative path to a function? The call is only taking one variable. When I include a second variable (absolute path), my function does not work.
def extract(tar_url, extract_path='.'):
The call that does not work:
extract(chosen, path)
This works, but does not extract:
extract(chosen)
Full Code:
def do_fileExtract(self, line):
defaultFolder = "Extracted"
if not defaultFolder.endswith(':') and not os.path.exists('c:\\Extracted'):
os.mkdir('c:\\Extracted')
raw_input("PLACE .tgz FILES in c:\Extracted AT THIS TIME!!! PRESS ENTER WHEN FINISHED!")
else:
pass
def extract(tar_url, extract_path='.'):
print tar_url
tar = tarfile.open(tar_url, 'r')
for item in tar:
tar.extract(item, extract_path)
if item.name.find(".tgz") != -1 or item.name.find(".tar") != -1:
extract(item.name, "./" + item.name[:item.name.rfind('/')])
userpath = "Extracted"
directory = os.path.join("c:\\", userpath)
os.chdir(directory)
path=os.getcwd() #Set log path here
dirlist=os.listdir(path)
files = [fname for fname in os.listdir(path)
if fname.endswith(('.tgz','.tar'))]
for item in enumerate(files):
print "%d- %s" % item
try:
idx = int(raw_input("\nEnter the file's number:\n"))
except ValueError:
print "You fail at typing numbers."
try:
chosen = files[idx]
except IndexError:
print "Try a number in range next time."
newDir = raw_input('\nEnter a name to create a folder a the c: root directory:\n')
selectDir = os.path.join("c:\\", newDir)
path=os.path.abspath(selectDir)
if not newDir.endswith(':') and not os.path.exists(selectDir):
os.mkdir(selectDir)
try:
extract(chosen, path)
print 'Done'
except:
name = os.path.basename(sys.argv[0])
print chosen
It looks like you missed an escape character in "PLACE .tgz FILES in c:\Extracted AT THIS TIME!!! PRESS ENTER WHEN FINISHED!"
I don't think raw_input sees the prompt string as a raw string, just the user input.
But this shouldn't affect the functionality of your program.
Are you on Unix or windows? I was under the impression that the on Unix you use / forward slash instead of \\ backslash as a separator.
I tested some code on this file:
http://simkin.asu.edu/geowall/mars/merpano0.tar.gz
The following code:
>>> from os import chdir
>>> import tarfile
>>> chdir(r'C:\Users\Acer\Downloads')
>>> tar_url = 'merpano0.tar.gz'
>>> print tar_url
merpano0.tar.gz
>>> tar = tarfile.open(tar_url, 'r')
>>> extract_path = 'C:\\Users\\Acer\\Downloads\\test\\'
>>> for item in tar:
tar.extract(item, extract_path)
executed cleanly with no problems on my end. In the test directory I got a single folder with some files, exactly as in the original tar file. Can you explain what you're doing differently in your code that might be bugging up?

Extracting extension from filename in Python

Is there a function to extract the extension from a filename?
Use os.path.splitext:
>>> import os
>>> filename, file_extension = os.path.splitext('/path/to/somefile.ext')
>>> filename
'/path/to/somefile'
>>> file_extension
'.ext'
Unlike most manual string-splitting attempts, os.path.splitext will correctly treat /a/b.c/d as having no extension instead of having extension .c/d, and it will treat .bashrc as having no extension instead of having extension .bashrc:
>>> os.path.splitext('/a/b.c/d')
('/a/b.c/d', '')
>>> os.path.splitext('.bashrc')
('.bashrc', '')
New in version 3.4.
import pathlib
print(pathlib.Path('yourPath.example').suffix) # '.example'
print(pathlib.Path("hello/foo.bar.tar.gz").suffixes) # ['.bar', '.tar', '.gz']
I'm surprised no one has mentioned pathlib yet, pathlib IS awesome!
import os.path
extension = os.path.splitext(filename)[1]
import os.path
extension = os.path.splitext(filename)[1][1:]
To get only the text of the extension, without the dot.
For simple use cases one option may be splitting from dot:
>>> filename = "example.jpeg"
>>> filename.split(".")[-1]
'jpeg'
No error when file doesn't have an extension:
>>> "filename".split(".")[-1]
'filename'
But you must be careful:
>>> "png".split(".")[-1]
'png' # But file doesn't have an extension
Also will not work with hidden files in Unix systems:
>>> ".bashrc".split(".")[-1]
'bashrc' # But this is not an extension
For general use, prefer os.path.splitext
worth adding a lower in there so you don't find yourself wondering why the JPG's aren't showing up in your list.
os.path.splitext(filename)[1][1:].strip().lower()
Any of the solutions above work, but on linux I have found that there is a newline at the end of the extension string which will prevent matches from succeeding. Add the strip() method to the end. For example:
import os.path
extension = os.path.splitext(filename)[1][1:].strip()
You can find some great stuff in pathlib module (available in python 3.x).
import pathlib
x = pathlib.PurePosixPath("C:\\Path\\To\\File\\myfile.txt").suffix
print(x)
# Output
'.txt'
With splitext there are problems with files with double extension (e.g. file.tar.gz, file.tar.bz2, etc..)
>>> fileName, fileExtension = os.path.splitext('/path/to/somefile.tar.gz')
>>> fileExtension
'.gz'
but should be: .tar.gz
The possible solutions are here
Although it is an old topic, but i wonder why there is none mentioning a very simple api of python called rpartition in this case:
to get extension of a given file absolute path, you can simply type:
filepath.rpartition('.')[-1]
example:
path = '/home/jersey/remote/data/test.csv'
print path.rpartition('.')[-1]
will give you: 'csv'
Just join all pathlib suffixes.
>>> x = 'file/path/archive.tar.gz'
>>> y = 'file/path/text.txt'
>>> ''.join(pathlib.Path(x).suffixes)
'.tar.gz'
>>> ''.join(pathlib.Path(y).suffixes)
'.txt'
Surprised this wasn't mentioned yet:
import os
fn = '/some/path/a.tar.gz'
basename = os.path.basename(fn) # os independent
Out[] a.tar.gz
base = basename.split('.')[0]
Out[] a
ext = '.'.join(basename.split('.')[1:]) # <-- main part
# if you want a leading '.', and if no result `None`:
ext = '.' + ext if ext else None
Out[] .tar.gz
Benefits:
Works as expected for anything I can think of
No modules
No regex
Cross-platform
Easily extendible (e.g. no leading dots for extension, only last part of extension)
As function:
def get_extension(filename):
basename = os.path.basename(filename) # os independent
ext = '.'.join(basename.split('.')[1:])
return '.' + ext if ext else None
You can use a split on a filename:
f_extns = filename.split(".")
print ("The extension of the file is : " + repr(f_extns[-1]))
This does not require additional library
filename='ext.tar.gz'
extension = filename[filename.rfind('.'):]
Extracting extension from filename in Python
Python os module splitext()
splitext() function splits the file path into a tuple having two values – root and extension.
import os
# unpacking the tuple
file_name, file_extension = os.path.splitext("/Users/Username/abc.txt")
print(file_name)
print(file_extension)
Get File Extension using Pathlib Module
Pathlib module to get the file extension
import pathlib
pathlib.Path("/Users/pankaj/abc.txt").suffix
#output:'.txt'
Even this question is already answered I'd add the solution in Regex.
>>> import re
>>> file_suffix = ".*(\..*)"
>>> result = re.search(file_suffix, "somefile.ext")
>>> result.group(1)
'.ext'
This is a direct string representation techniques :
I see a lot of solutions mentioned, but I think most are looking at split.
Split however does it at every occurrence of "." .
What you would rather be looking for is partition.
string = "folder/to_path/filename.ext"
extension = string.rpartition(".")[-1]
Another solution with right split:
# to get extension only
s = 'test.ext'
if '.' in s: ext = s.rsplit('.', 1)[1]
# or, to get file name and extension
def split_filepath(s):
"""
get filename and extension from filepath
filepath -> (filename, extension)
"""
if not '.' in s: return (s, '')
r = s.rsplit('.', 1)
return (r[0], r[1])
you can use following code to split file name and extension.
import os.path
filenamewithext = os.path.basename(filepath)
filename, ext = os.path.splitext(filenamewithext)
#print file name
print(filename)
#print file extension
print(ext)
A true one-liner, if you like regex.
And it doesn't matter even if you have additional "." in the middle
import re
file_ext = re.search(r"\.([^.]+)$", filename).group(1)
See here for the result: Click Here
Well , i know im late
that's my simple solution
file = '/foo/bar/whatever.ext'
extension = file.split('.')[-1]
print(extension)
#output will be ext
try this:
files = ['file.jpeg','file.tar.gz','file.png','file.foo.bar','file.etc']
pen_ext = ['foo', 'tar', 'bar', 'etc']
for file in files: #1
if (file.split(".")[-2] in pen_ext): #2
ext = file.split(".")[-2]+"."+file.split(".")[-1]#3
else:
ext = file.split(".")[-1] #4
print (ext) #5
get all file name inside the list
splitting file name and check the penultimate extension, is it in the pen_ext list or not?
if yes then join it with the last extension and set it as the file's extension
if not then just put the last extension as the file's extension
and then check it out
You can use endswith to identify the file extension in python
like bellow example
for file in os.listdir():
if file.endswith('.csv'):
df1 =pd.read_csv(file)
frames.append(df1)
result = pd.concat(frames)
For funsies... just collect the extensions in a dict, and track all of them in a folder. Then just pull the extensions you want.
import os
search = {}
for f in os.listdir(os.getcwd()):
fn, fe = os.path.splitext(f)
try:
search[fe].append(f)
except:
search[fe]=[f,]
extensions = ('.png','.jpg')
for ex in extensions:
found = search.get(ex,'')
if found:
print(found)
This method will require a dictonary, list, or set. you can just use ".endswith" using built in string methods. This will search for name in list at end of file and can be done with just str.endswith(fileName[index]). This is more for getting and comparing extensions.
https://docs.python.org/3/library/stdtypes.html#string-methods
Example 1:
dictonary = {0:".tar.gz", 1:".txt", 2:".exe", 3:".js", 4:".java", 5:".python", 6:".ruby",7:".c", 8:".bash", 9:".ps1", 10:".html", 11:".html5", 12:".css", 13:".json", 14:".abc"}
for x in dictonary.values():
str = "file" + x
str.endswith(x, str.index("."), len(str))
Example 2:
set1 = {".tar.gz", ".txt", ".exe", ".js", ".java", ".python", ".ruby", ".c", ".bash", ".ps1", ".html", ".html5", ".css", ".json", ".abc"}
for x in set1:
str = "file" + x
str.endswith(x, str.index("."), len(str))
Example 3:
fileName = [".tar.gz", ".txt", ".exe", ".js", ".java", ".python", ".ruby", ".c", ".bash", ".ps1", ".html", ".html5", ".css", ".json", ".abc"];
for x in range(0, len(fileName)):
str = "file" + fileName[x]
str.endswith(fileName[x], str.index("."), len(str))
Example 4
fileName = [".tar.gz", ".txt", ".exe", ".js", ".java", ".python", ".ruby", ".c", ".bash", ".ps1", ".html", ".html5", ".css", ".json", ".abc"];
str = "file.txt"
str.endswith(fileName[1], str.index("."), len(str))
Examples 5, 6, 7 with output
Example 8
fileName = [".tar.gz", ".txt", ".exe", ".js", ".java", ".python", ".ruby", ".c", ".bash", ".ps1", ".html", ".html5", ".css", ".json", ".abc"];
exts = []
str = "file.txt"
for x in range(0, len(x)):
if str.endswith(fileName[1]) == 1:
exts += [x]
The easiest way to get is to use mimtypes, below is the example:
import mimetypes
mt = mimetypes.guess_type("file name")
file_extension = mt[0]
print(file_extension)
Here if you want to extract the last file extension if it has multiple
class functions:
def listdir(self, filepath):
return os.listdir(filepath)
func = functions()
os.chdir("C:\\Users\Asus-pc\Downloads") #absolute path, change this to your directory
current_dir = os.getcwd()
for i in range(len(func.listdir(current_dir))): #i is set to numbers of files and directories on path directory
if os.path.isfile((func.listdir(current_dir))[i]): #check if it is a file
fileName = func.listdir(current_dir)[i] #put the current filename into a variable
rev_fileName = fileName[::-1] #reverse the filename
currentFileExtension = rev_fileName[:rev_fileName.index('.')][::-1] #extract from beginning until before .
print(currentFileExtension) #output can be mp3,pdf,ini,exe, depends on the file on your absolute directory
Output is mp3, even works if has only 1 extension name
I'm definitely late to the party, but in case anyone wanted to achieve this without the use of another library:
file_path = "example_tar.tar.gz"
file_name, file_ext = [file_path if "." not in file_path else file_path.split(".")[0], "" if "." not in file_path else file_path[file_path.find(".") + 1:]]
print(file_name, file_ext)
The 2nd line is basically just the following code but crammed into one line:
def name_and_ext(file_path):
if "." not in file_path:
file_name = file_path
else:
file_name = file_path.split(".")[0]
if "." not in file_path:
file_ext = ""
else:
file_ext = file_path[file_path.find(".") + 1:]
return [file_name, file_ext]
Even though this works, it might not work will all types of files, specifically .zshrc, I would recomment using os's os.path.splitext function, example below:
import os
file_path = "example.tar.gz"
file_name, file_ext = os.path.splitext(file_path)
print(file_name, file_ext)
Cheers :)
# try this, it works for anything, any length of extension
# e.g www.google.com/downloads/file1.gz.rs -> .gz.rs
import os.path
class LinkChecker:
#staticmethod
def get_link_extension(link: str)->str:
if link is None or link == "":
return ""
else:
paths = os.path.splitext(link)
ext = paths[1]
new_link = paths[0]
if ext != "":
return LinkChecker.get_link_extension(new_link) + ext
else:
return ""
def NewFileName(fichier):
cpt = 0
fic , *ext = fichier.split('.')
ext = '.'.join(ext)
while os.path.isfile(fichier):
cpt += 1
fichier = '{0}-({1}).{2}'.format(fic, cpt, ext)
return fichier

Categories

Resources