How to rename a bunch of files using python? - python

I am a complete beginner in python. I need to rename a bunch of files with dates in the name. The names all look like:
front 7.25.16
left 7.25.16
right 7.25.16
I would like them to start with the date rather then front, left, or right, so that front 7.25.16 becomes 7.25.16 front.
I have tried using regular expressions and os.walk and I have run into troubles with both. Right now I am just trying to print the file names to prove os.walk is working. Right now my code looks like this:
import re, shutil, os
K = re.compile(r"(\d+.\d+.\d+)")
RE_Date = K.search("front 7.25.16")
for root, dirs, filenames in os.walk("path"):
for filename in filenames:
print ("the filename is: " + filename)
print ("")
Any advice would be greatly appreciated.

Check this example to rename file as per your need.
import os
filenames = ["front 7.25.16.jpg", "left 7.25.16.jpg", "right 7.25.16.jpg"]
for file_name in filenames:
x = file_name.split(' ')[0]
y = file_name.split(' ')[1]
new_name = '{} {}{}'.format(os.path.splitext(y)[0], x, os.path.splitext(y)[-1])
print new_name
output:
7.25.16 front.jpg
7.25.16 left.jpg
7.25.16 right.jpg
In your code your can use os.rename for rename files
import os
for root, dirs, filenames in os.walk("path"):
for file_name in filenames:
x = file_name.split(' ')[0]
y = file_name.split(' ')[1]
new_name = '{} {}{}'.format(os.path.splitext(y)[0], x, os.path.splitext(y)[-1])
file_path = os.path.join(root, file_name)
new_path = os.path.join(root, new_name)
os.rename(file_name, new_path)

Related

How to change certain characters of files within a folder

I have a folder named "animals"
Inside the folder I have the following files:
"cat.PNG", "dog.PNG", "horse.PNG", "sheep.PNG"
I know the following code will change the files to lowercase
files = os.listdir('.')
for f in files:
new = f.lower()
os.rename(f, new)
But how would I change this if I wanted the file type to be lower and the name of the animal to be upper of every file?
The cleanest way (which works for any directory and any extension too):
for f in os.listdir(source_dir):
name,ext = os.path.splitext()
os.rename(os.path.join(source_dir,f), os.path.join(source_dir,name+ext.lower())
split name into radix+extension
convert extension to lowercase
perform rename with full path
A really simple solution would be the following:
for f in files:
new = f.upper()
new.replace(".PNG", ".png")
os.rename(f, new)
You can split the file name, do each operation individually, then rejoin them.
files = os.listdir('.')
for f in files:
# Split the filename by '.'
split_filename = f.split('.')
filename = ".".join(split_filename[:-1])
extension = split_filename[:-1]
# Do each operation
filename = filename.upper()
extension = extension.lower()
# Rejoin the filename
new_filename = filename + '.' + extension
# Rename the file
os.rename(new_filename, new)
(base, ext) = f.split('.')
new_name = f'{c.upper()}.{d.lower()}'
os.rename(f, new_name)
You can use split and join, see this example:
file_names = ["cat.PNG", "dog.PNG", "horse.PNG", "sheep.PNG"]
for file_name in file_names:
name, extension = file_name.split('.')
print('.'.join([name.upper(), extension.lower()]))

Strange behaviour with os.join()

I've noticed odd behaviour with Pythons os.join(). in that I'm adding a year and filename to a path. Here's my code.
#!/usr/bin/env python
import os
#------------------------------------------------
def file_walk(root, ext):
# Walk file with me, Laura Palmer!
fList = []
for current, dirs, files in os.walk(root):
for file in files:
fname = os.path.join(current, file) # this works fine, yeah!
src = os.path.isfile(fname)
if src:
if fname.endswith(ext):
fList.append(fname)
return fList
myFolder = r"d:\temp\test"
myExt = ".html"
myYear = "2019"
allfiles = file_walk(myFolder, myExt)
for theFile in allfiles:
sourceFile = theFile
destinFile = os.path.join(myFolder, myYear, theFile)
print sourceFile
print destinFile
print
myFile = "bookmarks_06_05_2019.html"
print os.path.join(myFolder, myYear, myFile)
# EoF
As strings, they work fine (see last line), but as paths, not so well :(
Output I'm getting from print destinFile
d:\temp\test\bookmarks_01_26_2018.html
d:\temp\test\bookmarks_05_06_2014.html
d:\temp\test\bookmarks_06_05_2019.html
I'm expecting the follow:
d:\temp\test\2019\bookmarks_01_26_2018.html
d:\temp\test\2019\bookmarks_05_06_2014.html
d:\temp\test\2019\bookmarks_06_05_2019.html
Can anyone point me in the right direction of where I'm going wrong?
theFile is an absolute file path. If all you want from it is the base name, use:
destinFile = os.path.join(myFolder, myYear, os.path.basename(theFile))
Note that os.path.join returns the last absolute argument with any relative arguments after that one combined in a path. This is why the result didn't have the 2019 component.

How would I exclude directories from os.listdir results?

I'm making a script that will encode files within a directory using b64/b16 and I'm using os.listdir to do so, but it also lists directories which causes problems since now it's trying to encode directories as if it were a file.
How would I be able to exclude directories from os.listdir results?
import os
import sys
import base64
import codecs
import time
import string
import glob
#C:\\Users\\Fedora\\Desktop\\Win 10
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = []
filecount = 0
fileprogress = 0
for file in dirs:
files.append(file)
filecount = filecount + 1
for x in files:
os.system("cls")
fileprogress = fileprogress + 1
print("File " + str(fileprogress) + "/" + str(filecount))
print("Encrypting " + x + "...")
inputfile = open(path + "\\" + x, "rb")
data = inputfile.read()
inputfile.close()
data = base64.b16encode(data)
data = base64.b64encode(data)
data = base64.b16encode(data)
data = base64.b64encode(data)
data = base64.b16encode(data)
outputfile = open(path + "\\" + x + ".crypt", "wb")
outputfile.write(data)
outputfile.close()
use filter
filepath = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = filter(lambda x:os.path.isfile(os.path.join(filepath, x)), dirs)
or list comprehension with os.path.isfile()
filepath = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = [x for x in dirs if os.path.isfile(os.path.join(filepath, x))]
You can use os.path.isdir function to check if the current file is a directory.
Also, it is much better to use string formatting operations instead of string concatenation: not
print("File " + str(fileprogress) + "/" + str(filecount))
but
print("File {}/{}".format(fileprogress, filecount))
Such code is much easier to understand and modify.
Instead of using os.listdir() your can use os.walk which will return separate list for files and directories
python-oswalk-example
import os
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
for (path, dirs, files) in os.walk(path):
print path
print dirs
print files
pythoncentral os-walk
#Import the os module, for the os.walk function
import os
#Set the directory you want to start from
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
for dirName, subdirList, fileList in os.walk(path):
print('Found directory: %s' % dirName)
for fname in fileList:
print('\t%s' % fname)

Python: How to use output of "listfiles" to delete/ move/ etc. files

Background:
My target is to find dublicate files in two differen folders (without subfolders). To do that, I use the following Python script:
###Check ob alle Archive noch vorhanden oder ob Daten bei Check gelöscht wurden
def listfiles(path):
files = []
for dirName, subdirList, fileList in os.walk(path):
dir = dirName.replace(path, '')
for fname in fileList:
if fname.endswith("_GIS.7z"):
files.append(os.path.join(dir, fname))
return files
x = listfiles(root)
y = listfiles(backupfolderGIS)
#q = [filename for filename in x if filename not in y]
files_only_in_x = set(x) - set(y)
files_only_in_y = set(y) - set(x)
files_only_in_either = set(x) ^ set(y)
files_in_both = set(x) & set(y)
all_files = set(x) | set(y)
print "Alle Datein:"
print all_files
print " "
print "Nur im Zwischenspeicher:"
print files_only_in_x
print " "
print "Nur im Backupordner:"
print files_only_in_y
print " "
print "Nur einem von beiden Ordnern:"
print files_only_in_either
print " "
print "In beiden Ordnern:"
print files_in_both
print " "
The relevant output variable/ list is files_in_both (folders); it shows me the dublicates; if I use print, it looks like set(['NameoftheProject_GIS.7z', 'NameofanotherProject_GIS.7z']).
Question:
How can I use this output/ information (of dublicate files in directories) to delete/ move them? Here for example the files NameoftheProject_GIS.7z and NameofanotherProject_GIS.7z in folder backupfolderGIS / list files_in_both.
os.walk recursively checks all folders and subfolders starting from the root dir you pass, you want to check two different folders (without subfolders) so just search each folder with glob, if you want to move you can use shutil.move:
from glob import iglob
from os import path
from shutil import move
pt1, pt2 = "/path_1", "path_2"
dupe = set(map(path.basename, iglob("/path_1./*_GIS.7z"))).intersection(map(path.basename, iglob("/path_2./*_GIS.7z")))
for fle in dupe:
# move(src, dest)
move(path.join(pt1, fle), "wherever")
Or to delete use os.remove:
for fle in dupe:
os.remove(path.join(pt1, fle))
If you want to move/delete the file from pt2 then pass that to path.join in place of pt1.
You could also use str.endwith with os.listdir:
dupe = set(fname for fname in os.listdir(pt1) if fname.endswith("_GIS.7z")).intersection(fname for fname in os.listdir(pt2) if fname.endswith("_GIS.7z"))
To avoid repeating you can put it in a function:
from shutil import move
from os import path, listdir
def listfiles(path, end):
return set(fname for fname in listdir(path) if fname.endswith(end))
for fle in listfiles(pt1,"_GIS.7z").intersection(listfiles(pt2, "_GIS.7z")):
move(path.join(t1, fle), "wherever")
Now if you did want to check all folders for files with the same basename and so something for dupe name, you would need to keep a record of the full paths, you can group all common files by basename using a defaultdict:
from os import path, walk
from collections import defaultdict
def listfiles(pth, end):
files = defaultdict(list)
for dirName, subdirList, fileList in walk(pth):
for fname in fileList:
if fname.endswith(end):
files[fname].append(path.join(dirName, fname))
return files
You will get a dict where the keys are the basenames and the values are lists of files with the full path to each, any list with more than one vaulue means you have at least two files with the same name but you should remember have the same basename does not mean the files are actually the same.

Pulling Files and Timestamps from a Directory and Subdirectories

I have a working script that will print all files in a given directory. I would like help making it do two additional things:
(1) Also be able to print the date_created or time stamp for each file.
(2) Do all of the above not only for files in the given directory, but in all subdirectories as well.
Here is the working script:
from os import listdir
from os.path import isfile, join
from sys import argv
script, filename = argv
mypath = os.getcwd()
allfiles = [ f for f in listdir(mypath) if isfile(join(mypath,f)) ]
output = open(filename, 'w')
for i in allfiles:
string = "%s" %i
output.write(string + "\n")
output.close()
print "Directory printed."
I would hope to be able to print something like (filename + ", " + timestamp + "\n"), or some substitute.
Thanks!
http://docs.python.org/2/library/os.html and http://docs.python.org/2/library/stat.html have you covered.
os.walk will give you the recursive directory walking
stat will give you file timestamps (atime,ctime,mtime)
This snippet walks through files in a directory + subdirectories and prints out created and modified timestamps.
import os
import time
def walk_files(directory_path):
# Walk through files in directory_path, including subdirectories
for root, _, filenames in os.walk(directory_path):
for filename in filenames:
file_path = root + '/' + filename
created = os.path.getctime(file_path)
modified = os.path.getmtime(file_path)
# Process stuff for the file here, for example...
print "File: %s" % file_path
print " Created: %s" % time.ctime(created)
print " Last modified: %s" % time.ctime(modified)
walk_files('/path/to/directory/')

Categories

Resources