Loop over files in subdir and put output in other subdir - python

I want to perform an action on all files in a subdir and put the out put in another dir. For example, in /Pictures/ there are subdirs /January, /February/ etc and in them imgages. I want to perform actions on the images and put the output to /Processed/ and its subdirs /January, /Februady etc.
I imagine it to be solved something like this, but I really could use some help:
import os
path = '/Pictures/'
outpath = '/Processed/'
for subdir, dirs, files in os.walk(path):
#do something with files and send out put to corresponding output dir

This should give you the basic structure :
import os
path = 'Pictures/' # NOTE: Without starting '/' !
outpath = 'Processed/'
for old_dir, _, filenames in os.walk(path):
new_dir = old_dir.replace(path, outpath, 1)
if not os.path.exists(new_dir):
print "Creating %s" % new_dir
os.makedirs(new_dir)
for filename in filenames:
old_path = os.path.join(old_dir, filename)
new_path = os.path.join(new_dir, filename)
print "Processing : %s -> %s" % (old_path, new_path)
# do something with new_path
It creates the same subfolder structure in 'Processed/' as in 'Pictures/' and it iterates over every filename.
For every file in your folders, you get the new_path variable :
old_path is 'Pictures/1/test.jpg', new_path will be 'Processed/1/test.jpg'

This basically walks through all folders of a directory, gets its files; perform some action with performFunction() and write to the same file. You can modify this to write to different path!
def walkDirectory(directory, filePattern):
for path, dirs, files in os.walk(os.path.abspath(directory),followlinks=True):
for filename in fnmatch.filter(files, filePattern):
try:
filepath = os.path.join(path, filename)
with open(filepath) as f:
s = f.read()
s = performFunction()
with open(filepath, "w") as f:
print filepath
f.write(s)
f.flush()
f.close()
except:
import traceback
print traceback.format_exc()
Hope it helps!

Related

Why does this Python (using 3.7) renamer not allow directories to start with a number?

I have created the following renamer (below) to replace periods from the filename and directory name, which seems to work fine for filenames but doesn't work for directory names if they start with an integer. No errors are raised. If there are no integers in any directory names, then it works fine for directories. Otherwise, it simply renames the files but not the directories. Can anybody tell me why and how to get around this?
Any help is much appreciated.
import os
def Replace_Filename(Root_Folder):
for Root, Dirs, Files in os.walk(Root_Folder):
for File in Files:
print(File)
Fname, Fext = os.path.splitext(File)
print(Fname)
print(Fext)
Replaced = Fname.replace(".","_")
print(Replaced)
New_Fname = Replaced + Fext
print(New_Fname)
F_path = os.path.join(Root, File)
print(F_path)
New_Fpath = os.path.join(Root, New_Fname)
print(New_Fpath)
os.rename(F_path, New_Fpath)
def Replace_Dirname(Root_Folder):
for Root, Dirs, Files in os.walk(Root_Folder):
for Dir in Dirs:
print(Dir)
New_Dname = Dir.replace(".","_")
print(New_Dname)
D_Path = os.path.join(Root, Dir)
print(D_Path)
New_Dpath = os.path.join(Root, New_Dname)
print(New_Dpath)
os.rename(D_Path, New_Dpath)
Root_Folder = "D:\\Practicerename-Copy"
Replace_Filename(Root_Folder)
Replace_Dirname(Root_Folder)

How to copy and rename all files in a directory using Python?

I am trying to copy all jpeg files from a directory (with multiple subdirectories) to a single directory. There are multiple files with the same name, so I am trying to rename the files using the name of the parent directory. For example: c:\images\tiger\image_00001.jpg will be moved to a new folder and renamed to c:\images\allimages\tiger_image_00001.jpg. I tried the code below, but nothing happens. The folder gets created, but the files do not move. This is what I have so far:
import os
path = 'source/'
os.mkdir('source/allimages/')
extensions = ['.jpeg']
for folder, _, filenames in os.walk(path):
for filename in filenames:
if folder == path or folder == os.path.join(path, 'allimages'):
continue
folder = folder.strip(path)
extension = os.path.splitext(os.path.splitext(filename)[0])[-1].lower()
if extension in extensions:
infilename = os.path.join(path, folder, filename)
newname = os.path.join(path, 'all_files', "{}-{}".format(folder.strip('./')))
os.rename(infilename, newname)
I would recommend having a function dedicated to resolving a unique filename. A while loop should do the trick. This should work.
import os
import shutil
def resolve_path(filename, destination_dir):
dest = os.path.join(destination_dir, filename)
*base_parts, extension = filename.split('.')
base_name = '.'.join(base_parts)
duplicate_num = 1
while os.path.exists(dest):
new_base = base_name + str(duplicate_num).zfill(5)
new_filename = "{}.{}".format(new_base, extension)
dest = os.path.join(destination_dir, new_filename)
duplicate_num += 1
return dest
That is such that the following is the result....
>>> with open('/path/to/file.extension', 'w') as f:
>>> pass # just create the file
>>> resolve_path('file.extension', '/path/to/')
'/path/to/file00001.extension'
Then put it together with traversing the source...
def consolidate(source, destination, extension='.jpg'):
if not os.path.exists(destination):
os.makedirs(destination)
for root, dirs, files in os.walk(source):
for f in files:
if f.lower().endswith(extension):
source_path = os.path.join(root, f)
destination_path = resolve_path(f, destination)
shutil.copyfile(source_path, destination_path)
You're calling splitext on its own output, which doesn't get what you want:
In [4]: os.path.splitext(os.path.splitext('foo.bar')[0])[-1]
Out[4]: ''
You just want extension = os.path.splitext(filename)[-1].lower(), or if you don't want the dot, then extension = os.path.splitext(filename)[-1].lower()[1:].
(Edited) more seriously, there's a problem with folder.strip(path): this will remove all characters in path from folder. For instance 'source/rescue'.strip('source/') == ''. What you want is folder.replace(path, '').

Recursively name all .pyc.py files to .py files

I'm trying to recursively rename all .pyc.py files to .py files.
My code:
import os,sys
def main():
ffolder = raw_input("folder >> ")
folder = 'C:\Users\Account Name\Desktop\Disney\toontown\\'+ ffolder +''
for filename in os.listdir(folder):
infilename = os.path.join(folder,filename)
if not os.path.isfile(infilename): continue
oldbase = os.path.splitext(filename)
newname = infilename.replace('.pyc.py', '.py')
output = os.rename(infilename, newname)
while True:
main()
It works fine but it requires me to type in each folder name. How do I make it do it on its own?
Use os.walk to recursively traverse the directory tree.
import os
import fnmatch
for dirpath, dirnames, filenames in os.walk(folder):
for f in filenames:
if f.endswith('.pyc.py'):
os.rename(os.path.join(dirpath, f), os.path.join(dirpath, f[:-7] + '.py'))
I think os.walk is the solution here. It can recursively find files in subdirectories. Try this:
import os,sys
def main():
ffolder = raw_input("folder >> ")
root = 'C:\Users\Account Name\Desktop\Disney\toontown\\'+ ffolder +''
for folder, subdirs, files in os.walk(root):
for filename in files:
infilename = os.path.join(folder,filename)
if not os.path.isfile(infilename): continue
oldbase = os.path.splitext(filename)
newname = infilename.replace('.pyc.py', '.py')
output = os.rename(infilename, newname)
while True:
main()
Take a look at os.walk. [docs]
You'll want to call your rename function (doRename(dir) for example) on a directory. Have your function walk through the directory renaming all files then call itself with any directories it finds.

Renaming filenames using python

I need to simply add the word "_Manual" onto the end of all the files i have in a specific directory
Here is the script i am using at the moment - i have no experience with python so this script is a frankenstine of other scripts i had lying around!
It doesn't give any error messages but it also doesnt work..
folder = "C:\Documents and Settings\DuffA\Bureaublad\test"
import os, glob
for root, dirs, filenames in os.walk(folder):
for filename in filenames:
filename_split = os.path.splitext(filename) # filename and extensionname (extension in [1])
filename_zero = filename_split[0]
os.rename(filename_zero, filename_zero + "_manual")
I am now using
folder = "C:\Documents and Settings\DuffA\Bureaublad\test"
import os # glob is unnecessary
for root, dirs, filenames in os.walk(folder):
for filename in filenames:
fullpath = os.path.join(root, filename)
filename_split = os.path.splitext(fullpath) # filename and extensionname (extension in [1])
filename_zero, fileext = filename_split
print fullpath, filename_zero + "_manual" + fileext
os.rename(fullpath, filename_zero + "_manual" + fileext)
but it still doesnt work..
it doesnt print anything and nothing gets changed in the folder!
os.rename requires a source and destination filename. The variable filename contains your current filename (e.g., "something.txt"), whereas your split separates that into something and txt. As the source file to rename, you then only specify something, which fails silently.
Instead, you want to rename the file given in filename, but as you walk into subfolders as well, you need to make sure to use the absolute path. For this you can use os.path.join(root, filename).
So in the end you get something like this:
os.rename(os.path.join(root, filename),
os.path.join(root, filename_zero + "_manual" + filename_split[1]))
This would rename dir1/something.txt into dir1/something_manual.txt.
folder = r"C:\Documents and Settings\DuffA\Bureaublad\test"
import os, glob
for root, dirs, filenames in os.walk(folder):
for filename in filenames:
filename_split = os.path.splitext(filename) # filename and extensionname (extension in [1])
filename_zero = filename_split[0]
os.rename(os.path.join(root, filename), os.path.join(root, filename_zero + "_manual" + filename_split[1]))
In your code, you are trying to rename filename_zero, which is the filename without extension and therefore does not exist as a real path. You have to specify the full path to os.rename like above.
I. e. it does nothing? Let's see:
folder = "C:\Documents and Settings\DuffA\Bureaublad\test"
import os # glob is unnecessary
for root, dirs, filenames in os.walk(folder):
for filename in filenames:
fullpath = os.path.join(root, filename)
filename_split = os.path.splitext(fullpath) # filename and extensionname (extension in [1])
filename_zero, fileext = filename_split
os.rename(fullpath, filename_zero + "_manual" + fileext)
might do the trick, as you have to work with the full path. but I don't understand why there was no exception when the files could not be found...
EDIT to put the change to a more prominent place:
You as well seem to have your path wrong.
Use
folder = r"C:\Documents and Settings\DuffA\Bureaublad\test"
to prevent that the \t is turned into a tab character.
for root, dirs, filenames in os.walk(folder):
for filename in filenames:
os.rename(os.path.join(root,filename),
os.path.join(root,'%s_manual%s' % os.path.splitext(filename)))
you should add a control in your code, to verify that the filename to rename hasn't already '_manual' in its string name

Python recursive folder read

I have a C++/Obj-C background and I am just discovering Python (been writing it for about an hour).
I am writing a script to recursively read the contents of text files in a folder structure.
The problem I have is the code I have written will only work for one folder deep. I can see why in the code (see #hardcoded path), I just don't know how I can move forward with Python since my experience with it is only brand new.
Python Code:
import os
import sys
rootdir = sys.argv[1]
for root, subFolders, files in os.walk(rootdir):
for folder in subFolders:
outfileName = rootdir + "/" + folder + "/py-outfile.txt" # hardcoded path
folderOut = open( outfileName, 'w' )
print "outfileName is " + outfileName
for file in files:
filePath = rootdir + '/' + file
f = open( filePath, 'r' )
toWrite = f.read()
print "Writing '" + toWrite + "' to" + filePath
folderOut.write( toWrite )
f.close()
folderOut.close()
Make sure you understand the three return values of os.walk:
for root, subdirs, files in os.walk(rootdir):
has the following meaning:
root: Current path which is "walked through"
subdirs: Files in root of type directory
files: Files in root (not in subdirs) of type other than directory
And please use os.path.join instead of concatenating with a slash! Your problem is filePath = rootdir + '/' + file - you must concatenate the currently "walked" folder instead of the topmost folder. So that must be filePath = os.path.join(root, file). BTW "file" is a builtin, so you don't normally use it as variable name.
Another problem are your loops, which should be like this, for example:
import os
import sys
walk_dir = sys.argv[1]
print('walk_dir = ' + walk_dir)
# If your current working directory may change during script execution, it's recommended to
# immediately convert program arguments to an absolute path. Then the variable root below will
# be an absolute path as well. Example:
# walk_dir = os.path.abspath(walk_dir)
print('walk_dir (absolute) = ' + os.path.abspath(walk_dir))
for root, subdirs, files in os.walk(walk_dir):
print('--\nroot = ' + root)
list_file_path = os.path.join(root, 'my-directory-list.txt')
print('list_file_path = ' + list_file_path)
with open(list_file_path, 'wb') as list_file:
for subdir in subdirs:
print('\t- subdirectory ' + subdir)
for filename in files:
file_path = os.path.join(root, filename)
print('\t- file %s (full path: %s)' % (filename, file_path))
with open(file_path, 'rb') as f:
f_content = f.read()
list_file.write(('The file %s contains:\n' % filename).encode('utf-8'))
list_file.write(f_content)
list_file.write(b'\n')
If you didn't know, the with statement for files is a shorthand:
with open('filename', 'rb') as f:
dosomething()
# is effectively the same as
f = open('filename', 'rb')
try:
dosomething()
finally:
f.close()
If you are using Python 3.5 or above, you can get this done in 1 line.
import glob
# root_dir needs a trailing slash (i.e. /root/dir/)
for filename in glob.iglob(root_dir + '**/*.txt', recursive=True):
print(filename)
As mentioned in the documentation
If recursive is true, the pattern '**' will match any files and zero or more directories and subdirectories.
If you want every file, you can use
import glob
for filename in glob.iglob(root_dir + '**/**', recursive=True):
print(filename)
Agree with Dave Webb, os.walk will yield an item for each directory in the tree. Fact is, you just don't have to care about subFolders.
Code like this should work:
import os
import sys
rootdir = sys.argv[1]
for folder, subs, files in os.walk(rootdir):
with open(os.path.join(folder, 'python-outfile.txt'), 'w') as dest:
for filename in files:
with open(os.path.join(folder, filename), 'r') as src:
dest.write(src.read())
TL;DR: This is the equivalent to find -type f to go over all files in all folders below and including the current one:
for currentpath, folders, files in os.walk('.'):
for file in files:
print(os.path.join(currentpath, file))
As already mentioned in other answers, os.walk() is the answer, but it could be explained better. It's quite simple! Let's walk through this tree:
docs/
└── doc1.odt
pics/
todo.txt
With this code:
for currentpath, folders, files in os.walk('.'):
print(currentpath)
The currentpath is the current folder it is looking at. This will output:
.
./docs
./pics
So it loops three times, because there are three folders: the current one, docs, and pics. In every loop, it fills the variables folders and files with all folders and files. Let's show them:
for currentpath, folders, files in os.walk('.'):
print(currentpath, folders, files)
This shows us:
# currentpath folders files
. ['pics', 'docs'] ['todo.txt']
./pics [] []
./docs [] ['doc1.odt']
So in the first line, we see that we are in folder ., that it contains two folders namely pics and docs, and that there is one file, namely todo.txt. You don't have to do anything to recurse into those folders, because as you see, it recurses automatically and just gives you the files in any subfolders. And any subfolders of that (though we don't have those in the example).
If you just want to loop through all files, the equivalent of find -type f, you can do this:
for currentpath, folders, files in os.walk('.'):
for file in files:
print(os.path.join(currentpath, file))
This outputs:
./todo.txt
./docs/doc1.odt
The pathlib library is really great for working with files. You can do a recursive glob on a Path object like so.
from pathlib import Path
for elem in Path('/path/to/my/files').rglob('*.*'):
print(elem)
import glob
import os
root_dir = <root_dir_here>
for filename in glob.iglob(root_dir + '**/**', recursive=True):
if os.path.isfile(filename):
with open(filename,'r') as file:
print(file.read())
**/** is used to get all files recursively including directory.
if os.path.isfile(filename) is used to check if filename variable is file or directory, if it is file then we can read that file.
Here I am printing file.
If you want a flat list of all paths under a given dir (like find . in the shell):
files = [
os.path.join(parent, name)
for (parent, subdirs, files) in os.walk(YOUR_DIRECTORY)
for name in files + subdirs
]
To only include full paths to files under the base dir, leave out + subdirs.
I've found the following to be the easiest
from glob import glob
import os
files = [f for f in glob('rootdir/**', recursive=True) if os.path.isfile(f)]
Using glob('some/path/**', recursive=True) gets all files, but also includes directory names. Adding the if os.path.isfile(f) condition filters this list to existing files only
For my taste os.walk() is a little too complicated and verbose. You can do the accepted answer cleaner by:
all_files = [str(f) for f in pathlib.Path(dir_path).glob("**/*") if f.is_file()]
with open(outfile, 'wb') as fout:
for f in all_files:
with open(f, 'rb') as fin:
fout.write(fin.read())
fout.write(b'\n')
use os.path.join() to construct your paths - It's neater:
import os
import sys
rootdir = sys.argv[1]
for root, subFolders, files in os.walk(rootdir):
for folder in subFolders:
outfileName = os.path.join(root,folder,"py-outfile.txt")
folderOut = open( outfileName, 'w' )
print "outfileName is " + outfileName
for file in files:
filePath = os.path.join(root,file)
toWrite = open( filePath).read()
print "Writing '" + toWrite + "' to" + filePath
folderOut.write( toWrite )
folderOut.close()
os.walk does recursive walk by default. For each dir, starting from root it yields a 3-tuple (dirpath, dirnames, filenames)
from os import walk
from os.path import splitext, join
def select_files(root, files):
"""
simple logic here to filter out interesting files
.py files in this example
"""
selected_files = []
for file in files:
#do concatenation here to get full path
full_path = join(root, file)
ext = splitext(file)[1]
if ext == ".py":
selected_files.append(full_path)
return selected_files
def build_recursive_dir_tree(path):
"""
path - where to begin folder scan
"""
selected_files = []
for root, dirs, files in walk(path):
selected_files += select_files(root, files)
return selected_files
I think the problem is that you're not processing the output of os.walk correctly.
Firstly, change:
filePath = rootdir + '/' + file
to:
filePath = root + '/' + file
rootdir is your fixed starting directory; root is a directory returned by os.walk.
Secondly, you don't need to indent your file processing loop, as it makes no sense to run this for each subdirectory. You'll get root set to each subdirectory. You don't need to process the subdirectories by hand unless you want to do something with the directories themselves.
Try this:
import os
import sys
for root, subdirs, files in os.walk(path):
for file in os.listdir(root):
filePath = os.path.join(root, file)
if os.path.isdir(filePath):
pass
else:
f = open (filePath, 'r')
# Do Stuff
If you prefer an (almost) Oneliner:
from pathlib import Path
lookuppath = '.' #use your path
filelist = [str(item) for item in Path(lookuppath).glob("**/*") if Path(item).is_file()]
In this case you will get a list with just the paths of all files located recursively under lookuppath.
Without str() you will get PosixPath() added to each path.
This worked for me:
import glob
root_dir = "C:\\Users\\Scott\\" # Don't forget trailing (last) slashes
for filename in glob.iglob(root_dir + '**/*.jpg', recursive=True):
print(filename)
# do stuff
If just the file names are not enough, it's easy to implement a Depth-first search on top of os.scandir():
stack = ['.']
files = []
total_size = 0
while stack:
dirname = stack.pop()
with os.scandir(dirname) as it:
for e in it:
if e.is_dir():
stack.append(e.path)
else:
size = e.stat().st_size
files.append((e.path, size))
total_size += size
The docs have this to say:
The scandir() function returns directory entries along with file attribute information, giving better performance for many common use cases.

Categories

Resources