How to check if multiple files exist in different directories - python

I know how to use python to check to see if a file exists, but what I am after is trying to see if multiple files of the same name exist throughout my working directory. Take for instance:
gamedata/areas/
# i have 2 folders in this directory
# testarea and homeplace
1. gamedata/areas/testarea/
2. gamedata/areas/homeplace/
Each folder of homeplace and testarea for instance contains a file called 'example'
Is there a pythonic way to use 'os' or similiar to check to see if the file 'example' can be found in both testarea and homeplace?
Although is their a way to do this without manually and statically using
os.path.isfile()
because throughout the life of the program new directories will be made, and I don't want to constantly go back into the code to change it.

You can check in every directory bellow gamedata/areas/:
This only goes down one level, you could extend it to go down as many levels as you want.
from os import listdir
from os.path import isdir, isfile, join
base_path = "gamedata/areas/"
files = listdir(base_path)
only_directories = [path for path in files if isdir(join(base_path,path))]
for directory_path in only_directories:
dir_path = join(base_path, directory_path)
for file_path in listdir(dir_path):
full_file_path = join(base_path, dir_path, file_path)
is_file = isfile(full_file_path)
is_example = "example" in file_path
if is_file and is_example:
print "Found One!!"
Hope it helps!

Maybe something like
places = ["testarea", "homeplace"]
if all(os.path.isfile(os.path.join("gamedata/areas/", x, "example") for x in places)):
print("Missing example")
If the condition is false, this doesn't tell you which subdirectory does not contain the file example, though. You can update places as necessary.

As I mentioned in the comments, os.walk is your friend:
import os
ROOT="gamedata/areas"
in_dirs = [path for (path, dirs, filenames)
in os.walk(ROOT)
if 'example' in filenames]
in_dirs will be a list of subdirectories where example is found

Related

Recursively find and copy files from many folders

I have some files in an array that I want to recursively search from many folders
An example of the filename array is ['A_010720_X.txt','B_120720_Y.txt']
Example of folder structure is as below which I can also provide as an array e.g ['A','B'] and ['2020-07-01','2020-07-12']. The "DL" remains the same for all.
C:\A\2020-07-01\DL
C:\B\2020-07-12\DL
etc
I have tried to use shutil but it doesn't seem to work effectively for my requirement as I can only pass in a full file name and not a wildcard. The code I have used with shutil which works but without wildcards and with absolute full file name and path e.g the code below will only give me A_010720_X.txt
I believe the way to go would be using glob or pathlib which i have not used before or cannot find some good examples similar to my use case
import shutil
filenames_i_want = ['A_010720_X.txt','B_120720_Y.txt']
RootDir1 = r'C:\A\2020-07-01\DL'
TargetFolder = r'C:\ELK\LOGS\ATH\DEST'
for root, dirs, files in os.walk((os.path.normpath(RootDir1)), topdown=False):
for name in files:
if name in filenames_i_want:
print ("Found")
SourceFolder = os.path.join(root,name)
shutil.copy2(SourceFolder, TargetFolder)
I think this should do what you need assuming they are all .txt files.
import glob
import shutil
filenames_i_want = ['A_010720_X.txt','B_120720_Y.txt']
TargetFolder = r'C:\ELK\LOGS\ATH\DEST'
all_files = []
for directory in ['A', 'B']:
files = glob.glob('C:\{}\*\DL\*.txt'.format(directory))
all_files.append(files)
for file in all_files:
if file in filenames_i_want:
shutil.copy2(file, TargetFolder)

Ignoring folders when organizing files

I am fairly new to python, and trying to write a program that organizes files based on their extensions
import os
import shutil
newpath1 = r'C:\Users\User1\Documents\Downloads\Images'
if not os.path.exists(newpath1): # check to see if they already exist
os.makedirs(newpath1)
newpath2 = r'C:\Users\User1\Documents\Downloads\Documents'
if not os.path.exists(newpath2):
os.makedirs(newpath2)
newpath3 = r'C:\Users\User1\Documents\Downloads\Else'
if not os.path.exists(newpath3):
os.makedirs(newpath3)
source_folder = r"C:\Users\User1\Documents\Downloads" # the location of the files we want to move
files = os.listdir(source_folder)
for file in files:
if file.endswith(('.JPG', '.png', '.jpg')):
shutil.move(os.path.join(source_folder,file), os.path.join(newpath1,file))
elif file.endswith(('.pdf', '.pptx')):
shutil.move(os.path.join(source_folder,file), os.path.join(newpath2,file))
#elif file is folder:
#do nothing
else:
shutil.move(os.path.join(source_folder,file), os.path.join(newpath3,file))
I want it to move files based on their extensions. However, I am trying to figure out how to stop the folders from moving. Any help would be greatly appreciated.
Also, for some reason, not every file is being moved, even though they have the same extension.
As with most path operations, I recommend using the pathlib module. Pathlib is available since Python 3.4 and has portable (multi platform), high-level API for file system operations.
I recommend using the following methods on Path objects, to determine their type:
Path.is_file()
Path.is_dir()
import shutil
from pathlib import Path
# Using class for nicer grouping of target directories
# Note that pathlib.Path enables Unix-like path construction, even on Windows
class TargetPaths:
IMAGES = Path.home().joinpath("Documents/Downloads/Images")
DOCUMENTS = Path.home().joinpath("Documents/Downloads/Documents")
OTHER = Path.home().joinpath("Documents/Downloads/Else")
__ALL__ = (IMAGES, DOCUMENTS, OTHER)
for target_dir in TargetPaths.__ALL__:
if not target_dir.is_dir():
target_dir.mkdir(exist_ok=True)
source_folder = Path.home().joinpath("Documents/Downloads") # the location of the files we want to move
# Get absolute paths to the files in source_folder
# files is a generator (only usable once)
files = (path.absolute() for path in source_folder.iterdir() if path.is_file())
def move(source_path, target_dir):
shutil.move(str(source_path), str(target_dir.joinpath(file.name))
for path in files:
if path.suffix in ('.JPG', '.png', '.jpg'):
move(path, TargetPaths.IMAGES)
elif path.suffix in ('.pdf', '.pptx'):
move(path, TargetPaths.DOCUMENTS)
else:
move(path, TargetPaths.OTHER)
See here
In particular, the os.walk command. This command returns a 3-tuple with the dirpath, dirname, and filename.
In your case, you should use [x[0] for x in os.walk(dirname)]

Moving Files: Matching Partial File/Directory Criteria (lastName, firstName) - Glob, Shutil

EDIT: ANSWER Below is the answer to the question. I will leave all subsequent text there just to show you how difficult I made such an easy task..
from pathlib import Path
import shutil
base = "C:/Users/Kenny/Documents/Clients"
for file in Path("C:/Users/Kenny/Documents/Scans").iterdir():
name = file.stem.split('-')[0].rstrip()
subdir = Path(base, name)
if subdir.exists():
dest = Path(subdir, file.name)
shutil.move(file, dest)
Preface:
I'm trying to write code that will move hundreds of PDF files from a :/Scans folder into another directory based on the matching client's name. This question is linked below - a very kind person, Elis Byberi, helped assist me in correcting my original code. I'm encountering another problem though..
To see our discussion and a similar question discussed:
-Python- Move All PDF Files in Folder to NewDirectory Based on Matching Names, Using Glob or Shutil
Python move files from directories that match given criteria to new directory
Question: How can you move all of the named files in :/Scans to their appropriately matched folder in :/Clients.
Background: Here is a breakdown of my file folders to give you a better idea of what I'm trying to do.
Within :/Scans folder I have thousands of PDF files, manually renamed (I tried writing a program to auto-rename.. didn't work) based on client and content, such that the folder encloses PDFs labeled as follows:
lastName, firstName - [contentVariable]
(repeat the above 100,000x)
Within the :/C drive of my computer I have a folder named 'Clients' with sub-folders for each and every client, named similar to the pattern above, as 'lastName, firstName'
EDIT: The code below will move the entire Scans folder to the Clients folder, which is close, but not exactly what I need to be doing. I only need to move the files within Scans to the corresponding Client fold names.
import glob
import shutil
import os
source = "C:/Users/Kenny/Documents/Scans"
dest = "C:/Users/Kenny/Documents/Clients"
os.chdir("C:/Users/Kenny/Documents/Clients")
pattern = '*,*'
for x in glob.glob(pattern):
fileName = os.path.join(source, x)
print(fileName)
shutil.move(source, dest)
EDIT 2 - CLOSE!: The code below will move all the files in Scans to the Clients folder, which is close, but not exactly what I need to be doing. I need to get each file into the correct corresponding file folder within the Clients folder.
This is a step forward from moving the entire Scans folder I would think.
source = "C:/Users/Kenny/Documents/Scans"
dest = "C:/Users/Kenny/Documents/Clients"
for (dirpath, dirnames, filenames) in walk(source):
for file in filenames:
shutil.move(path.join(dirpath,file), dest)
I have the following code below as well, and I am aware it does not do what I want it to do, so I am definitely missing something..
import glob
import shutil
import os
path = "C:/Users/Kenny/Documents/Scans"
dirs = os.listdir(path)
for file in dirs:
print(file)
dest_dir = "C:/Users/Kenny/Documents/Clients/{^w, $w}?"
for file in glob.glob(r'C:Users/Kenny/Documents/Clients/{^w, $w}?'):
print(file)
shutil.move(file, dest_dir)
1) Should I use os.scandir instead of os.listdir ?
2) Am I moving in the correct direction if I modify the code as such:
import glob
import shutil
import os
path = "C:/Users/Kenny/Documents/Scans"
dirs = os.scandir(path)
for file in dirs:
print(file)
dest_dir = "C:/Users/Kenny/Documents/Clients/*"
for file in glob.glob(r'C:Users/Kenny/Documents/Clients, *'):
dest_dir = os.path.join(file, glob.glob)
shutil.move(file, dest_dir)
Note within the 'for file in glob.glob(r'C:Users/Kenny/Documents/Clients/{^w, $w}?' I have tried replacing 'Clients/{^w, $w}?' with just 'Clients/*'
For the above, I only need the file in :/Scans, written as, "lastName, firstName - [content]" to be matched and moved to /Clients/[lastName, firstName] --- the [content] does not matter. But there are both greedy and nongreedy expressions... which is why I'm unsure about using * or {^w, $w}? -- because we have clients with the same last names, but different first names.
The following error is generated when running the first command:
Error 1
Error 2
The following error (though, there is no error?) is generated when running the second command:
Error 3
EDIT/POSSIBLE ANSWER
Have not yet tested this but, fnmatch(filename, pattern), or, fnmatch.translate(pattern) can be used to test whether the filename string matches the pattern string, returning True or False.
From here perhaps you could write a conditional statement..
for file in os.listdir('.'):
if fnmatch.fnmatch(file, '*.txt'):
shutil.move(source, destination)
or
for file in os.listdir('.'):
if fnmatch.fnmatch(file, '*.txt'):
shutil.move(file.join(eachFile, source), destination)
I have not tested the two aforesaid codes. I have no idea if they work, but editing allows others to see how my train of thought is progressing.

Visiting multiple folders with extensions

I'm working on something here, and I'm completely confused. Basically, I have the script in my directory, and that script has to run on multiple folders with a particular extension. Right now, I have it up and running on a single folder. Here's the structure, I have a main folder say, Python, inside that I have multiple folders all with the same .ext, and inside each sub-folder I again have few folders, inside which I have the working file.
Now, I want the script to visit the whole path say, we are inside the main folder 'python', inside which we have folder1.ext->sub-folder1->working-file, come out of this again go back to the main folder 'Python' and start visiting the second directory.
Now there are so many things in my head, the glob module, os.walk, or the for loop. I'm getting the logic wrong. I desperately need some help.
Say, Path=r'\path1'
How do I start about? Would greatly appreciate any help.
I'm not sure if this is what you want, but this main function with a recursive helper function gets a dictionary of all of the files in a main directory:
import os, os.path
def getFiles(path):
'''Gets all of the files in a directory'''
sub = os.listdir(path)
paths = {}
for p in sub:
print p
pDir = os.path.join(path, p)
if os.path.isdir(pDir):
paths.update(getAllFiles(pDir, paths))
else:
paths[p] = pDir
return paths
def getAllFiles(mainPath, paths = {}):
'''Helper function for getFiles(path)'''
subPaths = os.listdir(mainPath)
for path in subPaths:
pathDir = os.path.join(path, p)
if os.path.isdir(pathDir):
paths.update(getAllFiles(pathDir, paths))
else:
paths[path] = pathDir
return paths
This returns a dictionary of the form {'my_file.txt': 'C:\User\Example\my_file.txt', ...}.
Since you distinguish first level directories from its sub-directories, you could do something like this:
# this is a generator to get all first level directories
dirs = (d for d in os.listdir(my_path) if os.path.isdir(d)
and os.path.splitext(d)[-1] == my_ext)
for d in dirs:
for root, sub_dirs, files in os.walk(d):
for f in files:
# call your script on each file f
You could use Formic (disclosure: I am the author). Formic allows you to specify one multi-directory glob to match your files so eliminating directory walking:
import formic
fileset = formic.FileSet(include="*.ext/*/working-file", directory=r"path1")
for file_name in fileset:
# Do something with file_name
A couple of points to note:
/*/ matches every subdirectory, while /**/ recursively descends into every subdirectory, their subdirectories and so on. Some options:
If the working file is precisely one directory below your *.ext, then use /*/
If the working file is at any depth under *.ext, then use /**/ instead.
If the working file is at least one directory, then you might use /*/**/
Formic starts searching in the current working directory. If this is the correct directory, you can omit the directory=r"path1"
I am assuming the working file is literally called working-file. If not, substitute a glob that matches it, like *.sh or script-*.

Make multiple directories based on a list

Hi
I would like to make multiple new dir's in a set root dir each one named based on a list of names
e.g.
List looks like this
Folder_1
Folder_x
Folder_y
is there an easy way to do this in python?
import os
root_path = '/whatever/your/root/path/is/'
folders = ['Folder_1','Folder_x','Folder_y']
for folder in folders:
os.mkdir(os.path.join(root_path,folder))
Here's one way to do it using a flexible custom function. Note that it uses os.makedirs() instead of os.mkdir() which means that it will also create the root folder if necessary, as well as allowing the subfolder paths to contain intermediate-level directories if desired.
The code also uses functools.partial() to create a temporary local function named concat_path() to use with the built-in map() function to concatenate the root directory's name with each subfolder's. It then uses os.makedirs() on each of those to create the subfolder path.
import os
from functools import partial
def makefolders(root_dir, subfolders):
concat_path = partial(os.path.join, root_dir)
for subfolder in map(concat_path, subfolders):
os.makedirs(subfolder, exist_ok=True) # Python 3.2+
if __name__=='__main__':
root_dir = '/path/to/root/folder'
subfolders = ('Numbers/Folder_1', 'Letters/Folder_x', 'Letters/Folder_y')
makefolders(root_dir, subfolders)
Make folder name as desired
import os
root_path = '/home/sagnik'
folders= [None] * 201
for x in range(0,201):
print(str(x))
folders[x] ="folder"+str(x)
Create folders
for folder in folders:
os.mkdir(os.path.join(root_path,folder))
os.mkdir(name_of_dir)
is your friend.
os.path.join to combine your root dir and name, and os.mkdir to create the directories. Looping over things is easily enough done with for.
import os
root_dir = 'root_path\\whateverYouWant\\'
list_ = ['Folder_1', 'Folder_x', 'Folder_y']
for folder in list_:
os.makedirs(root_dir + folder)
I was in the same situation too and finally got a small working output, try it.
I had two files, first the program file and second a .txt file containing the list of folder names.
import os
f = open('folder.txt', 'r')
for g in f:
os.mkdir(g)
f.close()
import os
dir_names = ["ABC1", "ABC2", "ABC3"]
#Create three folders on Desktop
#dir_path = os.path.expanduser("~/Desktop")
dir_path = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
for folder in dir_names:
try:
if not os.mkdir(os.path.join(dir_path,folder)):
print(folder)
except:
print("Folder already exists")
break
from os import makedirs
makedirs('1/2/3/4/5/6/7/8/4/4/5/5/5/5/5/5/5/55/5/5/5/5')
By this, you'll more than you want.

Categories

Resources