how to avoid searching a folder - python

how do I avoid searching a folder? This script goes through every folder a searches it for your file, how do I avoid searching Applications? or only search the folders I tell it to. I've been trying for at least 3 hours
from PIL import Image
user_path = ("/Users/" + getpass.getuser())
FileName = input("file name, please, including the exsention: ")
print("working?")
for folder, sub_folder, files in os.walk(user_path):
print(f"folder is {folder}")
for sub_fold in sub_folder:
print(f"sub folder is {sub_fold}")
for f in files:
print(f"file: {f}")
if FileName == f:
print("file found")
print(os.path.abspath(os.path.join(root, name)))

Create array with folders excluded.
When loops entering into folder check is that folder name are in array created above. If it is just ignore.

I made a sample code. Please check and respond to me.
import os
ext = ["a", "b", "c"] # I assume these are unnecessary folders.
for folder, sub_folder, files in os.walk(user_path):
print(f"folder is {folder}")
for sub_fold in sub_folder:
if sub_fold in ext:
continue
else:
print(f"sub folder is {sub_fold}")
for f in files:
print(f"file: {f}")
if FileName == f:
print("file found")
print(os.path.abspath(os.path.join(root, name)))

os.walk walks the entire directory tree, presenting the current directory, its immediate subfolders and its immediate files on each iteration. As long as you are walking top-down (the default) you can stop a subfolder from being iterated by removing it from the folders list. In this example, I made the blacklist a canned list in the source, but you could prompt for it if you'd like. On each folder iteration all you need to do is see if the wanted filename is in the list of file names in that iteration.
from PIL import Image
import getpass
import os
# blacklist folders paths relative to user_path
blacklist = ["Applications", "Downloads"]
# get user root and fix blacklist
# user_path = ("/Users/" + getpass.getuser())
user_path = os.path.expanduser("~")
blacklist = [os.path.join(user_path, name) for name in blacklist]
FileName = input("file name, please, including the exsention: ")
print("working?")
for folder, sub_folders, files in os.walk(user_path):
# eliminate top level folders and their subfolders with inplace
# remove of subfolders
if folder in blacklist:
del sub_folders[:]
continue
# in-place remove of blacklisted folders below top level
for sub_folder in sub_folders[:]:
if os.path.join(folder, sub_folder) in blacklist:
sub_folders.remove(sub_folder)
if FileName in files:
print("file found")
print(os.path.abspath(os.path.join(folder, FileName)))

Related

How to Copy Sub Folders and files into New folder using Python

Very new to iterating over folder structures in python (and python!)
All the answers I've found on this site seem very confusing to implement to my situation. Hoping someone can assist.
I have a folder called Downloads. ( 1st level )
This folder is stored at "C:\Users\myusername\Desktop\downloads"
Within this folder I have the following subfolders. (2nd level)
Folder path example: C:\Users\myusername\Desktop\downloads\2020-03-13
2020-03-13
2020-03-13
2020-03-15... etc
Within each of these 2nd level folders I have another level of folders with pdf files.
So if I take 2020-03-13 it has a number of folders below: - 3rd level
22105853
22108288
22182889
Example path for third level:
C:\Users\myusername\Desktop\downloads\2020-03-13\22105853
All I am trying to do is create a new folder at the Downloads (1st)level and copy all the folders at the third level into it. Eliminating the second level structure basically.
Desired result.
C:\Users\myusername\Desktop\r3\downloads\NEWFOLDER\22105853
C:\Users\myusername\Desktop\r3\downloads\NEWFOLDER\22108288
C:\Users\myusername\Desktop\r3\downloads\NEWFOLDER\22182889
I started the code below and managed to recreate the file structure within a new file called Downloads: But stuck now and hoping someone can help me.
save_dir='C:\\Users\\myusername\\Desktop\\downloads\\'
localpath = os.path.join(save_dir, 'Repository')
if not os.path.exists(localpath):
try:
os.mkdir(localpath, mode=777)
print('MAKE_DIR: ' + localpath)
except OSError:
print("directory error occurred")
for root, dirs, files in os.walk(save_dir):
for dir in dirs:
path = os.path.join(localpath, dir)
if '-' not in path and not os.path.exists(path):
#(Checking for '-' to not create folders at sceond level)
os.mkdir(path, mode=777)
print(path)
This code snippet should work:
import os
from distutils.dir_util import copy_tree
root_dir = 'path/to/your/rootdir'
try:
os.mkdir('path/to/your/rootdir/dirname')
except:
pass
for folder_name in os.listdir(root_dir):
path = root_dir + folder_name
for folder_name in os.listdir(path):
copy_tree(path + folder_name, 'path/to/your/rootdir/dirname')
just replace the directory names with the names you need
Using copy_tree is probably the best way to do it, however I prefer check if there are strange files or folders in wrong place and then create folders or copy the files.
Here is another way to do that.
However be careful if you will create the repository folder inside the root folder and than you will iterate over the root folder, in listdir you will have also the Repository folder.
import os
import shutil
def main_copy():
save_dir='C:\\Users\\myusername\\Desktop\\downloads'
localpath = os.path.join(save_dir, 'Repository')
if not os.path.exists(localpath):
try:
os.mkdir(localpath, mode=777)
print('MAKE_DIR: ' + localpath)
except OSError:
print("directory error occurred")
return
for first_level in os.listdir(save_dir):
subffirstlevel = os.path.join(save_dir, first_level)
# skip repository folder
if subffirstlevel == localpath: continue
# skip eventually files
if os.path.isfile(subffirstlevel): continue
for folder_name in os.listdir(subffirstlevel):
subf = os.path.join(subffirstlevel, folder_name)
# skip eventually files
if os.path.isfile(subf): continue
newsubf = os.path.join(localpath, folder_name)
if not os.path.exists(newsubf):
try:
os.mkdir(newsubf, mode=777)
print('MAKE_DIR: ' + newsubf)
except OSError:
print("directory error occurred")
continue
for file_name in os.listdir(subf):
filename = os.path.join(subf, file_name)
if os.path.isfile(filename):
shutil.copy(filename, os.path.join(newsubf, file_name))
print("copy ", file_name)

python how to collect a specific file from a list of folders and save

I have many folders in a master folder as given below. Each folder contains a .JPG file. I would like to extract all these files and store them in this master folder.
Inside each folder
My present code:
import os
import glob
os.chdir('Master folder')
extension = 'JPG'
jpg_files= [i for i in glob.glob('*.{}'.format(extension))]
This did not work.
To find the images in your tree, I would use os.walk. Below you can find a complete example to 'find and move' function that move all the files to your given path, and create a new filename for duplicate filenames.
The simple 'find and replace' function will also check with function add_index_to_filepath whether or not the file already exists, add an index (n) to the path. For example: if image.jpg would exists, it turns the next one into turn into image (1).jpg and the following one into image (2).jpg and so on.
import os
import re
import shutil
def add_index_to_filepath(path):
'''
Check if a file exists, and append '(n)' if true.
'''
# If the past exists, go adjust it
if os.path.exists(path):
# pull apart your path and filenames
folder, file = os.path.split(path)
filename, extension = os.path.splitext(file)
# discover the current index, and correct filename
try:
regex = re.compile(r'\(([0-9]*)\)$')
findex = regex.findall(filename)[0]
filename = regex.sub('({})'.format(int(findex) + 1), filename)
except IndexError:
filename = filename + ' (1)'
# Glue your path back together.
new_path = os.path.join(folder, '{}{}'.format(filename, extension))
# Recursivly call your function, go keep verifying if it exists.
return add_index_to_filepath(new_path)
return path
def find_and_move_files(path, extension_list):
'''
Walk through a given path and move the files from the sub-dir to the path.
Upper-and lower-case are ignored. Duplicates get a new filename.
'''
files_moved = []
# First walk through the path, to list all files.
for root, dirs, files in os.walk(path, topdown=False):
for file in files:
# Is your extension wanted?
extension = os.path.splitext(file)[-1].lower()
if extension in extension_list:
# Perpare your old an new path, and move
old_path = os.path.join(root, file)
new_path = add_index_to_filepath(os.path.join(path, file))
if new_path in files_moved:
shutil.move(old_path, new_path)
# Lets keep track of what we moved to return it in the end
files_moved.append(new_path)
return files_moved
path = '.' # your filepath for the master-folder
extensions = ['.jpg', '.jpeg'] # There are some variations of a jpeg-file extension.
found_files = find_and_move_files(path, extensions)

How to extract sub-folder path?

I have about 100 folders and in each folder files that should be read and analyzed.
I can read the files from their subfolders, but I want to start processing at e.g. the 10th folder until the end. And I need the exact folder path.
How can I do this?
To clarify my question, I extracted a sample from my code:
rootDir = 'D:/PhD/result/Pyradiomic_input/'
for (path, subdirs, files) in os.walk(rootDir):
sizefile=len(path)
if "TCGA-" in path :
print(path)
The output is:
D:/PhD/result/Pyradiomic_input/TCGA-02-0006
D:/PhD/result/Pyradiomic_input/TCGA-02-0009
D:/PhD/result/Pyradiomic_input/TCGA-02-0011
D:/PhD/result/Pyradiomic_input/TCGA-02-0027
D:/PhD/result/Pyradiomic_input/TCGA-02-0046
D:/PhD/result/Pyradiomic_input/TCGA-02-0069
Now my question is how can I start working from e.g. D:/PhD/result/Pyradiomic_input/TCGA-02-0046 until the end, instead of starting from the top? I tried some ideas but they did not work.
You could set a flag to capture when you hit a specific directory
rootDir = 'D:/PhD/result/Pyradiomic_input/'
first_folder = 'TCGA-02-0046'
process = False
for (path, subdirs, files) in os.walk(rootDir):
sizefile=len(path)
if "TCGA-" in path :
print(path)
if first_folder in path:
process = True
if process:
#process folder
If you want a specific folder to indicate the script should stop processing
rootDir = 'D:/PhD/result/Pyradiomic_input/'
first_folder = 'TCGA-02-0046'
last_folder = 'TCGA-02-0099'
process = False
for (path, subdirs, files) in os.walk(rootDir):
sizefile=len(path)
if "TCGA-" in path :
print(path)
if first_folder in path:
process = True
if last_folder in path:
break
if process:
#process folder
You can also set a list of directories that you want to process
rootDir = 'D:/PhD/result/Pyradiomic_input/'
process_dirs = ['TCGA-02-0046', ...]
process = False
for (path, subdirs, files) in os.walk(rootDir):
sizefile=len(path)
if "TCGA-" in path :
print(path)
if any(d in path for d in process_dirs):
#process folder
You can simply skip the values you aren't interested in. Here a bit simplified:
counter = 0
# mocking the file operations
for path in ["/dir-1", "/dir-2", "/dir-3", "/dir-4", "/dir-5"]:
# skip the first two paths
if counter < 2:
counter += 1
continue
# do something
print(path)
Alternatively you could collect the paths first, like this:
paths = []
# mocking the file operations
for path in ["/dir-1", "/dir-2", "/dir-3", "/dir-4", "/dir-5"]:
# collect paths in array
paths.append(path)
# skip the first two elements
paths = paths[2:]
for path in paths:
# do something
print(path)
The second version can become a bit shorter if you use generator expressions, but I favor readability.

How to get the files with the biggest size in the folders, change their name and save to a different folder

I need to get files with the biggest size in different folders, change their name to folder name that they belong to and save to a new folder. I have something like this and I got stuck:
import os
# Core settings
rootdir = 'C:\\Users\\X\\Desktop\\humps'
to_save = 'C:\\Users\\X\\Desktop\\new'
for root, dirs, files in os.walk(rootdir):
new_list = []
for file in files:
if file.endswith(".jpg"):
try:
print(file)
os.chdir(to_save)
add_id = root.split("humps\\")[1]
add_id = add_id.split("\\")[0]
file_name = os.path.join(root,file)
new_list.append(file_name)
bigfile = max(new_list, key=lambda x: x.stat().st_size)
except:
pass
To make it more clear: Let's say the name of the sub-folder is "elephant" and there are different elephant photos and subfolders in in this elephant folder. I want to go through those photos and subfolders and find the elephant foto with the biggest size, name it as elephant and save it to my target folder. Also repaet it for other sub folders such as lion, puma etc.
How I could achieve what I want ?
To find biggest file and save to another location
import os
import shutil
f_list = []
root = "path/to/directory"
root = os.path.abspath(root)
for folder, subfolders, files in os.walk(root):
for file in files:
filePath = os.path.join(folder, file)
f_list.append(filePath)
bigest_file = max(f_list,key=os.path.getsize)
new_path = "path/where/you/want/to/save"
shutil.copy(biggest_file,new_path)
if you want only images then add one more condition in loop
for folder, subfolders, files in os.walk(root):
for file in files:
if file.endswith(".jpg"):
filePath = os.path.join(folder, file)
f_list.append(filePath)
To get all folders biggest file
root = "demo"
root = os.path.abspath(root)
def test(path):
big_files = []
all_paths = [x[0] for x in os.walk(path)]
for paths in all_paths:
f_list = filter(os.path.isfile, os.listdir(paths))
if len(f_list) > 0:
big_files.append((paths,max(f_list,key=os.path.getsize)))
return big_files
print test(root)
How to get the files with the biggest size in the folders, change their name and save to a different folder
Basically you already have a good description of what you need to do. You just need to follow it step by step:
get all files in some search directory
filter for relevant files ("*.jpg")
get their sizes
find the maximum
copy to new directory with name of search directory
IMO it's an important skill to be able to break down a task into smaller tasks. Then, you just need to implement the smaller tasks and combine:
def iterate_files_recursively(directory="."):
for entry in os.scandir(directory):
if entry.is_dir():
for file in iterate_files_recursively(entry.path):
yield file
else:
yield entry
files = iterate_files_recursively(subfolder_name)
I'd use os.scandir because it avoids building up a (potentially) huge list of files in memory and instead allows me (via a generator) to work one file at a time. Note that starting with 3.6 you can use the result of os.scandir as a context manager (with syntax).
images = itertools.filterfalse(lambda f: not f.path.endswith('.jpg'), files)
Filtering is relatively straightforward except for the IMO strange choice of ìtertools.filterfalse to only keep elements for which its predicate returns False.
biggest = max(images, key=(lambda img: img.stat().st_size))
This is two steps in one: Get the maximum with the builtin max function, and use the file size as "key" to establish an order. Note that this raises a ValueError if you don't have any images ... so you might want to supply default=None or handle that exception.
shutil.copy(biggest.path, os.path.join(target_directory, subfolder_name + '.jpg')
shutil.copy copies the file and some metadata. Instead of hardcoding path separators, please use os.path.join!
Now all of this assumes that you know the subfolder_name. You can scan for those easily, too:
def iterate_directories(directory='.'):
for entry in os.scandir(directory):
if entry.is_dir():
yield entry
Here's some code that does what you want. Instead of using the old os.walk function, it uses modern pathlib functions.
The heart of this code is the recursive biggest function. It scans all the files and directories in folder, saving the matching file names to the files list, and recursively searching any directories it finds. It then returns the path of the largest file that it finds, or None if no matching files are found.
from pathlib import Path
import shutil
def filesize(path):
return path.stat().st_size
def biggest(folder, pattern):
''' Find the biggest file in folder that matches pattern
Search recursively in all subdirectories
'''
files = []
for f in folder.iterdir():
if f.is_file():
if f.match(pattern):
files.append(f)
elif f.is_dir():
found = biggest(f, pattern)
if found:
files.append(found)
if files:
return max(files, key=filesize)
def copy_biggest(src, dest, pattern):
''' Find the biggest file in each folder in src that matches pattern
and copy it to dest, using the folder's name as the new file name
'''
for path in src.iterdir():
if path.is_dir():
found = biggest(path, pattern)
if found:
newname = dest / path
print(path, ':', found, '->', newname)
shutil.copyfile(found, newname)
You can call it like this:
rootdir = r'C:\Users\X\Desktop\humps'
to_save = r'C:\Users\X\Desktop\new'
copy_biggest(Path(rootdir), Path(to_save), '*.jpg')
Note that the copied files will have the same name as the top-level folder in rootdir that they were found in, with no file extension. If you want to give them a .jpg extension, you can change
newname = dest / path
to
newname = (dest / path).with_suffix('.jpg')
The shutil module on older versions of Python 3 doesn't understand pathlib paths. But that's easy enough to remedy. In the copy_biggest function, replace
shutil.copyfile(found, newname)
with
shutil.copyfile(str(found), str(newname))

How to remove all empty files within folder and its sub folders?

I am trying to remove all empty files in a folder, and there are folders within the folder so it needs to check inside those folders too:
e.g
remove all empty files within C:\folder1\folder1 and C:\folder1\folder2 etc
import sys
import os
def main():
getemptyfiles(sys.argv[1])
def getemptyfiles(rootdir):
for root, dirs, files in os.walk(rootdir):
for d in ['RECYCLER', 'RECYCLED']:
if d in dirs:
dirs.remove(d)
for f in files:
fullname = os.path.join(root, f)
try:
if os.path.getsize(fullname) == 0:
print fullname
os.remove(fullname)
except WindowsError:
continue
This will work with a bit of adjusting:
The os.remove() statement could fail so you might want to wrap it with try...except as well. WindowsError is platform specific. Filtering the traversed directories is not strictly necessary but helpful.
The for loop uses dir to find all files, but not directories, in the current directory and all subfolders recursively. Then the second line checks to see if the length of each file is less than 1 byte before deleting it.
cd /d C:\folder1
for /F "usebackq" %%A in (`dir/b/s/a-d`) do (
if %%~zA LSS 1 del %%A
)
import os
while(True):
path = input("Enter the path")
if(os.path.isdir(path)):
break
else:
print("Entered path is wrong!")
for root,dirs,files in os.walk(path):
for name in files:
filename = os.path.join(root,name)
if os.stat(filename).st_size == 0:
print(" Removing ",filename)
os.remove(filename)
I do first remove empty files, afterwards by following this answer (https://stackoverflow.com/a/6215421/2402577), I have removed the empty folders.
In addition, I added topdown=False in os.walk() to walk from leaf to roo since the default behavior of os.walk() is to walk from root to leaf.
So empty folders that also contains empty folders or files are removed as well.
import os
def remove_empty_files_and_folders(dir_path) -> None:
for root, dirnames, files in os.walk(dir_path, topdown=False):
for f in files:
full_name = os.path.join(root, f)
if os.path.getsize(full_name) == 0:
os.remove(full_name)
for dirname in dirnames:
full_path = os.path.realpath(os.path.join(root, dirname))
if not os.listdir(full_path):
os.rmdir(full_path)
I hope this can help you
#encoding = utf-8
import os
docName = []
def listDoc(path):
docList = os.listdir(path)
for doc in docList:
docPath = os.path.join(path,doc)
if os.path.isfile(docPath):
if os.path.getsize(docPath)==o:
os.remove(docPath)
if os.path.isdir(docPath):
listDoc(docPath)
listDoc(r'C:\folder1')

Categories

Resources