Copying files with specific name to a specific folder using python - python

I have a following problem (I am on macOS):
12 usb flash drives are mounted in /Volumes and they have names from cam0 to cam11. Each of the drives have a following structure cam0/DCIM/100HDDVR/C00_0001.mp4, cam1/DCIM/100HDDVR/C01_0001.mp4, etc. In each 100HDDVR folder there will be multiple files so for cam0 for example it will be: C00_0001.mp4, C00_0002.mp4, C00_0003.mp4, etc.
Now I would like to copy it to the desktop, lets say, where a new folder will be created called: recording[adds today date] then create a subfolder shoot1 which will be containing files C00_0001.mp4 through C11_0001.mp4 and create subfolder shoot2 that will be containing C00_0002.mp4 through C11_0002.mp4 and so on until all the files from the flash drives are copied.
So far I managed to copy all files from cam0/DCIM/100HDDVR to a new folder recordings/cam0 but it is not automated enough and I am struggling to update it.
def copyy(self):
root.update_idletasks()
self.status['text'] = "Files are being copyied, have patience ;)".format(self.status)
self.source_direcotry0= '/Volumes/CAM0/DCIM/100HDDVR'
self.source_direcotry1= '/Volumes/CAM1/DCIM/100HDDVR'
self.source_direcotry2= '/Volumes/CAM2/DCIM/100HDDVR'
self.source_direcotry3= '/Volumes/CAM3/DCIM/100HDDVR'
self.source_direcotry4= '/Volumes/CAM4/DCIM/100HDDVR'
self.source_direcotry5= '/Volumes/CAM5/DCIM/100HDDVR'
self.source_direcotry6= '/Volumes/CAM6/DCIM/100HDDVR'
self.source_direcotry7= '/Volumes/CAM7/DCIM/100HDDVR'
self.source_direcotry8= '/Volumes/CAM8/DCIM/100HDDVR'
self.source_direcotry9= '/Volumes/CAM9/DCIM/100HDDVR'
self.source_direcotry10= '/Volumes/CAM10/DCIM/100HDDVR'
self.source_direcotry11= '/Volumes/CAM11/DCIM/100HDDVR'
self.path0="recording/CAM0"
self.path1="recording/CAM1"
self.path2="recording/CAM2"
self.path3="recording/CAM3"
self.path4="recording/CAM4"
self.path5="recording/CAM5"
self.path6="recording/CAM6"
self.path7="recording/CAM7"
self.path8="recording/CAM8"
self.path9="recording/CAM9"
self.path10="recording/CAM10"
self.path11="recording/CAM11"
self.cam0=os.path.join(self.Destination.get(), self.path0)
self.cam1=os.path.join(self.Destination.get(), self.path1)
self.cam2=os.path.join(self.Destination.get(), self.path2)
self.cam3=os.path.join(self.Destination.get(), self.path3)
self.cam4=os.path.join(self.Destination.get(), self.path4)
self.cam5=os.path.join(self.Destination.get(), self.path5)
self.cam6=os.path.join(self.Destination.get(), self.path6)
self.cam7=os.path.join(self.Destination.get(), self.path7)
self.cam8=os.path.join(self.Destination.get(), self.path8)
self.cam9=os.path.join(self.Destination.get(), self.path9)
self.cam10=os.path.join(self.Destination.get(), self.path10)
self.cam11=os.path.join(self.Destination.get(), self.path11)
self.p.start(1)
self.work_thread = threading.Thread(target=self.copyy2, args=())
self.work_thread.start()
self.work_thread.join()
self.p.stop()
self.status['text'] = "Files have been copyied".format(self.status)
def copyy2(self):
shutil.copytree(self.source_direcotry0, self.cam0)
shutil.copytree(self.source_direcotry1, self.cam1)
shutil.copytree(self.source_direcotry2, self.cam2)
shutil.copytree(self.source_direcotry3, self.cam3)
shutil.copytree(self.source_direcotry4, self.cam4)
shutil.copytree(self.source_direcotry5, self.cam5)
shutil.copytree(self.source_direcotry6, self.cam6)
shutil.copytree(self.source_direcotry7, self.cam7)
shutil.copytree(self.source_direcotry8, self.cam8)
shutil.copytree(self.source_direcotry9, self.cam9)
shutil.copytree(self.source_direcotry10, self.cam10)
shutil.copytree(self.source_direcotry11, self.cam11)
On top of that how would you tackle this problem so it works on Windows too were when flash drives are mounted you can't see their name and disk letters are always different. Is that even possible?
I hope that I managed to describe it clear enough and thanks in advance for any tips.

I would do it another way around :
get all files path under /Volume
search all files finishing by the extension you Want : [0001.mp4, 0002.mp4, etc..]
copy these files to a subdirectory on your desktop.
For (1) you can use :
def get_filepaths(directory, extension=None):
"""
Generate the file names in a directory
tree by walking the tree either top-down or bottom-up. For each
directory in the tree rooted at directory top (including top itself),
it yields a 3-tuple (dirpath, dirnames, filenames).
"""
import os, ntpath
assert(os.path.isdir(directory)), "The 'directory' parameter is not a directory (%s)"%(directory)
file_paths = [] # List which will store all of the full filepaths.
# Walk the tree.
for root, directories, files in os.walk(directory):
for filename in files:
# Join the two strings in order to form the full filepath.
filepath = os.path.join(root, filename)
if not ntpath.basename(filepath).startswith("."):
file_paths.append(filepath) # Add it to the list.
if extension:return [x for x in file_paths if x.endswith(extension) ]
return file_paths # Self-explanatory
For (2) and (3) you can use the same fonction in a loop :
# this is a pseudo code - no tested and has to be customized
nb_shoot = n
for shoot_n in range(nb_shoot):
# 1) looking for files to copy
nb_zeros = 3 - len(str(shoot_n)) # calculating the number of 0 to put in front of the filename
zeros = "0"*nb_zeros
extension = "%s%shoot_n.mp4"%(zeros, shoot_n)
paths_for_the_shoot = get_filepaths("/Volumes/", extension)
#2) creating a new dir
current_date = time.time()
new_dir_name = "recording_%s/shoot_%s"%(current_date , shoot_n )
os.mkdir(new_dir_name)
for path in paths_for_the_shoot:
#copy file
os.copy(path, new_dir_name)

Related

How to Iterate over several directory levels and move files based on condition

I would like some help to loop through some directories and subdirectories and extracting data. I have a directory with three levels, with the third level containing several .csv.gz files. The structure is like this
I need to access level 2 (where subfolders are) of each folder and check the existence of a specific folder (in my example, this will be subfolder 3; I left the other folders empty for this example, but in real cases they will have data). If checking returns True, then I want to change the name of files within the target subfolder3 and transfer all files to another folder.
Bellow is my code. It is quite cumbersome and there is probably better ways of doing it. I tried using os.walk() and this is the closest I got to a solution but it won't move the files.
import os
import shutil
def organizer(parent_dir, target_dir, destination_dir):
for root, dirs, files in os.walk(parent_dir):
if root.endswith(target_dir):
target = root
for files in os.listdir(target):
if not files.startswith("."):
# this is to change the name of the file
fullname = files.split(".")
just_name = fullname[0]
csv_extension = fullname[1]
gz_extension = fullname[2]
subject_id = target
#make a new name
origin = subject_id + "/"+ just_name + "." + csv_extension + "." + gz_extension
#make a path based on this new name
new_name = os.path.join(destination_dir, origin)
#move file from origin folder to destination folder and rename the file
shutil.move(origin, new_name)
Any suggestions on how to make this work and / or more eficient?
simply enough, you can use the built-in os module, with os.walk(path) returns you root directories and files found
import os
for root, _, files in os.walk(path):
#your code here
for your problem, do this
import os
for root, dirs, files in os.walk(parent_directory);
for file in files:
#exctract the data from the "file"
check this for more information os.walk()
and if you want to get the name of the file, you can use os.path.basename(path)
you can even check for just the gzipped csv files you're looking for using built-in fnmatch module
import fnmathch, os
def find_csv_files(path):
result = []
for root, _, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, "*.csv.gz"): # find csv.gz using regex paterns
result.append(os.path.join(root, name))
return list(set(results)) #to get the unique paths if for some reason duplicated
Ok, guys, I was finally able to find a solution. Here it is. Not the cleanest one, but it works in my case. Thanks for the help.
def organizer(parent_dir, target_dir, destination_dir):
for root, dirs, files in os.walk(parent_dir):
if root.endswith(target_dir):
target = root
for files in os.listdir(target):
#this one because I have several .DS store files in the folder which I don't want to extract
if not files.startswith("."):
fullname = files.split(".")
just_name = fullname[0]
csv_extension = fullname[1]
gz_extension = fullname[2]
origin = target + "/" + files
full_folder_name = origin.split("/")
#make a new name
new_name = full_folder_name[5] + "_"+ just_name + "." + csv_extension + "." + gz_extension
#make a path based on this new name
new_path = os.path.join(destination_dir, new_name)
#move file from origin folder to destination folder and rename the file
shutil.move(origin, new_path)
The guess the problem was that was passing a variable that was a renamed file (in my example, I wrongly called this variable origin) as the origin path to shutil.move(). Since this path does not exist, then the files weren't moved.

Python's re.findAll() function won't work as expected

I'm trying to create a python script that will find all the files from a working directory with a certain name pattern.
I have stored all files in a list and then I have tried applying the re.findall method on the list to obtain only a list of files with that name pattern.
I have written this code:
# Create the regex object that we will use to find our files
fileRegex = re.compile(r'A[0-9]*[a-z]*[0-9]*.*')
all_files = []
# Recursevly read the contents of the working_dir/Main folder #:
for folderName, subfolders, filenames in os.walk(working_directory + "/Main"):
for filename in filenames:
all_files.append(filename)
found_files = fileRegex.findall(all_files)
I get this error at the last line of the code:
TypeError: expected string or bytes-like object
I have also tried re.findall(all_files) instead of using the 'fileRegex' created prior to that line. Same error. Please tell me what am I doing wrong. Thank you so much for reading my post!
Edit(second question):
I have followed the answers and it's now working fine. I'm trying to create an archive with the files that match that pattern after I've found them. The archive was created however the way I wrote the code the whole path to the file gets included in the archive (all the folders from / up to the file). I just want the file to be included in the final .zip not the whole directories and subdirectories that make the path to it.
Here is the code. The generation of the .zipfile is at the bottom. Please give me a tip how could I solve this I've tried many things but none worked. Thanks:
# Project properties:
# Recursively read the contents of the 'Main' folder which contains files with different names.
# Select only the files whose name begin with letter A and contain digits in it. Use regexes for this.
# Archive these files in a folder named 'Created_Archive' in the project directory. Give the archive a name of your choosing.
# Files that you should find are:
# Aerials3.txt, Albert0512.txt, Alberto1341.txt
########################################################################################################################################
import os
import re
import zipfile
from pathlib import Path
# Get to the proper working directory
working_directory = os.getcwd()
if working_directory != "/home/paul/Desktop/Python_Tutorials/Projects/Files_And_Archive":
working_directory = "/home/paul/Desktop/Python_Tutorials/Projects/Files_And_Archive"
os.chdir(working_directory)
check_archive = Path(os.getcwd() + "/" + "files.zip")
if check_archive.is_file():
print("Yes. Deleting it and creating it.")
os.unlink(os.getcwd() + "/" + "files.zip")
else:
print("No. Creating it.")
# Create the regex object that we will use to find our files
fileRegex = re.compile(r'A[0-9]*[a-z]*[0-9]+.*')
found_files = []
# Create the zipfile object that we will use to create our archive
fileArchive = zipfile.ZipFile('files.zip', 'a')
# Recursevly read the contents of the working_dir/Main folder #:
for folderName, subfolders, filenames in os.walk(working_directory + "/Main"):
for filename in filenames:
if fileRegex.match(filename):
found_files.append(folderName + "/" + filename)
# Check all files have been found and create the archive. If the archive already exists
# delete it.
for file in found_files:
print(file)
fileArchive.write(file, compress_type=zipfile.ZIP_DEFLATED)
fileArchive.close()
re.findAll works on strings not on lists, so its better you use r.match over the list to filter the ones that actually matches:
found_files = [s for s in all_files if fileRegex.match(s)]
regex works on strings not lists. the following works
import re
import os
# Create the regex object that we will use to find our files
# fileRegex = re.compile(r'A[0-9]*[a-z]*[0-9]*.*')
fileRegex = re.compile(r'.*\.py')
all_files = []
found_files = []
working_directory = r"C:\Users\michael\PycharmProjects\work"
# Recursevly read the contents of the working_dir/Main folder #:
for folderName, subfolders, filenames in os.walk(working_directory):
for filename in filenames:
all_files.append(filename)
if fileRegex.search(filename):
found_files.append(filename)
print('all files\n', all_files)
print('\nfound files\n', found_files)
re.findall doesn't take a list of strings. You need re.match .
# Create the regex object that we will use to find our files
fileRegex = re.compile(r'A[0-9]*[a-z]*[0-9]*.*')
all_files = []
# Recursively read the contents of the working_dir/Main folder #:
for folderName, subfolders, filenames in os.walk(working_directory + "/Main"):
for filename in filenames:
all_files.append(filename)
found_files = [file_name for file_name in all_files if fileRegex.match(file_name)]

How to get the files with the biggest size in the folders, change their name and save to a different folder

I need to get files with the biggest size in different folders, change their name to folder name that they belong to and save to a new folder. I have something like this and I got stuck:
import os
# Core settings
rootdir = 'C:\\Users\\X\\Desktop\\humps'
to_save = 'C:\\Users\\X\\Desktop\\new'
for root, dirs, files in os.walk(rootdir):
new_list = []
for file in files:
if file.endswith(".jpg"):
try:
print(file)
os.chdir(to_save)
add_id = root.split("humps\\")[1]
add_id = add_id.split("\\")[0]
file_name = os.path.join(root,file)
new_list.append(file_name)
bigfile = max(new_list, key=lambda x: x.stat().st_size)
except:
pass
To make it more clear: Let's say the name of the sub-folder is "elephant" and there are different elephant photos and subfolders in in this elephant folder. I want to go through those photos and subfolders and find the elephant foto with the biggest size, name it as elephant and save it to my target folder. Also repaet it for other sub folders such as lion, puma etc.
How I could achieve what I want ?
To find biggest file and save to another location
import os
import shutil
f_list = []
root = "path/to/directory"
root = os.path.abspath(root)
for folder, subfolders, files in os.walk(root):
for file in files:
filePath = os.path.join(folder, file)
f_list.append(filePath)
bigest_file = max(f_list,key=os.path.getsize)
new_path = "path/where/you/want/to/save"
shutil.copy(biggest_file,new_path)
if you want only images then add one more condition in loop
for folder, subfolders, files in os.walk(root):
for file in files:
if file.endswith(".jpg"):
filePath = os.path.join(folder, file)
f_list.append(filePath)
To get all folders biggest file
root = "demo"
root = os.path.abspath(root)
def test(path):
big_files = []
all_paths = [x[0] for x in os.walk(path)]
for paths in all_paths:
f_list = filter(os.path.isfile, os.listdir(paths))
if len(f_list) > 0:
big_files.append((paths,max(f_list,key=os.path.getsize)))
return big_files
print test(root)
How to get the files with the biggest size in the folders, change their name and save to a different folder
Basically you already have a good description of what you need to do. You just need to follow it step by step:
get all files in some search directory
filter for relevant files ("*.jpg")
get their sizes
find the maximum
copy to new directory with name of search directory
IMO it's an important skill to be able to break down a task into smaller tasks. Then, you just need to implement the smaller tasks and combine:
def iterate_files_recursively(directory="."):
for entry in os.scandir(directory):
if entry.is_dir():
for file in iterate_files_recursively(entry.path):
yield file
else:
yield entry
files = iterate_files_recursively(subfolder_name)
I'd use os.scandir because it avoids building up a (potentially) huge list of files in memory and instead allows me (via a generator) to work one file at a time. Note that starting with 3.6 you can use the result of os.scandir as a context manager (with syntax).
images = itertools.filterfalse(lambda f: not f.path.endswith('.jpg'), files)
Filtering is relatively straightforward except for the IMO strange choice of ìtertools.filterfalse to only keep elements for which its predicate returns False.
biggest = max(images, key=(lambda img: img.stat().st_size))
This is two steps in one: Get the maximum with the builtin max function, and use the file size as "key" to establish an order. Note that this raises a ValueError if you don't have any images ... so you might want to supply default=None or handle that exception.
shutil.copy(biggest.path, os.path.join(target_directory, subfolder_name + '.jpg')
shutil.copy copies the file and some metadata. Instead of hardcoding path separators, please use os.path.join!
Now all of this assumes that you know the subfolder_name. You can scan for those easily, too:
def iterate_directories(directory='.'):
for entry in os.scandir(directory):
if entry.is_dir():
yield entry
Here's some code that does what you want. Instead of using the old os.walk function, it uses modern pathlib functions.
The heart of this code is the recursive biggest function. It scans all the files and directories in folder, saving the matching file names to the files list, and recursively searching any directories it finds. It then returns the path of the largest file that it finds, or None if no matching files are found.
from pathlib import Path
import shutil
def filesize(path):
return path.stat().st_size
def biggest(folder, pattern):
''' Find the biggest file in folder that matches pattern
Search recursively in all subdirectories
'''
files = []
for f in folder.iterdir():
if f.is_file():
if f.match(pattern):
files.append(f)
elif f.is_dir():
found = biggest(f, pattern)
if found:
files.append(found)
if files:
return max(files, key=filesize)
def copy_biggest(src, dest, pattern):
''' Find the biggest file in each folder in src that matches pattern
and copy it to dest, using the folder's name as the new file name
'''
for path in src.iterdir():
if path.is_dir():
found = biggest(path, pattern)
if found:
newname = dest / path
print(path, ':', found, '->', newname)
shutil.copyfile(found, newname)
You can call it like this:
rootdir = r'C:\Users\X\Desktop\humps'
to_save = r'C:\Users\X\Desktop\new'
copy_biggest(Path(rootdir), Path(to_save), '*.jpg')
Note that the copied files will have the same name as the top-level folder in rootdir that they were found in, with no file extension. If you want to give them a .jpg extension, you can change
newname = dest / path
to
newname = (dest / path).with_suffix('.jpg')
The shutil module on older versions of Python 3 doesn't understand pathlib paths. But that's easy enough to remedy. In the copy_biggest function, replace
shutil.copyfile(found, newname)
with
shutil.copyfile(str(found), str(newname))

limit no of filepaths using os.walk where huge files in a directory

I have an application. One method which will allow a directory path and returns list of file paths under given directory using os.walk
I would like to read certain no of files(some threshold value like bring 20 file paths) in a directory where has huge no files and stores in Queue. Here i can have a check of file path with its status in database.
Next time when i call the same method with same directory, it should return next set of file paths by excluding already returned file paths.
Scenario:
Lets assume, D:/Sample_folder has 1000 no of files.
my_dir = "D:/Sample_folder"
def read_files(directory):
file_paths = []
for root, directories, files in os.walk(directory):
for filename in files:
file_path = os.path.join(root, filename)
file_paths.append(file_path)
return file_paths
read_files(my_dir) == which will give first 100 no of files in first turn
Next turn, it should give remaining set of 100 files
like so...
Any ideas or sample scripts for this.
Assuming you already have files populated, this should do.
import Queue
paths = Queue.Queue()
current_list = []
for i, path in enumerate(files):
# Second case to make sure we dont add a blank list
if i % 100 == 0 and i != 0:
paths.put(current_list)
current_list = []
current_list.append(path)
EDIT:
Here is a possible solution using a class, but it doesn't add much code. The main idea is to pop off an element each time it gets accessed. So the workflow is to make a FileListIter object, then call .next() on it to return a list of the next 100 files to do something with and then the object forgets them. You can call .has_next() to check if you're out of files. If you pass an argument to next like .next(2), then it will instead give back the first 2 files in a list.
CODE:
import os
class FileListIter(object):
#Initialize the files
def __init__(self,directory):
file_paths = []
for root, directories, files in os.walk(directory):
for filename in files:
file_path = os.path.join(root, filename)
file_paths.append(file_path)
self.files = file_paths
#When called w/out args give back the first 100 files, otherwise the first n
def next(self,n=100):
ret,self.files = self.files[:n],self.files[n:]
return ret
#Check if there are any files left
def has_next(self):
return len(self.files) > 0
d = '/home/rob/stack_overflow'
files_gen = FileListIter(d) #<-- this makes an object
while files_gen.has_next():
file_subset = files_gen.next(2)
print file_subset

Python: Loop through directory, check if certain amount of files is in there, if not; copy 2 files from other directory and one file based on name

I'm still in the learning proces of python.
I'm trying to make a script that does the following:
Loop through directory's based on todays date (so if I run it tomorrow, itll look for the folders with tomorrows date on it).
Check if there are .pdf files in it.
If there arent any .pdf files in them: copy standard 2 of them from another directory + copy one based on name of the excel file name. (So lets say the excel filed is called: Excelfile45 then it should copy the pdf file called: "45") EDIT: It can also be based on directory map if that is an easier way of doing things.
So this is I got this far:
import os, fnmatch
def findDir (path, filter):
for root, dirs, files in os.walk(path):
for file in fnmatch.filter(files, filter):
yield os.path.join(root, file)
for pdfFile in findDir(r'C:\new', '*.pdf'):
print(pdfFile)
Its runs through the directories and looks for PDF's in them. But now I've got no clue on how to continue.
Any help is greatly appreciated!
Also my apologies for any grammar / spelling errors.
Your specs are pretty vague, so I had to assume a lot of things. I think this code achieves what you want, but you may have to tweak it a bit (for example date format in the directory name).
I assumed a directory structure like this:
c:\new (base dir)
daily_2014_12_14
daily_2014_12_15
...
standard
And the code:
import os
import fnmatch
import time
import shutil
import re
# directories
base_dir = "C:\new"
standard_dir = os.path.join(base_dir, "standard")
# find files in directory. based on yours, but modified to return a list.
def find_dir(path, name_filter):
result = []
for root, dirs, files in os.walk(path):
for filename in fnmatch.filter(files, name_filter):
result.append(os.path.join(root, filename))
return result
# getting today's directory. you can rearrange year-month-day as you want.
def todays_dir():
date_str = time.strftime("%Y_%m_%d")
return os.path.join(base_dir, "daily_" + date_str)
# copy a file from one directory to another
def copy(filename, from_dir, to_dir):
from_file = os.path.join(from_dir, filename)
to_file = os.path.join(to_dir, filename)
shutil.copyfile(from_file, to_file)
# main logic
today_dir = todays_dir()
pdfs = find_dir(today_dir, "*.pdf")
excels = find_dir(today_dir, "*.xls")
if len(pdfs) == 0:
copy("standard1.pdf", standard_dir, today_dir)
copy("standard2.pdf", standard_dir, today_dir)
if len(excels) == 1:
excel_name = os.path.splitext(excels[0])[0]
excel_num = re.findall(r"\d+", excel_name)[-1]
copy(excel_num + ".pdf", standard_dir, today_dir)
Also: I agree with Iplodman's comment. Show us a bit more effort next time.

Categories

Resources