Creating folders by checking numbers at beginning of a string with python - python

I have a list of strings which contains files like below
filename = [ '000101 FL - Project Title Page.DOC',
'014200 FL - References.DOC',
'095446 FL - Fabric-Wrapped Ceiling Panels.DOC',
'142113 FL - ELECTRIC TRACTION FREIGHT ELEVATORS.DOC']
I want to check if a folder with a name consisting of Div + first two numbers in each string exist, such as Div00, Div01, Div09, Div14 in this case. If not I would like to create this folder. Then store the name of the file in this folder.
In pseudocode I believe it would be similar to
for file in filenames
if 'Div' + file[0][0] not a folder
make folder 'Div' + file[0][0]
add file to folder
else
add file to folder Div + file[0][0]
There will be multiple files starting with the same two numbers this is why I want to sort them into folder.
Let me know if you need any clarification.

Use os.mkdir to create a directory and shutil.copy2 to copy a file,
import os
import shutil
filenames = [ '000101 FL - Project Title Page.DOC']
for filename in filenames:
folder = 'Div' + filename[:2] # 'Div00'
# Create the folder if doesn't exist
if not os.path.exists(folder):
os.makedirs(folder)
# Copy the file to `folder`
if os.path.isfile(filename):
shutil.copy2(filename, folder) # metadata is copied as well

You can just check if there is a folder and make it if it does not exist
if not os.path.exists(dirName):
os.makedirs(dirName)

Try something like this:
import os
import shutil
for file in filenames
dir_name = "Div%s" % file[0:2]
if not os.path.isdir(dir_name)
os.makedirs(dir_name)
shutil.copy(file, dir_name)

Related

How to Iterate over several directory levels and move files based on condition

I would like some help to loop through some directories and subdirectories and extracting data. I have a directory with three levels, with the third level containing several .csv.gz files. The structure is like this
I need to access level 2 (where subfolders are) of each folder and check the existence of a specific folder (in my example, this will be subfolder 3; I left the other folders empty for this example, but in real cases they will have data). If checking returns True, then I want to change the name of files within the target subfolder3 and transfer all files to another folder.
Bellow is my code. It is quite cumbersome and there is probably better ways of doing it. I tried using os.walk() and this is the closest I got to a solution but it won't move the files.
import os
import shutil
def organizer(parent_dir, target_dir, destination_dir):
for root, dirs, files in os.walk(parent_dir):
if root.endswith(target_dir):
target = root
for files in os.listdir(target):
if not files.startswith("."):
# this is to change the name of the file
fullname = files.split(".")
just_name = fullname[0]
csv_extension = fullname[1]
gz_extension = fullname[2]
subject_id = target
#make a new name
origin = subject_id + "/"+ just_name + "." + csv_extension + "." + gz_extension
#make a path based on this new name
new_name = os.path.join(destination_dir, origin)
#move file from origin folder to destination folder and rename the file
shutil.move(origin, new_name)
Any suggestions on how to make this work and / or more eficient?
simply enough, you can use the built-in os module, with os.walk(path) returns you root directories and files found
import os
for root, _, files in os.walk(path):
#your code here
for your problem, do this
import os
for root, dirs, files in os.walk(parent_directory);
for file in files:
#exctract the data from the "file"
check this for more information os.walk()
and if you want to get the name of the file, you can use os.path.basename(path)
you can even check for just the gzipped csv files you're looking for using built-in fnmatch module
import fnmathch, os
def find_csv_files(path):
result = []
for root, _, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, "*.csv.gz"): # find csv.gz using regex paterns
result.append(os.path.join(root, name))
return list(set(results)) #to get the unique paths if for some reason duplicated
Ok, guys, I was finally able to find a solution. Here it is. Not the cleanest one, but it works in my case. Thanks for the help.
def organizer(parent_dir, target_dir, destination_dir):
for root, dirs, files in os.walk(parent_dir):
if root.endswith(target_dir):
target = root
for files in os.listdir(target):
#this one because I have several .DS store files in the folder which I don't want to extract
if not files.startswith("."):
fullname = files.split(".")
just_name = fullname[0]
csv_extension = fullname[1]
gz_extension = fullname[2]
origin = target + "/" + files
full_folder_name = origin.split("/")
#make a new name
new_name = full_folder_name[5] + "_"+ just_name + "." + csv_extension + "." + gz_extension
#make a path based on this new name
new_path = os.path.join(destination_dir, new_name)
#move file from origin folder to destination folder and rename the file
shutil.move(origin, new_path)
The guess the problem was that was passing a variable that was a renamed file (in my example, I wrongly called this variable origin) as the origin path to shutil.move(). Since this path does not exist, then the files weren't moved.

Trying to pass files from a subfolder to a new folder after Filtering

I am trying to make a code that allows me to copy files from various subfolders to a new folder. The main problem and why it wasn't a straight forward thing to deal with since the beginning is that all the files start the same and end the same, just 1 letter changes in the file that I need.
I keep getting an error saying: "FileNotFoundError: [Errno 2] No such file or directory: '20210715 10.1_Plate_R_p00_0_B04f00d0.TIF'"
So far this is what I have:
import os,fnmatch, shutil
mainFolder = r"E:\Screening\202010715\20210715 Screening"
dst = r"C:\Users\**\Dropbox (**)\My PC\Desktop\Screening Data\20210715"
for subFolder in os.listdir(mainFolder):
sf = mainFolder+"/"+subFolder
id os.path.isdir(sf):
subset = fnmatch.filter(os.listdir(sf), "*_R_*")
for files in subset:
if '_R_' in files:
shutil.copy(files, dst)
PLEASE HELP!
The issue is that you aren't providing the full path to the file to shutil.copy, so it assumes the file exists in the folder you are running the script from. Another issue the code has, is that the subset variable gets replaced in each loop before you are able to copy the files you matched in the next loop.
import os, fnmatch, shutil
mainFolder = r"E:\Screening\202010715\20210715 Screening"
dst = r"C:\Users\**\Dropbox (**)\My PC\Desktop\Screening Data\20210715"
matches = [] # To store the matches
for subFolder in os.listdir(mainFolder):
sf = mainFolder + "/" + subFolder
if os.path.isdir(sf):
matches.extend([f'{sf}/{file}' for file in fnmatch.filter(os.listdir(sf), "*_R_*")]) # Append the full path to the file
for files in matches:
shutil.copy(files, dst)

Python OS - using 'os.listdir' to view a directory that returns a list of folders. How do I view the contents of the subfolders?

I have been set a challenge to browse a directory that contains mostly empty folders - the flag(answer) is in one of them. I have used os module to see all the names of the folders - they are all named 'folder-' plus a number between 1 and 200. How do I view what is inside them?
You should use os.walk() instead of litdir() like as
import os
import os.path
for dirpath, dirnames, filenames in os.walk("."):
for file in filenames:
print(file)
import os
def getListOfFiles(dirName):
# create a list of file and sub directories
# names in the given directory
File_list = os.listdir(dirName)
Files = list()
# Iterate over all the entries
for entry in File_list:
# Create full path
fullPath = os.path.join(dirName, entry)
# If entry is a directory then get the list of files in this directory
if os.path.isdir(fullPath):
Files = Files + getListOfFiles(fullPath)
else:
Files.append(fullPath)
return Files
#Call the above function to create a list of files in a directory tree i.e.
dirName = 'C:/Users/huzi95s/Desktop/Django';
# Get the list of all files in directory tree at given path
listOfFiles = getListOfFiles(dirName)

Python - how to change directory

I am doing a school assignment where I have to take input from a user and save it to a text file.
My file structure will be something like:
- Customer register
- Customer ID
- .txt files 1-5
It can be saved in the python folder and I can make the folders like this:
os.makedirs("Customer register/Customer ID")
My question is, how do I set the path the text files are to be stored in, in the directory when I don't know the directory? So that no matter where the program is run it is saved in the "Customer ID" folder I create (but on the computer the program is run on)?
Also, how do I make this work on both windows and mac?
I also want to program to be able to be executed several times, and check if the folder is there and save to the "Customer ID" folder if it already exists. Is there a way to do that?
EDIT:
This is the code I am trying to use:
try:
dirs = os.makedirs("Folder")
path = os.getcwd()
os.chdir(path + "/Folder")
print (os.getcwd())
except:
if os.path.exists:
path = os.getcwd()
unique_filename = str(uuid.uuid4())
customerpath = os.getcwd()
os.chdir(customerpath + "/Folder/" + unique_filename)
I am able to create a folder and change the directory (everything in "try" works as I want).
When this folder is created I want to create a second folder with a random generated folder name (used for saving customer files). I can't get this to work in the same way.
Error:
FileNotFoundError: [WinError 2] The system cannot find the file specified: 'C:\Users\48736\PycharmProjects\tina/Folder/979b9026-b2f6-4526-a17a-3b53384f60c4'
EDIT 2:
try:
os.makedirs("Folder")
path = os.getcwd()
os.chdir(path + "/Folder")
print (os.getcwd())
except:
if os.path.exists:
path = os.getcwd()
os.chdir(os.path.join(path, 'Folder'))
print(os.getcwd())
def userId(folderid):
try:
if not os.path.exists(folderid):
os.makedirs(folderid)
except:
if os.path.exists(folderid):
os.chdir(path + "/Folder/" + folderid)
userId(str(uuid.uuid4()))
print(os.getcwd())
So I can now create a folder, change directory to the folder I have created and create a new folder with a unique filename within that folder.
But I can't change the directory again to the folder with the unique filename.
Any suggestions?
I have tried:
os.chdir(path + "/Folder/" + folderid)
os.chdir(path, 'Folder', folderid)
os.chdir(os.path.join(path, 'Folder', folderid))
But is still just stays in: C:\Users\47896\PycharmProjects\tina\Folder
You can use relative paths in your create directory command, i.e.
os.makedirs("./Customer register/Customer ID")
to create folder in project root (=where the primary caller is located) or
os.makedirs("../Customer register/Customer ID") in parent directory.
You can, of course, traverse the files tree as you need.
For specific options mentioned in your question, please, see makedirs documentation at Python 3 docs
here is solution
import os
import shutil
import uuid
path_on_system = os.getcwd() # directory where you want to save data
path = r'Folder' # your working directory
dir_path = os.path.join(path_on_system, path)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
file_name = str(uuid.uuid4()) # file which you have created
if os.path.exists(file_name) and os.path.exists(dir_path):
shutil.move(file_name,os.path.join(dir_path,file_name))
else:
print(" {} does not exist".format(file_name))

Python's re.findAll() function won't work as expected

I'm trying to create a python script that will find all the files from a working directory with a certain name pattern.
I have stored all files in a list and then I have tried applying the re.findall method on the list to obtain only a list of files with that name pattern.
I have written this code:
# Create the regex object that we will use to find our files
fileRegex = re.compile(r'A[0-9]*[a-z]*[0-9]*.*')
all_files = []
# Recursevly read the contents of the working_dir/Main folder #:
for folderName, subfolders, filenames in os.walk(working_directory + "/Main"):
for filename in filenames:
all_files.append(filename)
found_files = fileRegex.findall(all_files)
I get this error at the last line of the code:
TypeError: expected string or bytes-like object
I have also tried re.findall(all_files) instead of using the 'fileRegex' created prior to that line. Same error. Please tell me what am I doing wrong. Thank you so much for reading my post!
Edit(second question):
I have followed the answers and it's now working fine. I'm trying to create an archive with the files that match that pattern after I've found them. The archive was created however the way I wrote the code the whole path to the file gets included in the archive (all the folders from / up to the file). I just want the file to be included in the final .zip not the whole directories and subdirectories that make the path to it.
Here is the code. The generation of the .zipfile is at the bottom. Please give me a tip how could I solve this I've tried many things but none worked. Thanks:
# Project properties:
# Recursively read the contents of the 'Main' folder which contains files with different names.
# Select only the files whose name begin with letter A and contain digits in it. Use regexes for this.
# Archive these files in a folder named 'Created_Archive' in the project directory. Give the archive a name of your choosing.
# Files that you should find are:
# Aerials3.txt, Albert0512.txt, Alberto1341.txt
########################################################################################################################################
import os
import re
import zipfile
from pathlib import Path
# Get to the proper working directory
working_directory = os.getcwd()
if working_directory != "/home/paul/Desktop/Python_Tutorials/Projects/Files_And_Archive":
working_directory = "/home/paul/Desktop/Python_Tutorials/Projects/Files_And_Archive"
os.chdir(working_directory)
check_archive = Path(os.getcwd() + "/" + "files.zip")
if check_archive.is_file():
print("Yes. Deleting it and creating it.")
os.unlink(os.getcwd() + "/" + "files.zip")
else:
print("No. Creating it.")
# Create the regex object that we will use to find our files
fileRegex = re.compile(r'A[0-9]*[a-z]*[0-9]+.*')
found_files = []
# Create the zipfile object that we will use to create our archive
fileArchive = zipfile.ZipFile('files.zip', 'a')
# Recursevly read the contents of the working_dir/Main folder #:
for folderName, subfolders, filenames in os.walk(working_directory + "/Main"):
for filename in filenames:
if fileRegex.match(filename):
found_files.append(folderName + "/" + filename)
# Check all files have been found and create the archive. If the archive already exists
# delete it.
for file in found_files:
print(file)
fileArchive.write(file, compress_type=zipfile.ZIP_DEFLATED)
fileArchive.close()
re.findAll works on strings not on lists, so its better you use r.match over the list to filter the ones that actually matches:
found_files = [s for s in all_files if fileRegex.match(s)]
regex works on strings not lists. the following works
import re
import os
# Create the regex object that we will use to find our files
# fileRegex = re.compile(r'A[0-9]*[a-z]*[0-9]*.*')
fileRegex = re.compile(r'.*\.py')
all_files = []
found_files = []
working_directory = r"C:\Users\michael\PycharmProjects\work"
# Recursevly read the contents of the working_dir/Main folder #:
for folderName, subfolders, filenames in os.walk(working_directory):
for filename in filenames:
all_files.append(filename)
if fileRegex.search(filename):
found_files.append(filename)
print('all files\n', all_files)
print('\nfound files\n', found_files)
re.findall doesn't take a list of strings. You need re.match .
# Create the regex object that we will use to find our files
fileRegex = re.compile(r'A[0-9]*[a-z]*[0-9]*.*')
all_files = []
# Recursively read the contents of the working_dir/Main folder #:
for folderName, subfolders, filenames in os.walk(working_directory + "/Main"):
for filename in filenames:
all_files.append(filename)
found_files = [file_name for file_name in all_files if fileRegex.match(file_name)]

Categories

Resources