remove parent folders in dataset - python

I want to remove parent folders in this dataset.
for example I want to change the directory from "images256/w/waiting_room/" to "images256/wating_room/" for all of parent's folders what you see I've deleted "w" parent folder from directory.

Since you flagged this question with python, I assume you want to use python for this. You can use the shutil package for this
import os
import shutil
base_folder = "images256"
inter_folder = "w"
child_folder = "waiting_room"
source_folder = os.path.join(base_folder, inter_folder, child_folder)
destination_folder = os.path.join(base_folder, child_folder)
for file_name in os.listdir(source_folder):
source = os.path.join(source_folder, file_name)
destination = os.path.join(destination_folder, file_name)
shutil.move(source, destination)
print('Moved:', file_name)
shutil.rmtree(source_folder)
You can also use shutil.copytree(src,dest) but since we are talking about datasets, that may be large, moving the files will be faster and you won't run out of space. Be sure to only have files inside your source folder, otherwise maybe rewrite it to a recursive function to move all contents in deeper nodes.

Related

Move files one by one to newly created directories for each file with Python 3

What I have is an initial directory with a file inside D:\BBS\file.x and multiple .txt files in the work directory D:\
What I am trying to do is to copy the folder BBS with its content and incrementing it's name by number, then copy/move each existing .txt file to the newly created directory to make it \BBS1, \BBS2, ..., BBSn (depends on number of the txt).
Visual example of the Before and After:
Initial view of the \WorkFolder
Desired view of the \WorkFolder
Right now I have reached only creating of a new directory and moving txt in it but all at once, not as I would like to. Here's my code:
from pathlib import Path
from shutil import copy
import shutil
import os
wkDir = Path.cwd()
src = wkDir.joinpath('BBS')
count = 0
for content in src.iterdir():
addname = src.name.split('_')[0]
out_folder = wkDir.joinpath(f'!{addname}')
out_folder.mkdir(exist_ok=True)
out_path = out_folder.joinpath(content.name)
copy(content, out_path)
files = os.listdir(wkDir)
for f in files:
if f.endswith(".txt"):
shutil.move(f, out_folder)
I kindly request for assistance with incrementing and copying files one by one to the newly created directory for each as mentioned.
Not much skills with python in general. Python3 OS Windows
Thanks in advance
Now, I understand what you want to accomplish. I think you can do it quite easily by only iterating over the text files and for each one you copy the BBS folder. After that you move the file you are currently at. In order to get the folder_num, you may be able to just access the file name's characters at the particular indexes (e.g. f[4:6]) if the name is always of the pattern TextXX.txt. If the prefix "Text" may vary, it is more stable to use regular expressions like in the following sample.
Also, the function shutil.copytree copies a directory with its children.
import re
import shutil
from pathlib import Path
wkDir = Path.cwd()
src = wkDir.joinpath('BBS')
for f in os.listdir(wkDir):
if f.endswith(".txt"):
folder_num = re.findall(r"\d+", f)[0]
target = wkDir.joinpath(f"{src.name}{folder_num}")
# copy BBS
shutil.copytree(src, target)
# move .txt file
shutil.move(f, target)

Ignoring folders when organizing files

I am fairly new to python, and trying to write a program that organizes files based on their extensions
import os
import shutil
newpath1 = r'C:\Users\User1\Documents\Downloads\Images'
if not os.path.exists(newpath1): # check to see if they already exist
os.makedirs(newpath1)
newpath2 = r'C:\Users\User1\Documents\Downloads\Documents'
if not os.path.exists(newpath2):
os.makedirs(newpath2)
newpath3 = r'C:\Users\User1\Documents\Downloads\Else'
if not os.path.exists(newpath3):
os.makedirs(newpath3)
source_folder = r"C:\Users\User1\Documents\Downloads" # the location of the files we want to move
files = os.listdir(source_folder)
for file in files:
if file.endswith(('.JPG', '.png', '.jpg')):
shutil.move(os.path.join(source_folder,file), os.path.join(newpath1,file))
elif file.endswith(('.pdf', '.pptx')):
shutil.move(os.path.join(source_folder,file), os.path.join(newpath2,file))
#elif file is folder:
#do nothing
else:
shutil.move(os.path.join(source_folder,file), os.path.join(newpath3,file))
I want it to move files based on their extensions. However, I am trying to figure out how to stop the folders from moving. Any help would be greatly appreciated.
Also, for some reason, not every file is being moved, even though they have the same extension.
As with most path operations, I recommend using the pathlib module. Pathlib is available since Python 3.4 and has portable (multi platform), high-level API for file system operations.
I recommend using the following methods on Path objects, to determine their type:
Path.is_file()
Path.is_dir()
import shutil
from pathlib import Path
# Using class for nicer grouping of target directories
# Note that pathlib.Path enables Unix-like path construction, even on Windows
class TargetPaths:
IMAGES = Path.home().joinpath("Documents/Downloads/Images")
DOCUMENTS = Path.home().joinpath("Documents/Downloads/Documents")
OTHER = Path.home().joinpath("Documents/Downloads/Else")
__ALL__ = (IMAGES, DOCUMENTS, OTHER)
for target_dir in TargetPaths.__ALL__:
if not target_dir.is_dir():
target_dir.mkdir(exist_ok=True)
source_folder = Path.home().joinpath("Documents/Downloads") # the location of the files we want to move
# Get absolute paths to the files in source_folder
# files is a generator (only usable once)
files = (path.absolute() for path in source_folder.iterdir() if path.is_file())
def move(source_path, target_dir):
shutil.move(str(source_path), str(target_dir.joinpath(file.name))
for path in files:
if path.suffix in ('.JPG', '.png', '.jpg'):
move(path, TargetPaths.IMAGES)
elif path.suffix in ('.pdf', '.pptx'):
move(path, TargetPaths.DOCUMENTS)
else:
move(path, TargetPaths.OTHER)
See here
In particular, the os.walk command. This command returns a 3-tuple with the dirpath, dirname, and filename.
In your case, you should use [x[0] for x in os.walk(dirname)]

Moving Files: Matching Partial File/Directory Criteria (lastName, firstName) - Glob, Shutil

EDIT: ANSWER Below is the answer to the question. I will leave all subsequent text there just to show you how difficult I made such an easy task..
from pathlib import Path
import shutil
base = "C:/Users/Kenny/Documents/Clients"
for file in Path("C:/Users/Kenny/Documents/Scans").iterdir():
name = file.stem.split('-')[0].rstrip()
subdir = Path(base, name)
if subdir.exists():
dest = Path(subdir, file.name)
shutil.move(file, dest)
Preface:
I'm trying to write code that will move hundreds of PDF files from a :/Scans folder into another directory based on the matching client's name. This question is linked below - a very kind person, Elis Byberi, helped assist me in correcting my original code. I'm encountering another problem though..
To see our discussion and a similar question discussed:
-Python- Move All PDF Files in Folder to NewDirectory Based on Matching Names, Using Glob or Shutil
Python move files from directories that match given criteria to new directory
Question: How can you move all of the named files in :/Scans to their appropriately matched folder in :/Clients.
Background: Here is a breakdown of my file folders to give you a better idea of what I'm trying to do.
Within :/Scans folder I have thousands of PDF files, manually renamed (I tried writing a program to auto-rename.. didn't work) based on client and content, such that the folder encloses PDFs labeled as follows:
lastName, firstName - [contentVariable]
(repeat the above 100,000x)
Within the :/C drive of my computer I have a folder named 'Clients' with sub-folders for each and every client, named similar to the pattern above, as 'lastName, firstName'
EDIT: The code below will move the entire Scans folder to the Clients folder, which is close, but not exactly what I need to be doing. I only need to move the files within Scans to the corresponding Client fold names.
import glob
import shutil
import os
source = "C:/Users/Kenny/Documents/Scans"
dest = "C:/Users/Kenny/Documents/Clients"
os.chdir("C:/Users/Kenny/Documents/Clients")
pattern = '*,*'
for x in glob.glob(pattern):
fileName = os.path.join(source, x)
print(fileName)
shutil.move(source, dest)
EDIT 2 - CLOSE!: The code below will move all the files in Scans to the Clients folder, which is close, but not exactly what I need to be doing. I need to get each file into the correct corresponding file folder within the Clients folder.
This is a step forward from moving the entire Scans folder I would think.
source = "C:/Users/Kenny/Documents/Scans"
dest = "C:/Users/Kenny/Documents/Clients"
for (dirpath, dirnames, filenames) in walk(source):
for file in filenames:
shutil.move(path.join(dirpath,file), dest)
I have the following code below as well, and I am aware it does not do what I want it to do, so I am definitely missing something..
import glob
import shutil
import os
path = "C:/Users/Kenny/Documents/Scans"
dirs = os.listdir(path)
for file in dirs:
print(file)
dest_dir = "C:/Users/Kenny/Documents/Clients/{^w, $w}?"
for file in glob.glob(r'C:Users/Kenny/Documents/Clients/{^w, $w}?'):
print(file)
shutil.move(file, dest_dir)
1) Should I use os.scandir instead of os.listdir ?
2) Am I moving in the correct direction if I modify the code as such:
import glob
import shutil
import os
path = "C:/Users/Kenny/Documents/Scans"
dirs = os.scandir(path)
for file in dirs:
print(file)
dest_dir = "C:/Users/Kenny/Documents/Clients/*"
for file in glob.glob(r'C:Users/Kenny/Documents/Clients, *'):
dest_dir = os.path.join(file, glob.glob)
shutil.move(file, dest_dir)
Note within the 'for file in glob.glob(r'C:Users/Kenny/Documents/Clients/{^w, $w}?' I have tried replacing 'Clients/{^w, $w}?' with just 'Clients/*'
For the above, I only need the file in :/Scans, written as, "lastName, firstName - [content]" to be matched and moved to /Clients/[lastName, firstName] --- the [content] does not matter. But there are both greedy and nongreedy expressions... which is why I'm unsure about using * or {^w, $w}? -- because we have clients with the same last names, but different first names.
The following error is generated when running the first command:
Error 1
Error 2
The following error (though, there is no error?) is generated when running the second command:
Error 3
EDIT/POSSIBLE ANSWER
Have not yet tested this but, fnmatch(filename, pattern), or, fnmatch.translate(pattern) can be used to test whether the filename string matches the pattern string, returning True or False.
From here perhaps you could write a conditional statement..
for file in os.listdir('.'):
if fnmatch.fnmatch(file, '*.txt'):
shutil.move(source, destination)
or
for file in os.listdir('.'):
if fnmatch.fnmatch(file, '*.txt'):
shutil.move(file.join(eachFile, source), destination)
I have not tested the two aforesaid codes. I have no idea if they work, but editing allows others to see how my train of thought is progressing.

Writing zipfile in Python 3.6 without absolute path

I am trying to write a zip file using Python's zipfile module that starts at a certain subfolder but still maintains the tree structure from that subfolder. For example, if I pass "C:\Users\User1\OneDrive\Documents", the zip file will contain everything from Documents onward, with all of Documents' subfolders maintained within Documents. I have the following code:
import zipfile
import os
import datetime
def backup(src, dest):
"""Backup files from src to dest."""
base = os.path.basename(src)
now = datetime.datetime.now()
newFile = f'{base}_{now.month}-{now.day}-{now.year}.zip'
# Set the current working directory.
os.chdir(dest)
if os.path.exists(newFile):
os.unlink(newFile)
newFile = f'{base}_{now.month}-{now.day}-{now.year}_OVERWRITE.zip'
# Write the zipfile and walk the source directory tree.
with zipfile.ZipFile(newFile, 'w') as zip:
for folder, _ , files in os.walk(src):
print(f'Working in folder {os.path.basename(folder)}')
for file in files:
zip.write(os.path.join(folder, file),
arcname=os.path.join(
folder[len(os.path.dirname(folder)) + 1:], file),
compress_type=zipfile.ZIP_DEFLATED)
print(f'\n---------- Backup of {base} to {dest} successful! ----------\n')
I know I have to use the arcname parameter for zipfile.write(), but I can't figure out how to get it to maintain the tree structure of the original directory. The code as it is now writes every subfolder to the first level of the zip file, if that makes sense. I've read several posts suggesting I use os.path.relname() to chop off the root, but I can't seem to figure out how to do it properly. I am also aware that this post looks similar to others on Stack Overflow. I have read those other posts and cannot figure out how to solve this problem.
The arcname parameter will set the exact path within the zip file for the file you are adding. You issue is when you are building the path for arcname you are using the wrong value to get the length of the prefix to remove. Specifically:
arcname=os.path.join(folder[len(os.path.dirname(folder)) + 1:], file)
Should be changed to:
arcname=os.path.join(folder[len(src):], file)

How to check if multiple files exist in different directories

I know how to use python to check to see if a file exists, but what I am after is trying to see if multiple files of the same name exist throughout my working directory. Take for instance:
gamedata/areas/
# i have 2 folders in this directory
# testarea and homeplace
1. gamedata/areas/testarea/
2. gamedata/areas/homeplace/
Each folder of homeplace and testarea for instance contains a file called 'example'
Is there a pythonic way to use 'os' or similiar to check to see if the file 'example' can be found in both testarea and homeplace?
Although is their a way to do this without manually and statically using
os.path.isfile()
because throughout the life of the program new directories will be made, and I don't want to constantly go back into the code to change it.
You can check in every directory bellow gamedata/areas/:
This only goes down one level, you could extend it to go down as many levels as you want.
from os import listdir
from os.path import isdir, isfile, join
base_path = "gamedata/areas/"
files = listdir(base_path)
only_directories = [path for path in files if isdir(join(base_path,path))]
for directory_path in only_directories:
dir_path = join(base_path, directory_path)
for file_path in listdir(dir_path):
full_file_path = join(base_path, dir_path, file_path)
is_file = isfile(full_file_path)
is_example = "example" in file_path
if is_file and is_example:
print "Found One!!"
Hope it helps!
Maybe something like
places = ["testarea", "homeplace"]
if all(os.path.isfile(os.path.join("gamedata/areas/", x, "example") for x in places)):
print("Missing example")
If the condition is false, this doesn't tell you which subdirectory does not contain the file example, though. You can update places as necessary.
As I mentioned in the comments, os.walk is your friend:
import os
ROOT="gamedata/areas"
in_dirs = [path for (path, dirs, filenames)
in os.walk(ROOT)
if 'example' in filenames]
in_dirs will be a list of subdirectories where example is found

Categories

Resources