Lets say some paths like these:
C:/Test/path_i_need/test2/test3/test4
C:/Test/test2/path_i_need/test3
C:/Test/test2/test3/path_i_need/test4
How I can extract the path that i need in each of the scenarios using python, for example:
C:/Test/path_i_need
C:/Test/test2/path_i_need
C:/Test/test2/test3/path_i_need
So basically i don't know how many sub folder are before the path_i_need or after it, I only need that path, i dont care whats after.
You could do a DFS (depth-first search) from the root directory until you find all the paths you're looking for:
from os import listdir, path
ROOT_DIR = "./example"
FLAG = "example1"
found_dirs = []
def find_dirs(p):
subdirs = listdir(p)
for subdir in subdirs:
curdir = path.join(p, subdir)
if subdir == FLAG:
found_dirs.append(curdir)
elsif path.isdir(curdir):
find_dirs(curdir)
find_dirs(ROOT_DIR)
Try this, without using os module or any imports:
paths = """
C:/Test/path_i_need/test2/test3/test4
C:/Test/test2/path_i_need/test3
C:/Test/test2/test3/path_i_need/test4
""".strip().split('\n')
need_this_path = 'path_i_need'
len_that_which_i_need = len(need_this_path)
extracted_paths = [p[:p.index(need_this_path) + len_that_which_i_need] for p in paths]
print(*extracted_paths, sep='\n')
Outputs:
C:/Test/path_i_need
C:/Test/test2/path_i_need
C:/Test/test2/test3/path_i_need
Related
I have a data structure format as below:
Dataset:
training-
-Cat
-dog
-monkey
I would like to transfer/move 10 percent of files from each dataset to validation dataset. How can I do it using python?. It should automatically create the directories as well
Dataset:
validation-
-Cat
-dog
-monkey
You can try:
import os
source = 'C:/.../training/'
destination = 'C:/.../validation/'
if not os.path.exists(destination):
# Create a new directory because it does not exist
os.makedirs(destination)
allfiles = os.listdir(source)
for f in allfiles:
os.rename(source + f, destination + f)
Try this, it should help, yet not tested on Windows (only Ubuntu). But you can modify if path string is different on Windows OS.
Tested on : Python = 3.6.13, numpy = 1.19.2
from glob import glob
import os
import numpy as np
import shutil
def copy_folder(src, dst, percent_keep=0.1):
all_files = glob(f"{src}/*")
# select folders
folders = [folder for folder in all_files if os.path.isdir(folder)]
# select files
all_files = [file for file in all_files if os.path.isfile(file)]
print(f"There are {len(folders)} folders in {src.split('training')[-1]}")
print(f"There are {len(all_files)} files in {src.split('training')[-1]}")
for folder in folders:
# iterate through subfolders
copy_folder(folder, dst, percent_keep)
if len(all_files) > 0:
# find path to be attacked to validation path
remaining_path = src.split("training/")[-1]
new_path = os.path.join(dst, "validation", remaining_path) # new path for destination
if not os.path.exists(new_path):
os.makedirs(new_path)
# select samples from all files you have
keep_files = np.random.choice(all_files, int(len(all_files) * percent_keep))
print(f"Copying {len(keep_files)} random files")
for index, file in enumerate(keep_files):
print(f"\rCopying {index+1} / {len(keep_files)}", end="")
shutil.copyfile(file, os.path.join(new_path, file.rsplit("/")[-1]))
print("")
if __name__ == "__main__":
src = "/home/user/Dataset/training" # Should be path to training folder
# should be path of directory one below training folder
# (lets say root) it will attach validation folder later in code
dst = "/home/user/Dataset/"
copy_folder(src, dst, 0.1)
if you dont want to use numpy for selecting random file to copy to validation folder, use random library.
Something like:
keep_files = random.choices(all_files,k=int(len(all_files) * percent_keep) )
If you dont want to use shutils or glob, you can use os library:
os.lisdirs() # instead of glob
os.rename() # instead of shutils (maybe somethind different, not tested)
if you dont want random samples , use :
keep_files = all_files [:int(len(all_files) * percent_keep)]
currently I have this list
log_list =
['C:\Users\Me\Downloads\archive108\archive\tools\Fors\JuniorLogFile.txt', 'C:\Users\Me\Downloads\archive108\archive\tools\Fors\SeniorLogFile.txt', 'C:\Users\Me\Downloads\archive108\archive\tools\Fors\BasicLogFile.txt', 'C:\Users\Me\Downloads\archive108\archive\tools\Fors\PrimaryLogFile.txt', 'C:\Users\Me\Downloads\archive108\archive\tools\Fors\IntermediateLogFile.txt']
I tried using
log_files_split = [i.split("s")[4] for i in log_files]
but the code doesnt work on other pathfiles how can i always have the output list to contain only the text files shown below
['JuniorLogFile.txt', 'SeniorLogFile.txt', 'BasicLogFile.txt', 'PrimaryLogFile.txt', 'IntermediateLogFile.txt']
Simply you can use os package as follows -
import os
log_list = [
r'C:\Users\Me\Downloads\archive108\archive\tools\Fors\JuniorLogFile.txt',
r'C:\Users\Me\Downloads\archive108\archive\tools\Fors\SeniorLogFile.txt',
r'C:\Users\Me\Downloads\archive108\archive\tools\Fors\BasicLogFile.txt',
r'C:\Users\Me\Downloads\archive108\archive\tools\Fors\PrimaryLogFile.txt',
r'C:\Users\Me\Downloads\archive108\archive\tools\Fors\IntermediateLogFile.txt'
]
file_names = [os.path.basename(file_name) for file_name in log_list if os.path.exists(file_name)]
print(f'File names - {file_names}')
import ntpath
def path_leaf(path):
head, tail = ntpath.split(path)
return tail or ntpath.basename(head)
paths = [r'C:\Users\Me\Downloads\archive108\archive\tools\Fors\JuniorLogFile.txt', r'C:\Users\Me\Downloads\archive108\archive\tools\Fors\SeniorLogFile.txt']
pathlist = [path_leaf(path) for path in paths]
pathlist variable will give you only text file .
I have a package of the following format:
Electricity
|___ __main__.py
|
|__ Electricity
| |___ general_functions
| |___ regression_calcs
| | |___ create_calcs.py
| |
| |____ run_calcs.py
|
|
|
|__ Data_Input
|___ regression_vals
|__ regression_vals.csv
run_calcs.py runs the code in regression_calcs, which requires data from Data_Input/Regression_vals.
What is the most pythonic way to find the number of ../ (number of times to go up a folder), until Data_Input is found?
This is because right now I'm running the scripts in Electricity/Electricity/run_calcs.py (for testing). Eventually I will be running in Electricity/__main__.py.
it will be for df = pd.read_csv(f'{filepath}Data_Input/regression_vals/regression_vals.csv')
where filepath = '../'*n
Inside your files within regression_calcs:
from os import listdir
from os.path import join, isdir, dirname, basename
filepath = None
# get parent of the .py running
par_dir = dirname(__file__)
while True:
# get basenames of all the directories in that parent
dirs = [basename(join(par_dir, d)) for d in listdir(par_dir) if isdir(join(par_dir, d))]
# the parent contains desired directory
if 'Data_Input' in dirs:
filepath = par_dir
break
# back it out another parent otherwise
par_dir = dirname(par_dir)
Of course this only works if you have a single '/Data_Input/' directory!
What I eventually used (a mix between avix & pstatic's answer):
import os, unipath
def rel_location():
"""Goes up until it finds the folder 'Input_Data', then it stops
returns '' or '../' or '../../', or ... depending on how many times it had to go up"""
path = unipath.Path(__file__)
num_tries = 5
for num_up_folder in range(num_tries):
path = path.parent
if 'Input_Data' in os.listdir(path):
break
if num_tries == num_up_folder:
raise FileNotFoundError("The directory 'Input_Data' could not be found in the 5"
" directories above this file's location. ")
location = '../'* num_up_folder
return location
You can use Unipath.
path = Path("/Electricity/Data_Input/regression_vals/regression_vals.csv")
path = path.parent
path = path.parent
And now path refers to /Electricity/Data_Input directory.
Here is an alternate implementation using pathlib and directly returning a Path object for the desired directory.
from pathlib import Path
def get_path_to_rel_location(directory_to_find):
"""Goes up in directory heirarchy until it finds directory that contains
`directory_to_find` and returns Path object of `directory_to_find`"""
path = Path.cwd()
num_tries = 5
for num_up_folder in range(num_tries):
path = path.parent
if path / directory_to_find in path.iterdir():
break
if num_tries == num_up_folder:
raise FileNotFoundError(f"The directory {directory_to_find} could not be found in the {num_tries}"
f" directories above this file's location.")
return path / directory_to_find
# Example usage
path = get_path_to_rel_location("Input_Data")
os.scandir is useful for stuff like this.
def find_my_cousin(me, cousin_name):
"""Find a file or directory named `cousin_name`. Start searching at `me`,
and traverse directly up the file tree until found."""
if not os.path.isdir(me):
parent_folder = os.path.dirname(me)
else:
parent_folder = me
folder = None
removed = -1
while folder != parent_folder: # Stop if we hit the file system root
folder = parent_folder
removed += 1
with os.scandir(folder) as ls:
for f in ls:
if f.name == cousin_name:
print(
"{} is your cousin, {} times removed, and she lives at {}"
"".format(f.name, removed, f.path)
)
return f.path
parent_folder = os.path.normpath(os.path.join(folder, os.pardir))
This answer is a modified version of A H's answer just with Micah Culpepper's exit condition and simplified.
import os
path = os.path.dirname(os.path.abspath(__file__))
while "Input_Data" not in os.listdir(path):
if path == os.path.dirname(path):
raise FileNotFoundError("could not find Input_Data")
path = os.path.dirname(path)
Hi After 2 hours of googling and searching I am failing to find, or build a simple piece of code for the following setup:
3 folders, 2 files:
/home/folderA/text2.txt
/home/folderB/
/home/folderB/folder1
/home/folderB/text1.txt
Moving the "Unknown" content of folderB to folderA.
All the attempts at solving this issue are either deemed too trivial and redirected to "look up shutil." or yielding half a page of code for a specific setup with different demands, rendering it impossible for my miserable pythonskills to develop a sufficient (elegant) result.
In python I have a procedure to identify the existence of folderB, but its contents are unknown and varying to me. In light of full disclosure; here is my XYproblem:
#!/usr/bin/python
from glob import glob
path1 =glob('*')
path2 = glob('*/*/')
print path1
print path2
print len(path2)
a = len(path2)
for i in range(0,a):
print len(path2[i]), path2[i], path2[i][len(path2[i])-2]
print(path2[i][len(path2[i])-2].isdigit())
if path2[i][len(path2[i])-3]==" " and path2[i][len(path2[i])-2].isdigit():
print('yay')
newpath =path2[i][:len(path2[i])-2]+"0"+path2[i][(len(path2[i])-2):]
print(newpath)
import os
print(os.path.isdir(newpath))
if os.path.isdir(newpath): #if it is true, the new folder with the " 0n" already exists
import shutil
newpath0=path2[i]+ '*/'
print(newpath0, "headsup", newpath)
shutil.copy(newpath0,newpath)
#shutil.move(
But for the sake of those who seek an efficient, simple solution to this problem please keep it to the simplified, and hypothetical case of "folderX" :)
-edit-
Because the folderA indeed already exists, and has the same (potential) hierarchy as folderB. That is why
shutil.rmtree('folderA')
is not an option; folderA should remain intact.
So, ironically, temporarily removing the contents of folderA to put the contents of folderB in it, yields essentially the exact same problem as I am trying to solve.
-edit2-
Thanks for the effort, I keep on getting:
SyntaxError: invalid syntax
at the
def mv(from_folder, to_folder)
^
Now I appologize for my exceptional level of retardedness but I currently lack the clarity to comprehend how you envisioned your solution;
So after looking up the def mv(.. function I came to the conclusion that you might mean that from_folder should be replaced by a string which contains folderB.
So I tried 2 options:
your original
defining:
stringA='folderA', stringB=folderB
and substituting from_folder with stringB and to_folder with stringA in the first 3 rows of your code.
Both yield the same error.
*Note import os and import shutil have already been performed within the active if loop. Just to ensure that was the not causing the problem, I also tried it with an the given explicit import os and import shutil immediatly above def..
the code I so far have hence looks like:
#!/usr/bin/python
from glob import glob
path1 =glob('*')
path2 = glob('*/*/')
#print path1
#print path2
#print len(path2)
a = len(path2)
for i in range(0,a):
#print len(path2[i]), path2[i], path2[i][len(path2[i])-2]
#print(path2[i][len(path2[i])-2].isdigit())
if path2[i][len(path2[i])-3]==" " and path2[i][len(path2[i])-2].isdigit():
#print('yay')
newpath =path2[i][:len(path2[i])-2]+"0"+path2[i][(len(path2[i])-2):]
#print(newpath)
import os
#print(os.path.isdir(newpath))
if os.path.isdir(newpath): #if it is true, the new folder with the " 0n" already exists
import shutil
newpath0=path2[i]+ '*/'
#print(newpath0, "hier", newpath, ' en path2[i] ', path2[i])
#shutil.copy(newpath0,newpath)
#from here chose from_folder = path2[i] and to_folder=newpath
stringb = path2[i]
stringa = newpath
print(stringb,' ', stringa)
print('reached')
def mv(stringb, stringa):
root_src_dir = stringb
print('reached')
root_dst_dir = stringa
for src_dir, dirs, files in os.walk(root_src_dir):
dst_dir = src_dir.replace(root_src_dir, root_dst_dir)
if not os.path.exists(dst_dir):
os.mkdir(dst_dir)
for file_ in files:
src_file = os.path.join(src_dir, file_)
dst_file = os.path.join(dst_dir, file_)
if os.path.exists(dst_file):
os.remove(dst_file)
shutil.move(src_file, dst_dir)
But it refuses to print the second "reached", or in other words, it does not define
import shutil
shutil.copytree('folderB', 'folderA')
Just make sure folderA doesn't exist before running the command.
If for some reason the folder already exists beforehand and you have no control over that, just do
shutil.rmtree('folderA')
shutil.copytree('folderB', 'folderA')
Thanks for all the help! I've tried a different approach;
#!/usr/bin/python
from glob import glob
path1 =glob('*')
path2 = glob('*/*/')
print path1
print('hi')
#print path2
#print len(path2)
a = len(path2)
for i in range(0,a):
#print len(path2[i]), path2[i], path2[i][len(path2[i])-2]
#print(path2[i][len(path2[i])-2].isdigit())
if path2[i][len(path2[i])-3]==" " and path2[i][len(path2[i])-2].isdigit():
#print('yay')
newpath =path2[i][:len(path2[i])-2]+"0"+path2[i][(len(path2[i])-2):]
#print(newpath)
import os
#print(os.path.isdir(newpath))
if os.path.isdir(newpath): #if it is true, the new folder with the " 0n" already exists
import shutil
newpath0=path2[i]+ '*/'
#print(newpath0, "hi", newpath, ' en path2[i] ', path2[i])
#shutil.copy(newpath0,newpath)
#from here chose from_folder = path2[i] and to_folder=newpath
#stringb = path2[i]
#stringa = newpath
#print(stringb,' ', stringa)
print('reached')
subb = os.listdir(path2[i]) #subb is defined here as a list of all the subfolders and subfiles in folderB
#print(subb,len(subb))
print(newpath) #newpath = folderA
for c in range(0,(len(subb))): #This creates an index running for running through all the entries in subb
completesubb=range(len(subb)) #This line defines an array which will contain the full path to all the subfolders and subfiles within folderB
completesubb[c] = path2[i]+subb[c]#here the full path to the subfolders and subfiles within folderB is created,(with 1 sub-file/folder per entry in the array)
print completesubb[c]
completesuba=range(len(subb)) #This will not be required
#move_file(completesubb[c],newpath) #gave an error
shutil.move(completesubb[c],newpath) #moves everythin inside folderB to inside folderA
Now all that is left for me to do is improve the recognizing procedure for "folderA" and folderB but that's another story :) Thnx everyone, as soon as my rep reaches 15 you'll get an automated raise ;)
Try this
import os
import shutil
def mv(from_folder, to_folder):
root_src_dir = from_folder
root_dst_dir = to_folder
for src_dir, dirs, files in os.walk(root_src_dir):
dst_dir = src_dir.replace(root_src_dir, root_dst_dir)
if not os.path.exists(dst_dir):
os.mkdir(dst_dir)
for file_ in files:
src_file = os.path.join(src_dir, file_)
dst_file = os.path.join(dst_dir, file_)
if os.path.exists(dst_file):
os.remove(dst_file)
shutil.move(src_file, dst_dir)
Then
>>>mv('path/to/folderB', 'path/to/folderA')
I want to create a simple python script that look into folders and subfolders and create a playlist with the name of the folder containing the mp3's. But so far I have only came across python modules that work on linux OR I could not figure out howto install them (pymad)..
It's just for my android mobile so figured that the m3u format should do it.. I don't care for any other meta data than the name of the mp3 files themself.
I actually just looked at http://en.wikipedia.org/wiki/M3U and saw that it is quite easy to write m3u files... should be able to do it with simple python write to text file`
Here is my solution
import os
import glob
dir = os.getcwd()
for (path, subdirs, files) in os.walk(dir):
os.chdir(path)
if glob.glob("*.mp3") != []:
_m3u = open( os.path.split(path)[1] + ".m3u" , "w" )
for song in glob.glob("*.mp3"):
_m3u.write(song + "\n")
_m3u.close()
os.chdir(dir) # Not really needed..
I wrote up some code which will return a list of all nested playlist candidates based on your criteria:
import os
#Input: A path to a folder
#Output: List containing paths to all of the nested folders of path
def getNestedFolderList(path):
rv = [path]
ls = os.listdir(path)
if not ls:
return rv
for item in ls:
itemPath = os.path.join(path,item)
if os.path.isdir(itemPath):
rv= rv+getNestedFolderList(itemPath)
return rv
#Input: A path to a folder
#Output: (folderName,path,mp3s) if the folder contains mp3s. Else None
def getFolderPlaylist(path):
mp3s = []
ls = os.listdir(path)
for item in ls:
if item.count('mp3'):
mp3s.append(item)
if len(mp3s) > 0:
folderName = os.path.basename(path)
return (folderName,path,mp3s)
else:
return None
#Input: A path to a folder
#Output: List of all candidate playlists
def getFolderPlaylists(path):
rv = []
nestedFolderList = getNestedFolderList(path)
for folderPath in nestedFolderList:
folderPlaylist = getFolderPlaylist(folderPath)
if folderPlaylist:
rv.append(folderPlaylist)
return rv
print getFolderPlaylists('.')