Python code to merge multiple .wav files from multiple folders gets hung up - python

I have a bunch of wave files from an outdoor bird recorder that are broken up into 1 hour segments. Each days worth of audio is in a single folder and I have 30 days worth of folders. I am trying to iterate through the folders an merge each days audio into one file and export it with the folder name but each time i try to run it the print statements indicate that each for loop runs to completion before the merge function can be called, or it runs properly and the merge funtion throws a write error.
import wave
import os
#creates an empty object for the first folder name
rootfiles= ""
#sets the path for the starting location
path = "I:\SwiftOne_000"
#lists all folders in the directory "path"
dir_list = os.listdir(path)
print("Files and directories in '", path, "' :")
#iterates through folders in path
for i in dir_list:
#adds file name to original path
rootfiles = ( path + "\\" + i)
prefix = i
# define outfiles for waves
out_name = prefix
print("first loop completed")
for x in rootfiles:
myfiles= []
paths = rootfiles
ext = (".wav")
#print(paths)
dir_lists = os.listdir(paths)
#print(dir_lists)
#print("Files and directories in '", paths, "' :")
print("second loop completed")
for x in dir_lists:
myfiles.append( paths + "\\" + x)
#print (myfiles)
outfile= "D:\SwiftD\prefix" + prefix + ".wav"
wav_files = myfiles
print("third loop completed")
from contextlib import closing
with closing(wave.open(outfile, 'wb')) as output:
# find sample rate from first file
with closing(wave.open(wav_files[0])) as w:
output.setparams(w.getparams())
# write each file to output
for infile in wav_files:
with closing(wave.open(infile)) as w:
output.writeframes(w.readframes(w.getnframes()))

I think you want something like this, assuming your folder structure is:
- Swift (directory)
- Day1 (directory)
- File1
- File2
- File3
import os, wave
src = r'I:\SwiftOne_000'
output_folder = r'I:\OutputFolder'
input_data = {}
for d_name, d_path in [(d, path) for d in os.listdir(src) if os.path.isdir(path := os.path.join(src, d))]:
input_data[d_name] = [path for f in os.listdir(d_path) if f.lower().endswith('.wav') and os.path.isfile(path := os.path.join(d_path, f))]
print(input_data)
for d_name, paths in input_data.items():
with wave.open(os.path.join(output_folder, f'{d_name}.wav'), 'wb') as output:
params_written = False
for path in paths:
with wave.open(path, 'rb') as data:
if not params_written:
output.setparams(data.getparams())
params_written = True
output.writeframes(data.readframes(data.getnframes()))
There are a few issues with your code. It better to use os.path.join to concatentate paths rather than constructing the string yourself as it makes it platform independent (although you probably don't care). os.listdir will return files and folders so you should check the type with os.path.isfile or os.path.isdir to be sure. The case for the file extension isn't always in lower case so your extension check might not work; using .lower() means you can always check for .wav.
I'm pretty sure you don't need contentlib closing as the with block will already take care of this for you.
You are using the outfile variable to write to the file, however, you overwrite this each time you loop around the third loop, so you will only ever get one file corresponding to the last directory.
Without seeing the stack trace, I'm not sure what the write error is likely to be.

Related

How to Iterate over several directory levels and move files based on condition

I would like some help to loop through some directories and subdirectories and extracting data. I have a directory with three levels, with the third level containing several .csv.gz files. The structure is like this
I need to access level 2 (where subfolders are) of each folder and check the existence of a specific folder (in my example, this will be subfolder 3; I left the other folders empty for this example, but in real cases they will have data). If checking returns True, then I want to change the name of files within the target subfolder3 and transfer all files to another folder.
Bellow is my code. It is quite cumbersome and there is probably better ways of doing it. I tried using os.walk() and this is the closest I got to a solution but it won't move the files.
import os
import shutil
def organizer(parent_dir, target_dir, destination_dir):
for root, dirs, files in os.walk(parent_dir):
if root.endswith(target_dir):
target = root
for files in os.listdir(target):
if not files.startswith("."):
# this is to change the name of the file
fullname = files.split(".")
just_name = fullname[0]
csv_extension = fullname[1]
gz_extension = fullname[2]
subject_id = target
#make a new name
origin = subject_id + "/"+ just_name + "." + csv_extension + "." + gz_extension
#make a path based on this new name
new_name = os.path.join(destination_dir, origin)
#move file from origin folder to destination folder and rename the file
shutil.move(origin, new_name)
Any suggestions on how to make this work and / or more eficient?
simply enough, you can use the built-in os module, with os.walk(path) returns you root directories and files found
import os
for root, _, files in os.walk(path):
#your code here
for your problem, do this
import os
for root, dirs, files in os.walk(parent_directory);
for file in files:
#exctract the data from the "file"
check this for more information os.walk()
and if you want to get the name of the file, you can use os.path.basename(path)
you can even check for just the gzipped csv files you're looking for using built-in fnmatch module
import fnmathch, os
def find_csv_files(path):
result = []
for root, _, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, "*.csv.gz"): # find csv.gz using regex paterns
result.append(os.path.join(root, name))
return list(set(results)) #to get the unique paths if for some reason duplicated
Ok, guys, I was finally able to find a solution. Here it is. Not the cleanest one, but it works in my case. Thanks for the help.
def organizer(parent_dir, target_dir, destination_dir):
for root, dirs, files in os.walk(parent_dir):
if root.endswith(target_dir):
target = root
for files in os.listdir(target):
#this one because I have several .DS store files in the folder which I don't want to extract
if not files.startswith("."):
fullname = files.split(".")
just_name = fullname[0]
csv_extension = fullname[1]
gz_extension = fullname[2]
origin = target + "/" + files
full_folder_name = origin.split("/")
#make a new name
new_name = full_folder_name[5] + "_"+ just_name + "." + csv_extension + "." + gz_extension
#make a path based on this new name
new_path = os.path.join(destination_dir, new_name)
#move file from origin folder to destination folder and rename the file
shutil.move(origin, new_path)
The guess the problem was that was passing a variable that was a renamed file (in my example, I wrongly called this variable origin) as the origin path to shutil.move(). Since this path does not exist, then the files weren't moved.

Need help checking for multiple underscores in a collection of files - see code below

I am working on a project where I need to sort .jpg files and folders that contain .jpg files. I have other scripts that are functional which I intend to incorporate into this python script later. First though, I've implemented in the first script below to count the number of underscores in a file and take action based on the result and this works successfully. I need help on creating logic that will go through .jpg image files and if the files have more than one underscore the program will move the files into an error folder. Also any feedback on how to optimize this script would be greatly appreciated!
from pathlib import Path
import shutil, os, time, glob
timestr = time.strftime("%Y%m%d-%H%M%S")
folder = 'D:\\test\\testing'
working_folder = 'DitaTest1'
full_path = Path(os.path.join(folder, working_folder))
test_path = folder + '\\' + working_folder
for file_path in full_path.iterdir():
file_name = file_path.name
result = file_name.count('_')
if file_path.is_file():
os.chdir(test_path)
for file in glob.glob("*.jpg"):
dst=test_path+"\\"+file.replace(" ","_").replace(".jpg","") # .replace("Angle","").replace("Front","").replace("Side","")
os.mkdir(dst)
# print(dst)
shutil.move(file,dst)
elif result != 1:
if not file_path.is_file():
shutil.move(os.path.join(folder, working_folder, file_name), os.path.join(folder, working_folder + ' - dir-ERRORS_' + timestr, file_name))
else:
print('Ignored operation')
You need to explain more so that we can understand it better but from what I have read,
Your if logic seems to be wrong, if you want to check the number of underscores you shouldn't put that logic in elif. You should try sth like this instead.
for file_path in full_path.iterdir():
file_name = file_path.name
result = file_name.count('_')
if os.path.isdir(file_path):
pass
else:
if result == 1:
os.chdir(test_path)
for file in glob.glob("*.jpg"):
dst=test_path+"\\"+file.replace(" ","_").replace(".jpg","") # .replace("Angle","").replace("Front","").replace("Side","")
os.mkdir(dst)
# print(dst)
shutil.move(file,dst)
else:
shutil.move(os.path.join(folder, working_folder, file_name), os.path.join(folder, working_folder + ' - dir-ERRORS_' + timestr, file_name))
What this code does is, iterate over the folder and if it finds a folder it will just pass and when it finds a file it will check if result == 1. If it is it will move it to your desired folder, otherwise it will move it to the error folder. If I made a mistake let me know.

Need help creating a txt file in a folder which, is in the same directory

I'm pretty new to python. I have a homwork problem where we need to analyze corpora and then compare them. We also have to save the files as a .txt file after is has been processed with an attribute, the size.
So I need to create a .txt file in a seperate folder called trigram-models.
This folder is in the same directory as my python file. I think i have to use the os module but i'm not sure how.
Here is my code:
from langdetect import read_trigrams, trigram_table, write_trigrams
import os
def make_profiles(datafolder, profilefolder, size):
filelist = []
for file in os.listdir('./training'):
filelist.append(file)
print(filelist)
for file in filelist:
filen = "./training/"+file
print("fi", filen)
maketable = trigram_table(filen, size)
readdata = read_trigrams(filen)
#print("re", readdata)
splitname = str(file).split('-')
newname = splitname[0] + "." + str(size) + '.txt'
endtable = write_trigrams(readdata, newname)
return (endtable)
make_profiles("./training", "./trigram-models", 20)
To create a directory, I would use the following format which relies on a try / catch and prevents an error if the directory already exists:
dirName = 'tempDir'
try:
# Create target Directory
os.mkdir(dirName)
print("Directory " , dirName , " Created ")
except FileExistsError:
print("Directory " , dirName , " already exists")
To change your directory you can use the following:
os.chdir(directoryLocation)
I recommend reading chapter 8 in automating the boring stuff with python.
I hope this helps. If you have any questions please don't hesitate to ask.
First of all, be sure to indent all the code in your method for it to be appropriately enclosed.
You are also passing relative paths (datafolder, profilefolder) for your folders as method arguments, so you should use them inside the method instead.
Lastly, to create a file in a folder, I would recommend using the following algorithm:
file_path = '/'.join(profilefolder, newname)
with open(file_path, 'w') as ouf:
ouf.write(endtable)
You will probably need to replace "endtable" with a string representation of your data.
Hope it helps.
As to clarify on toti08's answer, you should replace os.absdir with os.path.absdir.
filelist = [os.path.abspath(f) for f in os.listdir(data_folder)]
instead of
filelist = [os.abspath(f) for f in os.listdir(data_folder)]
Your function is not using the argument profileFolder, where you specify the name of the output directory. So first of all you should use this information for creating a folder before processing your files.
So first thing would be to create this output directory.
Second is to save your files there, and to do that you need to append the file name to the output directory. Something like this:
def make_profiles(data_folder, output_folder, size):
filelist = []
for file in os.listdir(data_folder):
filelist.append(file)
# Create output folder
if not os.path.exists(output_folder):
os.mkdir(output_folder)
for file in filelist:
filen = "./training/"+file
#print("fi", filen)
splitname = str(file).split('-')
# Create new file by appending name to output_folder
newname = os.path.join(output_folder, splitname[0] + "." + str(size) + '.txt')
return (endtable)
make_profiles(./training, './trigram-models', 20)
Note that you can also specify the relative folder name (i.e. "trigram-models" only) and then create the output directory by appending this name to the current path:
output_folder = os.path.join(os.getcwd(), output_folder)
Also (not related to the question) this piece of code could be optimized:
filelist = []
for file in os.listdir(data_folder):
filelist.append(file)
os.listdir already returns a list, so you could directly write:
filelist = os.listdir(data_folder)
But since you're interested in the absolute path of each file you could better do:
filelist = [os.path.abspath(f) for f in os.listdir(data_folder)]
where you basically take each file returned by os.listdir and you append its absolute path to your file list. Doing this you could avoid the line filen = "./training/"+file.
So in the end your code should look something like this:
def make_profiles(data_folder, output_folder, size):
filelist = [os.abspath(f) for f in os.listdir(data_folder)]
# Create output folder
if not os.path.exists(output_folder):
os.mkdir(output_folder)
for file in filelist:
splitname = str(file).split('-')
# [...add other pieces of code]
# Create new file by appending name to output_folder
newname = os.path.join(output_folder, splitname[0] + "." + str(size) + '.txt')
# [...add other pieces of code]
return (endtable)
make_profiles(./training, './trigram-models', 20)

How to get the files with the biggest size in the folders, change their name and save to a different folder

I need to get files with the biggest size in different folders, change their name to folder name that they belong to and save to a new folder. I have something like this and I got stuck:
import os
# Core settings
rootdir = 'C:\\Users\\X\\Desktop\\humps'
to_save = 'C:\\Users\\X\\Desktop\\new'
for root, dirs, files in os.walk(rootdir):
new_list = []
for file in files:
if file.endswith(".jpg"):
try:
print(file)
os.chdir(to_save)
add_id = root.split("humps\\")[1]
add_id = add_id.split("\\")[0]
file_name = os.path.join(root,file)
new_list.append(file_name)
bigfile = max(new_list, key=lambda x: x.stat().st_size)
except:
pass
To make it more clear: Let's say the name of the sub-folder is "elephant" and there are different elephant photos and subfolders in in this elephant folder. I want to go through those photos and subfolders and find the elephant foto with the biggest size, name it as elephant and save it to my target folder. Also repaet it for other sub folders such as lion, puma etc.
How I could achieve what I want ?
To find biggest file and save to another location
import os
import shutil
f_list = []
root = "path/to/directory"
root = os.path.abspath(root)
for folder, subfolders, files in os.walk(root):
for file in files:
filePath = os.path.join(folder, file)
f_list.append(filePath)
bigest_file = max(f_list,key=os.path.getsize)
new_path = "path/where/you/want/to/save"
shutil.copy(biggest_file,new_path)
if you want only images then add one more condition in loop
for folder, subfolders, files in os.walk(root):
for file in files:
if file.endswith(".jpg"):
filePath = os.path.join(folder, file)
f_list.append(filePath)
To get all folders biggest file
root = "demo"
root = os.path.abspath(root)
def test(path):
big_files = []
all_paths = [x[0] for x in os.walk(path)]
for paths in all_paths:
f_list = filter(os.path.isfile, os.listdir(paths))
if len(f_list) > 0:
big_files.append((paths,max(f_list,key=os.path.getsize)))
return big_files
print test(root)
How to get the files with the biggest size in the folders, change their name and save to a different folder
Basically you already have a good description of what you need to do. You just need to follow it step by step:
get all files in some search directory
filter for relevant files ("*.jpg")
get their sizes
find the maximum
copy to new directory with name of search directory
IMO it's an important skill to be able to break down a task into smaller tasks. Then, you just need to implement the smaller tasks and combine:
def iterate_files_recursively(directory="."):
for entry in os.scandir(directory):
if entry.is_dir():
for file in iterate_files_recursively(entry.path):
yield file
else:
yield entry
files = iterate_files_recursively(subfolder_name)
I'd use os.scandir because it avoids building up a (potentially) huge list of files in memory and instead allows me (via a generator) to work one file at a time. Note that starting with 3.6 you can use the result of os.scandir as a context manager (with syntax).
images = itertools.filterfalse(lambda f: not f.path.endswith('.jpg'), files)
Filtering is relatively straightforward except for the IMO strange choice of ìtertools.filterfalse to only keep elements for which its predicate returns False.
biggest = max(images, key=(lambda img: img.stat().st_size))
This is two steps in one: Get the maximum with the builtin max function, and use the file size as "key" to establish an order. Note that this raises a ValueError if you don't have any images ... so you might want to supply default=None or handle that exception.
shutil.copy(biggest.path, os.path.join(target_directory, subfolder_name + '.jpg')
shutil.copy copies the file and some metadata. Instead of hardcoding path separators, please use os.path.join!
Now all of this assumes that you know the subfolder_name. You can scan for those easily, too:
def iterate_directories(directory='.'):
for entry in os.scandir(directory):
if entry.is_dir():
yield entry
Here's some code that does what you want. Instead of using the old os.walk function, it uses modern pathlib functions.
The heart of this code is the recursive biggest function. It scans all the files and directories in folder, saving the matching file names to the files list, and recursively searching any directories it finds. It then returns the path of the largest file that it finds, or None if no matching files are found.
from pathlib import Path
import shutil
def filesize(path):
return path.stat().st_size
def biggest(folder, pattern):
''' Find the biggest file in folder that matches pattern
Search recursively in all subdirectories
'''
files = []
for f in folder.iterdir():
if f.is_file():
if f.match(pattern):
files.append(f)
elif f.is_dir():
found = biggest(f, pattern)
if found:
files.append(found)
if files:
return max(files, key=filesize)
def copy_biggest(src, dest, pattern):
''' Find the biggest file in each folder in src that matches pattern
and copy it to dest, using the folder's name as the new file name
'''
for path in src.iterdir():
if path.is_dir():
found = biggest(path, pattern)
if found:
newname = dest / path
print(path, ':', found, '->', newname)
shutil.copyfile(found, newname)
You can call it like this:
rootdir = r'C:\Users\X\Desktop\humps'
to_save = r'C:\Users\X\Desktop\new'
copy_biggest(Path(rootdir), Path(to_save), '*.jpg')
Note that the copied files will have the same name as the top-level folder in rootdir that they were found in, with no file extension. If you want to give them a .jpg extension, you can change
newname = dest / path
to
newname = (dest / path).with_suffix('.jpg')
The shutil module on older versions of Python 3 doesn't understand pathlib paths. But that's easy enough to remedy. In the copy_biggest function, replace
shutil.copyfile(found, newname)
with
shutil.copyfile(str(found), str(newname))

How to rename all files and subfolders within a directory using Python

I am doing a small project using a book about Python. One of the examples they show is how to rename all of the files and directories right underneath the root folder, however I want to go a step further and change files within sub directories.
I figured using os.walk would be my best bet but I can only get it to change file names not subdirectories. I would like to also change subdirectory names as I go through os.walk.
I am using a book and online resources to accomplish this, any help would be awesome.
Here is what I have right now based on the book. I have tried to include os.listdir but the regex won't work like that.
import shutil, os, re
#regex that matches files with dates
wrongFormat = re.compile(r"""^(.*?)
((0|1)?\d).
((0|1|2|3)?\d).
((19|20)\d\d)
(.*?)$
""", re.VERBOSE)
#Loop over the files in the working directory
path = '.'
for path, dirs, files in os.walk(path, topdown=True):
for incorrectDt in files:
dt = wrongFormat.search(incorrectDt)
#Skip file without a date
if dt == None:
continue
#Split filename into parts
beforePart = dt.group(1)
monthPart = dt.group(2)
dayPart = dt.group(4)
yearPart = dt.group(6)
afterPart = dt.group(8)
#Form the new date format
correctFormat = beforePart + monthPart + "_" + dayPart + "_" + yearPart + afterPart
#Get full, absolute file paths.
absWorkingDir = os.path.abspath(path)
incorrectDt= os.path.join(absWorkingDir, incorrectDt)
correctFormat = os.path.join(absWorkingDir, correctFormat)
#rename files
print ('Renaming "%s" to "%s"...' % (wrongFormat, correctFormat))
shutil.move(incorrectDt, correctFormat)

Categories

Resources