Finding the latest file in each subdirectory - python

I have the a similar folder structure for which I need to pull the latest .jpg from each subdirectory:
+ C:\\myfiles
+ parentdir1
+ subdir1
+ somename1.jpg
+ somename2.jpg
+ ...
+ subdir2
+ somename3.jpg
+ somename4.jpg
+ ...
+ ...
+ parentdir2
+ subdir1
+ somename5.jpg
+ somename6.jpg
+ ...
+ subdir2
+ somename7.jpg
+ somename8.jpg
+ ...
+ ...
+ parentdir3
+ subdir1
+ somename9.jpg
+ somename10.jpg
+ ...
+ subdir2
+ somename11.jpg
+ somename12.jpg
+ ...
+ ...
+ ...
I don't know any of the names of the folders or files but I need to access the last 2 .jpg files in each subdir.
For the sake of making this simple, let's just assume I need to print the last 2 files created in the subdir.
I wrote a script that will search all subdir's in a given parentdir, but I actually need to go iterate through all parentdir's as well
import os
path = 'C:\\myfiles'
filelist = []
for i in range(len(os.listdir(path))):
subpath = path + '\\' + os.listdir(path)[i]
for root, dirs, files in os.walk(subpath):
for file in os.listdir(subpath):
filelist.append(os.path.join(root, file))
sorted_filelist = sorted(filelist, key=os.path.getctime)
print('the latest jpg file in ' + root + ' is: ' + sorted_filelist[-1])
print('the 2nd last jpg file in ' + root + ' is: ' + sorted_filelist[-2])
filelist.clear()

I think this will do what you want. Note that I sort the files by their last modification times, rather than their creation time because I think that's the way to determine which are "most recent".
import glob
import os
N_MOST_RECENT = 2
path = 'C:\\myfiles'
for entry in os.listdir(path):
subpath = os.path.join(path, entry)
if os.path.isdir(subpath):
for subentry in os.listdir(subpath):
subentrypath = os.path.abspath(os.path.join(subpath, subentry))
if os.path.isdir(subentrypath):
jpg_files = glob.iglob(os.path.join(subentrypath, '*.jpg'))
sorted_filenames = sorted(jpg_files, key=os.path.getmtime)
# Create list of filenames of the N most recent files.
most_recent = [os.path.split(name)[-1] # Extract filename from path.
for name in sorted_filenames[-N_MOST_RECENT:]]
print(f'{N_MOST_RECENT} most recent .jpg files in "{subentrypath}":\n'
f' {most_recent}')

Try iterating through the parent directory, and then through all the sub-directories, using os.listdir().
import os
parent_dir = 'path/to/parent/dir'
for subdir in os.listdir(parent_dir):
if not os.path.isdir(subdir):
continue
sorted_filelist = sorted(
[os.path.join(parent_dir, subdir, f) for f in os.listdir(subdir)
if os.path.splitext(f)[1] == '.jpg'],
key=os.path.getctime, reverse=True)
print(sorted_filelist[:2])

Related

How to move and rename multiple files to a specific folder?

I have a small problem with a tool I built in Python.
This tool works classifying files by filenames and making folders by a word in every filename and then moving all the files to the corresponding folder.
Files:
09052019_6_filetype1_currenttime_randomnumber.xml
09052019_2_filetype2_currenttime_randomnumber.xml
09052019_9_filetype3_currenttime_randomnumber.xml
09052019_1_filetype3_currenttime_randomnumber.xml
09052019_1_filetype3_currenttime_randomnumber.xml
Actual results:
filetype1_Status_6(folder)
09052019_6_filetype1_currenttime_randomnumber.xml
filetype2_Status_2(folder)
09052019_2_filetype2_currenttime_randomnumber.xml
filetype3_Status_9(folder)
09052019_9_filetype3_currenttime_randomnumber.xml
filetype3_Status_1(folder)
09052019_1_filetype3_currenttime_randomnumber.xml
09052019_1_filetype3_currenttime_randomnumber.xml
Code Version 1.0
#!/usr/bin/python3
# v1.0
# Importing modules
import os
import shutil
import sys
# Path of input and output files
src = input('Input files: ')
dest = input('Destination files: ')
os.chdir(dest)
def classify():
for f in os.listdir(src):
splitname = f.split('_')
status = splitname[1]
topic = splitname[2]
foldername = topic + '_' + 'Status_' + status
if not os.path.exists(foldername):
os.mkdir(foldername)
shutil.move(os.path.join(src, f), foldername)
print('Sorting out files, please wait...')
classify()
print('¡DONE!')
Improvement
But in the v2.0 I would like to "improve" it a little more, just keeping the same usability but changing filenames from original name to "Message_*.xml" and it works but only moving one file, not all of them.
Current results:
filetype1_Status_6(folder)
Message_.xml
filetype2_Status_2(folder)
Message.xml
filetype3_Status_9(folder)
Message_.xml
filetype3_Status_1(folder)
Message_.xml
Expected results:
filetype1_Status_6(folder)
Message_.xml
filetype2_Status_2(folder)
Message.xml
filetype3_Status_9(folder)
Message_.xml
filetype3_Status_1(folder)
Message_.xml
Message_1.xml
Code Version 2.0
#!/usr/bin/python3
# v2.0
# Importing modules
import os
import shutil
import sys
# Path of input and output files
src = input('Input files: ')
dest = input('Destination files: ')
os.chdir(dest)
def classify():
for f in os.listdir(src):
splitname = f.split('_')
status = splitname[1]
topic = splitname[2]
foldername = topic + '_' + 'Status_' + status
newFileName = foldername + '\\' + 'Message_' + '.xml'
if not os.path.exists(foldername):
os.mkdir(foldername)
shutil.copy(os.path.join(src, f), newFileName)
print('Sorting out files, please wait...')
classify()
print('¡DONE!')
You are naming everything Message_ so you will never get multiple files. You need to parse the names in the folder and then increment the filenames accordingly.
msgName = 'Message_0'
newFileName = foldername + '\\' + msgName + '.xml'
if not os.path.exists(foldername):
os.mkdir(foldername)
else:
while os.path.isfile(newFileName) is True:
msgInt = int(msgName[8:])
msgInt += 1
msgName = msgName[:8] + str(msgInt)
newFileName = foldername + '\\' + msgName + '.xml'
shutil.copy(os.path.join(src, f), newFileName)
Now if you already have message_0.xml in your folder, you will get a message_1.xml instead, and so on.

Error writing to folder in python os.makedirs()

I am trying to download a .zip file from ftp site, (works independent of the error), I am creating a folder in a directory with the current date in the name. I want the downloaded zip file to be placed in the newly created folder. my code is below.
import os
import urllib
import datetime
now = datetime.datetime.now()
situs = "ftp://pbcgis:sigcbp#ftp.co.palm-beach.fl.us/CWGIS/SITUS_PUB.zip"
path = os.path.join(r"Y:\JH_Data_Dump\SITUS\PBC_SITUS" + str(now.month) + "_" + str(now.day) + "_" + str(now.year))
path1 = os.path.join(path + "PBC_SITUS" + str(now.month) + "_" + str(now.day) + "_" + str(now.year) +".zip")
print "Creating new directory..."
os.makedirs(path)
print "beginning PBC SITUS Download..."
urllib.urlretrieve(situs, path1)
I get no errors and the file downloads successfully but its not placing the .zip into my newly created folder, its placing it the same directory as the folder but not inside.
You use os.path.join incorrectly. Path segments - directories and filename - are distinct arguments. They are joined using path separator, either \ or /.
path = os.path.join('Y:', "PBC_SITUS123")
path1 = os.path.join(path, "PBC_SITUS123" + ".zip")
will result in Y:\PBC_SITUS123\PBC_SITUS123.zip
I figured out why, I was missing a "\" in the path1 string
it should read:
path1 = os.path.join(path + r"\PBC_SITUS" + str(now.month) + "_" + str(now.day) + "_" + str(now.year) +".zip")

How to check if folder has suboflders and then list the directories with listdir()?

I want to write a file searching code where I don't know if the directory I'm searching in has subdirectories and I want to check that so I don't get an error like this:
[Error 267]The directory name is invalid: 'C:/Path/To/Directory'.
I wrote a code like this where if it finds the file it breaks and stopps the program but if not it goes down a layer and so on.
filename = raw_input('> ')
path = 'C:/Path/Of/Directory/You/Want/To/Search/In'
fldr = os.listdir(path)
for f in fldr:
p = path + '/' + f
sfldr = os.listdir(p)
if os.path.exists(p + '/' + filename):
print 'Found file!!', p + '/' + filename
break
else:
for sf in sfldr:
pp = p + '/' + sf
ssfldr = os.listdir(pp)
if os.path.exists(pp + '/' + filename):
print 'Found file!!', pp + '/' + filename
break
else:
for ssf in ssfldr:
ppp = pp + '/' + ssf
sssfldr = os.listdir(ppp)
if os.path.exists(ppp + '/' + filename):
print 'Found file!!', ppp + '/' + filename
break
The easy to notice error is that when the directory doesn't have 3 layers of subfolders the program just breaks and gives an error message.So if I could check if the folder has subfolders before entering it,that would be neat.
Use os.scandir(). Provides better speed over os.walk()
Link to docs here!
Alternatively use, glob
>>> from glob import glob
>>> paths = glob('*/')
>>> paths
['bin/', 'content/', 'include/', 'lib/', 'output/']
>>>

How to move files in multiple directories into another directory with same sub directoies

I have
import shutil
for sub_dir in os.listdir(path + "train"):
src_files = os.listdir(path+"train/" + sub_dir)
...
for file in src_files[0:split_index]:
original = path+"train/" + sub_dir+ "/" + file
distutils.dir_util.mkpath(path + "valid/" + sub_dir)
destination = path + "valid/" + sub_dir+"/"
shutil.move(original, destination)
I have two these directory structures:
train/abc
train/def
train/ghi
valid/
I need to move some portion of files in train/ to valid/ and retain the original directory structure. After the move, valid/ should look the same as train/:
valid/abc
valid/def
valid/ghi
The above code isn't quite right. How can I modify it to make it right?
distutils.dir_util.mkpath(path + "valid/" + sub_dir)
Added this part and it works now.

Converting multiple gz file within subdirectories into csv

I have many subdirectories in my main directory and would like to write a script to unzip and convert all the files within it. If possible, I would also like to combine all the CSV within a single directory into a single CSV. But more importantly, I need help with my nested loop.
import gzip
import csv
import os
subdirlist = os.listdir('/home/user/Desktop/testloop')
subtotal = len(subdirlist)
subcounter = 0
for dirlist in subdirlist:
print "Working On " + dirlist
total = len(dirlist)
counter = 0
for dir in dirlist:
print "Working On " + dir
f = gzip.open('/' + str(subdirlist) + '/' + dir, 'rb')
file_content = f.read()
f.close()
print "25% Complete"
filename = '/' + str(subdirlist) + '/temp.txt'
target = open(filename, 'w')
target.write(file_content)
target.close()
print "50% Complete!"
csv_file = '/' + str(subdirlist) + '/' + str(dir) + '.csv'
in_txt = csv.reader(open(filename, "rb"), delimiter = '\t')
out_csv = csv.writer(open(csv_file, 'wb'))
out_csv.writerows(in_txt)
os.remove(filename)
os.remove('/' + str(subdirlist) + '/' + dir)
counter+=1
print str(counter) + "/" + str(total) + " " + str(dir) + " Complete!"
print "SubDirectory Converted!"
print str(subcounter) + "/" + str(subtotal) + " " + str(subdirlist) + " Complete!"
subcounter+=1
print "All Files Converted!"
Thanks in advance
To get lists of files and subdirectories, you can use os.walk. Below is an implementation I wrote to get all files (optionally, of certain type(s)) in arbitrarily nested subdirectories:
from os import walk, sep
from functools import reduce # in Python 3.x only
def get_filelist(root, extensions=None):
"""Return a list of files (path and name) within a supplied root directory.
To filter by extension(s), provide a list of strings, e.g.
get_filelist(root, ["zip", "csv"])
"""
return reduce(lambda x, y: x+y,
[[sep.join([item[0], name]) for name in item[2]
if (extensions is None or
name.split(".")[-1] in extensions)]
for item in walk(root)])

Categories

Resources