Pulling Files and Timestamps from a Directory and Subdirectories

Pulling Files and Timestamps from a Directory and Subdirectories - python

I have a working script that will print all files in a given directory. I would like help making it do two additional things:
(1) Also be able to print the date_created or time stamp for each file.
(2) Do all of the above not only for files in the given directory, but in all subdirectories as well.
Here is the working script:
from os import listdir
from os.path import isfile, join
from sys import argv
script, filename = argv
mypath = os.getcwd()
allfiles = [ f for f in listdir(mypath) if isfile(join(mypath,f)) ]
output = open(filename, 'w')
for i in allfiles:
string = "%s" %i
output.write(string + "\n")
output.close()
print "Directory printed."
I would hope to be able to print something like (filename + ", " + timestamp + "\n"), or some substitute.
Thanks!

http://docs.python.org/2/library/os.html and http://docs.python.org/2/library/stat.html have you covered.
os.walk will give you the recursive directory walking
stat will give you file timestamps (atime,ctime,mtime)

This snippet walks through files in a directory + subdirectories and prints out created and modified timestamps.
import os
import time
def walk_files(directory_path):
# Walk through files in directory_path, including subdirectories
for root, _, filenames in os.walk(directory_path):
for filename in filenames:
file_path = root + '/' + filename
created = os.path.getctime(file_path)
modified = os.path.getmtime(file_path)
# Process stuff for the file here, for example...
print "File: %s" % file_path
print " Created: %s" % time.ctime(created)
print " Last modified: %s" % time.ctime(modified)
walk_files('/path/to/directory/')

Related

Find keywords from txt files and copy target files

I have some textfiles in a folder and I want to search all of them for names between row 4 and 20 and then copy the ones containing one of those names to a different folder. With my code I only get an empty result file even though I know the keywords are in my folder. What could be the problem with this code for Python 3?
from os import system, listdir, path
import codecs
FILE = open('C:\\Users\\Admin\\Desktop\\Test\\Result.txt', 'w')
desktop_dir = path.join('C:\\Users\\Admin\\Desktop\\test\\')
for fn in listdir(desktop_dir):
fn_w_path = path.join(desktop_dir, fn)
if path.isfile(fn_w_path):
with open(fn_w_path, "r") as filee:
for line in filee.readlines():
for word in line.lower().split():
if word in {'James',
'Tim',
'Tom',
'Ian',
'William',
'Dennis',}:
FILE.write(word + "\n")
FILE.close()
import os
import shutil
for root, dirs, files in os.walk("test_dir1", topdown=False):
for name in files:
current_file = os.path.join(root, name)
destination = current_file.replace("test_dir1", "test_dir2")
print("Found file: %s" % current_file)
print("File copy to: %s" % destination)
shutil.copy(current_file, destination)

How would I exclude directories from os.listdir results?

I'm making a script that will encode files within a directory using b64/b16 and I'm using os.listdir to do so, but it also lists directories which causes problems since now it's trying to encode directories as if it were a file.
How would I be able to exclude directories from os.listdir results?
import os
import sys
import base64
import codecs
import time
import string
import glob
#C:\\Users\\Fedora\\Desktop\\Win 10
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = []
filecount = 0
fileprogress = 0
for file in dirs:
files.append(file)
filecount = filecount + 1
for x in files:
os.system("cls")
fileprogress = fileprogress + 1
print("File " + str(fileprogress) + "/" + str(filecount))
print("Encrypting " + x + "...")
inputfile = open(path + "\\" + x, "rb")
data = inputfile.read()
inputfile.close()
data = base64.b16encode(data)
data = base64.b64encode(data)
data = base64.b16encode(data)
data = base64.b64encode(data)
data = base64.b16encode(data)
outputfile = open(path + "\\" + x + ".crypt", "wb")
outputfile.write(data)
outputfile.close()

use filter
filepath = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = filter(lambda x:os.path.isfile(os.path.join(filepath, x)), dirs)
or list comprehension with os.path.isfile()
filepath = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = [x for x in dirs if os.path.isfile(os.path.join(filepath, x))]

You can use os.path.isdir function to check if the current file is a directory.
Also, it is much better to use string formatting operations instead of string concatenation: not
print("File " + str(fileprogress) + "/" + str(filecount))
but
print("File {}/{}".format(fileprogress, filecount))
Such code is much easier to understand and modify.

Instead of using os.listdir() your can use os.walk which will return separate list for files and directories
python-oswalk-example
import os
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
for (path, dirs, files) in os.walk(path):
print path
print dirs
print files
pythoncentral os-walk
#Import the os module, for the os.walk function
import os
#Set the directory you want to start from
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
for dirName, subdirList, fileList in os.walk(path):
print('Found directory: %s' % dirName)
for fname in fileList:
print('\t%s' % fname)

Python: How to use output of "listfiles" to delete/ move/ etc. files

Background:
My target is to find dublicate files in two differen folders (without subfolders). To do that, I use the following Python script:
###Check ob alle Archive noch vorhanden oder ob Daten bei Check gelöscht wurden
def listfiles(path):
files = []
for dirName, subdirList, fileList in os.walk(path):
dir = dirName.replace(path, '')
for fname in fileList:
if fname.endswith("_GIS.7z"):
files.append(os.path.join(dir, fname))
return files
x = listfiles(root)
y = listfiles(backupfolderGIS)
#q = [filename for filename in x if filename not in y]
files_only_in_x = set(x) - set(y)
files_only_in_y = set(y) - set(x)
files_only_in_either = set(x) ^ set(y)
files_in_both = set(x) & set(y)
all_files = set(x) | set(y)
print "Alle Datein:"
print all_files
print " "
print "Nur im Zwischenspeicher:"
print files_only_in_x
print " "
print "Nur im Backupordner:"
print files_only_in_y
print " "
print "Nur einem von beiden Ordnern:"
print files_only_in_either
print " "
print "In beiden Ordnern:"
print files_in_both
print " "
The relevant output variable/ list is files_in_both (folders); it shows me the dublicates; if I use print, it looks like set(['NameoftheProject_GIS.7z', 'NameofanotherProject_GIS.7z']).
Question:
How can I use this output/ information (of dublicate files in directories) to delete/ move them? Here for example the files NameoftheProject_GIS.7z and NameofanotherProject_GIS.7z in folder backupfolderGIS / list files_in_both.

os.walk recursively checks all folders and subfolders starting from the root dir you pass, you want to check two different folders (without subfolders) so just search each folder with glob, if you want to move you can use shutil.move:
from glob import iglob
from os import path
from shutil import move
pt1, pt2 = "/path_1", "path_2"
dupe = set(map(path.basename, iglob("/path_1./*_GIS.7z"))).intersection(map(path.basename, iglob("/path_2./*_GIS.7z")))
for fle in dupe:
# move(src, dest)
move(path.join(pt1, fle), "wherever")
Or to delete use os.remove:
for fle in dupe:
os.remove(path.join(pt1, fle))
If you want to move/delete the file from pt2 then pass that to path.join in place of pt1.
You could also use str.endwith with os.listdir:
dupe = set(fname for fname in os.listdir(pt1) if fname.endswith("_GIS.7z")).intersection(fname for fname in os.listdir(pt2) if fname.endswith("_GIS.7z"))
To avoid repeating you can put it in a function:
from shutil import move
from os import path, listdir
def listfiles(path, end):
return set(fname for fname in listdir(path) if fname.endswith(end))
for fle in listfiles(pt1,"_GIS.7z").intersection(listfiles(pt2, "_GIS.7z")):
move(path.join(t1, fle), "wherever")
Now if you did want to check all folders for files with the same basename and so something for dupe name, you would need to keep a record of the full paths, you can group all common files by basename using a defaultdict:
from os import path, walk
from collections import defaultdict
def listfiles(pth, end):
files = defaultdict(list)
for dirName, subdirList, fileList in walk(pth):
for fname in fileList:
if fname.endswith(end):
files[fname].append(path.join(dirName, fname))
return files
You will get a dict where the keys are the basenames and the values are lists of files with the full path to each, any list with more than one vaulue means you have at least two files with the same name but you should remember have the same basename does not mean the files are actually the same.

Unzipping, renaming and migrating contents of GDB directory - Python

Currently trying to develop a catch all script that when pointed to a folder it will look through all of the subfolders and anything that is a .gdb will be moved to a new folder. Some GDBS are zipped and have a folder which then contains the GDB inside. If the GDB files have the same name once extracted, they will be appended with the suffix _2, _3, _4 and then moved to the folder with the other GDBs and won't conflict with any filenames.
Everything works except the renaming. Whereby it will rename one ok and then start placing folders inside of another gdb, before eventually failing with. WindowsError: [Error 183] Cannot create a file when that file already exists.
import shutil, errno, re, os, zipfile, os.path, sys
def unzip(source_filename, dest_dir):
with zipfile.ZipFile(source_filename) as zf:
for member in zf.infolist():
words = member.filename.split('/')
path = dest_dir
for word in words[:-1]:
drive, word = os.path.splitdrive(word)
head, word = os.path.split(word)
if word in (os.curdir, os.pardir, ''): continue
path = os.path.join(path, word)
zf.extract(member, path)
results = []
input_path = sys.argv[1]
output_path = sys.argv[2]
if input_path not in output_path:
for path, subdirs, files in os.walk(input_path):
for f in files:
if f.endswith(".zip") or f.endswith(".ZIP"):
unzip(os.path.join(path,f), path)
if input_path not in output_path:
for path, subdirs, files in os.walk(input_path):
counter = 2
for dir_name in subdirs:
if os.path.join(path, dir_name).endswith(".gdb") or os.path.join(path, dir_name).endswith(".GDB"):
if os.path.exists(output_path + "\\" + dir_name):
print "Moving " + os.path.join(path, dir_name) + " To " + output_path + "\\" + str(counter) + dir_name
os.rename(os.path.join(path, dir_name), output_path + "\\" + dir_name[:-4] + "_" + str(counter) + ".gdb")
counter + 1
else:
print "Moving " + os.path.join(path, dir_name) + " To " + output_path
shutil.move(os.path.join(path, dir_name), output_path)
else:
print "########################################"
print "########################################"
print "Please select a different output path"
print "The output path should not be contained"
print " In the input path"
print "########################################"
print "########################################"
Any help anyone could give would be greatly appreciated. Cheers!

Organizing data by filetype

I am trying to sort a large number of files based off of their file extension. A lot of the files are .doc, .docx, .xls, etc.
This is what I was thinking in my head, but if there is a simpler way to do things, let me know! I do have multiple files with the same extension, so I don't want it to create a new folder for that extension every time and overwrite the previous file. I also have a much larger list, but for this example I don't believe all of them are needed. The OS is MacOS.
import os, shutil
extList = ['.doc', '.docx', '.xls']
for ext in extList:
os.mkdir(path + '/' + ext +'_folder')
for file in os.listdir(filepath):
if file.endswith(ext): #missing an indent
print(file)
shutil.copyfile(file + '/' + ext +'_folder' + file)
Also, if I run into a file that I do not have on my list, I would like it to go into a folder named 'noextlist'.

Here is what I was able to create quickly
import os, re, shutil
DocFolder = r'...'#Your doc folder path
DocxFolder = r'...'#Your docx folder path
XlsFolder = r'...'#Your xls folder path
MiscFolder = r'...'#Your misc folder path
for root, dirs, files in os.walk(r'...'): #Your folder path you want to sort
for file in files:
if file.endswith(".doc"):
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,DocFolder)
elif file.endswith(".docx"):
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,DocxFolder)
elif file.endswith(".xls"):
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,XlsFolder)
else:
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,MiscFolder)
Edit:The main function here is the for root,dirs,files in os.walk This allows the program to transverse through the provided path to search all files including the ones in the sub folder and sort it out accordingly.

import errno
import shutil
from os import listdir, mkdir
from os.path import splitext, join
# set for fast lookup
extList = set(['.doc', '.docx', '.xls'])
# source path
filepath = ...
# dest path
path = ...
for f in listdir(filepath):
# extract extension from file name
ext = splitext(f)[1]
if ext in extList:
dir_ = join(path, "{}_folder".format(ext))
try:
mkdir(dir_)
except OSError as e:
if ex.errno != errno.EEXIST:
raise # raise if any other error than "already exists"
dest = join(dir_, f)
else:
dest = join(path, "noextlist_folder", f)
shutil.copy2(join(filepath, f), dest)

If I understand correctly, you like your solution but you need a way to rename files with duplicate names so that the extras don't disappear. You can check if the destination file already exists and construct a variant name by adding _1, _2, etc. to the filename until you find something unused.
newpathname = path + '/' + ext +'_folder' + "/" + file
n = 0
while os.path.exists(newpathname):
n += 1
base, ext = os.path.splitext(newpathname)
newpathname = "%s_%d%s" % (base, n, ext)
shutil.copyfile(filepath+"/"+file, newpathname)
But your code has some other glitches, so here's a rewritten scanner. It uses os.walk() to descend into several levels of subdirectories (you don't say if that's needed or not), and it collects files of all extensions in one pass. And it constructs variant names as before.
import os, shutil
extList = ['.doc', '.docx', '.xls']
from os.path import join as joinpath
# Make sure the destination directories exist
for ext in extList:
extdir = joinpath(path, ext[1:]+"_folder")
if not os.path.exists(extdir):
os.mkdir(extdir)
for dirname, _dirs, files in os.walk(filepath):
for file in files:
base, ext = os.path.splitext(file)
if ext not in extList:
continue
destpath = joinpath(path, ext[1:]+"_folder")
n = 0
newpathname = joinpath(destpath, file)
# If the new name is in use, find an unused variant
while os.path.exists(newpathname):
n += 1
newfile = "%s_%d%s" % (base, n, ext)
newpathname = joinpath(path, newfile)
sh.copy(joinpath(dirname, file), newpathname) # or other copy method

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Pulling Files and Timestamps from a Directory and Subdirectories - python

http://docs.python.org/2/library/os.html and http://docs.python.org/2/library/stat.html have you covered. os.walk will give you the recursive directory walking stat will give you file timestamps (atime,ctime,mtime)

Related

Find keywords from txt files and copy target files

How would I exclude directories from os.listdir results?

Python: How to use output of "listfiles" to delete/ move/ etc. files

Unzipping, renaming and migrating contents of GDB directory - Python

Organizing data by filetype

Categories

Resources