How would I exclude directories from os.listdir results? - python

I'm making a script that will encode files within a directory using b64/b16 and I'm using os.listdir to do so, but it also lists directories which causes problems since now it's trying to encode directories as if it were a file.
How would I be able to exclude directories from os.listdir results?
import os
import sys
import base64
import codecs
import time
import string
import glob
#C:\\Users\\Fedora\\Desktop\\Win 10
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = []
filecount = 0
fileprogress = 0
for file in dirs:
files.append(file)
filecount = filecount + 1
for x in files:
os.system("cls")
fileprogress = fileprogress + 1
print("File " + str(fileprogress) + "/" + str(filecount))
print("Encrypting " + x + "...")
inputfile = open(path + "\\" + x, "rb")
data = inputfile.read()
inputfile.close()
data = base64.b16encode(data)
data = base64.b64encode(data)
data = base64.b16encode(data)
data = base64.b64encode(data)
data = base64.b16encode(data)
outputfile = open(path + "\\" + x + ".crypt", "wb")
outputfile.write(data)
outputfile.close()

use filter
filepath = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = filter(lambda x:os.path.isfile(os.path.join(filepath, x)), dirs)
or list comprehension with os.path.isfile()
filepath = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = [x for x in dirs if os.path.isfile(os.path.join(filepath, x))]

You can use os.path.isdir function to check if the current file is a directory.
Also, it is much better to use string formatting operations instead of string concatenation: not
print("File " + str(fileprogress) + "/" + str(filecount))
but
print("File {}/{}".format(fileprogress, filecount))
Such code is much easier to understand and modify.

Instead of using os.listdir() your can use os.walk which will return separate list for files and directories
python-oswalk-example
import os
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
for (path, dirs, files) in os.walk(path):
print path
print dirs
print files
pythoncentral os-walk
#Import the os module, for the os.walk function
import os
#Set the directory you want to start from
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
for dirName, subdirList, fileList in os.walk(path):
print('Found directory: %s' % dirName)
for fname in fileList:
print('\t%s' % fname)

Related

Show path tree with files

import os
path = "G:\krunker\mod"
abcde = open("path.txt", "w")
for dirpath, dirnames, filenames in os.walk(path):
directory_level = dirpath.replace(path, "")
directory_level = directory_level.count(os.sep)
indent = " " * 4
print("{}{}/".format(indent*directory_level, os.path.basename(dirpath)), file=abcde)
for f in filenames:
print("{}{}".format(indent*(directory_level+1), f), file=abcde)
abcde.close()
I want it to print the files in every single folder of the path but it does only on the last
The indentation is not correct. The second for loop also has to be inside the first for loop.
Correct code:
import os
path = "/home/user/my_folder/tools"
abcde = open("path.txt", "w")
for dirpath, dirnames, filenames in os.walk(path):
directory_level = dirpath.replace(path, "")
directory_level = directory_level.count(os.sep)
indent = " " * 4
print("{}{}/".format(indent*directory_level, os.path.basename(dirpath)), file=abcde)
for f in filenames:
print("{}{}".format(indent*(directory_level+1), f), file=abcde)
abcde.close()
A part of path.txt content:
tools/
.gitignore
README.md
__init__.py
requirements3.txt
test.py
path.txt
.git/
description
hooks/
commit-msg.sample
info/
exclude
refs/
heads/
master
You can also use a recursive function which gets recursivly all subfolder's content until there's any subfolder:
from os import walk
output = open("path.txt", "w")
def listFiles(path, indent):
for (openedPath, folders, files) in walk(path):
for file in files:
output.write("\t" * (indent) + file + "\n")
for folder in folders:
output.write("\t" * (indent) + folder + "/\n")
listFiles(path + "/" + folder, indent + 1)
break
source = "/my/path/to/my/folder"
print(source + "/")
listFiles(source, 1)
There's an example with a little code project folder.
/my/path/to/my/folder/
input.txt
main.py
output/
error.cpp
trying.cpp
logo.cpp
You can use this kind of code to simplify
import os
folder = r"C:\path\to\find\files"
x = [os.path.join(r,file) for r,d,f in os.walk(folder) for file in f # If you want specific files if file.endswith(".txt")]
y = [os.path.join(r,folder) for r,d,f in os.walk(folder) for folder in d]
print(x) #For files in main directory and subdirectories
print(y) #For files in main directory and subdirectories

Python - copy specific file from subfolder to destination, get filename from text file

I want to get my script to read a list of names from a list(txt), then search for those in a selected folder with subfolders, then copy and paste those files to another selected folder. My script running without error but no result.
My script:
import os
import os.path
import shutil
textFile = ("D:\\Test\\list.txt")
sourceFolder = ("D:\\Test")
destinationFolder = ("D:\\")
filesToFind = []
with open(textFile, "r") as tx:
for row in tx:
filesToFind.append(row.strip())
for root, dirs, filename in os.walk(sourceFolder):
if filename in filesToFind:
f = os.path.join(root, filename)
shutil.copy(f, destinationFolder)
Haven’t test it but I think this will work - change this:
for root, dirs, filename in os.walk(sourceFolder):
if filename in filesToFind:
f = os.path.join(root, filename)
shutil.copy(f, destinationFolder)
To this:
for root, dirs, filenames in os.walk(sourceFolder):
for filename in filenames:
if filename in filesToFind:
f = os.path.join(root, filename)
shutil.copy(f, destinationFolder)
# Same code using glob #
## More efficient and also tested one ##
## One more feature added- checks file name given present or not ##
import os
import os.path
import shutil
import glob
textFile = ("D:\\Test\\list.txt")
sourceFolder = ("D:\Test")
destinationFolder = ("D:\\")
f = open(textFile, "r").readlines()
for i in f:
ListFile= glob.glob(os.path.join(sourceFolder,"**",i.strip()),recursive=True)
if len(ListFile):
print(ListFile[0],destinationFolder,os.path.basename(ListFile[0]))
destinationfile=os.path.join(destinationFolder,os.path.basename(ListFile[0]))
shutil.copyfile(ListFile[0],destinationfile)
else:
print(i,"-File not found")

Organizing data by filetype

I am trying to sort a large number of files based off of their file extension. A lot of the files are .doc, .docx, .xls, etc.
This is what I was thinking in my head, but if there is a simpler way to do things, let me know! I do have multiple files with the same extension, so I don't want it to create a new folder for that extension every time and overwrite the previous file. I also have a much larger list, but for this example I don't believe all of them are needed. The OS is MacOS.
import os, shutil
extList = ['.doc', '.docx', '.xls']
for ext in extList:
os.mkdir(path + '/' + ext +'_folder')
for file in os.listdir(filepath):
if file.endswith(ext): #missing an indent
print(file)
shutil.copyfile(file + '/' + ext +'_folder' + file)
Also, if I run into a file that I do not have on my list, I would like it to go into a folder named 'noextlist'.
Here is what I was able to create quickly
import os, re, shutil
DocFolder = r'...'#Your doc folder path
DocxFolder = r'...'#Your docx folder path
XlsFolder = r'...'#Your xls folder path
MiscFolder = r'...'#Your misc folder path
for root, dirs, files in os.walk(r'...'): #Your folder path you want to sort
for file in files:
if file.endswith(".doc"):
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,DocFolder)
elif file.endswith(".docx"):
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,DocxFolder)
elif file.endswith(".xls"):
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,XlsFolder)
else:
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,MiscFolder)
Edit:The main function here is the for root,dirs,files in os.walk This allows the program to transverse through the provided path to search all files including the ones in the sub folder and sort it out accordingly.
import errno
import shutil
from os import listdir, mkdir
from os.path import splitext, join
# set for fast lookup
extList = set(['.doc', '.docx', '.xls'])
# source path
filepath = ...
# dest path
path = ...
for f in listdir(filepath):
# extract extension from file name
ext = splitext(f)[1]
if ext in extList:
dir_ = join(path, "{}_folder".format(ext))
try:
mkdir(dir_)
except OSError as e:
if ex.errno != errno.EEXIST:
raise # raise if any other error than "already exists"
dest = join(dir_, f)
else:
dest = join(path, "noextlist_folder", f)
shutil.copy2(join(filepath, f), dest)
If I understand correctly, you like your solution but you need a way to rename files with duplicate names so that the extras don't disappear. You can check if the destination file already exists and construct a variant name by adding _1, _2, etc. to the filename until you find something unused.
newpathname = path + '/' + ext +'_folder' + "/" + file
n = 0
while os.path.exists(newpathname):
n += 1
base, ext = os.path.splitext(newpathname)
newpathname = "%s_%d%s" % (base, n, ext)
shutil.copyfile(filepath+"/"+file, newpathname)
But your code has some other glitches, so here's a rewritten scanner. It uses os.walk() to descend into several levels of subdirectories (you don't say if that's needed or not), and it collects files of all extensions in one pass. And it constructs variant names as before.
import os, shutil
extList = ['.doc', '.docx', '.xls']
from os.path import join as joinpath
# Make sure the destination directories exist
for ext in extList:
extdir = joinpath(path, ext[1:]+"_folder")
if not os.path.exists(extdir):
os.mkdir(extdir)
for dirname, _dirs, files in os.walk(filepath):
for file in files:
base, ext = os.path.splitext(file)
if ext not in extList:
continue
destpath = joinpath(path, ext[1:]+"_folder")
n = 0
newpathname = joinpath(destpath, file)
# If the new name is in use, find an unused variant
while os.path.exists(newpathname):
n += 1
newfile = "%s_%d%s" % (base, n, ext)
newpathname = joinpath(path, newfile)
sh.copy(joinpath(dirname, file), newpathname) # or other copy method

Flatten complex directory structure in Python

I want to move files from a complex directory structure to just one place. For example i have this deep hierarchy:
foo/
foo2/
1.jpg
2.jpg
...
I want it to be:
1.jpg
2.jpg
...
My current solution:
def move(destination):
for_removal = os.path.join(destination, '\\')
is_in_parent = lambda x: x.find(for_removal) > -1
with directory(destination):
files_to_move = filter(is_in_parent,
glob_recursive(path='.'))
for file in files_to_move:
shutil.move(file, destination)
Definitions: directory and glob_recursive. Note, that my code only moves files to their common parent directory, not an arbitrary destination.
How can i move all files from a complex hierarchy to a single place succinctly and elegantly?
I don't like testing the name of the file about to be moved to see if we're already in the destination directory. Instead, this solution only scans the subdirectories of the destination
import os
import itertools
import shutil
def move(destination):
all_files = []
for root, _dirs, files in itertools.islice(os.walk(destination), 1, None):
for filename in files:
all_files.append(os.path.join(root, filename))
for filename in all_files:
shutil.move(filename, destination)
Explanation: os.walk walks recursively the destination in a "top down" manner. whole filenames are constructed with the os.path.join(root, filename) call. Now, to prevent scanning files at the top of the destination, we just need to ignore the first element of the iteration of os.walk. To do that I use islice(iterator, 1, None). One other more explicit way would be to do this:
def move(destination):
all_files = []
first_loop_pass = True
for root, _dirs, files in os.walk(destination):
if first_loop_pass:
first_loop_pass = False
continue
for filename in files:
all_files.append(os.path.join(root, filename))
for filename in all_files:
shutil.move(filename, destination)
this would do, it also renames files if they collide (I commented out the actual move and replaced with a copy):
import os
import sys
import string
import shutil
#Generate the file paths to traverse, or a single path if a file name was given
def getfiles(path):
if os.path.isdir(path):
for root, dirs, files in os.walk(path):
for name in files:
yield os.path.join(root, name)
else:
yield path
destination = "./newdir/"
fromdir = "./test/"
for f in getfiles(fromdir):
filename = string.split(f, '/')[-1]
if os.path.isfile(destination+filename):
filename = f.replace(fromdir,"",1).replace("/","_")
#os.rename(f, destination+filename)
shutil.copy(f, destination+filename)
Run recursively through directory, move the files and launch move for directories:
import shutil
import os
def move(destination, depth=None):
if not depth:
depth = []
for file_or_dir in os.listdir(os.path.join([destination] + depth, os.sep)):
if os.path.isfile(file_or_dir):
shutil.move(file_or_dir, destination)
else:
move(destination, os.path.join(depth + [file_or_dir], os.sep))
import os.path, shutil
def move(src, dest):
not_in_dest = lambda x: os.path.samefile(x, dest)
files_to_move = filter(not_in_dest,
glob_recursive(path=src))
for f in files_to_move:
shutil.move(f, dest)
Source for glob_recursive. Does not change name of file, if they collide.
samefile is a safe way to compare paths. But it doesn't work on Windows, so check How to emulate os.path.samefile behaviour on Windows and Python 2.7?.
def splitPath(p):
a,b = os.path.split(p)
return (splitPath(a) if len(a) and len(b) else []) + [b]
def safeprint(s):
try:
print(s)
except UnicodeEncodeError:
if sys.version_info >= (3,):
print(s.encode('utf8').decode(sys.stdout.encoding))
else:
print(s.encode('utf8'))
def flatten(root, doit):
SEP = "¦"
REPL = "?"
folderCount = 0
fileCount = 0
if not doit:
print("Simulating:")
for path, dirs, files in os.walk(root, topdown=False):
if path != root:
for f in files:
sp = splitPath(path)
np = ""
for element in sp[1:]:
e2 = element.replace(SEP, REPL)
np += e2 + SEP
f2 = f.replace(SEP, REPL)
newName = np + f2
safeprint("Moved: "+ newName )
if doit:
shutil.move(os.path.join(path, f), os.path.join(root, f))
# Uncomment, if you want filenames to be based on folder hierarchy.
#shutil.move(os.path.join(path, f), os.path.join(root, newName))
fileCount += 1
safeprint("Removed: "+ path)
if doit:
os.rmdir(path)
folderCount += 1
if doit:
print("Done.")
else:
print("Simulation complete.")
print("Moved files:", fileCount)
print("Removed folders:", folderCount)
directory_path = r"C:\Users\jd\Documents\myFtpData"
flatten(directory_path, True)
Adding on to the answers, I believe my answer will satisfy all your needs, the other answers fail when there is a subdirectory and file with the same filename as the upper directory.
This was SOLVED here, Also look at my Github Repo for Structured File Copy and Flattened File Copy:
import os, fnmatch, shutil
PATTERN = '*.txt' # Regex Pattern to Match files
INPUT_FOLDER = "A" # os.getcwd()
INPUT_FOLDER = os.path.abspath(INPUT_FOLDER)
include_input_foldername = False
prepend = "_included" if include_input_foldername else ""
OUTPUT_FOLDER = f"Structured_Copy_{os.path.basename(INPUT_FOLDER)}{prepend}"
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
def find(pattern, path):
"""Utility to find files wrt a regex search"""
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
all_files = find(PATTERN, INPUT_FOLDER)
for each_path in all_files:
relative_path = os.path.relpath(each_path, os.path.dirname(INPUT_FOLDER)) if include_input_foldername else os.path.relpath(each_path, INPUT_FOLDER)
flattened_relative_fullpath = os.path.join(OUTPUT_FOLDER, relative_path)
os.makedirs(os.path.dirname(flattened_relative_fullpath), exist_ok=True)
shutil.copy(each_path, flattened_relative_fullpath)
print(f"Copied {each_path} to {flattened_relative_fullpath}")
print(f"Finished Copying {len(all_files)} Files from : {INPUT_FOLDER} to : {OUTPUT_FOLDER}")

Pulling Files and Timestamps from a Directory and Subdirectories

I have a working script that will print all files in a given directory. I would like help making it do two additional things:
(1) Also be able to print the date_created or time stamp for each file.
(2) Do all of the above not only for files in the given directory, but in all subdirectories as well.
Here is the working script:
from os import listdir
from os.path import isfile, join
from sys import argv
script, filename = argv
mypath = os.getcwd()
allfiles = [ f for f in listdir(mypath) if isfile(join(mypath,f)) ]
output = open(filename, 'w')
for i in allfiles:
string = "%s" %i
output.write(string + "\n")
output.close()
print "Directory printed."
I would hope to be able to print something like (filename + ", " + timestamp + "\n"), or some substitute.
Thanks!
http://docs.python.org/2/library/os.html and http://docs.python.org/2/library/stat.html have you covered.
os.walk will give you the recursive directory walking
stat will give you file timestamps (atime,ctime,mtime)
This snippet walks through files in a directory + subdirectories and prints out created and modified timestamps.
import os
import time
def walk_files(directory_path):
# Walk through files in directory_path, including subdirectories
for root, _, filenames in os.walk(directory_path):
for filename in filenames:
file_path = root + '/' + filename
created = os.path.getctime(file_path)
modified = os.path.getmtime(file_path)
# Process stuff for the file here, for example...
print "File: %s" % file_path
print " Created: %s" % time.ctime(created)
print " Last modified: %s" % time.ctime(modified)
walk_files('/path/to/directory/')

Categories

Resources