recursive file copying into subdirectory - python

I need to copy all the files and folders to the current folder to a subdirectory. What would be the best way to do so? I tried the following snippet but it fails as it fails if the destination directory already exists.
def copy(d=os.path.curdir):
dest = "t"
for i in os.listdir(d):
if os.path.isdir(i):
shutil.copytree(i, dest)
else:
shutil.copy(i, dest)
I have the feeling that the same task can be done in a better and easier manner. How do i do it?

I would never do it on python, but the following solution came to mind. It doesn't look simple, but it should work and can be simplified (haven't checked, sorry, no access to the computer now):
def copyDirectoryTree(directory, destination, preserveSymlinks=True):
for entry in os.listdir(directory):
entryPath = os.path.join(directory, entry)
if os.path.isdir(entryPath):
entrydest = os.path.join(destination, entry)
if os.path.exists(entrydest):
if not os.path.isdir(entrydest):
raise IOError("Failed to copy thee, the destination for the `" + entryPath + "' directory exists and is not a directory")
copyDirectoryTree(entrypath, entrydest, preserveSymlinks)
else:
shutil.copytree(entrypath, entrydest, preserveSymlinks)
else: #symlinks and files
if preserveSymlinks:
shutil.copy(entryPath, directory)
else:
shutil.copy(os.path.realpath(entryPath), directory)

See the code in http://docs.python.org/library/shutil.html, then tweak it a little (e.g. try: around os.makedirs(dst)).

To extend mamnun's answer,
If you want to use the direct call to the os, I'd advise using cp -r since you seem to want a recursive copy for directories.

Do you really need to use python? Because shutil functions cannot copy all file metadata and group permissions. Why don't you try built-in OS commands like cp in linux and xcopy in windows?
You can even try to run these commands from python
import os
os.system("cp file1 file2")
Hope this helps.

Here is my version of a recursive copy method for python, seems to work :)
def copy_all(fr, to, overwrite=True):
fr = os.path.normpath(fr)
to = os.path.normpath(to)
if os.path.isdir(fr):
if (not os.path.exists(to + os.path.basename(fr)) and not
os.path.basename(fr) == os.path.basename(to)):
to += "/" + os.path.basename(fr)
mkdirs(to)
for file in os.listdir(fr):
copy_all(fr + "/" + file, to + "/")
else: #symlink or file
dest = to
if os.path.isdir(to):
dest += "/"
dest += os.path.basename(fr)
if overwrite and (os.path.exists(dest) or os.path.islink(dest)
rm(dest)
if os.path.isfile(fr):
shutil.copy2(fr, dest)
else: #has to be a symlink
os.symlink(os.readlink(fr), dest)
def mkdirs(path):
if not os.path.isdir(path):
os.makedirs(path)
def rm(path):
if os.path.isfile(path) or os.path.islink(path):
os.remove(path)
elif os.path.isdir(path):
for file in os.listdir(path):
fullpath = path+"/"+file
os.rmdir(fullpath)

Related

Need help checking for multiple underscores in a collection of files - see code below

I am working on a project where I need to sort .jpg files and folders that contain .jpg files. I have other scripts that are functional which I intend to incorporate into this python script later. First though, I've implemented in the first script below to count the number of underscores in a file and take action based on the result and this works successfully. I need help on creating logic that will go through .jpg image files and if the files have more than one underscore the program will move the files into an error folder. Also any feedback on how to optimize this script would be greatly appreciated!
from pathlib import Path
import shutil, os, time, glob
timestr = time.strftime("%Y%m%d-%H%M%S")
folder = 'D:\\test\\testing'
working_folder = 'DitaTest1'
full_path = Path(os.path.join(folder, working_folder))
test_path = folder + '\\' + working_folder
for file_path in full_path.iterdir():
file_name = file_path.name
result = file_name.count('_')
if file_path.is_file():
os.chdir(test_path)
for file in glob.glob("*.jpg"):
dst=test_path+"\\"+file.replace(" ","_").replace(".jpg","") # .replace("Angle","").replace("Front","").replace("Side","")
os.mkdir(dst)
# print(dst)
shutil.move(file,dst)
elif result != 1:
if not file_path.is_file():
shutil.move(os.path.join(folder, working_folder, file_name), os.path.join(folder, working_folder + ' - dir-ERRORS_' + timestr, file_name))
else:
print('Ignored operation')
You need to explain more so that we can understand it better but from what I have read,
Your if logic seems to be wrong, if you want to check the number of underscores you shouldn't put that logic in elif. You should try sth like this instead.
for file_path in full_path.iterdir():
file_name = file_path.name
result = file_name.count('_')
if os.path.isdir(file_path):
pass
else:
if result == 1:
os.chdir(test_path)
for file in glob.glob("*.jpg"):
dst=test_path+"\\"+file.replace(" ","_").replace(".jpg","") # .replace("Angle","").replace("Front","").replace("Side","")
os.mkdir(dst)
# print(dst)
shutil.move(file,dst)
else:
shutil.move(os.path.join(folder, working_folder, file_name), os.path.join(folder, working_folder + ' - dir-ERRORS_' + timestr, file_name))
What this code does is, iterate over the folder and if it finds a folder it will just pass and when it finds a file it will check if result == 1. If it is it will move it to your desired folder, otherwise it will move it to the error folder. If I made a mistake let me know.

Recursive download with pysftp

I'm trying to fetch from SFTP with the following structure:
main_dir/
dir1/
file1
dir2/
file2
I tried to achieve this with commands below:
sftp.get_r(main_path + dirpath, local_path)
or
sftp.get_d(main_path + dirpath, local_path)
The local path is like d:/grabbed_files/target_dir, and the remote is like /data/some_dir/target_dir.
With get_r I am getting FileNotFound exception. With get_d I am getting empty dir (when target dir have files not dirs, it works fine).
I'm totally sure that directory exists at this path. What am I doing wrong?
This one works for me, but when you download directory it create full path locally.
pysftp.Connection.get_r()
I also created simple download and upload methods:
def download_r(sftp, outbox):
tmp_dir = helpers.create_tmpdir()
assert sftp.isdir(str(outbox))
assert pathlib.Path(tmp_dir).is_dir()
sftp.get_r(str(outbox), str(tmp_dir))
tmp_dir = tmp_dir / outbox
return tmp_dir
def upload_r(sftp, inbox, files):
assert sftp.isdir(str(inbox))
if pathlib.Path(files).is_dir():
logger.debug(list(files.iterdir()))
sftp.put_r(str(files), str(inbox))
else:
logger.debug('No files here.')
I didn't understand why it doesn't work so I ended with my own recursive solution:
def grab_dir_rec(sftp, dirpath):
local_path = target_path + dirpath
full_path = main_path + dirpath
if not sftp.exists(full_path):
return
if not os.path.exists(local_path):
os.makedirs(local_path)
dirlist = sftp.listdir(remotepath=full_path)
for i in dirlist:
if sftp.isdir(full_path + '/' + i):
grab_dir_rec(sftp, dirpath + '/' + i)
else:
grab_file(sftp, dirpath + '/' + i)
In the event that you want a context manager wrapper around pysftp that does this for you, here is a solution that is even less code (after you copy/paste the github gist) that ends up looking like the following when used
path = "sftp://user:password#test.com/path/to/file.txt"
# Read a file
with open_sftp(path) as f:
s = f.read()
print s
# Write to a file
with open_sftp(path, mode='w') as f:
f.write("Some content.")
The (fuller) example: http://www.prschmid.com/2016/09/simple-opensftp-context-manager-for.html
This context manager happens to have auto-retry logic baked in in the event you can't connect the first time around (which surprisingly happens more often than you'd expect in a production environment...).
Oh, and yes, this assumes you are only getting one file per connection as it will auto-close the ftp connection.
The context manager gist for open_sftp: https://gist.github.com/prschmid/80a19c22012e42d4d6e791c1e4eb8515

Delete a directory that is not empty on python

So, I need to clean a directory that is not empty.
I have created the following function.For testing reasons I tried to remove a JDK installation
def clean_dir(location):
fileList = os.listdir(location)
for fileName in fileList:
fullpath=os.path.join(location, fileName)
if os.path.isfile(fullpath):
os.chmod(fullpath, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
os.remove(location + "/" + fileName)
elif os.path.isdir(fullpath):
if len(os.listdir(fullpath)) > 0:
clean_dir(fullpath)
#os.rmdir(location + "/" + fileName)
shutil.rmtree(location + "/" + fileName)
return
I tried to use rmtree and rmdir, but it fails.
The error I got using rmtree is:
OSError: Cannot call rmtree on a symbolic link
And this is the error I got when I used rmdir:
OSError: [Errno 66] Directory not empty:
'/tmp/jdk1.8.0_25/jre/lib/amd64/server'
The code works correctly on windows. But for some reason it fails on linux.
You're encountering one of the differences between the way Windows and Linux (UNIX really) handle filesystems. I believe adding an additional case to your code will at least help:
...
for fileName in fileList:
fullpath = os.path.join(location, fileName)
## |<-- Handle symlink -->|
if os.path.islink(fullpath) or os.path.isfile(fullpath):
os.chmod(fullpath, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
os.remove(os.path.join(location, fileName))
elif os.path.isdir(fullpath):
if len(os.listdir(fullpath)) > 0:
clean_dir(fullpath)
#os.rmdir(os.path.join(location, fileName))
shutil.rmtree(os.path.join(location, fileName))
...
This should properly handle the case where the entry is a symlink and remove it just like a file. I'm not sure the chmod is necessary - it probably works on the target of the link, but it shouldn't hurt to handle it the same way as a file.
However, I just checked and os.path.file against a symbolic link returns the type of the "thing" that is pointed to, so the additional check is needed to differentiate between the link itself and the thing pointed to. Also to be portable, instead of appending "/" use os.path.join as newly edited above.
kronenpj thanks, that was the idea. But when you have a symlink it tries to delete is as a normal file and fails. I had to add a new elif and add the unlink option for the symlink
def clean_dir(location):
fileList = os.listdir(location)
for fileName in fileList:
fullpath=os.path.join(location, fileName)
if os.path.isfile(fullpath):
os.chmod(fullpath, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
os.remove(os.path.join(location, fileName))
elif os.path.islink(fullpath):
os.unlink(fullpath)
elif os.path.isdir(fullpath):
if len(os.listdir(fullpath)) > 0:
clean_dir(fullpath)
#os.rmdir(location + "/" + fileName)
shutil.rmtree(os.path.join(location, fileName))
return

Copy a file with a too long path to another directory in Python

I am trying to copy files on Windows with Python 2.7, but sometimes this fails.
shutil.copyfile(copy_file, dest_file)
I get the following IOError:
[Errno 2] No such file or directory
But the file does exist! The problem is that the path of the file is too long. (> 255 characters)
How do I copy these files? It isn't a problem to open them in other applications.
To create a file with a too long path, create a file with an as long as possible file name and move the containing folder deeper down a tree structure.
I've been trying some of these methods without success:
http://timgolden.me.uk/python/win32_how_do_i/copy-a-file.html
I wasn't sure about the 255 char limit so I stumbled on this post. There I found a working answer: adding \\?\ before the path.
shutil.copyfile("\\\\?\\" + copy_file, dest_file)
edit:
I've found that working with long paths causes issues on Windows. Another trick I use is to just shorten the paths:
import win32api
path = win32api.GetShortPathName(path)
Thanks for the answer Gfy. I have a requirement to use relative paths. The \\?\ can't be added successfully to a relative path, so it's necessary to convert to an absolute path first (run from desktop):
import os
def clean_path(path):
path = path.replace('/',os.sep).replace('\\',os.sep)
if os.sep == '\\' and '\\\\?\\' not in path:
# fix for Windows 260 char limit
relative_levels = len([directory for directory in path.split(os.sep) if directory == '..'])
cwd = [directory for directory in os.getcwd().split(os.sep)] if ':' not in path else []
path = '\\\\?\\' + os.sep.join(cwd[:len(cwd)-relative_levels]\
+ [directory for directory in path.split(os.sep) if directory!=''][relative_levels:])
return path
clean_path('samples')
\\?\C:\Users\Username\Desktop\samples
clean_path('\samples')
\\?\C:\Users\Username\Desktop\samples
clean_path('..\samples')
\\?\C:\Users\Username\samples
clean_path('..\..\samples')
\\?\C:\Users\samples
clean_path('C:\Users\Username\Dropbox')
\\?\C:\Users\Username\Dropbox
Maybe do something like this:
path = "some/really/really/long/path/more/than/255/chars.txt"
def copyFile(path, dest, relative=0):
if len(path) > 255:
if not os.sep in path:
raise SomeException()
moveTo, path = path.split(os.sep, 1)
os.chdir(moveTo)
copyFile(path, dest, relative + 1)
else:
path_base = ['..'] * relative
path_rel = path_base + [dest]
shutil.copyfile(path, os.path.join(*path_rel))
This is tested, and does work... however, if the destination is more than 255 chars, you would be back in the same boat. In that case, you may have to move the file several times.

Find a file in python

I have a file that may be in a different place on each user's machine. Is there a way to implement a search for the file? A way that I can pass the file's name and the directory tree to search in?
os.walk is the answer, this will find the first match:
import os
def find(name, path):
for root, dirs, files in os.walk(path):
if name in files:
return os.path.join(root, name)
And this will find all matches:
def find_all(name, path):
result = []
for root, dirs, files in os.walk(path):
if name in files:
result.append(os.path.join(root, name))
return result
And this will match a pattern:
import os, fnmatch
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
find('*.txt', '/path/to/dir')
In Python 3.4 or newer you can use pathlib to do recursive globbing:
>>> import pathlib
>>> sorted(pathlib.Path('.').glob('**/*.py'))
[PosixPath('build/lib/pathlib.py'),
PosixPath('docs/conf.py'),
PosixPath('pathlib.py'),
PosixPath('setup.py'),
PosixPath('test_pathlib.py')]
Reference: https://docs.python.org/3/library/pathlib.html#pathlib.Path.glob
In Python 3.5 or newer you can also do recursive globbing like this:
>>> import glob
>>> glob.glob('**/*.txt', recursive=True)
['2.txt', 'sub/3.txt']
Reference: https://docs.python.org/3/library/glob.html#glob.glob
I used a version of os.walk and on a larger directory got times around 3.5 sec. I tried two random solutions with no great improvement, then just did:
paths = [line[2:] for line in subprocess.check_output("find . -iname '*.txt'", shell=True).splitlines()]
While it's POSIX-only, I got 0.25 sec.
From this, I believe it's entirely possible to optimise whole searching a lot in a platform-independent way, but this is where I stopped the research.
If you are using Python on Ubuntu and you only want it to work on Ubuntu a substantially faster way is the use the terminal's locate program like this.
import subprocess
def find_files(file_name):
command = ['locate', file_name]
output = subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0]
output = output.decode()
search_results = output.split('\n')
return search_results
search_results is a list of the absolute file paths. This is 10,000's of times faster than the methods above and for one search I've done it was ~72,000 times faster.
If you are working with Python 2 you have a problem with infinite recursion on windows caused by self-referring symlinks.
This script will avoid following those. Note that this is windows-specific!
import os
from scandir import scandir
import ctypes
def is_sym_link(path):
# http://stackoverflow.com/a/35915819
FILE_ATTRIBUTE_REPARSE_POINT = 0x0400
return os.path.isdir(path) and (ctypes.windll.kernel32.GetFileAttributesW(unicode(path)) & FILE_ATTRIBUTE_REPARSE_POINT)
def find(base, filenames):
hits = []
def find_in_dir_subdir(direc):
content = scandir(direc)
for entry in content:
if entry.name in filenames:
hits.append(os.path.join(direc, entry.name))
elif entry.is_dir() and not is_sym_link(os.path.join(direc, entry.name)):
try:
find_in_dir_subdir(os.path.join(direc, entry.name))
except UnicodeDecodeError:
print "Could not resolve " + os.path.join(direc, entry.name)
continue
if not os.path.exists(base):
return
else:
find_in_dir_subdir(base)
return hits
It returns a list with all paths that point to files in the filenames list.
Usage:
find("C:\\", ["file1.abc", "file2.abc", "file3.abc", "file4.abc", "file5.abc"])
Below we use a boolean "first" argument to switch between first match and all matches (a default which is equivalent to "find . -name file"):
import os
def find(root, file, first=False):
for d, subD, f in os.walk(root):
if file in f:
print("{0} : {1}".format(file, d))
if first == True:
break
The answer is very similar to existing ones, but slightly optimized.
So you can find any files or folders by pattern:
def iter_all(pattern, path):
return (
os.path.join(root, entry)
for root, dirs, files in os.walk(path)
for entry in dirs + files
if pattern.match(entry)
)
either by substring:
def iter_all(substring, path):
return (
os.path.join(root, entry)
for root, dirs, files in os.walk(path)
for entry in dirs + files
if substring in entry
)
or using a predicate:
def iter_all(predicate, path):
return (
os.path.join(root, entry)
for root, dirs, files in os.walk(path)
for entry in dirs + files
if predicate(entry)
)
to search only files or only folders - replace “dirs + files”, for example, with only “dirs” or only “files”, depending on what you need.
Regards.
SARose's answer worked for me until I updated from Ubuntu 20.04 LTS. The slight change I made to his code makes it work on the latest Ubuntu release.
import subprocess
def find_files(file_name):
command = ['locate'+ ' ' + file_name]
output = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True).communicate()[0]
output = output.decode()
search_results = output.split('\n')
return search_results
#F.M.F's answers has a few problems in this version, so I made a few adjustments to make it work.
import os
from os import scandir
import ctypes
def is_sym_link(path):
# http://stackoverflow.com/a/35915819
FILE_ATTRIBUTE_REPARSE_POINT = 0x0400
return os.path.isdir(path) and (ctypes.windll.kernel32.GetFileAttributesW(str(path)) & FILE_ATTRIBUTE_REPARSE_POINT)
def find(base, filenames):
hits = []
def find_in_dir_subdir(direc):
content = scandir(direc)
for entry in content:
if entry.name in filenames:
hits.append(os.path.join(direc, entry.name))
elif entry.is_dir() and not is_sym_link(os.path.join(direc, entry.name)):
try:
find_in_dir_subdir(os.path.join(direc, entry.name))
except UnicodeDecodeError:
print("Could not resolve " + os.path.join(direc, entry.name))
continue
except PermissionError:
print("Skipped " + os.path.join(direc, entry.name) + ". I lacked permission to navigate")
continue
if not os.path.exists(base):
return
else:
find_in_dir_subdir(base)
return hits
unicode() was changed to str() in Python 3, so I made that adjustment (line 8)
I also added (in line 25) and exception to PermissionError. This way, the program won't stop if it finds a folder it can't access.
Finally, I would like to give a little warning. When running the program, even if you are looking for a single file/directory, make sure you pass it as a list. Otherwise, you will get a lot of answers that not necessarily match your search.
example of use:
find("C:\", ["Python", "Homework"])
or
find("C:\\", ["Homework"])
but, for example: find("C:\\", "Homework") will give un-wanted answers.
I would be lying if I said I know why this happens. Again, this is not my code and I just made the adjustments I needed to make it work. All credit should go to #F.M.F.

Categories

Resources