Create missing directories in ftplib storbinary - python

I was using pycurl to transfer files over ftp in python. I could create the missing directories automatically on my remote server using:
c.setopt(pycurl.FTP_CREATE_MISSING_DIRS, 1)
for some reasons, I have to switch to ftplib. But I don't know how to to the same here. Is there any option to add to storbinary function to do that? or I have to create the directories manually?

FTP_CREATE_MISSING_DIRS is a curl operation (added here). I'd hazard a guess that you have to do it manually with ftplib, but I'd love to be proven wrong, anyone?
I'd do something like the following: (untested, and need to catch ftplib.all_errors)
ftp = ... # Create connection
# Change directories - create if it doesn't exist
def chdir(dir):
if directory_exists(dir) is False: # (or negate, whatever you prefer for readability)
ftp.mkd(dir)
ftp.cwd(dir)
# Check if directory exists (in current location)
def directory_exists(dir):
filelist = []
ftp.retrlines('LIST',filelist.append)
for f in filelist:
if f.split()[-1] == dir and f.upper().startswith('D'):
return True
return False
Or you could do directory_exists like this: (a bit harder to read?)
# Check if directory exists (in current location)
def directory_exists(dir):
filelist = []
ftp.retrlines('LIST',filelist.append)
return any(f.split()[-1] == dir and f.upper().startswith('D') for f in filelist)

I know it's kind of an old post but I just needed this and came up with a very simple function. I'm new to Python so I'd appreciate any feedback.
from ftplib import FTP
ftp = FTP('domain.com', 'username', 'password')
def cdTree(currentDir):
if currentDir != "":
try:
ftp.cwd(currentDir)
except IOError:
cdTree("/".join(currentDir.split("/")[:-1]))
ftp.mkd(currentDir)
ftp.cwd(currentDir)
Usage example:
cdTree("/this/is/an/example")

I tried adding this as a comment to the #Alex L 's answer, but it was too long. You need to descend recursively when changing directory if you want to create directories on the way. E.g.
def chdir(ftp, directory):
ch_dir_rec(ftp,directory.split('/'))
# Check if directory exists (in current location)
def directory_exists(ftp, directory):
filelist = []
ftp.retrlines('LIST',filelist.append)
for f in filelist:
if f.split()[-1] == directory and f.upper().startswith('D'):
return True
return False
def ch_dir_rec(ftp, descending_path_split):
if len(descending_path_split) == 0:
return
next_level_directory = descending_path_split.pop(0)
if not directory_exists(ftp,next_level_directory):
ftp.mkd(next_level_directory)
ftp.cwd(next_level_directory)
ch_dir_rec(ftp,descending_path_split)

I am using the following lines to resolve missing directory paths for FTP file copy
import os
ftps = FTP_TLS('ftps_server')
ftps.connect()
ftps.login()
destination_dir_path = 'some/dir/path' # directory path on FTP
dir_path = ''
for i in destination_dir_path.split('/'):
dir_path = os.path.join(dir_path,i)
if i not in ftps.nlst(os.path.dirname(dir_path)):
ftps.mkd(dir_path) # create directory on the FTP
ftps.storbinary(...) # store file using the binary mode

An alternative is to simply loop through each of the path elements, create the next and change into the newly-created directory. My use case was fairly straightforward though as I was copying items from one FTP server to another.
def create_ftp_path(session: ftplib.FTP, required_dir: str):
required_dir = required_dir.split('/')[:-1]
for path_item in required_dir:
if path_item.strip() == '':
continue
path_item = path_item.replace('/', '')
try:
session.cwd(path_item)
except:
session.mkd(path_item)
session.cwd(path_item)
Considerations:
This function assumes you have already changed directory for your FTP session to some base path and the required_dir is a path from that base path.
required_dir includes file name as the last element.
I'm removing any / characters because in my case they were causing 553 permission denied exception.
The exception handling is lacking, but in my case upload validation is happening further in the code so even if it fails it will be caught further down.

A more robust and reliable solution:
using ftplib
Hostname = yourhostname.com
Username = yourusername
Password = yourpassword
def mkdirs(path):
ftp = FTP(Hostname,Username,Password)
items = path.split('/')
cwd = "/"
for i in range(len(items)):
list = ftp.nlst()
if(not '.' in items[i] and not items[i] in list):
ftp.mkd(cwd + items[i] + "/")
cwd += items[i] + '/'
ftp.cwd(cwd)
ftp.quit()
mkdirs('path/to/directory/file.name')
This will create directories on your server if they do not exist.
Limitations: This will not work on folders with names that contain ..

This code will create all missing folders in path:
...
def chdir(ftp_path, ftp_conn):
dirs = [d for d in ftp_path.split('/') if d != '']
for p in dirs:
print(p)
check_dir(p, ftp_conn)
def check_dir(dir, ftp_conn):
filelist = []
ftp_conn.retrlines('LIST', filelist.append)
found = False
for f in filelist:
if f.split()[-1] == dir and f.lower().startswith('d'):
found = True
if not found:
ftp_conn.mkd(dir)
ftp_conn.cwd(dir)
if __name__ == '__main__':
ftp_conn = ... # ftp connection
t = 'FTP/for_Vadim/1/2/3/'
chdir(t, ftp_conn)
This code will check all dirs in path and create missing dirs
before "FTP/for_Vadim/"
after "FTP/for_Vadim/1/2/3/"

I'm using something like this (without cwd):
# -*- coding:utf-8 -*-
from ftplib import FTP, error_perm
def createDirs(ftp, dirpath):
"""
Create dir with subdirs.
:param ftp: connected FTP
:param dirpath: path (like 'test/test1/test2')
:type ftp: FTP
:type dirpath: str
:rtype: None
"""
dirpath = dirpath.replace('\\', '/')
tmp = dirpath.split('/')
dirs = []
for _ in tmp:
if len(dirs) == 0:
dirs.append(_)
continue
dirs.append(dirs[-1] + '/' + _)
for _ in dirs:
try:
ftp.mkd(_)
except error_perm as e:
e_str = str(e)
if '550' in e_str and 'File exists' in e_str:
continue
if __name__ == '__main__':
# init ftp
createDirs(ftp=ftp, dirpath='test/1/2/3')

Related

Extending Python's os.walk function on FTP server

How can I make os.walk traverse the directory tree of an FTP database (located on a remote server)? The way the code is structured now is (comments provided):
import fnmatch, os, ftplib
def find(pattern, startdir=os.curdir): #find function taking variables for both desired file and the starting directory
for (thisDir, subsHere, filesHere) in os.walk(startdir): #each of the variables change as the directory tree is walked
for name in subsHere + filesHere: #going through all of the files and subdirectories
if fnmatch.fnmatch(name, pattern): #if the name of one of the files or subs is the same as the inputted name
fullpath = os.path.join(thisDir, name) #fullpath equals the concatenation of the directory and the name
yield fullpath #return fullpath but anew each time
def findlist(pattern, startdir = os.curdir, dosort=False):
matches = list(find(pattern, startdir)) #find with arguments pattern and startdir put into a list data structure
if dosort: matches.sort() #isn't dosort automatically False? Is this statement any different from the same thing but with a line in between
return matches
#def ftp(
#specifying where to search.
if __name__ == '__main__':
import sys
namepattern, startdir = sys.argv[1], sys.argv[2]
for name in find(namepattern, startdir): print (name)
I am thinking that I need to define a new function (i.e., def ftp()) to add this functionality to the code above. However, I am afraid that the os.walk function will, by default, only walk the directory trees of the computer that the code is run from.
Is there a way that I can extend the functionality of os.walk to be able to traverse a remote directory tree (via FTP)?
All you need is utilizing the python's ftplib module. Since os.walk() is based on a Breadth-first search algorithm you need to find the directories and file names at each iteration, then continue the traversing recursively from the first directory. I implemented this algorithm about 2 years ago for using as the heart of FTPwalker, which is an optimum package for traversing extremely large directory trees Through FTP.
from os import path as ospath
class FTPWalk:
"""
This class is contain corresponding functions for traversing the FTP
servers using BFS algorithm.
"""
def __init__(self, connection):
self.connection = connection
def listdir(self, _path):
"""
return files and directory names within a path (directory)
"""
file_list, dirs, nondirs = [], [], []
try:
self.connection.cwd(_path)
except Exception as exp:
print ("the current path is : ", self.connection.pwd(), exp.__str__(),_path)
return [], []
else:
self.connection.retrlines('LIST', lambda x: file_list.append(x.split()))
for info in file_list:
ls_type, name = info[0], info[-1]
if ls_type.startswith('d'):
dirs.append(name)
else:
nondirs.append(name)
return dirs, nondirs
def walk(self, path='/'):
"""
Walk through FTP server's directory tree, based on a BFS algorithm.
"""
dirs, nondirs = self.listdir(path)
yield path, dirs, nondirs
for name in dirs:
path = ospath.join(path, name)
yield from self.walk(path)
# In python2 use:
# for path, dirs, nondirs in self.walk(path):
# yield path, dirs, nondirs
self.connection.cwd('..')
path = ospath.dirname(path)
Now for using this class, you can simply create a connection object using ftplib module and pass the the object to FTPWalk object and just loop over the walk() function:
In [2]: from test import FTPWalk
In [3]: import ftplib
In [4]: connection = ftplib.FTP("ftp.uniprot.org")
In [5]: connection.login()
Out[5]: '230 Login successful.'
In [6]: ftpwalk = FTPWalk(connection)
In [7]: for i in ftpwalk.walk():
print(i)
...:
('/', ['pub'], [])
('/pub', ['databases'], ['robots.txt'])
('/pub/databases', ['uniprot'], [])
('/pub/databases/uniprot', ['current_release', 'previous_releases'], ['LICENSE', 'current_release/README', 'current_release/knowledgebase/complete', 'previous_releases/', 'current_release/relnotes.txt', 'current_release/uniref'])
('/pub/databases/uniprot/current_release', ['decoy', 'knowledgebase', 'rdf', 'uniparc', 'uniref'], ['README', 'RELEASE.metalink', 'changes.html', 'news.html', 'relnotes.txt'])
...
...
...
I needed a function like os.walk on FTP and there where not any so i thought it would be useful to write it , for future references you can find last version here
by the way here is the code that would do that :
def FTP_Walker(FTPpath,localpath):
os.chdir(localpath)
current_loc = os.getcwd()
for item in ftp.nlst(FTPpath):
if not is_file(item):
yield from FTP_Walker(item,current_loc)
elif is_file(item):
yield(item)
current_loc = localpath
else:
print('this is a item that i could not process')
os.chdir(localpath)
return
def is_file(filename):
current = ftp.pwd()
try:
ftp.cwd(filename)
except Exception as e :
ftp.cwd(current)
return True
ftp.cwd(current)
return False
how to use:
first connect to your host :
host_address = "my host address"
user_name = "my username"
password = "my password"
ftp = FTP(host_address)
ftp.login(user=user_name,passwd=password)
now you can call the function like this:
ftpwalk = FTP_Walker("FTP root path","path to local") # I'm not using path to local yet but in future versions I will improve it. so you can just path an '/' to it
and then to print and download files you can do somthing like this :
for item in ftpwalk:
ftp.retrbinary("RETR "+item, open(os.path.join(current_loc,item.split('/')[-1]),"wb").write) #it is downloading the file
print(item) # it will print the file address
( i will write more features for it soon so if you need some specific things or have any idea that can be useful for users i'll be happy to hear that )
I wrote a library pip install walk-sftp. Event though it is named walk-sftp I included a WalkFTP class that lets you filter by start_date of files & end_date of files. You can even pass in a processing_function that returns True or False to see whether your process to clean & store data works. It also has a log parameter (pass filename) that uses pickle & keeps track of any progress so you don't overwrite or have to keep track of dates making backfilling easier.
https://pypi.org/project/walk-sftp/
Im going to assume this is what you want ... although really I have no idea
ssh = paramiko.SSHClient()
ssh.connect(server, username=username, password=password)
ssh_stdin, ssh_stdout, ssh_stderr = ssh.exec_command("locate my_file.txt")
print ssh_stdout
this will require the remote server to have the mlocate package `sudo apt-get install mlocate;sudo updatedb();

Python - Watch a folder for new .zip file and upload via FTP

I am working on creating a script to watch a folder, grab any new .zip files, and then upload them via FTP to a predetermined area. Right now FTP testing is being performed Locally, since the environment isnt yet created.
The strategy I am taking is to first, unzip into a local folder. Then, perform ftplib.storbinary , on the file from the local folder, to the ftpdestination. However, the unzipping process appears to be working but I am getting a "file does not exist" error, all though I can see it in the folder itself.
Also, is there anyway to unzip directly into an FTP location? I havent been able to find a way hence the approach I am taking.
Thanks, local ftp info removed from code. All paths that are relevant in this code will be changed, most likely to dynamic fashion, but for now this is a local environment
extractZip2.py
import zipfile
import ftplib
import os
import logging
import time
from socket import error as socket_error
#Logging Setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('__name__')
FTPaddress = ''
FTPusername = ''
FTPpassword = ''
ftp_destination_location = ''
path_to_watch = "C:/Users/206420055/Desktop/test2/"
before = dict ([(f,None) for f in os.listdir(path_to_watch)])
temp_destination_location = "C:/Users/206420055/Desktop/temp/"
def unzip(fullPath,temporaryPath):
with zipfile.ZipFile(fullPath, "r") as z :
logger.info("Unzipping {0}".format(fullPath))
z.extractall(temporaryPath)
logger.info("Unzipped into local directory {0}".format(temp_destination_location))
def check_or_create_ftp(session, folder):
"""
Checks to see if necessary folder for currenttab is available.
Creates the folder if not found, and enters it.
"""
if folder not in session.nlst():
logger.info('Directory for {0} does not exist, creating directory\n'.format(folder))
session.mkd(folder)
session.cwd(folder)
def check_or_create(temp_destination):
"""
Checks to see if local savepath exists. Will create savepath if not exists.
"""
if not os.path.exists(temp_destination):
logger.info('Directory for %s does not exist, creating directory\n' % temp_destination)
os.makedirs(str(temp_destination))
def transfer(address,username,password,filename,destination):
logger.info("Creating Session")
try:
session = session_init(address,username,password,destination)
except (socket_error,ftplib.error_perm) as e:
logger.error(str(e))
logger.error("Error in Session Init")
else:
try:
logger.info("Sending File {0}".format(filename))
send_file(session,filename)
except (IOError, OSError, ftplib.error_perm) as e:
logger.error(e)
def session_init(address,username,password,path):
session = ftplib.FTP(address,username,password)
check_or_create_ftp(session,path)
logger.info("Session Established")
return session
def send_file(session,filename):
file = open(filename,'rb')
logger.info('Sending File : STOR '+filename)
session.storbinary('STOR '+ filename, file)
file.close()
def delete_local_files(savepath, file):
logger.info("Cleaning Up Folder {0}".format(savepath))
os.remove(file)
while 1:
time.sleep(5)
after = dict ([(f,None) for f in os.listdir(path_to_watch)])
added = [f for f in after if not f in before]
removed = [f for f in before if not f in after]
if added: print "Added: ",", ".join(added)
before = after
check_or_create(temp_destination_location)
if added :
for file in added:
print file
if file.endswith('.zip'):
unzip(path_to_watch+file, temp_destination_location)
temp_files = os.listdir(temp_destination_location)
print("Temp Files {0}".format(temp_files))
for tf in temp_files:
print("TF {0}".format(tf))
transfer(FTPaddress,FTPusername,FTPpassword,tf,ftp_destination_location)
#delete_local_files(temp_destination_location,tf)
else:
pass
edit: adding error image
Seen above, we see the file in the temp folder. But the console obviously shows the error.
just change it to
from glob import glob
zips_in_path = dict ([(f,None) for f in glob("{base_path}/*.zip".format(base_path = path_to_watch)])
os.listdir does not include the path_to_watch part of the path it is just the filenames, however glob does.
so you could also do
after = dict ([(os.path.join(path_to_watch,f),None) for f in os.listdir(path_to_watch)])
using either of these methods you should be able to get the full path to the files in the path

Upload files using SFTP in Python, but create directories if path doesn't exist

I want to upload a file on a remote server with Python. I'd like to check beforehand if the remote path is really existing, and if it isn't, to create it. In pseudocode:
if(remote_path not exist):
create_path(remote_path)
upload_file(local_file, remote_path)
I was thinking about executing a command in Paramiko to create the path (e.g. mkdir -p remote_path). I came up with this:
# I didn't test this code
import paramiko, sys
ssh = paramiko.SSHClient()
ssh.connect(myhost, 22, myusername, mypassword)
ssh.exec_command('mkdir -p ' + remote_path)
ssh.close
transport = paramiko.Transport((myhost, 22))
transport.connect(username = myusername, password = mypassword)
sftp = paramiko.SFTPClient.from_transport(transport)
sftp.put(local_path, remote_path)
sftp.close()
transport.close()
But this solution doesn't sound good to me, because I close the connection and then reopen it again. Is there a better way to do it?
SFTP supports the usual FTP commands (chdir, mkdir, etc...), so use those:
sftp = paramiko.SFTPClient.from_transport(transport)
try:
sftp.chdir(remote_path) # Test if remote_path exists
except IOError:
sftp.mkdir(remote_path) # Create remote_path
sftp.chdir(remote_path)
sftp.put(local_path, '.') # At this point, you are in remote_path in either case
sftp.close()
To fully emulate mkdir -p, you can work through remote_path recursively:
import os.path
def mkdir_p(sftp, remote_directory):
"""Change to this directory, recursively making new folders if needed.
Returns True if any folders were created."""
if remote_directory == '/':
# absolute path so change directory to root
sftp.chdir('/')
return
if remote_directory == '':
# top-level relative directory must exist
return
try:
sftp.chdir(remote_directory) # sub-directory exists
except IOError:
dirname, basename = os.path.split(remote_directory.rstrip('/'))
mkdir_p(sftp, dirname) # make parent directories
sftp.mkdir(basename) # sub-directory missing, so created it
sftp.chdir(basename)
return True
sftp = paramiko.SFTPClient.from_transport(transport)
mkdir_p(sftp, remote_path)
sftp.put(local_path, '.') # At this point, you are in remote_path
sftp.close()
Of course, if remote_path also contains a remote file name, then it needs to be split off, the directory being passed to mkdir_p and the filename used instead of '.' in sftp.put.
Something simpler and slightly more readable too
def mkdir_p(sftp, remote, is_dir=False):
"""
emulates mkdir_p if required.
sftp - is a valid sftp object
remote - remote path to create.
"""
dirs_ = []
if is_dir:
dir_ = remote
else:
dir_, basename = os.path.split(remote)
while len(dir_) > 1:
dirs_.append(dir_)
dir_, _ = os.path.split(dir_)
if len(dir_) == 1 and not dir_.startswith("/"):
dirs_.append(dir_) # For a remote path like y/x.txt
while len(dirs_):
dir_ = dirs_.pop()
try:
sftp.stat(dir_)
except:
print "making ... dir", dir_
sftp.mkdir(dir_)
Had to do this today. Here is how I did it.
def mkdir_p(sftp, remote_directory):
dir_path = str()
for dir_folder in remote_directory.split("/"):
if dir_folder == "":
continue
dir_path += r"/{0}".format(dir_folder)
try:
sftp.listdir(dir_path)
except IOError:
sftp.mkdir(dir_path)
you can use pysftp package:
import pysftp as sftp
#used to pypass key login
cnopts = sftp.CnOpts()
cnopts.hostkeys = None
srv = sftp.Connection(host="10.2.2.2",username="ritesh",password="ritesh",cnopts=cnopts)
srv.makedirs("a3/a2/a1", mode=777) # will happily make all non-existing directories
you can check this link for more details:
https://pysftp.readthedocs.io/en/release_0.2.9/cookbook.html#pysftp-connection-mkdir
My version:
def is_sftp_dir_exists(sftp, path):
try:
sftp.stat(path)
return True
except Exception:
return False
def create_sftp_dir(sftp, path):
try:
sftp.mkdir(path)
except IOError as exc:
if not is_sftp_dir_exists(sftp, path):
raise exc
def create_sftp_dir_recursive(sftp, path):
parts = deque(Path(path).parts)
to_create = Path()
while parts:
to_create /= parts.popleft()
create_sftp_dir(sftp, str(to_create))
We try mkdir without trying listdir/stat first due to EAFP principle (it's also more performant to make one network request than several).
Paramiko contains a mkdir function:
http://paramiko-docs.readthedocs.org/en/latest/api/sftp.html#paramiko.sftp_si.SFTPServerInterface.mkdir
Assuming sftp operations are expensive,
I would go with:
def sftp_mkdir_p(sftp, remote_directory):
dirs_exist = remote_directory.split('/')
dirs_make = []
# find level where dir doesn't exist
while len(dirs_exist) > 0:
try:
sftp.listdir('/'.join(dirs_exist))
break
except IOError:
value = dirs_exist.pop()
if value == '':
continue
dirs_make.append(value)
else:
return False
# ...and create dirs starting from that level
for mdir in dirs_make[::-1]:
dirs_exist.append(mdir)
sftp.mkdir('/'.join(dirs_exist))```

Delete all files and folders after connecting to FTP

I want to connect through FTP to an address and then delete all contents. Currently I am using this code:
from ftplib import FTP
import shutil
import os
ftp = FTP('xxx.xxx.xxx.xxx')
ftp.login("admin", "admin")
for ftpfile in ftp.nlst():
if os.path.isdir(ftpfile)== True:
shutil.rmtree(ftpfile)
else:
os.remove(ftpfile)
My problem is I always get this error when he is trying to delete the first file:
os.remove(ftpfile)
WindowsError: [Error 2] The system cannot find the file specified: somefile.sys
Anyone has an idea why?
for something in ftp.nlst():
try:
ftp.delete(something)
except Exception:
ftp.rmd(something)
Any other ways?
This function deletes any given path recursively:
# python 3.6
from ftplib import FTP
def remove_ftp_dir(ftp, path):
for (name, properties) in ftp.mlsd(path=path):
if name in ['.', '..']:
continue
elif properties['type'] == 'file':
ftp.delete(f"{path}/{name}")
elif properties['type'] == 'dir':
remove_ftp_dir(ftp, f"{path}/{name}")
ftp.rmd(path)
ftp = FTP(HOST, USER, PASS)
remove_ftp_dir(ftp, 'path_to_remove')
from ftplib import FTP
import shutil
import os
ftp = FTP("hostname")
ftp.login("username", "password")
def deletedir(dirname):
ftp.cwd(dirname)
print(dirname)
for file in ftp.nlst():
try:
ftp.delete(file)
except Exception:
deletedir(file)
ftp.cwd("..")
ftp.rmd(dirname)
for ftpfile in ftp.nlst():
try:
ftp.delete(ftpfile)
except Exception:
deletedir(ftpfile)
ftp.nlst()
The above statement returns a list of file names.
os.remove()
The above statement requires a file path.
from ftplib import FTP
#--------------------------------------------------
class FTPCommunicator():
def __init__(self):
self.ftp = FTP()
self.ftp.connect('server', port=21, timeout=30)
self.ftp.login(user="user", passwd="password")
def getDirListing(self, dirName):
listing = self.ftp.nlst(dirName)
# If listed a file, return.
if len(listing) == 1 and listing[0] == dirName:
return []
subListing = []
for entry in listing:
subListing += self.getDirListing(entry)
listing += subListing
return listing
def removeDir(self, dirName):
listing = self.getDirListing(dirName)
# Longest path first for deletion of sub directories first.
listing.sort(key=lambda k: len(k), reverse=True)
# Delete files first.
for entry in listing:
try:
self.ftp.delete(entry)
except:
pass
# Delete empty directories.
for entry in listing:
try:
self.ftp.rmd(entry)
except:
pass
self.ftp.rmd(dirName)
def quit(self):
self.ftp.quit()
#--------------------------------------------------
def main():
ftp = FTPCommunicator()
ftp.removeDir("/Untitled")
ftp.quit()
#--------------------------------------------------
if __name__ == "__main__":
main()
#--------------------------------------------------
#Dmytro Gierman has the best option in my opinion, however, I don't want to delete the main folder, only the subfolder and all files inside.
Here is a script in order to make it work
import ftplib
# python 3.9
def remove_ftp_dir(ftp, path, mainpath):
for (name, properties) in ftp.mlsd(path=path):
if name in ['.', '..']:
continue
elif properties['type'] == 'file':
ftp.delete(f"{path}/{name}")
elif properties['type'] == 'dir':
remove_ftp_dir(ftp, f"{path}/{name}", mainpath)
if path != mainpath: #only deletes subdirectories not he mainpath
ftp.rmd(path)

Downloading a directory tree with ftplib

This will not download the contents of sub-directories; how can I do so?
import ftplib
import configparser
import os
directories = []
def add_directory(line):
if line.startswith('d'):
bits = line.split()
dirname = bits[8]
directories.append(dirname)
def makeDir(archiveTo):
for dir in directories:
newDir = os.path.join(archiveTo, dir)
if os.path.isdir(newDir) == True:
print("Directory \"" + dir + "\" already exists!")
else:
os.mkdir(newDir)
def getFiles(archiveTo, ftp):
files = ftp.nlst()
for filename in files:
try:
directories.index(filename)
except:
ftp.retrbinary('RETR %s' % filename, open(os.path.join(archiveTo, filename), 'wb').write)
def runBackups():
#Load INI
filename = 'connections.ini'
config = configparser.SafeConfigParser()
config.read(filename)
connections = config.sections()
i = 0
while i < len(connections):
#Load Settings
uri = config.get(connections[i], "uri")
username = config.get(connections[i], "username")
password = config.get(connections[i], "password")
backupPath = config.get(connections[i], "backuppath")
archiveTo = config.get(connections[i], "archiveto")
#Start Back-ups
ftp = ftplib.FTP(uri)
ftp.login(username, password)
ftp.cwd(backupPath)
#Map Directory Tree
ftp.retrlines('LIST', add_directory)
#Make Directories Locally
makeDir(archiveTo)
#Gather Files
getFiles(archiveTo, ftp)
#End connection and increase counter.
ftp.quit()
i += 1
print()
print("Back-ups complete.")
print()
this should do the trick :)
import sys
import ftplib
import os
from ftplib import FTP
ftp=FTP("ftp address")
ftp.login("user","password")
def downloadFiles(path,destination):
#path & destination are str of the form "/dir/folder/something/"
#path should be the abs path to the root FOLDER of the file tree to download
try:
ftp.cwd(path)
#clone path to destination
os.chdir(destination)
os.mkdir(destination[0:len(destination)-1]+path)
print destination[0:len(destination)-1]+path+" built"
except OSError:
#folder already exists at destination
pass
except ftplib.error_perm:
#invalid entry (ensure input form: "/dir/folder/something/")
print "error: could not change to "+path
sys.exit("ending session")
#list children:
filelist=ftp.nlst()
for file in filelist:
try:
#this will check if file is folder:
ftp.cwd(path+file+"/")
#if so, explore it:
downloadFiles(path+file+"/",destination)
except ftplib.error_perm:
#not a folder with accessible content
#download & return
os.chdir(destination[0:len(destination)-1]+path)
#possibly need a permission exception catch:
with open(os.path.join(destination,file),"wb") as f:
ftp.retrbinary("RETR "+file, f.write)
print file + " downloaded"
return
source="/ftproot/folder_i_want/"
dest="/systemroot/where_i_want_it/"
downloadFiles(source,dest)
This is a very old question, but I had a similar need that i wanted to satisfy in a very general manner. I ended up writing my own solution that works very well for me. I've placed it on Gist here https://gist.github.com/Jwely/ad8eb800bacef9e34dd775f9b3aad987
and pasted it below in case i ever take the gist offline.
Example usage:
import ftplib
ftp = ftplib.FTP(mysite, username, password)
download_ftp_tree(ftp, remote_dir, local_dir)
The code above will look for a directory called "remote_dir" on the ftp host, and then duplicate the directory and its entire contents into the "local_dir".
It invokes the script below.
import ftplib
import os
def _is_ftp_dir(ftp_handle, name, guess_by_extension=True):
""" simply determines if an item listed on the ftp server is a valid directory or not """
# if the name has a "." in the fourth to last position, its probably a file extension
# this is MUCH faster than trying to set every file to a working directory, and will work 99% of time.
if guess_by_extension is True:
if name[-4] == '.':
return False
original_cwd = ftp_handle.pwd() # remember the current working directory
try:
ftp_handle.cwd(name) # try to set directory to new name
ftp_handle.cwd(original_cwd) # set it back to what it was
return True
except:
return False
def _make_parent_dir(fpath):
""" ensures the parent directory of a filepath exists """
dirname = os.path.dirname(fpath)
while not os.path.exists(dirname):
try:
os.mkdir(dirname)
print("created {0}".format(dirname))
except:
_make_parent_dir(dirname)
def _download_ftp_file(ftp_handle, name, dest, overwrite):
""" downloads a single file from an ftp server """
_make_parent_dir(dest)
if not os.path.exists(dest) or overwrite is True:
with open(dest, 'wb') as f:
ftp_handle.retrbinary("RETR {0}".format(name), f.write)
print("downloaded: {0}".format(dest))
else:
print("already exists: {0}".format(dest))
def _mirror_ftp_dir(ftp_handle, name, overwrite, guess_by_extension):
""" replicates a directory on an ftp server recursively """
for item in ftp_handle.nlst(name):
if _is_ftp_dir(ftp_handle, item):
_mirror_ftp_dir(ftp_handle, item, overwrite, guess_by_extension)
else:
_download_ftp_file(ftp_handle, item, item, overwrite)
def download_ftp_tree(ftp_handle, path, destination, overwrite=False, guess_by_extension=True):
"""
Downloads an entire directory tree from an ftp server to the local destination
:param ftp_handle: an authenticated ftplib.FTP instance
:param path: the folder on the ftp server to download
:param destination: the local directory to store the copied folder
:param overwrite: set to True to force re-download of all files, even if they appear to exist already
:param guess_by_extension: It takes a while to explicitly check if every item is a directory or a file.
if this flag is set to True, it will assume any file ending with a three character extension ".???" is
a file and not a directory. Set to False if some folders may have a "." in their names -4th position.
"""
os.chdir(destination)
_mirror_ftp_dir(ftp_handle, path, overwrite, guess_by_extension)
this is an alternative. you can try using ftputil package. You can then use it to walk the remote directories and get your files
Using ftp.mlsd() instead of ftp.nlst():
import sys
import ftplib
import os
from ftplib import FTP
def fetchFiles(ftp, path, destination, overwrite=True):
'''Fetch a whole folder from ftp. \n
Parameters
----------
ftp : ftplib.FTP object
path : string ('/dir/folder/')
destination : string ('D:/dir/folder/') folder where the files will be saved
overwrite : bool - Overwrite file if already exists.
'''
try:
ftp.cwd(path)
os.mkdir(destination[:-1] + path)
print('New folder made: ' + destination[:-1] + path)
except OSError:
# folder already exists at the destination
pass
except ftplib.error_perm:
# invalid entry (ensure input form: "/dir/folder/")
print("error: could not change to " + path)
sys.exit("ending session")
# list children:
filelist = [i for i in ftp.mlsd()]
print('Current folder: ' + filelist.pop(0)[0])
for file in filelist:
if file[1]['type'] == 'file':
fullpath = os.path.join(destination[:-1] + path, file[0])
if (not overwrite and os.path.isfile(fullpath)):
continue
else:
with open(fullpath, 'wb') as f:
ftp.retrbinary('RETR ' + file[0], f.write)
print(file[0] + ' downloaded')
elif file[1]['type'] == 'dir':
fetchFiles(ftp, path + file[0] + '/', destination, overwrite)
else:
print('Unknown type: ' + file[1]['type'])
if __name__ == "__main__":
ftp = FTP('ftp address')
ftp.login('user', 'password')
source = r'/Folder/'
dest = r'D:/Data/'
fetchFiles(ftp, source, dest, overwrite=True)
ftp.quit()
Using ftputil, a fast solution could be:
def download(folder):
for item in ftp.walk(folder):
print("Creating dir " + item[0])
os.mkdir(item[0])
for subdir in item[1]:
print("Subdirs " + subdir)
for file in item[2]:
print(r"Copying File {0} \ {1}".format(item[0], file))
ftp.download(ftp.path.join(item[0],file), os.path.join(item[0],file))
It is non-trivial at least. In the simplest case, you only assume you have files and directories. This isn't always the case, there are softlinks and hardlinks and Windows-style shortcut. Softlink and directory shortcut are particularly problematic since they make recursive directory possible, which would confuse naive-ly implemented ftp grabber.
How would you handle such recursive directory depends on your need; you might simply not follow softlinks or you might try to detect recursive links. Detecting recursive link is inherently tricky, you cannot do it reliably.

Categories

Resources