I'm trying to implement a file transfer automation with python 2.7 on Windows.
So I have a FTPS server, I need to move some files from it to a local directory and to upload some files from local to FTPS
The FTPS structure is like so:
- ROOT FOLDER
- AAA
- abc_id1
- in
- out
- abc_id2
- in
- out
- abc_id3
- in
- out
- BBB
- abc_id1
- in
- out
- abc_id2
- in
- out
- abc_id3
- in
- out
I must first MOVE all files that match a wildcard ABC_*.csv, they are located in all /in folders (so for ex. AAA\abc_id1\in) to a local directory
Then I must upload (COPY) some files that have a wildcard from the local directory to the corresponding abc_/in folder (for ex. a file named ABC_id3.csv must go to the abc_id3 folder)
I have began the code:
from ftplib import FTP_TLS
ftps = FTP_TLS('ip_address')
ftps.login("user", "pass") # login before securing control channel
ftps.prot_p() # switch to secure data connection
#ftps.retrlines('LIST') # list directory content securely
ftps.cwd("AAA")
ftps.retrlines('LIST')
ftps.quit()
But I don't know how can i loop through the multiple folders to accomplish the task
Please suggest some code
Regards
Two things that will help. Walking through directories with os.walk and generators.
You'll want to walk through the directories and check each file going through. Once you determine it's a file you want you can apply the appropriate FTP functionality.
Here's a sample I have from one of my apps I'm working on. I've added the ability to exclude as well.
# Generator which runs through directories and returns files
def scanDir (self, root, excludeDirs, excludeFiles, excludeExt, maxFileSize):
global fileList
print "Scanning directory " + root
x = 0
for root, dirnames, filenames in os.walk(root):
for name in filenames:
#We want absolute path to these
absroot = os.path.abspath(root)
filename = os.path.join(absroot, name)
fileSize = os.path.getsize(filename) / 1024
x = x + 1
#print x
##TODO compressed files call here (Extension)
if (os.path.isfile(filename) and os.path.getsize(filename) > 0):
if fileSize > maxFileSize:
continue
else:
try:
#print root + name
os.path.getsize(filename)
data = open(root + "/" + name, 'rb').read()
except:
data = False
print "Could not read file :: %s/%s" % (root, file)
# TODO Create Exception here and filter file paths:
# regex for /home/*/mail
self.fileList.append({"filename":filename})
yield data, filename
Here's an example of recursively walking an FTP server and fetching zip files, with an anonymous login.
#!/usr/bin/env python
from ftplib import FTP
from time import sleep
import os
ftp = FTP('ftp2.census.gov')
ftp.login()
my_dirs = [] # global
my_files = [] # global
curdir = '' # global
def get_dirs(ln):
global my_dirs
global my_files
cols = ln.split(' ')
objname = cols[len(cols)-1] # file or directory name
if ln.startswith('d'):
my_dirs.append(objname)
else:
if objname.endswith('.zip'):
my_files.append(os.path.join(curdir, objname)) # full path
def check_dir(adir):
global my_dirs
global my_files # let it accrue, then fetch them all later
global curdir
my_dirs = []
gotdirs = [] # local
curdir = ftp.pwd()
print("going to change to directory " + adir + " from " + curdir)
ftp.cwd(adir)
curdir = ftp.pwd()
print("now in directory: " + curdir)
ftp.retrlines('LIST', get_dirs)
gotdirs = my_dirs
print("found in " + adir + " directories:")
print(gotdirs)
print("Total files found so far: " + str(len(my_files)) + ".")
sleep(1)
for subdir in gotdirs:
my_dirs = []
check_dir(subdir) # recurse
ftp.cwd('..') # back up a directory when done here
try:
check_dir('/geo/tiger/GENZ2012') # root directory to start in
except:
print('oh dear.')
ftp.quit()
ftp.cwd('/.') # change to root directory for downloading
for f in my_files:
print('getting ' + f)
file_name = f.replace('/', '_') # use path as filename prefix, with underscores
ftp.retrbinary('RETR ' + f, open(file_name, 'wb').write)
sleep(1)
ftp.quit()
print('all done!')
Related
I've got 6 directories (A, B, C, D, E, F) containing .mov files.
The structure is:
A
-0001_01.mov
-0002_01.mov
-...
B
-0001_02.mov
-0002_02.mov
-...
And so on.
First, I want to create as many directories as there are files in one of the directories mentioned above.
Let's say A contains 35 .mov files (B, C .. contain the same amount of .mov files).
I now got 35 folders starting from "01" up to "35".
Now I want to copy each corresponding .mov file into the same directory, which means 0001_01.mov - 0001_06.mov go into "01", 0002_01.mov - 0002_06.mov go into "02" and so on.
I've got the creation of the directories working, but I just can't wrap my head around the copying part.
import os
pathA = ("./A/")
pathB = ("./B/")
pathC = ("./C/")
pathD = ("./D/")
pathE = ("./E/")
pathF = ("./F/")
path, dirs, filesA = next(os.walk(pathA))
file_countA = len(filesA)
path, dirs, filesB = next(os.walk(pathB))
file_countB = len(filesB)
path, dirs, filesC = next(os.walk(pathC))
file_countC = len(filesC)
path, dirs, filesD = next(os.walk(pathD))
file_countD = len(filesD)
path, dirs, filesE = next(os.walk(pathE))
file_countE = len(filesE)
path, dirs, filesF = next(os.walk(pathF))
file_countF = len(filesF)
path2 = ("./")
if file_countA == file_countB == file_countC == file_countD == file_countE == file_countF:
print("true")
else:
print ("false")
for i in range(file_countA):
try:
if i < 9:
os.mkdir(path2 + "0" + str(i + 1))
path3 = ("./" + "0" + str(i + 1))
print (path3)
elif i >= 9:
os.mkdir(path2 + str(i + 1))
path3 = ("./" + str(i + 1))
print (path3)
except OSError:
print ("Creation of the directory %s failed" % path2)
else:
print ("Successfully created the directory %s " % path2)
This is my first time using python, I think the code reflects that.
I've now wasted countless hours on this, so any help is appreciated.
So I changed your code quite a bit and tested it quickly on my system and it seemed to do what you wanted. Can you try and let me know if this gave you idea of how it can be done?
Disclaimer: I'm not Python expert by any means but I find my way around it and this is most likely not the prettiest solution but it deos work on my machine exactly as you wanted it. Just make sure you run it from inside your folder and if you are not running it from outside your folder then change cwd = os.getcwd() to cwd = "path-to-your-folder"
import os
import shutil
import glob
paths = ["/A/","/B/","/C/","/D/","/E/","/F/"]
cwd = os.getcwd()
num_folders = 0
for path in paths:
num_files = len([f for f in os.listdir(cwd+path)if os.path.isfile(os.path.join(cwd+path, f))])
if num_files>num_folders:
num_folders = num_files
for i in range(num_folders):
try:
if i < 9:
fname = cwd + "/0" + str(i + 1)
os.mkdir(fname)
for path in paths:
source = cwd + "/" + path
filename = "000{}_*.mov".format(i+1)
for file in glob.glob(os.path.join(source,filename)):
shutil.copy2(file,fname)
elif i >= 9:
fname = cwd + "/" + str(i + 1)
os.mkdir(fname)
for path in paths:
source = cwd + "/" + path
filename = "00{}_*.mov".format(i+1)
for file in glob.glob(os.path.join(source,filename)):
shutil.copy2(file,fname)
except OSError:
pass
I'm no python expert either (look at my scores too, hi), but I've tried to keep your original coding order as much as possible. I would recommend to look at different codes for real expert-tier code but it seems to do what you're asking for :
import os
import shutil
mov_pathes = ["./a/", "./b/"]
all_files = []
lengths = []
for mov_path in mov_pathes :
# listdir gives you all files in the direcetory
files_in_dir = os.listdir(mov_path)
# we'll save those in a list along with where it's from ,
# ex : ('./patha/',['0001_01.mov','0002_01.mov'])
all_files.append((mov_path, files_in_dir))
# also length info for "all items are equal length" comparison in the future
lengths.append(len(files_in_dir))
if lengths.count(lengths[0]) == len(lengths) :
print ("true")
else :
print ("false")
base_dir = "./"
for i in range (1,lengths[0]+1) :
try :
# zfill(n) fills rest of your string to 0, (ex. "7".zfill(5) gives you 00007), probably helpful for future
path_name = base_dir + str(i).zfill(2)
os.mkdir(path_name)
except OSError :
print ("Creation of the directory {path_name} failed".format(path_name = path_name))
else :
print ("Successfully created the directory {path_name}".format(path_name = path_name))
Does exactly the same thing but it would probably make maintaining your code easier laster on.
for your real question, IF we're sure that your inputs are gonna look like 00XX_NN.mov, adding
for files in all_files :
# Remember we saved as (original dir, list of files in the dir?)
# This is a original dir
source_dir = files[0]
# This is list of files in that directory
source_files = files[1]
for file in source_files :
# so original file is located in source_dir + file
source_file = source_dir + file
# and your target directory is 00XX, so getting file[2:4] gives the target directory
target_dir = base_dir + file[2:4]
#shutil.copy (source file, target directory) copies your files.
shutil.copy (source_file , target_dir)
seems to do what you're asking for, at least for me. Once again I'm no expert so let me know if it's not working!
tested with :
./a
- 0001_01
- 0002_01
- 0003_01
./b
- 0001_02
- 0002_02
- 0003_02
result :
./01 :
- 0001_01
- 0001_02
./02 :
- 0002_01
- 0002_02
./03 :
- 0003_01
- 0003_02
I'm coding a script that either zips the hall folder or you can choose files with specific extension such as ".txt". I use sys.argv to pass the folder path and extension, if needed. However, when I pass the path the only thing I get is this error:
fileEx = sys.argv[2]
IndexError: list index out of range
How can I make the "file extension" command line optional?
My script:
import zipfile, os, sys
if len(sys.argv) < 2:
folder_path = sys.argv[1]
fileEx = None
else:
folder_path = sys.argv[1]
fileEx = sys.argv[2]
def zipf(folder_path, fileEx=None):
folder_path = os.path.abspath(folder_path)
number = 1
while True:
zipfilename= os.path.basename(folder_path) + "_" + str(number) +'.zip'
if not os.path.exists(zipfilename):
break
number = number + 1
# creat the zip file
print(f'creating {zipfilename}')
backupZip = zipfile.ZipFile(zipfilename, 'w')
# walking through folders
for foldername , subfolders , filenames in os.walk(folder_path):
print(f'Adding files in {foldername}. . . ')
#adding the folder
backupZip.write(foldername)
# adding all files in the folder to zipfile
for filename in filenames:
newBase = os.path.basename(folder_path) + '_'
if filename.startswith(newBase) and filename.endswith('.zip'):
continue
if fileEx:
if filename.endswith(fileEx):
os.chdir(foldername)
backupZip.write(os.path.join(foldername, filename))
else:
backupZip.write(os.path.join(foldername, filename))
backupZip.close()
zipf("folder_path, fileEx")
I'm getting an error while trying to copy files from a single source directory which contains bug fixes: /home/saurabh/testbed/patch_dir/ to multiple destination directories: app_dir_1 and app_dir_2.
Both these directories are exact replicas of each other.
The script does the following:-
Read lines from a text file into a list. Each line contains the name of one component. In this case: ['file1.class', file2.html]
Search value at each index recursively, starting from a particular directory:
/home/saurabh/testbed/dest_dir/
Take a backup of these files wherever they are found by appending ddMonyyyy to their extension.
Copy files from directory which contains patched components: /home/saurabh/testbed/patch_dir/
to the directory where backup was taken earlier
Directory Overview:-
/home/saurabh/testbed/dest_dir/
|--app_dir_1
|--file1.class
|--file2.html
|--file3.jsp
|--file4.xml
|--sub_dir
|--app_dir_2
|--file1.class
|--file2.html
|--file3.jsp
|--file4.xml
|--sub_dir
|--other_directories
/home/saurabh/testbed/patch_dir/
|--file1.class
|--file2.html
Below is my code:
#!/usr/bin/env python
import os
import fnmatch
import datetime
import shutil
with open('filenames.txt') as f:
content = f.readlines()
content = [x.strip() for x in content]
print('File contents:')
print(content)
suffix = datetime.datetime.now().strftime("_%d%b%Y")
approot = '/home/saurabh/testbed/dest_dir/'
source_dir = '/home/saurabh/testbed/patch_dir/'
dir_list = []
print('\n' + 'Renaming files present at:')
for root, dirs, files in os.walk(approot):
for file_list in content:
for filename in fnmatch.filter(files, file_list):
print(os.path.join(root, filename))
dir_list.append(root)
current_file = os.path.join(root, filename)
backup_file = os.path.join(root, filename + suffix)
os.rename(current_file, backup_file)
print("\n" + "Backup of all files complete!")
print('Backup of ' + str(len(dir_list)) + ' files taken recursively')
print('Number of files mentioned in text file: ' + str(len(content)) + '\n')
# 2 instances in UAT
# 12 instances in PROD
if (2*len(content)) == len(dir_list):
print("Retrofitted components will be copied to their respective directories")
for dst_ind in range(0, len(dir_list)):
if filename in fnmatch.filter(files, file_list):
print(source_dir + content[dst_ind] + "\t" + dir_list[dst_ind])
#shutil.copy2(source_dir+content[dst_ind], dir_list[dst_ind])
I'm getting the below error while copying the files (4.)
File contents:
['file1.class', 'file2.html']
Renaming files present at:
/home/saurabh/testbed/dest_dir/app_dir_1/file1.class
/home/saurabh/testbed/dest_dir/app_dir_1/file2.html
/home/saurabh/testbed/dest_dir/app_dir_2/file1.class
/home/saurabh/testbed/dest_dir/app_dir_2/file2.html
Backup of all files complete!
Backup of 4 files taken recursively
Number of files mentioned in text file: 2
Retrofitted components will be copied to their respective directories
/home/saurabh/testbed/patch_dir/file1.class /home/saurabh/testbed/dest_dir/app_dir_1
/home/saurabh/testbed/patch_dir/file2.html /home/saurabh/testbed/dest_dir/app_dir_1
Traceback (most recent call last):
File "./prod_movement.py", line 56, in <module>
print(source_dir + content[dst_ind] + "\t" + dir_list[dst_ind])
IndexError: list index out of range
Expected Output:
File contents:
['file1.class', 'file2.html']
Renaming files present at:
/home/saurabh/testbed/dest_dir/app_dir_1/file1.class
/home/saurabh/testbed/dest_dir/app_dir_1/file2.html
/home/saurabh/testbed/dest_dir/app_dir_2/file1.class
/home/saurabh/testbed/dest_dir/app_dir_2/file2.html
Backup of all files complete!
Backup of 4 files taken recursively
Number of files mentioned in text file: 2
Retrofitted components will be copied to their respective directories
/home/saurabh/testbed/patch_dir/file1.class /home/saurabh/testbed/dest_dir/app_dir_1
/home/saurabh/testbed/patch_dir/file2.html /home/saurabh/testbed/dest_dir/app_dir_1
/home/saurabh/testbed/patch_dir/file1.class /home/saurabh/testbed/dest_dir/app_dir_2
/home/saurabh/testbed/patch_dir/file2.html /home/saurabh/testbed/dest_dir/app_dir_2
Appreciate any help to fix the code.
Figured it out !!
#!/usr/bin/env python
import os
import fnmatch
import datetime
import shutil
with open('filenames.txt') as f:
content = f.readlines()
content = [x.strip() for x in content]
print('File contents:')
print(content)
suffix = datetime.datetime.now().strftime("_%d%b%Y")
approot = '/Users/saurabhm/Desktop/Python/testbed/dest_dir/'
source_dir = '/Users/saurabhm/Desktop/Python/testbed/patch_dir/'
dir_list = []
print('\n' + 'Renaming files present at:')
for root, dirs, files in os.walk(approot):
for file_list in content:
for filename in fnmatch.filter(files, file_list):
print(os.path.join(root, filename))
dir_list.append(root)
current_file = os.path.join(root, filename)
backup_file = os.path.join(root, filename + suffix)
#print(current_file, backup_file)
os.rename(current_file, backup_file)
#print(source_dir + filename + "\t" + root)
shutil.copy2(source_dir + filename, root)
os.chmod(root + '/' + filename, 0o750)
print("\n" + "Backup of all files complete!")
print('Backup of ' + str(len(dir_list)) + ' files taken recursively')
print('Number of files mentioned in text file: ' + str(len(content)) + '\n')
Once patched components are received over email, they are copied to a directory in the respective server. All these files are consolidated in a single directory (patch_dir)
Backup of existing files (having the same name and are present in "dest_dir") are taken wherever they are found, following which each file is copied from "patch_dir" to the directories inside "dest_dir", where their backup was taken.
How do I throttle the FTP download with Python ftplib? For example put a cap on the speed to be 20Mb/s?
I'm using the following code to download files with Python ftplib:
from ftplib import FTP
import os
download_list = 'testlist.txt' # inital list of directories to be downloaded
path_list = [] # initalize a list of all the pathes from download_list
local_folder = 'testStorage' #where files are going to be downloaded to
downloaded_list = 'completedownload.txt' # list of completed downloads
error_list = 'incomplete_downloads.txt' # list of paths that are incomplete
ftp=FTP("ftp.address.com")
ftp.login("user_name","password") #login to FTP account
print "Successfully logged in"
# make a list of files to download from a file
with open(download_list, 'r') as f:
content = f.readlines()
path_list = [x.strip() for x in content]
for path in path_list:
path = path.replace("*","") # strips the * found in the source file
print '\nChanging directory to ' + path + ':\n'
#ftp.cwd('/AAA/BBB/CCC/logic-1/') #the format to change into path note the * is omitted
#if ftp.cwd(path) == True:
try: # tries the path in the file
ftp.cwd(path)
#ftp.retrlines('LIST')
filenames = ftp.nlst()
for filename in filenames:
local_directory = local_folder+path # create the local path ie : testStorage/AAA/BBB/CCC/logic-1/
local_filename = os.path.join(local_directory,filename) #
if os.path.exists(local_filename) == False: # checks if file already exists
if not os.path.exists(local_directory): # mimic the remote path locally
os.makedirs(local_directory)
file = open(local_filename,'wb')
ftp.retrbinary('RETR '+ filename, file.write)
print filename
file.close()
elif os.path.exists(local_filename) == True: # skip the file if it exits
print 'File ' +filename + ' already exists, skipping this file'
except: #if path in text file does not exist write to error_list.txt
print 'Path ' + path + ' does not exist writing path to error_list.txt'
with open(error_list, 'a') as f2:
f2.write(path+'\n')
continue
print "all done closing connection"
ftp.close() #CLOSE THE FTP CONNECTION
To throttle the download, just implement a function that does file.write and time.sleep as needed. Pass that function to ftp.retrbinary as callback (instead of file.write directly).
This pseudo code (I do not do Python) should give you some idea:
total_length = 0
start_time = time.time()
def write_and_sleep(buf):
global file
global total_length
global start_time
file.write(buf)
total_length += sys.getsizeof(buf)
while (total_length / (time.time() - start_time)) > 100000000:
time.sleep(0.1)
ftp.retrbinary('RETR '+ filename, write_and_sleep)
Reducing maxblocksize (the 3rd argument of ftp.retrbinary) may help achieving more smooth "download curve".
I have a piece of code i wrote for school:
import os
source = "/home/pi/lab"
dest = os.environ["HOME"]
for file in os.listdir(source):
if file.endswith(".c")
shutil.move(file,dest+"/c")
elif file.endswith(".cpp")
shutil.move(file,dest+"/cpp")
elif file.endswith(".sh")
shutil.move(file,dest+"/sh")
what this code is doing is looking for files in a source directory and then if a certain extension is found the file is moved to that directory. This part works. If the file already exists in the destination folder of the same name add 1 at end of the file name, and before the extension and if they are multiples copies do "1++".
Like this: test1.c,test2.c, test3.c
I tried using os.isfile(filename) but this only looks at the source directory. and I get a true or false.
To test if the file exists in the destination folder you should os.path.join the dest folder with the file name
import os
import shutil
source = "/home/pi/lab"
dest = os.environ["HOME"]
# Avoid using the reserved word 'file' for a variable - renamed it to 'filename' instead
for filename in os.listdir(source):
# os.path.splitext does exactly what its name suggests - split the name and extension of the file including the '.'
name, extension = os.path.splitext(filename)
if extension == ".c":
dest_filename = os.path.join(dest, filename)
if not os.path.isfile(dest_filename):
# We copy the file as is
shutil.copy(os.path.join(source, filename) , dest)
else:
# We rename the file with a number in the name incrementing the number until we find one that is not used.
# This should be moved to a separate function to avoid code duplication when handling the different file extensions
i = 0
dest_filename = os.path.join(dest, "%s%d%s" % (name, i, extension))
while os.path.isfile(dest_filename):
i += 1
dest_filename = os.path.join(dest, "%s%d%s" % (name, i, extension))
shutil.copy(os.path.join(source, filename), dest_filename)
elif extension == ".cpp"
...
# Handle other extensions
If you want to have put the renaming logic in a separate function using glob and re this is one way:
import glob
import re
...
def rename_file(source_filename, source_ext):
filename_pattern = os.path.join(dest, "%s[0-9]*%s"
% (source_filename, source_ext))
# Contains file such as 'a1.c', 'a2.c', etc...
existing_files = glob.glob(filename_pattern)
regex = re.compile("%s([0-9]*)%s" % (source_filename, source_ext))
# Retrieve the max of the index used for this file using regex
max_index = max([int(match.group(1))
for match in map(regex.search, existing_files)
if match])
source_full_path = os.path.join(source, "%s%s"
% (source_filename, source_ext))
# Rebuild the destination filename with the max index + 1
dest_full_path = os.path.join(dest, "%s%d%s"
% (source_filename,
(max_index + 1),
source_ext))
shutil.copy(source_full_path, dest_full_path)
...
# If the file already exists i.e. replace the while loop in the else statement
rename_file(name, extension)
I din't test the code. But something like this should do the job:-
i = 0
filename = "a.txt"
while True:
if os.isfile(filename):
i+= 1
break
if i:
fname, ext = filename.split('.')
filename = fname + str(i) + '.' + ext