Zipping files in python - python

My program runs smoothly but I want my files from ftp to be zip in my local drive
The problem is only 1 file is being zipped after calling my main() function
Here's my code:
import os
import upload
import download
import zipfile
import ConfigParser
import ftputil
def main():
#create a folder Temp on d drive for later use
path = r'D:\Temp'
os.mkdir(path)
#parse all the values at config.ini file
config = ConfigParser.ConfigParser()
config.readfp(open('config.ini'))
server = config.get('main', 'Server')
username = config.get('main', 'Username')
password = config.get('main', 'Password')
uploads = config.get('main', 'Upload folder')
downloads = config.get('main', 'Download folder')
#connect to ftp
ftp = ftputil.FTPHost(server, username, password)
dirlist = ftp.listdir(downloads)
for list in dirlist:
ftp.chdir(downloads)
target = os.path.join(path, list)
ftp.download(list, target)
#########################################################
# THis section is where algo fails but the program run#
########################################################
#zipping files
absolute_path = r'D:\Temp'
dirlist = os.listdir(absolute_path)
filepath = r'D:\Temp\project2.zip'
for list in dirlist:
get_file = os.path.join(absolute_path, list)
zip_name = zipfile.ZipFile(filepath, 'w')
zip_name.write(get_file, 'Project2b\\' + list)
if __name__ == '__main__':
print "cannot be"

When you do this :
for list in dirlist:
get_file = os.path.join(absolute_path, list)
zip_name = zipfile.ZipFile(filepath, 'w')
zip_name.write(get_file, 'Project2b\\' + list)
you recreate a ZipFile for each file you want to zip, the "w" mode means you recreate it from scratch.
Try this (create the zip file before the loop) :
zip_name = zipfile.ZipFile(filepath, 'w')
for list in dirlist:
get_file = os.path.join(absolute_path, list)
zip_name.write(get_file, 'Project2b\\' + list)
Or this, it will open the zipfile in append mode:
for list in dirlist:
get_file = os.path.join(absolute_path, list)
zip_name = zipfile.ZipFile(filepath, 'a')
zip_name.write(get_file, 'Project2b\\' + list)

Have a look at the shutil module. There is an example using shutil.make_archive():
http://docs.python.org/library/shutil.html

If you have a lot of files you can zip them in parallel:
import zipfile
from pathlib import Path, WindowsPath
from typing import List, Text
import logging
from time import time
from concurrent.futures import ThreadPoolExecutor
logging.basicConfig(
format="%(asctime)s - %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG
)
PATH = (r"\\some_directory\subdirectory\zipped")
def file_names() -> List[WindowsPath]:
p = Path(PATH)
file_names = list(p.glob("./*.csv"))
logging.info("There are %d files", len(file_names))
return file_names
def zip_file(file: WindowsPath) -> None:
zip_file_name = Path(PATH, f"{file.stem}.zip")
with zipfile.ZipFile(zip_file_name, "w") as zip:
zip.write(file, arcname=file.name, compress_type=zipfile.ZIP_DEFLATED)
def main(files: List[Text]) -> None:
t0 = time()
number_of_files = len(files)
with ThreadPoolExecutor() as executor:
for counter, _ in enumerate(executor.map(zip_file, files), start=1):
# update progress every 100 files
if counter % 100 == 0:
logging.info(
"Processed %d/%d. TT: %d:%d",
counter,
number_of_files,
*divmod(int(time() - t0), 60),
)
logging.info(
"Finished zipping %d files. Total time: %d:%d",
len(files),
*divmod(int(time() - t0), 60),
)
if __name__ == "__main__":
files = file_names()
main(files)

Best way to do this is by putting debug statements at your for loops, there are two possibilities;
one is that the first forloop only downloads one file from the ftp folder
two is that the first loop downloads all files but second loop zips only one of them
use print statements to see which files are downloaded/zipped at the loops, good luck

Related

Compare dictionary values

As part of a wider project (to learn) I am building a script to discover discovering the files recursively in a folder. Then adding the filename (including the path) and the size in bytes to a CSV file.
I've then loaded that CSV file as a python dictionary.
What I would like to do now, is have python parse over each value in the dictionary (which is the size) and compare it to all others in the dictionary. If it finds a match, I want it to show me which keys (file name) have the matching values. I'll then do an MD5 hash on those that appear to have the same size.
The code below is as far as I've got - can anyone assist please?
#!/usr/bin/env python3
import argparse
import os
import sys
import csv
import fnmatch
def verify_args():
parser = argparse.ArgumentParser(description='Compare files recursively.')
parser.add_argument('path', help='Location to begin file comparison from.')
check = parser.parse_args()
if os.path.isdir(check.path):
print(check.path,'is a valid path - continuing' + '\n')
else:
print(check.path,'is an invalid path - exiting' + '\n')
sys.exit()
return parser.parse_args()
def listfiles(file_path):
print ('Starting comparison')
pattern = '*'
with open('/tmp/foo','w') as fo:
fo.write('file,size' + '\n')
for root, dirs, files in os.walk(file_path):
for filename in fnmatch.filter(files, pattern):
fo.write(os.path.join(root, filename) + ',' + str(os.path.getsize(os.path.join(root, filename))) + '\n')
files = {}
with open('/tmp/foo') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
files[row['file']] = row['size']
x = files.keys()
print(x)
# Not sure now what to do
def main():
args = verify_args()
file_path = args.path
listfiles(file_path)
if __name__ == '__main__':
main()

How to recursively zip multiple folders as individual .zip files?

I have a set of folders (named *.pages), which I want to zip into their individual .zip files, e.g. "example1.pages" into "example1.pages.zip", "example2.pages" into "example2.pages.zip", etc. I also want to include the contents of the individual .pages folders.
Currently, the script zips all the .pages files into a single file with nested directories.
I'm not sure how to proceed, and I believe I'm missing something when performing the zipfile functions.
Any help will be most appreciated!
import os
import zipfile
start_path = "MY/DIRECTORY/HERE"
def zipdir(ziph):
dir_count = 0
file_count = 0
for (path,dirs,files) in os.walk(start_path):
print('Directory: {:s}'.format(path))
dir_count += 1
for file in dirs:
if file.endswith(".pages"):
print('\nAttempting to zip: \'{}\''.format(file))
ziph.write(os.path.join(path, file))
print('Done')
file_count += 1
print('\nProcessed {} files in {} directories.'.format(file_count,dir_count))
if __name__ == '__main__':
zipf = zipfile.ZipFile("NAME/OF/INDIVIDUAL/ZIP/FILE.zip", 'w', zipfile.ZIP_DEFLATED)
zipdir(zipf)
zipf.close()
You only open a single zip file and add everything there. If you want one zip per file, you need to create zip files in the loop as you scan the files.
import os
import zipfile
start_path = "MY/DIRECTORY/HERE"
start_path = '.'
def zipdir(start_path):
dir_count = 0
file_count = 0
for (path,dirs,files) in os.walk(start_path):
print('Directory: {:s}'.format(path))
dir_count += 1
for file in files:
if file.endswith(".pages"):
file_path = os.path.join(path, file)
print('\nAttempting to zip: \'{}\''.format(file_path))
with zipfile.ZipFile(file_path + '.zip', 'w', zipfile.ZIP_DEFLATED) as ziph:
ziph.write(file_path, file)
print('Done')
file_count += 1
print('\nProcessed {} files in {} directories.'.format(file_count,dir_count))
if __name__ == '__main__':
zipdir(start_path)
You can also adopt the code by #tdelaney and use shutil module as follows:
enter import os
import shutil
reports_path = os.getcwd()
def zipdir(reports_path):
for (path,dirs,files) in os.walk(reports_path):
for d in dirs:
file_path = os.path.join(path, d)
print 'Compressing ' + d
shutil.make_archive(d,'zip',file_path)
print "Done"
if __name__ == '__main__':
zipdir(reports_path)

Throttling FTP download with Python ftplib

How do I throttle the FTP download with Python ftplib? For example put a cap on the speed to be 20Mb/s?
I'm using the following code to download files with Python ftplib:
from ftplib import FTP
import os
download_list = 'testlist.txt' # inital list of directories to be downloaded
path_list = [] # initalize a list of all the pathes from download_list
local_folder = 'testStorage' #where files are going to be downloaded to
downloaded_list = 'completedownload.txt' # list of completed downloads
error_list = 'incomplete_downloads.txt' # list of paths that are incomplete
ftp=FTP("ftp.address.com")
ftp.login("user_name","password") #login to FTP account
print "Successfully logged in"
# make a list of files to download from a file
with open(download_list, 'r') as f:
content = f.readlines()
path_list = [x.strip() for x in content]
for path in path_list:
path = path.replace("*","") # strips the * found in the source file
print '\nChanging directory to ' + path + ':\n'
#ftp.cwd('/AAA/BBB/CCC/logic-1/') #the format to change into path note the * is omitted
#if ftp.cwd(path) == True:
try: # tries the path in the file
ftp.cwd(path)
#ftp.retrlines('LIST')
filenames = ftp.nlst()
for filename in filenames:
local_directory = local_folder+path # create the local path ie : testStorage/AAA/BBB/CCC/logic-1/
local_filename = os.path.join(local_directory,filename) #
if os.path.exists(local_filename) == False: # checks if file already exists
if not os.path.exists(local_directory): # mimic the remote path locally
os.makedirs(local_directory)
file = open(local_filename,'wb')
ftp.retrbinary('RETR '+ filename, file.write)
print filename
file.close()
elif os.path.exists(local_filename) == True: # skip the file if it exits
print 'File ' +filename + ' already exists, skipping this file'
except: #if path in text file does not exist write to error_list.txt
print 'Path ' + path + ' does not exist writing path to error_list.txt'
with open(error_list, 'a') as f2:
f2.write(path+'\n')
continue
print "all done closing connection"
ftp.close() #CLOSE THE FTP CONNECTION
To throttle the download, just implement a function that does file.write and time.sleep as needed. Pass that function to ftp.retrbinary as callback (instead of file.write directly).
This pseudo code (I do not do Python) should give you some idea:
total_length = 0
start_time = time.time()
def write_and_sleep(buf):
global file
global total_length
global start_time
file.write(buf)
total_length += sys.getsizeof(buf)
while (total_length / (time.time() - start_time)) > 100000000:
time.sleep(0.1)
ftp.retrbinary('RETR '+ filename, write_and_sleep)
Reducing maxblocksize (the 3rd argument of ftp.retrbinary) may help achieving more smooth "download curve".

Python - move / upload specific files to FTPS

I'm trying to implement a file transfer automation with python 2.7 on Windows.
So I have a FTPS server, I need to move some files from it to a local directory and to upload some files from local to FTPS
The FTPS structure is like so:
- ROOT FOLDER
- AAA
- abc_id1
- in
- out
- abc_id2
- in
- out
- abc_id3
- in
- out
- BBB
- abc_id1
- in
- out
- abc_id2
- in
- out
- abc_id3
- in
- out
I must first MOVE all files that match a wildcard ABC_*.csv, they are located in all /in folders (so for ex. AAA\abc_id1\in) to a local directory
Then I must upload (COPY) some files that have a wildcard from the local directory to the corresponding abc_/in folder (for ex. a file named ABC_id3.csv must go to the abc_id3 folder)
I have began the code:
from ftplib import FTP_TLS
ftps = FTP_TLS('ip_address')
ftps.login("user", "pass") # login before securing control channel
ftps.prot_p() # switch to secure data connection
#ftps.retrlines('LIST') # list directory content securely
ftps.cwd("AAA")
ftps.retrlines('LIST')
ftps.quit()
But I don't know how can i loop through the multiple folders to accomplish the task
Please suggest some code
Regards
Two things that will help. Walking through directories with os.walk and generators.
You'll want to walk through the directories and check each file going through. Once you determine it's a file you want you can apply the appropriate FTP functionality.
Here's a sample I have from one of my apps I'm working on. I've added the ability to exclude as well.
# Generator which runs through directories and returns files
def scanDir (self, root, excludeDirs, excludeFiles, excludeExt, maxFileSize):
global fileList
print "Scanning directory " + root
x = 0
for root, dirnames, filenames in os.walk(root):
for name in filenames:
#We want absolute path to these
absroot = os.path.abspath(root)
filename = os.path.join(absroot, name)
fileSize = os.path.getsize(filename) / 1024
x = x + 1
#print x
##TODO compressed files call here (Extension)
if (os.path.isfile(filename) and os.path.getsize(filename) > 0):
if fileSize > maxFileSize:
continue
else:
try:
#print root + name
os.path.getsize(filename)
data = open(root + "/" + name, 'rb').read()
except:
data = False
print "Could not read file :: %s/%s" % (root, file)
# TODO Create Exception here and filter file paths:
# regex for /home/*/mail
self.fileList.append({"filename":filename})
yield data, filename
Here's an example of recursively walking an FTP server and fetching zip files, with an anonymous login.
#!/usr/bin/env python
from ftplib import FTP
from time import sleep
import os
ftp = FTP('ftp2.census.gov')
ftp.login()
my_dirs = [] # global
my_files = [] # global
curdir = '' # global
def get_dirs(ln):
global my_dirs
global my_files
cols = ln.split(' ')
objname = cols[len(cols)-1] # file or directory name
if ln.startswith('d'):
my_dirs.append(objname)
else:
if objname.endswith('.zip'):
my_files.append(os.path.join(curdir, objname)) # full path
def check_dir(adir):
global my_dirs
global my_files # let it accrue, then fetch them all later
global curdir
my_dirs = []
gotdirs = [] # local
curdir = ftp.pwd()
print("going to change to directory " + adir + " from " + curdir)
ftp.cwd(adir)
curdir = ftp.pwd()
print("now in directory: " + curdir)
ftp.retrlines('LIST', get_dirs)
gotdirs = my_dirs
print("found in " + adir + " directories:")
print(gotdirs)
print("Total files found so far: " + str(len(my_files)) + ".")
sleep(1)
for subdir in gotdirs:
my_dirs = []
check_dir(subdir) # recurse
ftp.cwd('..') # back up a directory when done here
try:
check_dir('/geo/tiger/GENZ2012') # root directory to start in
except:
print('oh dear.')
ftp.quit()
ftp.cwd('/.') # change to root directory for downloading
for f in my_files:
print('getting ' + f)
file_name = f.replace('/', '_') # use path as filename prefix, with underscores
ftp.retrbinary('RETR ' + f, open(file_name, 'wb').write)
sleep(1)
ftp.quit()
print('all done!')

Run file inside a zipfile?

Is it possible to run a .html or .exe for example, that is inside a zipfile? I'm using the Zipfile module.
Here's my sample code:
import zipfile
z = zipfile.ZipFile("c:\\test\\test.zip", "r")
x = ""
g = ""
for filename in z.namelist():
#print filename
y = len(filename)
x = str(filename)[y - 5:]
if x == ".html":
g = filename
f = z.open(g)
After f = z.open(g), I don't know what to do next. I tried using the .read() but it only reads whats inside of the html, what I need is for it to run or execute.
Or is there any othere similar ways to do this?
The best approach will be to extract the required file to the Windows temp directory and execute it. I have modified your original code to create a temp file and execute it:
import zipfile
import shutil
import os
z = zipfile.ZipFile("c:\\test\\test.zip", "r")
x = ""
g = ""
basename = ""
for filename in z.namelist():
print filename
y = len(filename)
x = str(filename)[y - 5:]
if x == ".html":
basename = os.path.basename(filename) #get the file name and extension from the return path
g = filename
print basename
break #found what was needed, no need to run the loop again
f = z.open(g)
temp = os.path.join(os.environ['temp'], basename) #create temp file name
tempfile = open(temp, "wb")
shutil.copyfileobj(f, tempfile) #copy unzipped file to Windows 'temp' folder
tempfile.close()
f.close()
os.system(temp) #run the file
Run the first .html file in a zip archive specified at the command line:
#!/usr/bin/env python
import os
import shutil
import sys
import tempfile
import webbrowser
import zipfile
from subprocess import check_call
from threading import Timer
with zipfile.ZipFile(sys.argv[1], 'r') as z:
# find the first html file in the archive
member = next(m for m in z.infolist() if m.filename.endswith('.html'))
# create temporary directory to extract the file to
tmpdir = tempfile.mkdtemp()
# remove tmpdir in 5 minutes
t = Timer(300, shutil.rmtree, args=[tmpdir], kwargs=dict(ignore_errors=True))
t.start()
# extract the file
z.extract(member, path=tmpdir)
filename = os.path.join(tmpdir, member.filename)
# run the file
if filename.endswith('.exe'):
check_call([filename]) # run as a program; wait it to complete
else: # open document using default browser
webbrowser.open_new_tab(filename) #NOTE: returns immediately
Example
T:\> open-from-zip.py file.zip
As an alternative to webbrowser you could use os.startfile(os.path.normpath(filename)) on Windows.

Categories

Resources