Downloading files from FTP site with specific words in the name - python

can you pls shed some light on what I am doing wrong here... I'm a Python newby... This connects and I can get a list of files in a FTP specific directory, but for the love of BigBang.. It isn't downloading any files. I need to download files starting with a specific Name string:
from ftplib import FTP_TLS
import fnmatch
import os
ftps = FTP_TLS('myftp_site.com')
ftps.login('userxx', 'pwxx')
ftps.prot_p()
ftps.cwd('Inbox')
print("File list:")
list_of_files = ftps.retrlines("LIST")
dest_dir = "C:\DownloadingDirectory"
for name in list_of_files:
if fnmatch.fnmatch(name,"StartingFileName*"):
with open(os.path.join(dest_dir, name), "wb") as f:
ftps.retrbinary("RETR {}".format(name), f.write)
ftps.quit()
enter code here

You are having problems getting the file list. LIST returns a long-format listing with file attributes like for instance
-rw------- 1 td dialout 543 Apr 3 20:18 .bash_history
Use NLST to get a short list. Also, the retrlines() function is kinda strange. It calls a callback for each line (defaulting to print) it receives. The command only returns a status string. You can add your own callback to fill a list, or use the .nlst() command to get the list for you.
from ftplib import FTP_TLS
import fnmatch
import os
ftps = FTP_TLS('myftp_site.com')
ftps.login('userxx', 'pwxx')
ftps.prot_p()
ftps.cwd('Inbox')
print("File list:")
list_of_files = []
ftps.retrlines("NLST", list_of_files.append)
# ...or use the existing helper
# list_of_files = ftps.nlst()
dest_dir = "C:\DownloadingDirectory"
for name in list_of_files:
if fnmatch.fnmatch(name,"StartingFileName*"):
with open(os.path.join(dest_dir, name), "wb") as f:
ftps.retrbinary("RETR {}".format(name), f.write)
ftps.quit()

Related

Walk directories and remove file extensions

I'm trying to remove all the outlook .ost and .nst files from the user's folder on a network PC, as well as I'm trying to get it to write what files were removed into a CSV file.
I'm able to get it to find all the files in the directory and write it to a CSV file but when I try to remove the files with os.remove it doesn't seem to run, I hashed it out for the time being.
I added in the try and except, to skip the files that are in use.
import os
import sys
sys.stdout = open("output_file.csv", "w")
try:
for rootDir, subdir, files in os.walk("//network_pc_name/c$/Users"):
for filenames in files:
if filenames.endswith((".nst",".ost")):
foundfiles = os.path.join(rootDir, filenames)
#os.remove(os.path.join(rootDir, filenames))
print(foundfiles)
except:
pass
sys.stdout.close()
I made some change to the script as suggested and it appears to run alot quicker, however, I can't seem to figure out how to ignore files which are in use.
I switched the files extensions to .xlsx and .txt files to simulate the .xlsx file being open receiving the permissions error and to see if the script would continue to run and remove the .txt file.
I got the following error:
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: '//DESKTOP-HRLS19N/c$/globtest\Book1.xlsx
import glob
import os
files = [i for i in glob.glob("//DESKTOP-HRLS19N/c$/globtest/**", recursive = True) if i.endswith((".xlsx",".txt"))]
[os.remove(f) for f in files]
with open("output_file.csv", "w") as f:
f.writelines("\n".join(files))
In my experience glob is much easier:
print([i for i in glob.glob("//network_pc_name/c$/Users/**", recursive=True) if i.endswith((".nst", ".ost"))])
Assuming that prints out the files you're expecting:
files = [i for i in glob.glob("//network_pc_name/c$/Users/**", recursive=True) if i.endswith((".nst", ".ost"))]
removed_files = []
for file in files:
try:
size = os.path.getsize(file)
os.remove(file)
removed_files.append(file + " Bytes: " + size)
except Exception as e:
print("Could not remove file: " + file)
with open("output_file.csv", "w") as f:
f.writelines("\n".join(removed_files))

Python 'Path' module's 'mkdir()' creates a directory with a name from memory?

Below is the code for
unzipping a folder containing text fies
find and replace a string within those file
zip it back.
import shutil
import zipfile
import sys
from pathlib import Path
class ZipProcessor:
def __init__(self, zipname):
self.zipname = zipname
self.temp_directory = Path("unzipped-{}".format(zipname[:-4]))
def process_zip(self):
self.unzip_files()
self.process_files()
self.zip_files()
def unzip_files(self):
self.temp_directory.mkdir()
with zipfile.ZipFile(self.zipname) as zip:
zip.extractall(str(self.temp_directory))
def zip_files(self):
with zipfile.ZipFile(self.zipname, 'w') as file:
for filename in self.temp_directory.iterdir():
print(filename) # second
file.write(str(filename), filename.name)
shutil.rmtree(str(self.temp_directory))
class ZipReplace(ZipProcessor):
def __init__(self, filename, search_string,replace_string):
super().__init__(filename)
self.search_string = search_string
self.replace_string = replace_string
def process_files(self):
'''perform a search and replace on all files in the
temporary directory'''
for filename in self.temp_directory.iterdir():
with filename.open() as file:
contents = file.read()
contents = contents.replace(self.search_string, self.replace_string)
with filename.open("w") as file:
file.write(contents)
if __name__ == "__main__":
ZipReplace(*sys.argv[1:4]).process_zip()
I run the code with the python3 EX5_zip.py replace3.zip 'code' 'program'
The error is receive is IsADirectoryError: [Errno 21] Is a directory: 'unzipped-replace3/replace'
My doubt is regarding the directory replace under the temp_directory path. How did it get created with that name? I doubt if its from the previous run of the script [ I had given zipfile as ```replace.zip``]. if that's the case how do I remove it from the memory ?
I used the following link to remove memory but the it remains.
Either:
there was already a replace subfolder in unzipped-replace3
or there is a replace directory inside your replace3.zip zip file so that folder was created by extractall
So when you try to open the Path("replace") given by iterdir with filename.open, it fails. You should first test if filename is a directory or not with filename.is_dir().

Getting the latest files from FTP folder (filename having spaces) in Python

I have a requirement where I have to pull the latest files from an FTP folder, the problem is that the filename is having spaces and the filename is having a specific pattern.
Below is the code I have implemented:
import sys
from ftplib import FTP
import os
import socket
import time
import pandas as pd
import numpy as np
from glob import glob
import datetime as dt
from __future__ import with_statement
ftp = FTP('')
ftp.login('','')
ftp.cwd('')
ftp.retrlines('LIST')
filematch='*Elig.xlsx'
downloaded = []
for filename in ftp.nlst(filematch):
fhandle=open(filename, 'wb')
print 'Getting ' + filename
ftp.retrbinary('RETR '+ filename, fhandle.write)
fhandle.close()
downloaded.append(filename)
ftp.quit()
I understand that I can append an empty list to ftp.dir() command, but since the filename is having spaces, I am unable to split it in the right way and pick the latest file of the type that I have mentined above.
Any help would be great.
You can get the file mtime by sending the MDTM command iff the FTP server supports it and sort the files on the FTP server accordingly.
def get_newest_files(ftp, limit=None):
"""Retrieves newest files from the FTP connection.
:ftp: The FTP connection to use.
:limit: Abort after yielding this amount of files.
"""
files = []
# Decorate files with mtime.
for filename in ftp.nlst():
response = ftp.sendcmd('MDTM {}'.format(filename))
_, mtime = response.split()
files.append((mtime, filename))
# Sort files by mtime and break after limit is reached.
for index, decorated_filename in enumerate(sorted(files, reverse=True)):
if limit is not None and index >= limit:
break
_, filename = decorated_filename # Undecorate
yield filename
downloaded = []
# Retrieves the newest file from the FTP server.
for filename in get_newest_files(ftp, limit=1):
print 'Getting ' + filename
with open(filename, 'wb') as file:
ftp.retrbinary('RETR '+ filename, file.write)
downloaded.append(filename)
The issue is that the FTP "LIST" command returns text for humans, which format depends on the FTP server implementation.
Using PyFilesystem (in place of the standard ftplib) and its API will provide a "list" API (search "walk") that provide Pythonic structures of the file and directories lists hosted in the FTP server.
http://pyfilesystem2.readthedocs.io/en/latest/index.html

python FTP download file with certain name

I have this FTP with folder and it contains these files:
pw201602042000.nc,
pw201602042010.nc,
pw201602042020.nc,
pw201602042030.nc,
pw201602042040.nc,
pw201602042050.nc,
pw201602042100.nc,
pw201602042110.nc,
pw201602042120.nc,
pw201602042130.nc,
pw201602042140.nc,
pw201602042150.nc,
pw201602042200.nc
how to download only file ending with 00?
from ftplib import FTP
server = FTP("ip/serveradress")
server.login("user", "password")
server.retrlines("LIST")
server.cwd("Folder")
server.sendcmd("TYPE i") # ready for file transfer
server.retrbinary("RETR %s"%("pw201602042300.nc"), open("pw", "wb").write)
when you obtained the list of files as list_of_files, just use fnmatch to match the file names according to wildcard:
list_of_files = server.retrlines("LIST")
dest_dir = "."
for name in list_of_files:
if fnmatch.fnmatch(name,"*00.nc"):
with open(os.path.join(dest_dir,name), "wb") as f:
server.retrbinary("RETR {}".format(name), f.write)
(note that you're writing the files on the same "pw" output file, I changed that, reusing the original name and provided a destination directory variable, and protecting the open in a with block to ensure file is closed when exiting the block)

os.renames for ftp in python

I want to move a large number of files from a windows system to a unix ftp server using python. I have a csv which has the current full path and filename and the new base bath to send it to (see here for an example dataset).
I have got a script using os.renames to do the transfer and directory creation in windows but can figure out a way to easily do it via ftp.
import os, glob, arcpy, csv, sys, shutil, datetime
top=os.getcwd()
RootOutput = top
startpath=top
FileList = csv.reader(open('FileList.csv'))
filecount=0
successcount=0
errorcount=0
# Copy/Move to FTP when required
ftp = ftplib.FTP('xxxxxx')
ftp.login('xxxx', 'xxxx')
directory = '/TransferredData'
ftp.cwd(directory)
##f = open(RootOutput+'\\Success_LOG.txt', 'a')
##f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
##f.close()
##
for File in FileList:
infile=File[0]
# local network ver
#outfile=RootOutput+File[4]
#os.renames(infile, outfile)
# ftp netowrk ver
# outfile=RootOutput+File[4]
# ftp.mkd(directory)
print infile, outfile
I tried the process in http://forums.arcgis.com/threads/17047-Upload-file-to-FTP-using-Python-ftplib but this is for moving all files in a directory, I have the old and new full file names and just need it to create the intermediate directories.
Thanks,
The following might work (untested):
def mkpath(ftp, path):
path = path.rsplit('/', 1)[0] # parent directory
if not path:
return
try:
ftp.cwd(path)
except ftplib.error_perm:
mkpath(ftp, path)
ftp.mkd(path)
ftp = FTP(...)
directory = '/TransferredData/'
for File in FileList:
infile = File[0]
outfile = File[4].split('\\') # need forward slashes in FTP
outfile = directory + '/'.join(outfile)
mkpath(ftp, outfile)
ftp.storbinary('STOR '+outfile, open(infile, 'rb'))

Categories

Resources