Python download files from FTP ignoring the missing ones - python

I've a list of numbers on csv file like this:
1
2
3
4
5
And an ftp server with files named like those numbers:
1.jpg
2.jpg
4.jpg
5.jpg
( 3.jpg is missing )
I want to download all the files of the FTP if the filename is on that csv list.
On my code i can successfully download the files but when it tryes to download a missing file on FTP the program crashes with:
urllib2.URLError: <urlopen error ftp error: [Errno ftp error] 550 Can't change directory to 3.jpg: No such file or directory>
Python code:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import urllib2, shutil
import pandas as pd
import numpy as np
from ftplib import FTP
FTP_server = 'ftp://user:pass#server.com/'
ftp = FTP_server+'the/path/to/files/'
class Test:
def Get(self):
data = pd.read_csv('test.csv',encoding='utf-8',delimiter=';')
#data['REF'].replace('', np.nan, inplace=True)
#data.dropna(subset=['REF'], inplace=True)
data['REF'] = data['REF'].astype(int)
new_data = data['REF']
for ref in new_data:
file = str(ref)+str('.jpg')
ftpfile = urllib2.urlopen(ftp+file)
localfile = open(file, 'wb')
shutil.copyfileobj(ftpfile, localfile)
Try = Test()
Try.Get()
I'm trying to make an if inside the for loop but i can't get it working, someone can give me some idea or tip plz?

Get acquainted with try-except blocks to handle this:
for ref in new_data:
try:
file = str(ref)+str('.jpg')
ftpfile = urllib2.urlopen(ftp+file)
localfile = open(file, 'wb')
shutil.copyfileobj(ftpfile, localfile)
except urllib2.URLError: print("-I- Skipping",file," - doesn't exist.")

Related

How to read a csvfile on FTP that is compressed on a zip/folder

I'm trying to :
read a .csv file (compressed in a zipfile that is stored on FTP) by using ftplib
store the .csv file on a virtual file on memory by using io
transform the virutal file to a dataframe by using pandas
For that I'm using the code below and it works really fine for the first scenario (path1, see image above) :
CODE :
import ftplib
import zipfile
import io
import pandas as pd
ftp = ftplib.FTP("theserver_name")
ftp.login("my_username","my_password")
ftp.encoding = "utf-8"
ftp.cwd('folder1/folder2')
filename = 'zipFile1.zip'
download_file = io.BytesIO()
ftp.retrbinary("RETR " + filename, download_file.write)
download_file.seek(0)
zfile = zipfile.ZipFile(download_file)
df = pd.read_csv(zfile.namelist()[0], delimiter=';')
display(df)
But in the second scenario (path2) and after changing my code, I get the error below :
CODE UPDATE :
ftp.cwd('folder1/folder2/')
filename = 'zipFile2.zip'
ERROR AFTER UPDATE :
FileNotFoundError: [Errno 2] No such file or directory:
'folder3/csvFile2.csv'
It seems like Python don't recognize the folder3 (contained in the zipFile2). Is there any explanation for that, please ? How can we fix that ? I tried with ftp.cwd('folder3') right before pd.read.csv() but it doesn't work..
Thanks to Serge Ballesta in his post here, I finally figure out how to transform csvFile2.csv to a DataFrame :
import ftplib
import zipfile
import io
import pandas as pd
ftp = ftplib.FTP("theserver_name")
ftp.login("my_username","my_password")
ftp.encoding = "utf-8"
flo = io.BytesIO()
ftp.retrbinary('RETR /folder1/folder2/zipFile2.zip', flo.write)
flo.seek(0)
with zipfile.ZipFile(flo) as archive:
with archive.open('folder3/csvFile2.csv') as fd:
df = pd.read_csv(fd, delimiter=';')
display(df)

Removing files using python from a server using FTP

I’m having a hard time with this simple script. It’s giving me an error of file or directory not found but the file is there. Script below I’ve masked user and pass plus FTP site
Here is my script
from ftplib import FTP
ftp = FTP('ftp.domain.ca')
pas = str('PASSWORD')
ftp.login(user = 'user', passwd=pas)
ftp.cwd('/public_html/')
filepaths = open('errorstest.csv', 'rb')
for j in filepaths:
    print(j)
    ftp.delete(str(j))
ftp.quit()
The funny thing tho is if I slight change the script to have ftp.delete() it finds the file and deletes it. So modified to be like this:
from ftplib import FTP
ftp = FTP('ftp.domain.ca')
pas = str('PASSWORD')
ftp.login(user = 'user', passwd=pas)
ftp.cwd('/public_html/')
ftp.delete(<file path>)
ftp.quit()
I’m trying to read this from a csv file. What am I doing wrong?
Whatever you have showed seems to be fine. But could you try this?
from ftplib import FTP
ftp = FTP(host)
ftp.login(username, password)
ftp.cwd('/public_html/')
print(ftp.pwd())
print(ftp.nlst())
with open('errorstest.csv') as file:
for line in file:
if line.strip():
ftp.delete(line.strip())
print(ftp.nlst())

Recursively get meta data of FTP folder and all sub folders

I am trying to figure out how to retrieve metadata from an FTP folder and all sub folders. I want to get the file name, file size, and date/time (of when the file was modified). I found the sample code (below) online. I entered in my credentials, ran the code, and received this error: No hostkey for host ftp.barra.com found.
Is there a quick fix for this?
from __future__ import print_function
import os
import time
import pysftp
ftp_username='xxx'
ftp_password='xxx'
ftp_host='xxx'
year = time.strftime("%Y")
month = time.strftime("%m")
day = time.strftime("%d")
ftp_dir = 'data/'+year+'/'+month
filename = time.strftime('ftp_file_lists.txt')
fout = open(filename, 'w')
wtcb = pysftp.WTCallbacks()
with pysftp.Connection(ftp_host, username=ftp_username, password=ftp_password) as sftp:
sftp.walktree(ftp_dir, fcallback=wtcb.file_cb, dcallback=wtcb.dir_cb, ucallback=wtcb.unk_cb)
print(len(wtcb.flist))
for fpath in wtcb.flist:
print(fpath, file=fout)
sftp.close()
Code from here.
http://alvincjin.blogspot.com/2014/09/recursively-fetch-file-paths-from-ftp.html

urlretrieve hangs when downloading file

I have a very simple script that uses urllib to retrieve a zip file and place it on my desktop. The zip file is only a couple MB in size and doesn't take long to download. However, the script doesn't seem to finish, it just hangs. Is there a way to forcibly close the urlretrieve?...or a better solution?
The URL is to a public ftp size. Is the ftp perhaps the cause?
I'm using python 2.7.8.
url = r'ftp://ftp.ngs.noaa.gov/pub/DS_ARCHIVE/ShapeFiles/IA.ZIP'
zip_path = r'C:\Users\***\Desktop\ngs.zip'
urllib.urlretrieve(url, zip_path)
Thanks in advance!
---Edit---
Was able to use ftplib to accomplish the task...
import os
from ftplib import FTP
import zipfile
ftp_site = 'ftp.ngs.noaa.gov'
ftp_file = 'IA.ZIP'
download_folder = '//folder to place file'
download_file = 'name of file'
download_path = os.path.join(download_folder, download_file)
# Download file from ftp
ftp = FTP(ftp_site)
ftp.login()
ftp.cwd('pub/DS_ARCHIVE/ShapeFiles') #change directory
ftp.retrlines('LIST') #show me the files located in directory
download = open(download_path, 'wb')
ftp.retrbinary('RETR ' + ftp_file, download.write)
ftp.quit()
download.close()
# Unzip if .zip file is downloaded
with zipfile.ZipFile(download_path, "r") as z:
z.extractall(download_folder)
urllib has a very bad support for error catching and debugging. urllib2 is a much better choice. The urlretrieve equivalent in urllib2 is:
resp = urllib2.urlopen(im_url)
with open(sav_name, 'wb') as f:
f.write(resp.read())
And the errors to catch are:
urllib2.URLError, urllib2.HTTPError, httplib.HTTPException
And you can also catch socket.error in case that the network is down.
You can use python requests library with requests-ftp module. It provides easier API and better processes exceptions. See: https://pypi.python.org/pypi/requests-ftp and http://docs.python-requests.org/en/latest/

download a zip file to a local drive and extract all files to a destination folder using python 2.5

I am trying to download a zip file to a local drive and extract all files to a destination folder.
so i have come up with solution but it is only to "download" a file from a directory to another directory but it doesn't work for downloading files. for the extraction, I am able to get it to work in 2.6 but not for 2.5. so any suggestions for the work around or another approach I am definitely open to.
thanks in advance.
######################################
'''this part works but it is not good for URl links'''
import shutil
sourceFile = r"C:\Users\blueman\master\test2.5.zip"
destDir = r"C:\Users\blueman\user"
shutil.copy(sourceFile, destDir)
print "file copied"
######################################################
'''extract works but not good for version 2.5'''
import zipfile
GLBzipFilePath =r'C:\Users\blueman\user\test2.5.zip'
GLBextractDir =r'C:\Users\blueman\user'
def extract(zipFilePath, extractDir):
zip = zipfile(zipFilePath)
zip.extractall(path=extractDir)
print "it works"
extract(GLBzipFilePath,GLBextractDir)
######################################################
urllib.urlretrieve can get a file (zip or otherwise;-) from a URL to a given path.
extractall is indeed new in 2.6, but in 2.5 you can use an explicit loop (get all names, open each name, etc). Do you need example code?
So here's the general idea (needs more try/except if you want to give a nice error message in each and every case which could go wrong, of which, of course, there are a million variants -- I'm only using a couple of such cases as examples...):
import os
import urllib
import zipfile
def getunzipped(theurl, thedir):
name = os.path.join(thedir, 'temp.zip')
try:
name, hdrs = urllib.urlretrieve(theurl, name)
except IOError, e:
print "Can't retrieve %r to %r: %s" % (theurl, thedir, e)
return
try:
z = zipfile.ZipFile(name)
except zipfile.error, e:
print "Bad zipfile (from %r): %s" % (theurl, e)
return
for n in z.namelist():
dest = os.path.join(thedir, n)
destdir = os.path.dirname(dest)
if not os.path.isdir(destdir):
os.makedirs(destdir)
data = z.read(n)
f = open(dest, 'w')
f.write(data)
f.close()
z.close()
os.unlink(name)
For downloading, look at urllib:
import urllib
webFile = urllib.urlopen(url)
For unzipping, use zipfile. See also this example.
The shortest way i've found so far, is to use +alex answer, but with ZipFile.extractall() instead of the loop:
from zipfile import ZipFile
from urllib import urlretrieve
from tempfile import mktemp
filename = mktemp('.zip')
destDir = mktemp()
theurl = 'http://www.example.com/file.zip'
name, hdrs = urlretrieve(theurl, filename)
thefile=ZipFile(filename)
thefile.extractall(destDir)
thefile.close()

Categories

Resources