How can I read a NetCDF file remotely with Python?
It is easy to read a file from a server with ssh using Python, but how can I replace the command sftp_client.open() by something like netCDF4.Dataset() to store that result into a variable?
In the following example, I am downloading locally and temporarily the file I'd like to read remotely:
import os
import paramiko
import netCDF4
remotefile = 'pathtoremotefile'
localfile = 'pathtolocaltmpfile'
ssh_client = paramiko.SSHClient()
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh_client.connect('myserver', username="myname", password="mypswd")
sftp_client = ssh_client.open_sftp()
# Something similar than this but for a NetCDF file?
# Or how to use remote_file in netCDF4 afterwards?
# remote_file = sftp_client.open(remotefile)
# Here, I just download it to manipulate it locally...
sftp_client.get(remotefile, localfile)
try:
ncfile = netCDF4.Dataset(localfile)
# Operations...
finally:
sftp_client.close()
ssh_client.close()
os.remove(localfile)
you can mount locally the remote ssh using sshfs and open it as a localfile
import os
import netCDF4
localfile = 'pathtolocalmountpoint/relativepathtofile'
try:
ncfile = netCDF4.Dataset(localfile)
Related
I have a script which get .csv file and some data correction and save my django database. In my case I couldn't get .csv file from FTP server. I tried following codes but I faced different errors each time.
import pandas as pd
import pysftp as sftp
with sftp.connect(your_host, your_user, your_pw) as conn:
with conn.open("path_and_file.csv", "r") as f:
df = pd.read_csv(f)
Error: "AttributeError: module 'pysftp' has no attribute 'connect'"
ftp = FTP('your_host')
ftp.login('your_user', 'your_pw')
ftp.set_pasv(False)
I couldn't go further.
How can I read .csv file from FTP server using by pandas?
I Solved my problem as below:
I copied files then opened as pd.
with FTP(host) as ftp:
ftp.login(user=user, passwd=password)
print(ftp.getwelcome())
with open("proj.csv", "wb") as f:
ftp.retrbinary("RETR " + "proj.csv", f.write, 1024)
with open("pers.csv", "wb") as f:
ftp.retrbinary("RETR " + "pers.csv", f.write, 1024)
ftp.quit()
import pysftp
import pandas as pd
cnopts = pysftp.CnOpts()
cnopts.hostkeys = None
with pysftp.Connection(hostname='hostname',username='username',password='password', cnopts=cnopts) as conn:
conn.get('filename')
with.open('filename') as f:
df = pd.read_csv(f)
this should give you the data frame of csv.
I'm trying to download a file from an FTPS server, using Python ftplib.
But the downloaded file has always 0 bytes (is empty).
If I see the file in the server with WinSCP, the file has data (about 1Kb).
In WinSCP I'm using the options "Encryption: Explicit TSL" and "PassiveMode=False".
What is wrong with the code?
Thanks!!
This is the code I am using:
import ftplib
server='10.XX.XX.XX'
username='username'
password='password'
session = ftplib.FTP_TLS(server)
session.login(user=username,passwd=password)
session.prot_p()
session.set_pasv(False)
session.nlst()
session.cwd("home")
print(session.pwd())
filename = "test.txt"
# Open a local file to store the downloaded file
my_file = open(r'c:\temp\ftpTest.txt', 'wb')
session.retrbinary('RETR ' + filename, my_file.write, 1024)
session.quit()
You are not closing the local file after the download. You should use context manager for that. Similarly also for the FTP session:
with ftplib.FTP_TLS(server) as session:
session.login(user=username, passwd=password)
session.prot_p()
session.set_pasv(False)
session.nlst()
session.cwd("home")
print(session.pwd())
filename = "test.txt"
# Open a local file to store the downloaded file
with open(r'c:\temp\ftpTest.txt', 'wb') as my_file:
session.retrbinary('RETR ' + filename, my_file.write, 1024)
I'm trying to extract open data zips from a public website into a postgis database I'm hosting locally. I'm using BytesIO to try to stream the individual shapefiles into a postgis command called shp2pgsql but getting an error that the individual .dbf files are not available from the command line tool. Is there a more appropriate way to pull the zips, extract them into memory, and upload to a postgresql/postgis database? See code and error below:
Code Block
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
import subprocess
# or: requests.get(url).content
count = 0
resp = urlopen("some/website/files.zip")
zipfile = ZipFile(BytesIO(resp.read()))
for file in zipfile.namelist():
if file.endswith('.shp'):
count += 1
cmd = 'shp2pgsql -s 4326 {0} temp_table_{1} | PGPASSWORD=somepassword psql -h someip -p 5432 -d somedb -U someuser -q'.format(file, count)
subprocess.call(cmd, shell=True)
Error message
Unable to open some_directory_shp/some_shp1.shp or some_directory_shp/some_shp1.SHP.
some_directory_shp/some_shp1.shp: dbf file (.dbf) can not be opened.
I’m having a hard time with this simple script. It’s giving me an error of file or directory not found but the file is there. Script below I’ve masked user and pass plus FTP site
Here is my script
from ftplib import FTP
ftp = FTP('ftp.domain.ca')
pas = str('PASSWORD')
ftp.login(user = 'user', passwd=pas)
ftp.cwd('/public_html/')
filepaths = open('errorstest.csv', 'rb')
for j in filepaths:
print(j)
ftp.delete(str(j))
ftp.quit()
The funny thing tho is if I slight change the script to have ftp.delete() it finds the file and deletes it. So modified to be like this:
from ftplib import FTP
ftp = FTP('ftp.domain.ca')
pas = str('PASSWORD')
ftp.login(user = 'user', passwd=pas)
ftp.cwd('/public_html/')
ftp.delete(<file path>)
ftp.quit()
I’m trying to read this from a csv file. What am I doing wrong?
Whatever you have showed seems to be fine. But could you try this?
from ftplib import FTP
ftp = FTP(host)
ftp.login(username, password)
ftp.cwd('/public_html/')
print(ftp.pwd())
print(ftp.nlst())
with open('errorstest.csv') as file:
for line in file:
if line.strip():
ftp.delete(line.strip())
print(ftp.nlst())
I have a script that processes downloaded log files into csv's per some parsing logic.
I want to write those csv's to a remote directory on a different server. This is due to space constraints on the server where I execute the script.
I have tried a few variations of the below script but I just cant seem to figure this out. I understand "SFTP" and "SSH" commands but I am not sure if that is the right approach in this use case.(I have all the keys and stuff setup to allow for remote connections between the servers)
import os
import re
import string
import csv
import gzip
import extract5
import pickle
import shutil
import pygeoip
def conn():
ssh = paramiko.SSHClient()
ssh.connect(XXXXXXXXXX.XXXX.XXXXXXXX.COM, username=XXXXXXX)
ssh_stdin, ssh_stdout, ssh_stderr = ssh.exec_command(cmd_to_execute)
return conn(self:%s % (['cd /fs/fs01/crmdata/SYWR/AAM/AAM_Master/'])
downloadroot = '/opt/admin/AAM_Scripts/'
outgoing = conn
for sfile in os.listdir(downloadroot):
if sfile.endswith('gz'):
oname = sfile[sfile.find('AAM'):]
extract5.process(downloadroot,sfile,oname,outgoing)
#delete download dictionary and pickle
for date in os.listdir(downloadroot):
#match = re.match('(\d{4})[/.-](\d{2})[/,-](\d{2})$', date)
if date.find('AAM_CDF_1181_') > 0:
os.remove('%s%s' % (downloadroot, date))
os.remove('%sdownload.pkl' % (downloadroot))
Is what I am trying to do possible? I am on the right path or is my approach completely off. I would love some thoughts behind how or if I can accomplish this.