Upload a file-like object with Paramiko? - python

I have a bunch of code that looks like this:
with tempfile.NamedTemporaryFile() as tmpfile:
tmpfile.write(fileobj.read()) # fileobj is some file-like object
tmpfile.flush()
try:
self.sftp.put(tmpfile.name, path)
except IOError:
# error handling removed for ease of reading
pass
Is it possible to do an upload like this without having to write the file out somewhere?

Update As of Paramiko 1.10, you can use putfo:
self.sftp.putfo(fileobj, path)
Instead of using paramiko.SFTPClient.put, you can use paramiko.SFTPClient.open, which opens a file-like object. You can write to that. Something like this:
f = self.sftp.open(path, 'wb')
f.write(fileobj.read())
f.close()
Note that it may be worthwhile to feed paramiko data in 32 KiB chunks, since that's the largest chunk underlying SSH protocol can handle without breaking it into multiple packets.

Is StringIO what you're looking for? (doc page)
SFTPClient's get() and put() functions take paths and not file-handles, which makes things a bit awkward.
You could write a wrapper for paramiko.SFTPClient to give it the functionality that you want.
Here's my best untested attempt:
from paramiko import SFTPClient
class SFTPClient2(SFTPClient):
def put(self, local_file, remotepath, callback=None, confirm=True):
fl = source_file
file_size = os.fstat(fl.fileno()).st_size
try:
fr = self.file(remotepath, 'wb')
fr.set_pipelined(True)
size = 0
try:
while True:
data = fl.read(32768)
if len(data) == 0:
break
fr.write(data)
size += len(data)
if callback is not None:
callback(size, file_size)
finally:
fr.close()
finally:
fl.close()
if confirm:
s = self.stat(remotepath)
if s.st_size != size:
raise IOError('size mismatch in put! %d != %d' % (s.st_size, size))
else:
s = SFTPAttributes()
return s
def get(self, remotepath, local_file, callback=None):
fr = self.file(remotepath, 'rb')
file_size = self.stat(remotepath).st_size
fr.prefetch()
try:
fl = local_file
try:
size = 0
while True:
data = fr.read(32768)
if len(data) == 0:
break
fl.write(data)
size += len(data)
if callback is not None:
callback(size, file_size)
finally:
fl.close()
finally:
fr.close()
s = os.fstat(fl.fileno())
if s.st_size != size:
raise IOError('size mismatch in get! %d != %d' % (s.st_size, size))
If it works, the get and put functions should now take local file-handles rather than paths.
All I had to do was get rid of the code that opens the file from the path, and change the code that gets the size of the file to use os.fstat instead of os.stat.

Related

I can't break a while loop in python

I'm trying to transfer files with python socket module, I can transfer and write the file but I can't stop the function when there are no more bytes to write
FORMAT="utf-8"
HEADER=512
CLIENT CODE
FORMAT="utf-8"
HEADER=512
def File(path):
name=os.path.basename(path)
client.send("file".encode(FORMAT))
client.send(name.encode(FORMAT))
print(client.recv(HEADER).decode(FORMAT))
f=open(path,"rb")
l=f.read(HEADER)
while (l):
client.send(l)
l=f.read(HEADER)
f.close()
print("Finish")
SERVER CODE
def Save(conn):
name=(conn.recv(HEADER).decode(FORMAT))
conn.send(f"Saving {name}".encode(FORMAT))
print(name)
Writing=True
with open(PATH+name,"wb") as f:
print("Writing file")
while Writing:
data=conn.recv(HEADER)
if not data:
Writing=False
f.write(data)
f.close()
print("File written")
conn.close()
To know when a socket has finished, take a look at How does the python socket.recv() method know that the end of the message has been reached?.
Anyway, receiving constant HEADER size is a bad practice, as the received amount can be smaller (due to network issues);
You'd better keep up with the actual amount received with respect to the desired amount, like in this example:
def receive(self, size):
from_client = b''
remaining_to_recive = size
while 0 < remaining_to_recive:
data = self.sock.recv(remaining_to_recive)
if not data: break
from_client += data
remaining_to_recive -= len(data)
if 0 < remaining_to_recive:
raise RuntimeError(Connection.NOT_ALL_DATA_RECEIVED_ERROR)
return from_client

Prune file size sent from server socket

I am connecting to a pre-configured server that serves four different file formats with different sizes. Each file is appended with the file size...
Example: lighthouse.jpg
561276ÿØÿà JFIF ` ` ÿî Adobe
The "561276" is the file size and needs to be pruned before saving the file.
Example: randomText.txt
45711111111111111111111111111111111111111111111111111111111
222222222222222222222222222222222222222222222222222222222
33333333333333333333333333333333333333333333333333
44444444444444444444444444444444444444444444444444444444
66666666666666666666
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
cvccccccccccccccccccccccccccccccccccccccccccccccc
ddddddddddddddddddddddddddddddddddddddddddddddddddd
The "457" is the file size and needs to be pruned before saving the file.
Some files have a size that is only 3-digits long. Some have a file size that is 6-digits long (as seen here). I want to make my code size-agnostic; regardless of how many digits is in the size.
I've tried using:
while len(buf) < 4:
buf += sock.recv(4 - len(buf))
size = struct.unpack('!i', buf)
but this only prunes the first four digits.
AND
I've tried using
len = sock.recv(4)
data = sock.recv(len)
but once again... only prunes the first four digits
Here is what I have so far:
def get_size():
buf = ''
while len(buf) < 4:
buf += sock.recv(4 - len(buf))
size = struct.unpack('!i', buf)
print "[*] Receiving %s bytes" % size
def download_pic():
size = get_size()
fname = 'tst.jpg'
with open(fname, 'wb') as img:
while True:
data = sock.recv(1024)
if not data:
break
img.write(data)
print '[*] {0} received!'.format(fname)
def main():
doconnectionstuffandprinttoconsole() #establishes connection
answer = input("[>] Your Selection: ")
sock.send(str(answer))
if answer == 2:
download_pic()
sock.close()
Any help in pruning the size from the file(s) would be greatly appreciated!
Jason Harper's suggestion (#jasonharper) got me thinking. When I ran repr(data) on the chunks from randomText.txt, I saw that it had a break in it that looked like...
'457''1111111111111111111111...
The server was attempting to send two different chunks (one at at time) but it kept getting merged into one chunk. So, I increased my sock.recv(64) up to sock.recv(256). And for some reason, it send two chunks!
'457'
'111111111...' [truncated]
NEW AND IMPROVED CODE!
import socket
import sys
import struct
import os
user1 = {'user1': 91827364}
user2 = {'user2': 19283746}
user3 = {'user3': 46372819}
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect(('127.0.0.1', 2058))
def print_data():
data_rcv = sock.recv(1024)
print "[-] {0}".format(data_rcv)
def download_file(format):
fname = 'download'
fullname = fname + '.' + format
try:
with open(fullname, 'wb') as txt:
len = sock.recv(256)
while True:
data = sock.recv(int(len))
if not data:
break
txt.write(data)
print("[*] {0} successfully downloaded with a length of {1} characters!".format(fullname, len))
except Exception:
print("[!] Error receiving file. Please try again.")
def connect():
print("[*] Sending Length")
sock.send("5")
my_struct = struct.pack('5s i', 'user1', 91827364)
print("[*] Sending User1 Struct")
sock.send(my_struct)
print_data()
def main():
print_data()
connect()
print_data()
answer = input("[>] Your Selection: ")
sock.send(str(answer))
if answer == 2: # Option to download Lighthouse.jpg
download_file("jpg")
elif answer == 4: # Option to download randomText.txt
download_file("txt")
sock.close()
if __name__ == "__main__":
main()
MY OUTPUT
[-] Please enter credentials
[*] Sending Length
[*] Sending User1 Struct
[-] Authenticated
[-] Choose a file to retrieve from the following list (enter the number):
1. photo.png
2. Lighthouse.jpg
3. npp.6.8.5.Installer.exe
4. randomText.txt
[>] Your Selection: 2
[*] download.jpg successfully downloaded with a length of 561276 characters!

Use of subprocess.call results in "too many open files"

I have the following code to create thumbnails and save images. However, after about 1000 items it raises an error saying too many open files. Where is this coming from? And how would I fix the code?
def download_file(url, extension='jpg'):
""" Download a large file. Return path to saved file.
"""
req = requests.get(url)
if not req.ok:
return None
guid = str(uuid.uuid4())
tmp_filename = '/tmp/%s.%s' % (guid, extension)
with open(tmp_filename, 'w') as f:
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
f.flush()
return tmp_filename
def update_artwork_item(item):
# Download the file
tmp_filename = util.download_file(item.artwork_url)
# Create thumbs
THUMB_SIZES = [(1000, 120), (1000, 30)]
guid = str(uuid.uuid4())
S3_BASE_URL = 'https://s3-us-west-1.amazonaws.com/xxx/'
try:
for size in THUMB_SIZES:
outfile = '%s_%s.jpg' % (guid, size[1])
img = Image.open(tmp_filename).convert('RGB')
img.thumbnail(size, Image.ANTIALIAS)
img.save(outfile, "JPEG")
s3_cmd = '%s %s premiere-avails --norr --public' % (S3_CMD, outfile) ## doesn't work half the time
x = subprocess.check_call(shlex.split(s3_cmd))
if x: raise
subprocess.call(['rm', outfile], stdout=FNULL, stderr=subprocess.STDOUT)
except Exception, e:
print '&&&&&&&&&&', Exception, e
else:
# Save the artwork icons
item.artwork_120 = S3_BASE_URL + guid + '_120.jpg'
item.artwork_30 = S3_BASE_URL + guid + '_30.jpg'
# hack to fix parallel saving
while True:
try:
item.save()
except Exception, e:
print '******************', Exception, e
time.sleep(random.random()*1e-1)
continue
else:
subprocess.call(['rm', tmp_filename], stdout=FNULL, stderr=subprocess.STDOUT)
break
It's almost certainly your use of subprocess.call. subprocess.call is asynchronous, and returns a pipe object, which you are responsible for closing. (See the documentation). So what's happening is that each time you call subprocess.call, a new pipe object is being returned, and you eventually run out of file handles.
By far the easiest thing to do would be to just remove the file from Python by calling os.remove instead of piping to the Unix rm command. Your use of check_call is okay, because check_call is synchronous and won't return a file object you have to close.

Progress bar with Python Dropbox Api

I'm working making a progress bar for my python Dropbox app using the Dropbox API. The problem is I can't figure out how to get the number of bytes written so far so that I can build it. Is there any way to do this using the python Dropbox API? If not could I measure the bytes being sent out from my system instead, possibly using the os module?
I'm trying to get it to work with get_chunked_loader, but it would be great if I could get the bytes written for put_file() as well as file_copy() and file_move also(). My code so far is something like this:
if (file_size >= 4194304):
big_file = open(path_to_file, 'rb')
uploader = client.get_chunked_uploader(big_file)
print "uploading " + path_to_file
while uploader.offset < file_size:
percent_complete = bytes_written / file_size * 100
clearscreen()
print "%.2f" % percent_complete + "%"
Thanks!
I recreate ChunkedUploader
f = open(filetoupload, 'rb')
uploader = MMChunkedUploader(self.client, f, file_size, 1024*200)
uploader.upload_chunked()
uploader.finish(dropboxfilename)
class MMChunkedUploader(object):
"""Contains the logic around a chunked upload, which uploads a
large file to Dropbox via the /chunked_upload endpoint.
"""
def __init__(self, client, file_obj, length, chunk_size = 4 * 1024 * 1024):
self.client = client
self.offset = 0
self.upload_id = None
self.last_block = None
self.file_obj = file_obj
self.target_length = length
self.chunk_size=chunk_size
self.clocknumber=0
dec=float(self.target_length)/chunk_size - self.target_length//chunk_size
if dec >0:
self.totalblock=self.target_length/chunk_size +1
else:
self.totalblock=self.target_length/chunk_size
def upload_chunked(self, chunk_size = 0):
"""Uploads data from this ChunkedUploader's file_obj in chunks, until
an error occurs. Throws an exception when an error occurs, and can
be called again to resume the upload.
Parameters
chunk_size
The number of bytes to put in each chunk. (Default 4 MB.)
"""
if chunk_size ==0:
chunk_size=self.chunk_size
self.clocknumber=0
while self.offset < self.target_length:
self.clocknumber+=1
print "Block n.", repr(self.clocknumber) , " of " , repr(self.totalblock), " %", round((float(self.clocknumber) * 100) / self.totalblock, 0)
next_chunk_size = min(chunk_size, self.target_length - self.offset) #sceglie tra min e chuck size
if self.last_block == None:
self.last_block = self.file_obj.read(next_chunk_size)
print "Leggo blocco file"
try:
(self.offset, self.upload_id) = self.client.upload_chunk(
StringIO(self.last_block), next_chunk_size, self.offset, self.upload_id)
self.last_block = None
except dropbox.rest.ErrorResponse as e:
# Handle the case where the server tells us our offset is wrong.
must_reraise = True
if e.status == 400:
reply = e.body
if "offset" in reply and reply['offset'] != 0 and reply['offset'] > self.offset:
self.last_block = None
self.offset = reply['offset']
must_reraise = False
if must_reraise:
raise
def finish(self, path, overwrite=False, parent_rev=None):
path = "/commit_chunked_upload/%s%s" % (self.client.session.root, dropbox.client.format_path(path))
params = dict(
overwrite = bool(overwrite),
upload_id = self.upload_id
)
if parent_rev is not None:
params['parent_rev'] = parent_rev
url, params, headers = self.client.request(path, params, content_server=True)
return self.client.rest_client.POST(url, params, headers)

Python urllib2 Progress Hook

I am trying to create a download progress bar in python using the urllib2 http client. I've looked through the API (and on google) and it seems that urllib2 does not allow you to register progress hooks. However the older deprecated urllib does have this functionality.
Does anyone know how to create a progress bar or reporting hook using urllib2? Or are there some other hacks to get similar functionality?
Here's a fully working example that builds on Anurag's approach of chunking in a response. My version allows you to set the the chunk size, and attach an arbitrary reporting function:
import urllib2, sys
def chunk_report(bytes_so_far, chunk_size, total_size):
percent = float(bytes_so_far) / total_size
percent = round(percent*100, 2)
sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" %
(bytes_so_far, total_size, percent))
if bytes_so_far >= total_size:
sys.stdout.write('\n')
def chunk_read(response, chunk_size=8192, report_hook=None):
total_size = response.info().getheader('Content-Length').strip()
total_size = int(total_size)
bytes_so_far = 0
while 1:
chunk = response.read(chunk_size)
bytes_so_far += len(chunk)
if not chunk:
break
if report_hook:
report_hook(bytes_so_far, chunk_size, total_size)
return bytes_so_far
if __name__ == '__main__':
response = urllib2.urlopen('http://www.ebay.com');
chunk_read(response, report_hook=chunk_report)
Why not just read data in chunks and do whatever you want to do in between, e.g. run in a thread, hook into a UI, etc etc
import urllib2
urlfile = urllib2.urlopen("http://www.google.com")
data_list = []
chunk = 4096
while 1:
data = urlfile.read(chunk)
if not data:
print "done."
break
data_list.append(data)
print "Read %s bytes"%len(data)
output:
Read 4096 bytes
Read 3113 bytes
done.
urlgrabber has built-in support for progress notification.
Simplified version:
temp_filename = "/tmp/" + file_url.split('/')[-1]
f = open(temp_filename, 'wb')
remote_file = urllib2.urlopen(file_url)
try:
total_size = remote_file.info().getheader('Content-Length').strip()
header = True
except AttributeError:
header = False # a response doesn't always include the "Content-Length" header
if header:
total_size = int(total_size)
bytes_so_far = 0
while True:
buffer = remote_file.read(8192)
if not buffer:
sys.stdout.write('\n')
break
bytes_so_far += len(buffer)
f.write(buffer)
if not header:
total_size = bytes_so_far # unknown size
percent = float(bytes_so_far) / total_size
percent = round(percent*100, 2)
sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" % (bytes_so_far, total_size, percent))
Minor modification to Triptych's response to allow for actually writing out the file (python3):
from urllib.request import urlopen
def chunk_report(bytes_so_far, chunk_size, total_size):
percent = float(bytes_so_far) / total_size
percent = round(percent*100, 2)
sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" %
(bytes_so_far, total_size, percent))
if bytes_so_far >= total_size:
sys.stdout.write('\n')
def chunk_read(response, chunk_size=8192, report_hook=None):
total_size = response.info().get("Content-Length").strip()
total_size = int(total_size)
bytes_so_far = 0
data = b""
while 1:
chunk = response.read(chunk_size)
bytes_so_far += len(chunk)
if not chunk:
break
if report_hook:
report_hook(bytes_so_far, chunk_size, total_size)
data += chunk
return data
Usage:
with open(out_path, "wb") as f:
response = urlopen(filepath)
data_read = chunk_read(response, report_hook=chunk_report)
f.write(data_read)

Categories

Resources