How to download large file with binary mode in python?

How to download large file with binary mode in python? - python

I am code a download function in python. The file size >1GB. The server is linux, HTTP server is Karrigell. Client is browse, Firefox or IE. I meet a big trouble.
At first, I use sys.stdout() to send file content.
file = open(path, 'rb')
size = os.path.getsize(path)
RESPONSE['Pragma'] = 'public'
RESPONSE['Expires'] = '0'
RESPONSE['Cache-Control'] = 'must-revalidate, pre-check=0'
RESPONSE['Content-Disposition'] = 'attachment; filename="' + os.path.basename(path) + '"'
RESPONSE['Content-type'] = "application/octet-stream"
RESPONSE['Content-Transfer-Encoding'] = 'binary'
RESPONSE['Content-length'] = str(os.path.getsize(path))
sys.stdout.flush()
chunk_size = 10000
handle = open(path, "rb")
while True:
buffer = handle.read(chunk_size)
if buffer:
STDOUT(buffer)
else:
break
sys.stdout.flush()
The problem is the server out of memory! I know, stdout write content to memory first, then memory send to socket.
So, I modify the function. Send content to socket directly. I use the py-sendfile module. http://code.google.com/p/py-sendfile/
file = open(path, 'rb')
size = os.path.getsize(path)
sock = REQUEST_HANDLER.sock
sock.sendall("""HTTP/1.1 200 OK\r\nPragma: no-cache\r\nExpires: 0\r\nCache-Control: no-cache, no-store\r\nContent-Disposition: attachment; filename="%s"\r\nContent-Type: application/octet-stream\r\nContent-Length: %u\r\nContent-Range: bytes 0-4096/%u\r\nLocation: "%s"\r\n\r\n""" % (os.path.basename(path), size, size, os.path.basename(path)))
offset = 0
nbytes = 4096
while 1:
try:
sent = sendfile.sendfile(sock.fileno(), file.fileno(), offset, nbytes)
except OSError, err:
if err.errno in (errno.EAGAIN, errno.EBUSY): # retry
continue
raise
else:
if sent == 0:
break # done
offset += sent
This time, the server memory is OK, but browse die! The browse memory rise quickly! Not free
until the socket accept whole file content.
I don't know how to deal with these problems. I think the second idea is right, send content to socket directly. But why browse can't free memory while accept data?

You should try to download the file in chunks. This is an example that works for me using urllib2
import os
import urllib2
import math
def downloadChunks(url):
"""Helper to download large files
the only arg is a url
this file will go to a temp directory
the file will also be downloaded
in chunks and print out how much remains
"""
baseFile = os.path.basename(url)
#move the file to a more uniq path
os.umask(0002)
temp_path = "/tmp/"
try:
file = os.path.join(temp_path,baseFile)
req = urllib2.urlopen(url)
total_size = int(req.info().getheader('Content-Length').strip())
downloaded = 0
CHUNK = 256 * 10240
with open(file, 'wb') as fp:
while True:
chunk = req.read(CHUNK)
downloaded += len(chunk)
print math.floor( (downloaded / total_size) * 100 )
if not chunk: break
fp.write(chunk)
except urllib2.HTTPError, e:
print "HTTP Error:",e.code , url
return False
except urllib2.URLError, e:
print "URL Error:",e.reason , url
return False
return file

Related

python continue download from where I left off

I'm trying to download a very large file in collab to my gDrive. Sometimes the connection cuts out, and It requires I restart. Is there a way I can download from where I left off?
My code looks like so:
from requests import get
import sys
def download(url, file_name):
# open in binary mode
with open(file_name, "wb") as f:
print("Downloading %s" % file_name)
response = get(url, stream=True)
total_length = response.headers.get('content-length')
if total_length is None: # no content length header
f.write(response.content)
else:
dl = 0
total_length = int(total_length)
for data in response.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
done = int(50 * dl / total_length)
sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50-done)) )
sys.stdout.flush()

Having issues with python web server

# Parse HTTP headers
headers = request.split('\n')
filename = headers[0].split()[1]
# Get the content of the file
if filename == '/':
filename = '/index.html'
# Get the content of htdocs/index.html
try:
fin = open(filename)
content = fin.read()
fin.close()
#Error, not found
except FileNotFoundError:
response = 'HTTP/1.0 404 NOT FOUND\n\nFile Not Found'
unable to get this to work below any ideas?? Unsure what exactly could be wrong here

Python - Receive muliple files

I'm learning socket programming and Python and I need to create a server that accepts several files from a client.
when I receive the one of the files I get this error:
ValueError: invalid literal for int() with base 2: '<!DOCTYPE html PUBLIC "-//W3C//D'
I do not understand why!
Thanks for your help!
client.py
for files in directory:
try:
print files
filename = files
size = len(filename)
size = bin(size)[2:].zfill(16)
s.send(size)
s.send(filename)
filename = os.path.join(path,filename)
filesize = os.path.getsize(filename)
filesize = bin(filesize)[2:].zfill(32) #
s.send(filesize)
file_to_send = open(filename, 'rb')
l = file_to_send.read()
s.sendall(l)
file_to_send.close()
print 'File Sent'
except socket.error, e:
print "Error sending data: %s" % e
server.py
while True:
size = clientsocket.recv(16)
if not size:
break
size = int(size)
filename = clientsocket.recv(size)
filesize = clientsocket.recv(32)
filesize = int(filesize,2)
file_to_write = open("/home/giorgio/Scrivania/SERVER/Download/"+'new_'+filename, 'wb')
num_files += 1
chunksize = 1024
while filesize > 0:
if filesize < chunksize:
chunksize = filesize
data = clientsocket.recv(chunksize)
file_to_write.write(data)
filesize -= len(data)
file_to_write.close()
print 'File received successfully'
serversock.close()

while True:
size = clientsocket.recv(16)
if not size:
break
size = int(size)
filename = clientsocket.recv(size)
filesize = clientsocket.recv(32)
#filesize = int(filesize,2) #This is not required and this is causing the error so remove it.
file_to_write = open("/home/giorgio/Scrivania/SERVER/Download/"+'new_'+filename, 'wb')
num_files += 1
chunksize = 1024
while filesize !="": #update to empty string comparison
if filesize < chunksize:
chunksize = filesize
data = clientsocket.recv(chunksize)
file_to_write.write(data)
filesize = len(data)
file_to_write.close()
print 'File received successfully'
serversock.close()
i have added comments for you to see the possible fixes.

Progress bar while uploading a file to dropbox

import dropbox
client = dropbox.client.DropboxClient('<token>')
f = open('/ssd-scratch/abhishekb/try/1.mat', 'rb')
response = client.put_file('/data/1.mat', f)
I want to upload a big file to dropbox. How can I check the progress? [Docs]
EDIT:
The uploader offeset is same below somehow. What am I doing wrong
import os,pdb,dropbox
size=1194304
client = dropbox.client.DropboxClient(token)
path='D:/bci_code/datasets/1.mat'
tot_size = os.path.getsize(path)
bigFile = open(path, 'rb')
uploader = client.get_chunked_uploader(bigFile, size)
print "uploading: ", tot_size
while uploader.offset < tot_size:
try:
upload = uploader.upload_chunked()
print uploader.offset
except rest.ErrorResponse, e:
print("something went wrong")
EDIT 2:
size=1194304
tot_size = os.path.getsize(path)
bigFile = open(path, 'rb')
uploader = client.get_chunked_uploader(bigFile, tot_size)
print "uploading: ", tot_size
while uploader.offset < tot_size:
try:
upload = uploader.upload_chunked(chunk_size=size)
print uploader.offset
except rest.ErrorResponse, e:
print("something went wrong")

upload_chunked, as the documentation notes:
Uploads data from this ChunkedUploader's file_obj in chunks, until an
error occurs. Throws an exception when an error occurs, and can be
called again to resume the upload.
So yes, it uploads the entire file (unless an error occurs) before returning.
If you want to upload a chunk at a time on your own, you should use upload_chunk and commit_chunked_upload.
Here's some working code that shows you how to upload a single chunk at a time and print progress in between chunks:
from io import BytesIO
import os
from dropbox.client import DropboxClient
client = DropboxClient(ACCESS_TOKEN)
path = 'test.data'
chunk_size = 1024*1024 # 1MB
total_size = os.path.getsize(path)
upload_id = None
offset = 0
with open(path, 'rb') as f:
while offset < total_size:
offset, upload_id = client.upload_chunk(
BytesIO(f.read(chunk_size)),
offset=offset, upload_id=upload_id)
print('Uploaded so far: {} bytes'.format(offset))
# Note the "auto/" on the next line, which is needed because
# this method doesn't attach the root by itself.
client.commit_chunked_upload('auto/test.data', upload_id)
print('Upload complete.')

Extract content from a file with mime multipart

I have a file that contain a tiff image and a document xml in a multipart mime document.
I would extract the image from this file.
How I can get it?
I have this code, but it requires an infinite time to extract it, if I have a big file (for example 30Mb), so this is unuseful.
f=open("content_file.txt","rb")
msg = email.message_from_file(f)
j=0
image=False
for i in msg.walk():
if i.is_multipart():
#print "MULTIPART: "
continue
if i.get_content_maintype() == 'text':
j=j+1
continue
if i.get_content_maintype() == 'image':
image=True
j=j+1
pl = i.get_payload(decode=True)
localFile = open("map.out.tiff", 'wb')
localFile.write(pl)
continue
f.close()
if (image==False):
sys.exit(0);
Thank you so much.

Solved:
def extract_mime_part_matching(stream, mimetype):
"""Return the first element in a multipart MIME message on stream
matching mimetype."""
msg = mimetools.Message(stream)
msgtype = msg.gettype()
params = msg.getplist()
data = StringIO.StringIO()
if msgtype[:10] == "multipart/":
file = multifile.MultiFile(stream)
file.push(msg.getparam("boundary"))
while file.next():
submsg = mimetools.Message(file)
try:
data = StringIO.StringIO()
mimetools.decode(file, data, submsg.getencoding())
except ValueError:
continue
if submsg.gettype() == mimetype:
break
file.pop()
return data.getvalue()
From:
http://docs.python.org/release/2.6.6/library/multifile.html
Thank you for the support.

It is not quite clear to me, why your code hangs. The indentation looks a bit wrong and opened files are not properly closed. You may also be low on memory.
This version works fine for me:
import email
import mimetypes
with open('email.txt') as fp:
message = email.message_from_file(fp)
for i, part in enumerate(message.walk()):
if part.get_content_maintype() == 'image':
filename = part.get_filename()
if not filename:
ext = mimetypes.guess_extension(part.get_content_type())
filename = 'image-%02d%s' % (i, ext or '.tiff')
with open(filename, 'wb') as fp:
fp.write(part.get_payload(decode=True))
(Partly taken from http://docs.python.org/library/email-examples.html#email-examples)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to download large file with binary mode in python? - python

Related

python continue download from where I left off

Having issues with python web server

Python - Receive muliple files

Progress bar while uploading a file to dropbox

Extract content from a file with mime multipart

Categories

Resources