using python stream downloading file with server limit - python

I tried to download file from a server using python, sometimes the file is very large, I would like to have some progress bar, one way to do this I can come up with is to download in a stream, so that I can print the progress. Currently I have tried the standard urlopen, urlretrieve, and requests module (with stream on).
Obviously, urlopen cannot download file in stream, requests module support this, however, the server has limit on the file I can download at one time (its limit is 1). So everytime, I tried to use requests, it only get the webpage told me to wait, is there any other way to do this?

I have very recently downloaded many types of media with this function:
import sys
import requests
import time
def download_resource(domain, url, file_name = None, download = True):
cookies = {}
s = requests.Session()
s.config['keep_alive'] = True
#add your own cookies here, I have a specific function I call
#for my application but yours is different
r = s.get(url, cookies = cookies, stream = True)
if not r.ok:
print "error in downloading"
return -1
file_size = int(r.headers['content-length'])
if not file_name:
try:
temp = r.headers['content-disposition']
except Exception as e:
pass
#failing download
return -1
else:
if not temp:
return -1
else:
file_name = temp.split("filename=")[-1]
return_obj["filename"] = file_name
#print "File size:", file_size
#print "\n", str(self.entire_size / float(1024*1024*1024)), "\n"
print "Downloading:", file_name
if download:
with open(file_name, "wb") as fh:
count = 1
chunk_size = 1048576
start_time = time.time()
try:
for block in r.iter_content(chunk_size):
total_time = time.time() - start_time
percent = count*chunk_size/float(file_size) * 100.0
fraction = int(percent/5)
download_speed = 1.0 / total_time
sys.stdout.write('\r')
sys.stdout.write("[%-20s] %d%% %3.2f MB/s " % ('='* fraction , percent, download_speed))
sys.stdout.flush()
if not block:
break
fh.write(block)
count += 1
start_time = time.time()
except Exception as e:
print e
finally:
#close up the stream
r.close()

Related

Python download videos from HTTP URL with bad internet

I have problem with downloading videos from my server e.g. http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro%20USLUGE.mp4
All works perfectly when internet is OK, but when I disconnect LAN cable from Raspberry Pi and stay like that less than 10-15 seconds. But when internet is off more than 10-15 seconds, my download does not continue or videos are not properly downloaded (I merge them later with MP4Box and they need to be). If someone has suggestion how to solve this problem and help me I would appreciate it very much.
Here is my code:
import os
import urllib
import urllib2
import time
import commands
import requests
import shutil
from urllib2 import URLError
urls = ['http://screensfiles.dbtouch.com/screens2/Companies/89/HD/00 APPS OVERVIEW.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro USLUGE.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/ILIRIJA BIOGRAD 2016.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Restoran marina.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/HT Screens.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Tasks.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Croatia Full of life.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/04 PROJECTS.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/05 ATTEND.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro Hotel.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Plurato dron snimka 2.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Plurato dron snimka 2.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Plurato dron snimka 2.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro USLUGE.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro USLUGE.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Screens.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Screens.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Tasks.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Screens.mp4']
directory = "/home/pi/pythonSignage/current_playlist/videos_to_merge/"
i=1
for url in urls:
i += 1
print("current iter: ")
print(i)
if (len(urls) > 1):
url_formatted = url.split('/')[-1].replace(" ", "").replace("%20", "") + " "
else:
url_formatted = url.split('/')[-1].replace(" ", "").replace("%20", "")
url_formatted_name = url.split('/')[-1].replace(" ", "").replace("%20", "").rstrip()
while True:
print("inside while true")
try:
""" method 0 doesn't work """
print("try")
response = urllib2.urlopen(url, timeout=5)
content = response.read()
print("content")
f = open(directory + url_formatted_name, 'wb')
f.write(content)
f.close()
""" method 1 doesn't work """
#video_fancy_downloader = urllib.FancyURLopener()
#video_fancy_downloader.retrieve(url, directory + url_formatted_name)
""" method 2 - doesn't work """
#my_file = urllib.URLopener()
#my_file = retrieve(url, directory + url_formatted_name)
""" method 3 - doesn't work """
#response = requests.get(url, stream=True)
#response.raise_for_status()
#with open(directory + url_formatted_name, 'wb') as handle:
# for block in response.iter_content(1024):
# handle.write(block)
except:
print("error download, sleep 5 sec")
time.sleep(5)
print("end")
I have managed to solve my problem. Maybe this is not best approach but it works.
Here is function for downloading video and returns response:
def do_download(destination, url):
comm = ["wget", "-c", "-O", destination, "-t", "15000", "-T", "5", url]
proc = subprocess.Popen(comm, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
tmp = proc.stdout.read()
if "wget: unable to resolve host address" in tmp:
return False
else:
return True
core part of downloading function is almost the same, but now it calls do_download inside while loop and checks for response:
if os.path.isfile(directory+url_formatted_name) is False:
print("must download file!")
while downl_success is False:
print("inside while true")
try:
print("try")
while(do_download(directory + url_formatted_name, url) is False):
print(" ------- inside while for download ----------- ")
time.sleep(5)
downl_success = True
print("file downloaded fully!")
break
except HTTPError, e:
print "HTTPError", e.code, url
time.sleep(5)
except URLError, e:
print "URL Error", e.reason, url
time.sleep(5)
else:
print("file already downloaded no need to download it again!")

apache/flask - when uploading file, requests are beeing qued, server unresponsive & memory gets high

I'm running Flask on Apache with mod_wsgi, every time a file get uploaded, the response time of the server worses, and my memory goes over 100% to swap.
This below is the whole code that handles my upload
#app.route(sApiRoute + '/f/upload', methods=['POST'])
#hasToken
def pbFileUpload(token=None,**kwargs):
def log_time(start_dt=None,start_time=None,doc=None):
end_time = None
if start_time is not None:
end_time = str(time.time()-start_time)
if end_time is not None and end_time > str(0.3):
pblog.warning("Upload with long response time. Start: "+str(start_dt)+ "Duration: "+str(end_time)+" \nUploader: " + token["id_user"] + " \nDoc info: " + str(doc))
import time
start_time = time.time()
start_dt = current_datetime()
# parse path
path = getUploadPath(request.json, None, token)
if not path:
return r404()
# Get the name of the uploaded file
filename = request.json["name"]
filename = filename.replace("$c$","č").replace("$c2$","ć").replace("$z$","ž").replace("$s$","š").replace("$d$","đ").replace("$C$","Č").replace("$C2$","Ć").replace("$Z$","Ž").replace("$S$","Š").replace("$D$","Đ")
rawfile = request.json["data"]
header, data = rawfile.split(',')
# convert data
import base64
recoveredfile = base64.decodebytes(bytes(data, 'UTF-8'))
# write data
import uuid
# leave file name as is
if request.json.get("leaveName"):
generatedfilename = filename
leaveName = True
else:
generatedfilename = str(uuid.uuid1())
leaveName = False
# use of ftp to save files
if 'ftp.' in path.lower():
putFileToFtp(path, filename, generatedfilename, recoveredfile)
# classic directory structure
else:
if not os.path.exists(path):
try:
os.mkdir(path)
except Exception as e:
pblog.warning(str(e),exc_info=1)
return jsonify(data={"status":False,"msg":"Path not found"})
while os.path.exists(os.path.join(path, generatedfilename)) and not leaveName:
generatedfilename = str(uuid.uuid1())
with open(os.path.join(path, generatedfilename), "wb") as out_file:
out_file.write(recoveredfile)
out_file.close()
# send to document system
if request.json.get("dataRec"):
db = DBStore.getDB(token["current_company_id"])
dataRec = request.json.get("dataRec")
dataRec["idext"] = generatedfilename
rtd = DocumentCls.getIdByIdent(dataRec["IdDocumentType"],db)
if rtd and rtd!=[]:
if rtd.get("Id"):
dataRec["IdDocumentType"] = rtd["Id"]
r = DocumentCls.saveDocument(dataRec,request.json.get("username"),db)
log_time(start_dt,start_time,dataRec)
return jsonify(data={"version": r,"generatedfilename":generatedfilename},**kwargs)
else:
log_time(start_dt,start_time,generatedfilename)
return jsonify(data={"filename": filename, "generatedfilename": generatedfilename},**kwargs)

Progress bar with Python Dropbox Api

I'm working making a progress bar for my python Dropbox app using the Dropbox API. The problem is I can't figure out how to get the number of bytes written so far so that I can build it. Is there any way to do this using the python Dropbox API? If not could I measure the bytes being sent out from my system instead, possibly using the os module?
I'm trying to get it to work with get_chunked_loader, but it would be great if I could get the bytes written for put_file() as well as file_copy() and file_move also(). My code so far is something like this:
if (file_size >= 4194304):
big_file = open(path_to_file, 'rb')
uploader = client.get_chunked_uploader(big_file)
print "uploading " + path_to_file
while uploader.offset < file_size:
percent_complete = bytes_written / file_size * 100
clearscreen()
print "%.2f" % percent_complete + "%"
Thanks!
I recreate ChunkedUploader
f = open(filetoupload, 'rb')
uploader = MMChunkedUploader(self.client, f, file_size, 1024*200)
uploader.upload_chunked()
uploader.finish(dropboxfilename)
class MMChunkedUploader(object):
"""Contains the logic around a chunked upload, which uploads a
large file to Dropbox via the /chunked_upload endpoint.
"""
def __init__(self, client, file_obj, length, chunk_size = 4 * 1024 * 1024):
self.client = client
self.offset = 0
self.upload_id = None
self.last_block = None
self.file_obj = file_obj
self.target_length = length
self.chunk_size=chunk_size
self.clocknumber=0
dec=float(self.target_length)/chunk_size - self.target_length//chunk_size
if dec >0:
self.totalblock=self.target_length/chunk_size +1
else:
self.totalblock=self.target_length/chunk_size
def upload_chunked(self, chunk_size = 0):
"""Uploads data from this ChunkedUploader's file_obj in chunks, until
an error occurs. Throws an exception when an error occurs, and can
be called again to resume the upload.
Parameters
chunk_size
The number of bytes to put in each chunk. (Default 4 MB.)
"""
if chunk_size ==0:
chunk_size=self.chunk_size
self.clocknumber=0
while self.offset < self.target_length:
self.clocknumber+=1
print "Block n.", repr(self.clocknumber) , " of " , repr(self.totalblock), " %", round((float(self.clocknumber) * 100) / self.totalblock, 0)
next_chunk_size = min(chunk_size, self.target_length - self.offset) #sceglie tra min e chuck size
if self.last_block == None:
self.last_block = self.file_obj.read(next_chunk_size)
print "Leggo blocco file"
try:
(self.offset, self.upload_id) = self.client.upload_chunk(
StringIO(self.last_block), next_chunk_size, self.offset, self.upload_id)
self.last_block = None
except dropbox.rest.ErrorResponse as e:
# Handle the case where the server tells us our offset is wrong.
must_reraise = True
if e.status == 400:
reply = e.body
if "offset" in reply and reply['offset'] != 0 and reply['offset'] > self.offset:
self.last_block = None
self.offset = reply['offset']
must_reraise = False
if must_reraise:
raise
def finish(self, path, overwrite=False, parent_rev=None):
path = "/commit_chunked_upload/%s%s" % (self.client.session.root, dropbox.client.format_path(path))
params = dict(
overwrite = bool(overwrite),
upload_id = self.upload_id
)
if parent_rev is not None:
params['parent_rev'] = parent_rev
url, params, headers = self.client.request(path, params, content_server=True)
return self.client.rest_client.POST(url, params, headers)

How to measure download speed and progress using requests?

I am using requests to download files, but for large files I need to check the size of the file on disk every time because I can't display the progress in percentage and I would also like to know the download speed. How can I go about doing it ? Here's my code :
import requests
import sys
import time
import os
def downloadFile(url, directory) :
localFilename = url.split('/')[-1]
r = requests.get(url, stream=True)
start = time.clock()
f = open(directory + '/' + localFilename, 'wb')
for chunk in r.iter_content(chunk_size = 512 * 1024) :
if chunk :
f.write(chunk)
f.flush()
os.fsync(f.fileno())
f.close()
return (time.clock() - start)
def main() :
if len(sys.argv) > 1 :
url = sys.argv[1]
else :
url = raw_input("Enter the URL : ")
directory = raw_input("Where would you want to save the file ?")
time_elapsed = downloadFile(url, directory)
print "Download complete..."
print "Time Elapsed: " + time_elapsed
if __name__ == "__main__" :
main()
I think one way to do it would be to read the file every time in the for loop and calculate the percentage of progress based on the header Content-Length. But that would be again an issue for large files(around 500MB). Is there any other way to do it?
see here: Python progress bar and downloads
i think the code would be something like this, it should show the average speed since start as bytes per second:
import requests
import sys
import time
def downloadFile(url, directory) :
localFilename = url.split('/')[-1]
with open(directory + '/' + localFilename, 'wb') as f:
start = time.clock()
r = requests.get(url, stream=True)
total_length = r.headers.get('content-length')
dl = 0
if total_length is None: # no content length header
f.write(r.content)
else:
for chunk in r.iter_content(1024):
dl += len(chunk)
f.write(chunk)
done = int(50 * dl / total_length)
sys.stdout.write("\r[%s%s] %s bps" % ('=' * done, ' ' * (50-done), dl//(time.clock() - start)))
print ''
return (time.clock() - start)
def main() :
if len(sys.argv) > 1 :
url = sys.argv[1]
else :
url = raw_input("Enter the URL : ")
directory = raw_input("Where would you want to save the file ?")
time_elapsed = downloadFile(url, directory)
print "Download complete..."
print "Time Elapsed: " + time_elapsed
if __name__ == "__main__" :
main()
An improved version of the accepted answer for python3 using io.Bytes (write to memory), result in Mbps, support for ipv4/ipv6, size and port arguments.
import sys, time, io, requests
def speed_test(size=5, ipv="ipv4", port=80):
if size == 1024:
size = "1GB"
else:
size = f"{size}MB"
url = f"http://{ipv}.download.thinkbroadband.com:{port}/{size}.zip"
with io.BytesIO() as f:
start = time.perf_counter()
r = requests.get(url, stream=True)
total_length = r.headers.get('content-length')
dl = 0
if total_length is None: # no content length header
f.write(r.content)
else:
for chunk in r.iter_content(1024):
dl += len(chunk)
f.write(chunk)
done = int(30 * dl / int(total_length))
sys.stdout.write("\r[%s%s] %s Mbps" % ('=' * done, ' ' * (30-done), dl//(time.perf_counter() -
start) / 100000))
print( f"\n{size} = {(time.perf_counter() - start):.2f} seconds")
Usage Examples:
speed_test()
speed_test(10)
speed_test(50, "ipv6")
speed_test(1024, port=8080)
Output Sample:
[==============================] 61.34037 Mbps
100MB = 17.10 seconds
Available Options:
size: 5, 10, 20, 50, 100, 200, 512, 1024
ipv: ipv4, ipv6
port: 80, 81, 8080
Updated on 20221011:
time.perf_counter() replaced time.clock(), which has been deprecated on python 3.3 (kudos to shiro)
I had a problem with a specific slow server to download a big file
no Content-Length header.
big file (42GB),
no compression,
slow server (<1MB/s),
Beeing this big, I had also problem with memory usage during the request. Requests doesn't write output on file, like urlibs does, looks like it keep it in memory.
No content length header makes the accepted answer.. not monitoring.
So I wrote this -basic- method to monitor speed during the csv download following just the "requests" documentation.
It needs a fname (complete output path), a link (http or https) and you can specify custom headers.
BLOCK=5*1024*1024
try:
with open(fname, 'wb') as f:
r = requests.get(link, headers=headers, stream=True)
## This is, because official dozumentation suggest it,
## saying it's more reliable thatn cycling directly on iterlines, to don't lose data
lines = r.iter_lines()
## Init the base vars, for monitor and block management
## Obj is a byte object, because iterlines returno objects
tsize = 0; obj = bytearray(); t0=time.time(); i=0;
for line in lines:
## calculate the line size, in bytes, and add to the byte object
tsize+=len(line)
obj.extend(line)
## When condition reached,
if tsize > BLOCK:
## Increment the block number
i+=1;
## Calculate the speed.. this is in MB/s,
## but you can easily change to KB/s, or Blocks/s
t1=time.time()
t=t1-t0;
speed=round(5/t, 2);
## Write the block to the file.
f.write(obj)
## Write stats
print('got', i*5, 'MB ', 'block' ,i, ' #', speed,'MB/s')
## Reinit all the base vars, for a new block
obj=bytearray(); tsize=0; t0=time.time()
## Write the last block part to the file.
f.write(obj)
except Exception as e:
print("Error: ", e, 0)

python script to record online live streaming videos

i am developing a script to download online live streaming videos.
My Script:
print "Recording video..."
response = urllib2.urlopen("streaming online video url")
filename = time.strftime("%Y%m%d%H%M%S",time.localtime())+".avi"
f = open(filename, 'wb')
video_file_size_start = 0
video_file_size_end = 1048576 * 7 # end in 7 mb
block_size = 1024
while True:
try:
buffer = response.read(block_size)
if not buffer:
break
video_file_size_start += len(buffer)
if video_file_size_start > video_file_size_end:
break
f.write(buffer)
except Exception, e:
logger.exception(e)
f.close()
above script is working fine to download 7Mb of video from live streaming contents and storing it in to *.avi files.
However, I would like to download just 10 secs of video regardless of the file size and store it in avi file.
I tried different possibilities but to no success.
Could any one please share your knowledge here to fix my issue.
Thanks in advance.
I don't think there is any way of doing that without constantly analysing the video, which will be way to costly. So you could take a guess of how many MB you need and once done check it's long enough. If it's too long, just cut it. Instead of guessing you could also build up some statistics of how much you need to retrieve. You could also replace the while True with:
start_time_in_seconds = time.time()
time_limit = 10
while time.time() - start_time_in_seconds < time_limit:
...
This should give you at least 10 seconds of video, unless connecting takes too much time (less then 10 seconds then) or server sends more for buffering (but that's unlikely for live streams).
You can use the 'Content-Length' header to retrieve the video filesize if it exists.
video_file_size_end = response.info().getheader('Content-Length')
response.read() does not work. response.iter_content() seem to do the trick.
import time
import requests
print("Recording video...")
filename = time.strftime("/tmp/" + "%Y%m%d%H%M%S",time.localtime())+".avi"
file_handle = open(filename, 'wb')
chunk_size = 1024
start_time_in_seconds = time.time()
time_limit = 10 # time in seconds, for recording
time_elapsed = 0
url = "http://demo.codesamplez.com/html5/video/sample"
with requests.Session() as session:
response = session.get(url, stream=True)
for chunk in response.iter_content(chunk_size=chunk_size):
if time_elapsed > time_limit:
break
# to print time elapsed
if int(time.time() - start_time_in_seconds)- time_elapsed > 0 :
time_elapsed = int(time.time() - start_time_in_seconds)
print(time_elapsed, end='\r', flush=True)
if chunk:
file_handle.write(chunk)
file_handle.close()

Categories

Resources