Python download videos from HTTP URL with bad internet - python

I have problem with downloading videos from my server e.g. http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro%20USLUGE.mp4
All works perfectly when internet is OK, but when I disconnect LAN cable from Raspberry Pi and stay like that less than 10-15 seconds. But when internet is off more than 10-15 seconds, my download does not continue or videos are not properly downloaded (I merge them later with MP4Box and they need to be). If someone has suggestion how to solve this problem and help me I would appreciate it very much.
Here is my code:
import os
import urllib
import urllib2
import time
import commands
import requests
import shutil
from urllib2 import URLError
urls = ['http://screensfiles.dbtouch.com/screens2/Companies/89/HD/00 APPS OVERVIEW.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro USLUGE.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/ILIRIJA BIOGRAD 2016.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Restoran marina.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/HT Screens.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Tasks.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Croatia Full of life.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/04 PROJECTS.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/05 ATTEND.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro Hotel.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Plurato dron snimka 2.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Plurato dron snimka 2.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Plurato dron snimka 2.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro USLUGE.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro USLUGE.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Screens.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Screens.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Tasks.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Screens.mp4']
directory = "/home/pi/pythonSignage/current_playlist/videos_to_merge/"
i=1
for url in urls:
i += 1
print("current iter: ")
print(i)
if (len(urls) > 1):
url_formatted = url.split('/')[-1].replace(" ", "").replace("%20", "") + " "
else:
url_formatted = url.split('/')[-1].replace(" ", "").replace("%20", "")
url_formatted_name = url.split('/')[-1].replace(" ", "").replace("%20", "").rstrip()
while True:
print("inside while true")
try:
""" method 0 doesn't work """
print("try")
response = urllib2.urlopen(url, timeout=5)
content = response.read()
print("content")
f = open(directory + url_formatted_name, 'wb')
f.write(content)
f.close()
""" method 1 doesn't work """
#video_fancy_downloader = urllib.FancyURLopener()
#video_fancy_downloader.retrieve(url, directory + url_formatted_name)
""" method 2 - doesn't work """
#my_file = urllib.URLopener()
#my_file = retrieve(url, directory + url_formatted_name)
""" method 3 - doesn't work """
#response = requests.get(url, stream=True)
#response.raise_for_status()
#with open(directory + url_formatted_name, 'wb') as handle:
# for block in response.iter_content(1024):
# handle.write(block)
except:
print("error download, sleep 5 sec")
time.sleep(5)
print("end")

I have managed to solve my problem. Maybe this is not best approach but it works.
Here is function for downloading video and returns response:
def do_download(destination, url):
comm = ["wget", "-c", "-O", destination, "-t", "15000", "-T", "5", url]
proc = subprocess.Popen(comm, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
tmp = proc.stdout.read()
if "wget: unable to resolve host address" in tmp:
return False
else:
return True
core part of downloading function is almost the same, but now it calls do_download inside while loop and checks for response:
if os.path.isfile(directory+url_formatted_name) is False:
print("must download file!")
while downl_success is False:
print("inside while true")
try:
print("try")
while(do_download(directory + url_formatted_name, url) is False):
print(" ------- inside while for download ----------- ")
time.sleep(5)
downl_success = True
print("file downloaded fully!")
break
except HTTPError, e:
print "HTTPError", e.code, url
time.sleep(5)
except URLError, e:
print "URL Error", e.reason, url
time.sleep(5)
else:
print("file already downloaded no need to download it again!")

Related

Office365-REST-Python-Client: how to avoid System.Runtime.InteropServices.COMException

I am using the Office365-REST-Python API to upload several large files (~4Gb each) on OneDrive, but I am running on the following error:
'-2130575252, System.Runtime.InteropServices.COMException', 'The security validation for this page has timed out. Click Back in your Web browser, refresh the page, and try your operation again.', "403 Client Error: FORBIDDEN for url: https://XXXXX/_api/Web/GetFileById('XXXXX')/finishUpload(uploadID='XXXXX',fileOffset=XXXXX)")
Is there anybody who can help me solving this issue?
Here is what my code looks like:
from office365.sharepoint.client_context import ClientContext
from office365.runtime.auth.user_credential import UserCredential
import os
MYURL = '<my_url>'
USERNAME = '<my_username>'
PASSWORD = '<my_pwd>'
def connect_to_cloud():
baseurl = MYURL
return ClientContext(baseurl).with_credentials(
UserCredential(USERNAME, PASSWORD))
def upload_file(ctx, localPath, remoteDirPath):
Max_size = 262144000
try:
target_folder = ctx.web.get_folder_by_server_relative_url(
remoteDirPath)
with open(localPath, 'rb') as f:
uploaded_file = target_folder.files.create_upload_session(
f, Max_size).execute_query()
return 'OK'
except Exception as e:
return str(e)
def upload_folder(localDirPath, remoteDir, return_dict):
return_dict['upload_folder'] = 'NOK'
ctx = connect_to_cloud()
try:
for rootDir, subFolders, files in os.walk(localDirPath):
rpath = os.path.relpath(rootDir, os.path.split(localDirPath)[0])
remotePath = os.path.join(remoteDir, rpath)
for f in files:
filepath = os.path.join(rootDir, f)
res = upload_file(ctx, filepath, remotePath)
if res != 'OK':
return_dict['upload_folder'] = res
return
return_dict['upload_folder'] = 'OK'
except Exception as e:
return_dict['upload_folder'] = e
Basically I am calling the function upload_folder which, in turn calls upload_file. Everything works for 30 minutes or more, untill I get the error message above.
Thanks a lot to anybody who can help!

how to check if cpanel can login successfully with python2.7?

i need a script to make it like a cpanel checker, with more than 1 url and the url is stored in a txt file.
usage : python script.py list.txt
format in file list.txt : https://demo.cpanel.net:2083|democom|DemoCoA5620
this is my code but it doesn't work, can someone help me?
Thanks.
import requests, sys
from multiprocessing.dummy import Pool as ThreadPool
try:
with open(sys.argv[1], 'r') as f:
list_data = [line.strip() for line in f if line.strip()]
except IOError:
pass
def cpanel(url):
try:
data = {'user':'democom', 'pass':'DemoCoA5620'}
r = requests.post(url, data=data)
if r.status_code==200:
print "login success"
else:
print "login failed"
except:
pass
def chekers(url):
try:
cpanel(url)
except:
pass
def Main():
try:
start = timer()
pp = ThreadPool(25)
pr = pp.map(chekers, list_data)
print('Time: ' + str(timer() - start) + ' seconds')
except:
pass
if __name__ == '__main__':
Main()
I fixed your code in a way that it will return an actual array containing a boolean array indicating the success of the cpanel function.
from __future__ import print_function
import requests
from multiprocessing.pool import ThreadPool
try:
list_data = ["https://demo.cpanel.net:2083|democom|DemoCoA5620",
"https://demo.cpanel.net:2083|UserDoesNotExist|WRONGPASSWORD",
]
except IOError:
pass
def cpanel(url):
try:
# try to split that url to get username / password
try:
url, username, password = url.split('|')
except Exception as e:
print("Url {} seems to have wrong format. Concrete error: {}".format(url, e))
return False
# build the correct url
url += '/login/?login_only=1'
# build post parameters
params = {'user': username,
'pass': password}
# make request
r = requests.post(url, params)
if r.status_code==200:
print("login for user {} success".format(username))
return True
else:
print("login for user {} failed due to Status Code {} and message \"{}\"".format(username, r.status_code, r.reason))
return False
except Exception as e:
print("Error occured for url {} ".format(e))
return False
def chekers(url):
return cpanel(url)
def Main():
try:
# start = timer()
pp = ThreadPool(1)
pr = pp.map(chekers, list_data)
print(pr)
# print('Time: ' + str(timer() - start) + ' seconds')
except:
pass
if __name__ == '__main__':
Main()
Output:
login for user democom success
login for user UserDoesNotExist failed due to Status Code 401 and message "Access Denied"
[True, False]
Be aware that I replaced your file read operation by some fixed urls.
Since you use request.post I guess you actually want to POST something to that urls. Your code does not do that. If you just want to send a request, use the requests.get method.
See the official documentation for the requests packet: https://2.python-requests.org/en/master/user/quickstart/#make-a-request for more details.
Also note that
"but it doesn't work"
is NOT a question.

Extract the Background Image Url of a website from html file set in the style in Python

I'm coding a website cloner in python, It is doing fine as well for most files but I have found a challenge in getting the url of background images eg
<div style="background-image: url(images/banner.jpg)" >
The script detects background-image as a folder and assume the url is 'background_image: url(images/banner.jpg' .How do I set it to get the actual url.
Python 2.7
import urllib2
import sys
import socket
import os
import re
socket.setdefaulttimeout(15)
dataTypesToDownload = [".jpg", ".jpeg", ".png", ".gif", ".ico", ".css", ".js", ".html"]
url = 'http://example.com/'
pathbase = 'theme'
if "http://" not in url and "https://" not in url:
url = "http://"+url
try:
os.mkdir(pathbase)
except OSError:
pass
file = open(pathbase + "/index.html", "w")
try:
content = urllib2.urlopen(url).read()
except urllib2.URLError as e:
print "An error occured: " + str(e.reason)
exit()
resources = re.split("=\"|='", content)
first = False
for resource in resources:
if first == False:
first = True
continue
resource = re.split("\"|'", resource)[0]
if any(s in resource for s in dataTypesToDownload):
print "Downloading " + resource
try:
path = resource.split("/")
if len(path) != 1:
path.pop(len(path) - 1)
trail = "./" + pathbase + "/"
for folder in path:
trail += folder+"/"
try:
os.mkdir(trail)
except OSError:
pass
except IOError:
pass
try:
if "?" in resource:
download = open(pathbase + "/"+resource.split("?")[len(resource.split("?")) - 2], "w")
else:
download = open(pathbase + "/"+resource, "w")
print url+"/"+resource
dContent = urllib2.urlopen(url+"/"+resource).read()
except urllib2.URLError as e:
print "An error occured: " + str(e.reason)
download.close()
continue
except IOError:
pass
continue
download.write(dContent)
download.close()
print "Downloaded!"
file.write(content)
file.close()
I expect it when it encounter style="background-image: url(images/banner.jpg),
It should set resource to be images/banner.jpg. But it is setting the resource as background-image: url(images/images.jpg

Python : Manga parsing return empty file

i want to parse images from a "certain" manga and chapter. here's my code:
import requests, bs4, os, urllib.request
try:
url = "http://manganelo.com/chapter/read_one_punch_man_manga_online_free3/chapter_136"
res = requests.get(url)
print("[+] Asking a request to " + url)
# slice the url so it only contains the name and chapter
name = url[34:].replace("/", "_")
os.mkdir(name)
print("[+] Making '{}' directory".format(name))
os.chdir(os.path.join(os.getcwd(), name))
soup = bs4.BeautifulSoup(res.text, "html.parser")
for img in soup.findAll("img"):
manga_url = img.get("src")
manga_name = img.get("alt") + ".jpg"
urllib.request.urlretrieve(manga_url, manga_name)
print("[+] Downloading: " + manga_name)
except Exception as e:
print("[-] Error: " + str(e))
it works fine BUT only for a specific chapter, let's say i put chapter 130, when i try to run the code it returns blank file but if i put chapter 136 or others it works fine. How can this happen?
you can replace urllib.request.urlretrieve(manga_url, manga_name)
with :
r = requests.get(manga_url, stream=True)
if r.status_code == 200:
with open(manga_name, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
Actually Remote server is apparently checking the user agent header and rejecting requests from Python's urllib.
On the other hand you can use :
opener = urllib.request.URLopener()
opener.addheader('User-Agent', 'whatever')
opener.retrieve(manga_url, manga_name)
This works for me
Hope this helps

using python stream downloading file with server limit

I tried to download file from a server using python, sometimes the file is very large, I would like to have some progress bar, one way to do this I can come up with is to download in a stream, so that I can print the progress. Currently I have tried the standard urlopen, urlretrieve, and requests module (with stream on).
Obviously, urlopen cannot download file in stream, requests module support this, however, the server has limit on the file I can download at one time (its limit is 1). So everytime, I tried to use requests, it only get the webpage told me to wait, is there any other way to do this?
I have very recently downloaded many types of media with this function:
import sys
import requests
import time
def download_resource(domain, url, file_name = None, download = True):
cookies = {}
s = requests.Session()
s.config['keep_alive'] = True
#add your own cookies here, I have a specific function I call
#for my application but yours is different
r = s.get(url, cookies = cookies, stream = True)
if not r.ok:
print "error in downloading"
return -1
file_size = int(r.headers['content-length'])
if not file_name:
try:
temp = r.headers['content-disposition']
except Exception as e:
pass
#failing download
return -1
else:
if not temp:
return -1
else:
file_name = temp.split("filename=")[-1]
return_obj["filename"] = file_name
#print "File size:", file_size
#print "\n", str(self.entire_size / float(1024*1024*1024)), "\n"
print "Downloading:", file_name
if download:
with open(file_name, "wb") as fh:
count = 1
chunk_size = 1048576
start_time = time.time()
try:
for block in r.iter_content(chunk_size):
total_time = time.time() - start_time
percent = count*chunk_size/float(file_size) * 100.0
fraction = int(percent/5)
download_speed = 1.0 / total_time
sys.stdout.write('\r')
sys.stdout.write("[%-20s] %d%% %3.2f MB/s " % ('='* fraction , percent, download_speed))
sys.stdout.flush()
if not block:
break
fh.write(block)
count += 1
start_time = time.time()
except Exception as e:
print e
finally:
#close up the stream
r.close()

Categories

Resources