Office365-REST-Python-Client: how to avoid System.Runtime.InteropServices.COMException

Office365-REST-Python-Client: how to avoid System.Runtime.InteropServices.COMException - python

I am using the Office365-REST-Python API to upload several large files (~4Gb each) on OneDrive, but I am running on the following error:
'-2130575252, System.Runtime.InteropServices.COMException', 'The security validation for this page has timed out. Click Back in your Web browser, refresh the page, and try your operation again.', "403 Client Error: FORBIDDEN for url: https://XXXXX/_api/Web/GetFileById('XXXXX')/finishUpload(uploadID='XXXXX',fileOffset=XXXXX)")
Is there anybody who can help me solving this issue?
Here is what my code looks like:
from office365.sharepoint.client_context import ClientContext
from office365.runtime.auth.user_credential import UserCredential
import os
MYURL = '<my_url>'
USERNAME = '<my_username>'
PASSWORD = '<my_pwd>'
def connect_to_cloud():
baseurl = MYURL
return ClientContext(baseurl).with_credentials(
UserCredential(USERNAME, PASSWORD))
def upload_file(ctx, localPath, remoteDirPath):
Max_size = 262144000
try:
target_folder = ctx.web.get_folder_by_server_relative_url(
remoteDirPath)
with open(localPath, 'rb') as f:
uploaded_file = target_folder.files.create_upload_session(
f, Max_size).execute_query()
return 'OK'
except Exception as e:
return str(e)
def upload_folder(localDirPath, remoteDir, return_dict):
return_dict['upload_folder'] = 'NOK'
ctx = connect_to_cloud()
try:
for rootDir, subFolders, files in os.walk(localDirPath):
rpath = os.path.relpath(rootDir, os.path.split(localDirPath)[0])
remotePath = os.path.join(remoteDir, rpath)
for f in files:
filepath = os.path.join(rootDir, f)
res = upload_file(ctx, filepath, remotePath)
if res != 'OK':
return_dict['upload_folder'] = res
return
return_dict['upload_folder'] = 'OK'
except Exception as e:
return_dict['upload_folder'] = e
Basically I am calling the function upload_folder which, in turn calls upload_file. Everything works for 30 minutes or more, untill I get the error message above.
Thanks a lot to anybody who can help!

Related

How to read ZIP file in an S3 bucket with a Python Lambda function?

I'm creating a backend function so that users of App A can import their details to App B. The flow is like this:
User uploads a zip file on the website. This zip contains csv files.
This zip file flows into S3.
Once in S3, it triggers a Lambda function.
The Lambda function then picks the zip file and starts processing the data inside the csv files.
I've completed Step 1,2 and 3. But in 4, the Lambda function is not able to read/process the file.
The python file works fine on my local device, so I think the issue is that it is not able to "get" the object from S3 correctly and so the read_zip doesn't work.
Relevant code below:
import <relevant libs>
s3Client = boto3.client('s3')
def lambda_handler(event,context):
bucket = event['Records'][0]['s3']['bucket']['name']
filename = event['Records'][0]['s3']['object']['key']
#tried printing these, displaying correctly
#response = s3Client.get_object(Bucket=bucket, Key=filename)
usefile = 'https://' + bucket + '.s3.ap-south-1.amazonaws.com/' + filename
print(usefile)
#printing correct filename
def read_csv(file):
to_return = []
reader = csv.DictReader(TextIOWrapper(file, 'utf-8'))
for row in reader:
to_return.append(row)
return to_return
def read_zip(usefile):
with ZipFile(usefile, 'r') as APPA_file:
with APPA_file.open("file1.csv", mode='r') as f:
file1 = read_csv(f)
with APPA_file.open("file2.csv", mode='r') as f:
file2 = read_csv(f)
return file1, file2
def get_APPB_url(APPA_uri):
resp = requests.get(APPA_uri)
if resp.status_code != 200:
return None
# extract the APPB url
re_match = re.findall('href="(https://www.X.org/.+/)"', resp.text)
if not re_match:
return None
print(resp.text)
return re_match[0]
def rate_on_APPB(APPB_url, rating):
re_match = re.findall('X.org/(.+)/', APPB_url)
if not re_match:
return None
APPB_id = re_match[0]
req_body = {
"query": <query used>,
"operationName": "<xyz>",
"variables": <variables>
}
headers = {
"content-type": "application/json",
"x-hasura-admin-secret": "XXX"
}
resp = requests.post("XXX", json=req_body, headers=headers)
if resp.status_code != 200:
raise ValueError(f"Hasura query failed. Code: {resp.status_code}")
else: print(APPB_id)
json_resp = resp.json()
if 'errors' in json_resp and len(json_resp['errors']) > 0:
first_error_msg = json_resp['errors'][0]['message']
if 'Authentication' in first_error_msg:
print(f"Failed to authenticate with cookie")
exit(1)
else:
raise ValueError(first_error_msg)
def APPA_to_APPB(APPA_dict):
APPB_url = get_APPB_url(APPA_dict['APPA URI'])
if APPB_url is None:
raise ValueError("Cannot find APPB title")
rate_on_APPB(APPB_url, int(float(APPA_dict['Rating']) * 2))
def main():
file1, file2 = read_zip(usefile)
success = []
errors = []
with tqdm(total=len(file1)) as pbar:
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
future_to_url = {
executor.submit(APPA_to_APPB, APPA_dict): APPA_dict for APPA_dict in file1
}
try:
for future in concurrent.futures.as_completed(future_to_url):
APPA_dict = future_to_url[future]
pbar.update(1)
try:
success.append(future.result())
except Exception as e:
errors.append({"APPA_dict": APPA_dict, "error": e})
except KeyboardInterrupt:
executor._threads.clear()
concurrent.futures.thread._threads_queues.clear()
print(f"Successfully rated: {len(success)} ")
print(f"{len(errors)} Errors")
for error in errors:
print(f"\t{error['APPA_dict']['Name']} ({error['APPA_dict']['Year']}): {error['error']}")
if __name__ == '__main__':
main()
CloudWatch log basically says Event started and ended successfully, nothing else. I was able to verify that the trigger was working fine by printing the file name and it appears in the log. But that's about it.

Python SIEM log collector hangs randomly

I am trying to troubleshoot a script for Mimecast's API. The script runs fine for the most part, but a few times, I have noticed that it stops pulling logs and generally appears to be a hung process. After restarting the script and manually pushing logs to the syslog server, it starts working again without issue. I am not able to reproduce this issue at will.
The script is supposed to do the following:
Authenticate against Mimecast's API
Sign responses
download, extract and save log files to log dir
utilize a tokenized header to determine which file was downloaded in the last request. Should save the token ID within a file in the checkpoint directory
Push files to remote syslog server
Output any errors and info to console
Below is the sample code from Mimecast.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging.handlers
import json
import os
import requests
import base64
import uuid
import datetime
import hashlib
import shutil
import hmac
import time
from zipfile import ZipFile
import io
# Set up variables
APP_ID = "YOUR DEVELOPER APPLICATION ID"
APP_KEY = "YOUR DEVELOPER APPLICATION KEY"
URI = "/api/audit/get-siem-logs"
EMAIL_ADDRESS = 'EMAIL ADDRESS OF YOUR ADMINISTRATOR'
ACCESS_KEY = 'ACCESS KEY FOR YOUR ADMINISTRATOR'
SECRET_KEY = 'SECRET KEY FOR YOUR ADMINISTRATOR'
LOG_FILE_PATH = "FULLY QUALIFIED PATH TO FOLDER TO WRITE LOGS"
CHK_POINT_DIR = 'FULLY QUALIFIED PATH TO FOLDER TO WRITE PAGE TOKEN'
# Set True to output to syslog, false to only save to file
syslog_output = False
# Enter the IP address or hostname of your syslog server
syslog_server = 'localhost'
# Change this to override default port
syslog_port = 514
# delete files after fetching
delete_files = True
# Set threshold in number of files in log file directory
log_file_threshold = 10000
# Set up logging (in this case to terminal)
log = logging.getLogger(__name__)
log.root.setLevel(logging.DEBUG)
log_formatter = logging.Formatter('%(levelname)s %(message)s')
log_handler = logging.StreamHandler()
log_handler.setFormatter(log_formatter)
log.addHandler(log_handler)
# Set up syslog output
syslog_handler = logging.handlers.SysLogHandler(address=(syslog_server, syslog_port))
syslog_formatter = logging.Formatter('%(message)s')
syslog_handler.setFormatter(syslog_formatter)
syslogger = logging.getLogger(__name__)
syslogger = logging.getLogger('SysLogger')
syslogger.addHandler(syslog_handler)
# Supporting methods
def get_hdr_date():
return datetime.datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S UTC")
def read_file(file_name):
try:
with open(file_name, 'r') as f:
data = f.read()
return data
except Exception as e:
log.error('Error reading file ' + file_name + '. Cannot continue. Exception: ' + str(e))
quit()
def write_file(file_name, data_to_write):
if '.zip' in file_name:
try:
byte_content = io.BytesIO(data_to_write)
zip_file = ZipFile(byte_content)
zip_file.extractall(LOG_FILE_PATH)
except Exception as e:
log.error('Error writing file ' + file_name + '. Cannot continue. Exception: ' + str(e))
quit()
else:
try:
with open(file_name, 'w') as f:
f.write(data_to_write)
except Exception as e:
log.error('Error writing file ' + file_name + '. Cannot continue. Exception: ' + str(e))
quit()
def get_base_url(email_address):
# Create post body for request
post_body = dict()
post_body['data'] = [{}]
post_body['data'][0]['emailAddress'] = email_address
# Create variables required for request headers
request_id = str(uuid.uuid4())
request_date = get_hdr_date()
headers = {'x-mc-app-id': APP_ID, 'x-mc-req-id': request_id, 'x-mc-date': request_date}
# Send request to API
log.debug('Sending request to https://api.mimecast.com/api/discover-authentication with request Id: ' +
request_id)
try:
r = requests.post(url='https://api.mimecast.com/api/login/discover-authentication',
data=json.dumps(post_body), headers=headers)
# Handle Rate Limiting
if r.status_code == 429:
log.warning('Rate limit hit. sleeping for ' + str(r.headers['X-RateLimit-Reset'] * 1000))
time.sleep(r.headers['X-RateLimit-Reset'] * 1000)
except Exception as e:
log.error('Unexpected error getting base url. Cannot continue.' + str(e))
quit()
# Handle error from API
if r.status_code != 200:
log.error('Request returned with status code: ' + str(r.status_code) + ', response body: ' +
r.text + '. Cannot continue.')
quit()
# Load response body as JSON
resp_data = json.loads(r.text)
# Look for api key in region region object to get base url
if 'region' in resp_data["data"][0]:
base_url = resp_data["data"][0]["region"]["api"].split('//')
base_url = base_url[1]
else:
# Handle no region found, likely the email address was entered incorrectly
log.error(
'No region information returned from API, please check the email address.'
'Cannot continue')
quit()
return base_url
def post_request(base_url, uri, post_body, access_key, secret_key):
# Create variables required for request headers
request_id = str(uuid.uuid4())
request_date = get_hdr_date()
unsigned_auth_header = '{date}:{req_id}:{uri}:{app_key}'.format(
date=request_date,
req_id=request_id,
uri=uri,
app_key=APP_KEY
)
hmac_sha1 = hmac.new(
base64.b64decode(secret_key),
unsigned_auth_header.encode(),
digestmod=hashlib.sha1).digest()
sig = base64.encodebytes(hmac_sha1).rstrip()
headers = {
'Authorization': 'MC ' + access_key + ':' + sig.decode(),
'x-mc-app-id': APP_ID,
'x-mc-date': request_date,
'x-mc-req-id': request_id,
'Content-Type': 'application/json'
}
try:
# Send request to API
log.debug('Sending request to https://' + base_url + uri + ' with request Id: ' + request_id)
r = requests.post(url='https://' + base_url + uri, data=json.dumps(post_body), headers=headers)
# Handle Rate Limiting
if r.status_code == 429:
log.warning('Rate limit hit. sleeping for ' + str(r.headers['X-RateLimit-Reset'] * 1000))
time.sleep(r.headers['X-RateLimit-Reset'] * 1000)
r = requests.post(url='https://' + base_url + uri, data=json.dumps(post_body), headers=headers)
# Handle errors
except Exception as e:
log.error('Unexpected error connecting to API. Exception: ' + str(e))
return 'error'
# Handle errors from API
if r.status_code != 200:
log.error('Request to ' + uri + ' with , request id: ' + request_id + ' returned with status code: ' +
str(r.status_code) + ', response body: ' + r.text)
return 'error'
# Return response body and response headers
return r.content, r.headers
def get_mta_siem_logs(checkpoint_dir, base_url, access_key, secret_key):
uri = "/api/audit/get-siem-logs"
# Set checkpoint file name to store page token
checkpoint_filename = os.path.join(checkpoint_dir, 'get_mta_siem_logs_checkpoint')
# Build post body for request
post_body = dict()
post_body['data'] = [{}]
post_body['data'][0]['type'] = 'MTA'
post_body['data'][0]['compress'] = True
if os.path.exists(checkpoint_filename):
post_body['data'][0]['token'] = read_file(checkpoint_filename)
# Send request to API
resp = post_request(base_url, uri, post_body, access_key, secret_key)
now = datetime.datetime.now().strftime("%a %b %d %H:%M:%S %Y")
# Process response
if resp != 'error':
resp_body = resp[0]
resp_headers = resp[1]
content_type = resp_headers['Content-Type']
# End if response is JSON as there is no log file to download
if content_type == 'application/json':
log.info('No more logs available')
return False
# Process log file
elif content_type == 'application/octet-stream':
file_name = resp_headers['Content-Disposition'].split('=\"')
file_name = file_name[1][:-1]
# Save files to LOG_FILE_PATH
write_file(os.path.join(LOG_FILE_PATH, file_name), resp_body)
# Save mc-siem-token page token to check point directory
write_file(checkpoint_filename, resp_headers['mc-siem-token'])
try:
if syslog_output is True:
for filename in os.listdir(LOG_FILE_PATH):
file_creation_time = time.ctime(os.path.getctime(LOG_FILE_PATH + "/" + filename))
if now < file_creation_time or now == file_creation_time:
log.info('Loading file: ' + filename + ' to output to ' + syslog_server + ':' + str(syslog_port))
with open(file=os.path.join(LOG_FILE_PATH, filename), mode='r', encoding='utf-8') as log_file:
lines = log_file.read().splitlines()
for line in lines:
syslogger.info(line)
log.info('Syslog output completed for file ' + filename)
except Exception as e:
log.error('Unexpected error writing to syslog. Exception: ' + str(e))
# return true to continue loop
return True
else:
# Handle errors
log.error('Unexpected response')
for header in resp_headers:
log.error(header)
return False
def run_script():
# discover base URL
try:
base_url = get_base_url(email_address=EMAIL_ADDRESS)
except Exception as e:
log.error('Error discovering base url for ' + EMAIL_ADDRESS + ' . Exception: ' + str(e))
quit()
# Request log data in a loop until there are no more logs to collect
try:
log.info('Getting MTA log data')
while get_mta_siem_logs(checkpoint_dir=CHK_POINT_DIR, base_url=base_url, access_key=ACCESS_KEY,
secret_key=SECRET_KEY) is True:
log.info('Getting more MTA log files')
except Exception as e:
log.error('Unexpected error getting MTA logs ' + (str(e)))
file_number = len([name for name in os.listdir(LOG_FILE_PATH) if os.path.isfile(name)])
if delete_files or file_number >= log_file_threshold:
for filename in os.listdir(LOG_FILE_PATH):
file_path = os.path.join(LOG_FILE_PATH, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print('Failed to delete %s. Reason: %s' % (file_path, e))
quit()
# Run script
run_script()
It seems like it may potentially be a race condition but I am not sure how to confirm since I can't reproduce it. I notice that SumoLogic has a modified version of this script as well with a different methodology for managing the files/paths. If this script works better than the main sample script above, would anybody be able to explain WHY? I haven't had any issues with it yet.
https://github.com/SumoLogic/sumologic-content/blob/master/MimeCast/SumoLogic-Mimecast-Data-Collection/siem_collection.py

how to check if cpanel can login successfully with python2.7?

i need a script to make it like a cpanel checker, with more than 1 url and the url is stored in a txt file.
usage : python script.py list.txt
format in file list.txt : https://demo.cpanel.net:2083|democom|DemoCoA5620
this is my code but it doesn't work, can someone help me?
Thanks.
import requests, sys
from multiprocessing.dummy import Pool as ThreadPool
try:
with open(sys.argv[1], 'r') as f:
list_data = [line.strip() for line in f if line.strip()]
except IOError:
pass
def cpanel(url):
try:
data = {'user':'democom', 'pass':'DemoCoA5620'}
r = requests.post(url, data=data)
if r.status_code==200:
print "login success"
else:
print "login failed"
except:
pass
def chekers(url):
try:
cpanel(url)
except:
pass
def Main():
try:
start = timer()
pp = ThreadPool(25)
pr = pp.map(chekers, list_data)
print('Time: ' + str(timer() - start) + ' seconds')
except:
pass
if __name__ == '__main__':
Main()

I fixed your code in a way that it will return an actual array containing a boolean array indicating the success of the cpanel function.
from __future__ import print_function
import requests
from multiprocessing.pool import ThreadPool
try:
list_data = ["https://demo.cpanel.net:2083|democom|DemoCoA5620",
"https://demo.cpanel.net:2083|UserDoesNotExist|WRONGPASSWORD",
]
except IOError:
pass
def cpanel(url):
try:
# try to split that url to get username / password
try:
url, username, password = url.split('|')
except Exception as e:
print("Url {} seems to have wrong format. Concrete error: {}".format(url, e))
return False
# build the correct url
url += '/login/?login_only=1'
# build post parameters
params = {'user': username,
'pass': password}
# make request
r = requests.post(url, params)
if r.status_code==200:
print("login for user {} success".format(username))
return True
else:
print("login for user {} failed due to Status Code {} and message \"{}\"".format(username, r.status_code, r.reason))
return False
except Exception as e:
print("Error occured for url {} ".format(e))
return False
def chekers(url):
return cpanel(url)
def Main():
try:
# start = timer()
pp = ThreadPool(1)
pr = pp.map(chekers, list_data)
print(pr)
# print('Time: ' + str(timer() - start) + ' seconds')
except:
pass
if __name__ == '__main__':
Main()
Output:
login for user democom success
login for user UserDoesNotExist failed due to Status Code 401 and message "Access Denied"
[True, False]
Be aware that I replaced your file read operation by some fixed urls.
Since you use request.post I guess you actually want to POST something to that urls. Your code does not do that. If you just want to send a request, use the requests.get method.
See the official documentation for the requests packet: https://2.python-requests.org/en/master/user/quickstart/#make-a-request for more details.
Also note that
"but it doesn't work"
is NOT a question.

Extract the Background Image Url of a website from html file set in the style in Python

I'm coding a website cloner in python, It is doing fine as well for most files but I have found a challenge in getting the url of background images eg
<div style="background-image: url(images/banner.jpg)" >
The script detects background-image as a folder and assume the url is 'background_image: url(images/banner.jpg' .How do I set it to get the actual url.
Python 2.7
import urllib2
import sys
import socket
import os
import re
socket.setdefaulttimeout(15)
dataTypesToDownload = [".jpg", ".jpeg", ".png", ".gif", ".ico", ".css", ".js", ".html"]
url = 'http://example.com/'
pathbase = 'theme'
if "http://" not in url and "https://" not in url:
url = "http://"+url
try:
os.mkdir(pathbase)
except OSError:
pass
file = open(pathbase + "/index.html", "w")
try:
content = urllib2.urlopen(url).read()
except urllib2.URLError as e:
print "An error occured: " + str(e.reason)
exit()
resources = re.split("=\"|='", content)
first = False
for resource in resources:
if first == False:
first = True
continue
resource = re.split("\"|'", resource)[0]
if any(s in resource for s in dataTypesToDownload):
print "Downloading " + resource
try:
path = resource.split("/")
if len(path) != 1:
path.pop(len(path) - 1)
trail = "./" + pathbase + "/"
for folder in path:
trail += folder+"/"
try:
os.mkdir(trail)
except OSError:
pass
except IOError:
pass
try:
if "?" in resource:
download = open(pathbase + "/"+resource.split("?")[len(resource.split("?")) - 2], "w")
else:
download = open(pathbase + "/"+resource, "w")
print url+"/"+resource
dContent = urllib2.urlopen(url+"/"+resource).read()
except urllib2.URLError as e:
print "An error occured: " + str(e.reason)
download.close()
continue
except IOError:
pass
continue
download.write(dContent)
download.close()
print "Downloaded!"
file.write(content)
file.close()
I expect it when it encounter style="background-image: url(images/banner.jpg),
It should set resource to be images/banner.jpg. But it is setting the resource as background-image: url(images/images.jpg

Python download videos from HTTP URL with bad internet

I have problem with downloading videos from my server e.g. http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro%20USLUGE.mp4
All works perfectly when internet is OK, but when I disconnect LAN cable from Raspberry Pi and stay like that less than 10-15 seconds. But when internet is off more than 10-15 seconds, my download does not continue or videos are not properly downloaded (I merge them later with MP4Box and they need to be). If someone has suggestion how to solve this problem and help me I would appreciate it very much.
Here is my code:
import os
import urllib
import urllib2
import time
import commands
import requests
import shutil
from urllib2 import URLError
urls = ['http://screensfiles.dbtouch.com/screens2/Companies/89/HD/00 APPS OVERVIEW.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro USLUGE.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/ILIRIJA BIOGRAD 2016.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Restoran marina.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/HT Screens.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Tasks.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Croatia Full of life.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/04 PROJECTS.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/05 ATTEND.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro Hotel.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Plurato dron snimka 2.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Plurato dron snimka 2.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Plurato dron snimka 2.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro USLUGE.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Cornaro USLUGE.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Screens.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Screens.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Tasks.mp4',
'http://screensfiles.dbtouch.com/screens2/Companies/89/HD/Hotels Touch - Screens.mp4']
directory = "/home/pi/pythonSignage/current_playlist/videos_to_merge/"
i=1
for url in urls:
i += 1
print("current iter: ")
print(i)
if (len(urls) > 1):
url_formatted = url.split('/')[-1].replace(" ", "").replace("%20", "") + " "
else:
url_formatted = url.split('/')[-1].replace(" ", "").replace("%20", "")
url_formatted_name = url.split('/')[-1].replace(" ", "").replace("%20", "").rstrip()
while True:
print("inside while true")
try:
""" method 0 doesn't work """
print("try")
response = urllib2.urlopen(url, timeout=5)
content = response.read()
print("content")
f = open(directory + url_formatted_name, 'wb')
f.write(content)
f.close()
""" method 1 doesn't work """
#video_fancy_downloader = urllib.FancyURLopener()
#video_fancy_downloader.retrieve(url, directory + url_formatted_name)
""" method 2 - doesn't work """
#my_file = urllib.URLopener()
#my_file = retrieve(url, directory + url_formatted_name)
""" method 3 - doesn't work """
#response = requests.get(url, stream=True)
#response.raise_for_status()
#with open(directory + url_formatted_name, 'wb') as handle:
# for block in response.iter_content(1024):
# handle.write(block)
except:
print("error download, sleep 5 sec")
time.sleep(5)
print("end")

I have managed to solve my problem. Maybe this is not best approach but it works.
Here is function for downloading video and returns response:
def do_download(destination, url):
comm = ["wget", "-c", "-O", destination, "-t", "15000", "-T", "5", url]
proc = subprocess.Popen(comm, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
tmp = proc.stdout.read()
if "wget: unable to resolve host address" in tmp:
return False
else:
return True
core part of downloading function is almost the same, but now it calls do_download inside while loop and checks for response:
if os.path.isfile(directory+url_formatted_name) is False:
print("must download file!")
while downl_success is False:
print("inside while true")
try:
print("try")
while(do_download(directory + url_formatted_name, url) is False):
print(" ------- inside while for download ----------- ")
time.sleep(5)
downl_success = True
print("file downloaded fully!")
break
except HTTPError, e:
print "HTTPError", e.code, url
time.sleep(5)
except URLError, e:
print "URL Error", e.reason, url
time.sleep(5)
else:
print("file already downloaded no need to download it again!")

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Office365-REST-Python-Client: how to avoid System.Runtime.InteropServices.COMException - python

Related

How to read ZIP file in an S3 bucket with a Python Lambda function?

Python SIEM log collector hangs randomly

how to check if cpanel can login successfully with python2.7?

Extract the Background Image Url of a website from html file set in the style in Python

Python download videos from HTTP URL with bad internet

Categories

Resources