How to download a file via https using QNetworkAccessManager

How to download a file via https using QNetworkAccessManager - python

I'm trying to write a class using QtNetwork to download a file without freezing my GUI.
This seems to work with http URLs (tested with "http://webcode.me"), but not with the https URL from my example.
import os
from typing import Optional
import urllib.parse
from PyQt5.QtCore import pyqtSignal, QByteArray, QFile, QObject, QUrl
from PyQt5.QtNetwork import QNetworkAccessManager, QNetworkReply, QNetworkRequest
class AsyncDownloader(QObject):
def __init__(self, url: str, filename: str, parent=None):
super().__init__(parent)
self.net_mgr = QNetworkAccessManager()
self.req = QNetworkRequest(QUrl(url))
self.fetch_task: Optional[QNetworkReply] = None
self.data: Optional[QByteArray] = None
self.file = QFile(filename)
self.net_mgr.sslErrors.connect(self._ignore_ssl_errors)
def start_fetch(self):
self.fetch_task = self.net_mgr.get(self.req)
self.fetch_task.downloadProgress.connect(self.on_progress)
self.fetch_task.finished.connect(self.on_finished)
def _ignore_ssl_errors(self, reply: QNetworkReply, errors: List[QSslError]):
print(f"errors {errors}")
reply.ignoreSslErrors(errors)
def on_progress(self, bytes_received: int, bytes_total: int):
print(f"bytes received {bytes_received} (total {bytes_total})")
def on_finished(self):
print("finished")
self.data = self.fetch_task.readAll()
if not self.file.open(QFile.WriteOnly):
raise IOError(f"Unable to write to {self.file.fileName}")
self.file.write(self.data)
self.file.close()
print(f"file written to {self.file.fileName()}")
if __name__ == '__main__':
from pathlib import Path
from PyQt5.QtWidgets import QApplication
dl_path = os.path.join(str(Path.home()), "test_dl")
os.makedirs(dl_path, exist_ok=True)
app = QApplication([])
downloader = AsyncDownloader(
"https://github.com/PiRK/Electrum-ABC-Build-Tools/releases/download/v1.0/tor-linux",
os.path.join(dl_path, "tor")
)
downloader.start_fetch()
app.exec_()
The errors (or warnings?) I'm getting are:
qt.network.ssl: QSslSocket: cannot resolve EVP_PKEY_base_id
qt.network.ssl: QSslSocket: cannot resolve SSL_get_peer_certificate
qt.network.ssl: QSslSocket: cannot call unresolved function SSL_get_peer_certificate
errors [<PyQt5.QtNetwork.QSslError object at 0x7fad867112a0>]
qt.network.ssl: QSslSocket: cannot call unresolved function EVP_PKEY_base_id
bytes received 0 (total 0)
finished
file written to /home/myname/test_dl/tor
The file that is written is empty.
I tried adding the following lines just after self.net_mgr = ....:
parsed_url = urllib.parse.urlparse(url)
if parsed_url.scheme == "https":
self.net_mgr.connectToHostEncrypted(parsed_url.hostname)
This does not help.
The download work fine with wget:
$ wget "https://github.com/PiRK/Electrum-ABC-Build-Tools/releases/download/v1.0/tor-linux"
...
tor-linux 100%[=============================================================================================>] 15,34M 985KB/s in 16s
2023-02-16 16:36:51 (969 KB/s) - ‘tor-linux’ saved [16090880/16090880]

After failing to get my QNetworkAccessManager to work for HTTPS, I used an alternative solution based on Python's multiprocessing standard library and the requests library (not stdlib, but recommended by the official python documentation for urllib.request).
The only drawback is that I'm not getting any download progress information.
import multiprocessing
import requests
class Downloader:
"""URL downloader designed to be run as a separate process and to communicate
with the main process via a Queue.
The queue can be monitored for the following messages (as str objects):
- "#started#"
- "#HTTP status# {status code} {reason}"
(e.g "#HTTP status# 200 OK")
- "#content size# {size in bytes}"
- "#finished#"
"""
def __init__(self, url: str, filename: str):
self.url = url
self.filename = filename
self.queue = multiprocessing.Queue()
def run_download(self):
self.queue.put("#started#")
r = requests.get(url)
self.queue.put(f"#HTTP status# {r.status_code} {r.reason}")
self.queue.put(f"#content size# {len(r.content)}")
with open(self.filename, "wb") as f:
f.write(r.content)
self.queue.put("#finished#")
if __name__ == '__main__':
from pathlib import Path
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QTimer
import os
import sys
url = sys.argv[1]
fname = sys.argv[2]
dl_path = os.path.join(str(Path.home()), "test_dl")
os.makedirs(dl_path, exist_ok=True)
app = QApplication([])
downloader = Downloader(
url,
os.path.join(dl_path, fname)
)
process = multiprocessing.Process(target=downloader.run_download)
def read_queue():
while not downloader.queue.empty():
msg = downloader.queue.get()
print(msg)
timer = QTimer()
timer.timeout.connect(read_queue)
timer.timeout.connect(lambda: print("."))
process.start()
timer.start(500)
app.exec_()
Here is an example of output for a 156 MB file:
$ python downloader.py "https://stsci-opo.org/STScI-01GGF8H15VZ09MET9HFBRQX4S3.png" large_img_https.png
#started#
.
.
.
.
.
.
.
.
.
#HTTP status# 200 OK
#content size# 159725397
#finished#
.
.
.
^Z
[3]+ Stopped python downloader.py "https://stsci-opo.org/STScI-01GGF8H15VZ09MET9HFBRQX4S3.png" large_img_https.png
$ kill %3

To download a file via HTTPS using QNetworkAccessManager in Qt, you can use the following steps:
Create a QNetworkAccessManager object.
Create a QNetworkRequest object with the URL of the file you want to download.
Set the SSL configuration of the QNetworkRequest object. If you are using a self-signed certificate, you may need to ignore SSL errors.
Call the QNetworkAccessManager::get() function with the QNetworkRequest object to initiate the download.
When the download is complete, the QNetworkAccessManager::finished() signal is emitted with a QNetworkReply object. You can then read the downloaded data from the QNetworkReply object and save it to a file.
Here's some sample code that demonstrates how to download a file via HTTPS using QNetworkAccessManager:
QNetworkAccessManager *manager = new QNetworkAccessManager(this);
QUrl url("https://example.com/file-to-download");
QNetworkRequest request(url);
// Set SSL configuration
QSslConfiguration config = request.sslConfiguration();
config.setProtocol(QSsl::TlsV1_2);
request.setSslConfiguration(config);
// Ignore SSL errors
connect(manager, &QNetworkAccessManager::sslErrors, this, [](QNetworkReply *reply, const QList<QSslError> &errors){
reply->ignoreSslErrors();
});
QNetworkReply *reply = manager->get(request);
// Connect to the finished() signal
connect(reply, &QNetworkReply::finished, this, [reply]() {
if (reply->error() == QNetworkReply::NoError) {
// Read downloaded data and save to file
QByteArray data = reply->readAll();
QFile file("downloaded-file");
file.open(QIODevice::WriteOnly);
file.write(data);
file.close();
} else {
// Handle error
}
reply->deleteLater();
});

Related

Argo - submit workflow from python with input parameter file

I basically want to run this command: argo submit -n argo workflows/workflow.yaml -f params.json through the official python SDK.
This example covers how to submit a workflow manifest, but I don't know where to add the input parameter file.
import os
from pprint import pprint
import yaml
from pathlib import Path
import argo_workflows
from argo_workflows.api import workflow_service_api
from argo_workflows.model.io_argoproj_workflow_v1alpha1_workflow_create_request import \
IoArgoprojWorkflowV1alpha1WorkflowCreateRequest
configuration = argo_workflows.Configuration(host="https://localhost:2746")
configuration.verify_ssl = False
with open("workflows/workflow.yaml", "r") as f:
manifest = yaml.safe_load(f)
api_client = argo_workflows.ApiClient(configuration)
api_instance = workflow_service_api.WorkflowServiceApi(api_client)
api_response = api_instance.create_workflow(
namespace="argo",
body=IoArgoprojWorkflowV1alpha1WorkflowCreateRequest(workflow=manifest, _check_type=False),
_check_return_type=False)
pprint(api_response)
Where to pass in the params.json file?

I found this snippet in the docs of WorkflowServiceApi.md (which was apparently too big to render as markdown):
import time
import argo_workflows
from argo_workflows.api import workflow_service_api
from argo_workflows.model.grpc_gateway_runtime_error import GrpcGatewayRuntimeError
from argo_workflows.model.io_argoproj_workflow_v1alpha1_workflow_submit_request import IoArgoprojWorkflowV1alpha1WorkflowSubmitRequest
from argo_workflows.model.io_argoproj_workflow_v1alpha1_workflow import IoArgoprojWorkflowV1alpha1Workflow
from pprint import pprint
# Defining the host is optional and defaults to http://localhost:2746
# See configuration.py for a list of all supported configuration parameters.
configuration = argo_workflows.Configuration(
host = "http://localhost:2746"
)
# Enter a context with an instance of the API client
with argo_workflows.ApiClient() as api_client:
# Create an instance of the API class
api_instance = workflow_service_api.WorkflowServiceApi(api_client)
namespace = "namespace_example" # str |
body = IoArgoprojWorkflowV1alpha1WorkflowSubmitRequest(
namespace="namespace_example",
resource_kind="resource_kind_example",
resource_name="resource_name_example",
submit_options=IoArgoprojWorkflowV1alpha1SubmitOpts(
annotations="annotations_example",
dry_run=True,
entry_point="entry_point_example",
generate_name="generate_name_example",
labels="labels_example",
name="name_example",
owner_reference=OwnerReference(
api_version="api_version_example",
block_owner_deletion=True,
controller=True,
kind="kind_example",
name="name_example",
uid="uid_example",
),
parameter_file="parameter_file_example",
parameters=[
"parameters_example",
],
pod_priority_class_name="pod_priority_class_name_example",
priority=1,
server_dry_run=True,
service_account="service_account_example",
),
) # IoArgoprojWorkflowV1alpha1WorkflowSubmitRequest |
# example passing only required values which don't have defaults set
try:
api_response = api_instance.submit_workflow(namespace, body)
pprint(api_response)
except argo_workflows.ApiException as e:
print("Exception when calling WorkflowServiceApi->submit_workflow: %s\n" % e)
Have you tried using a IoArgoprojWorkflowV1alpha1WorkflowSubmitRequest? Looks like it has submit_options of type IoArgoprojWorkflowV1alpha1SubmitOpts which has a parameter_file param.

ansible.inventory.manager check inventory for error in python

I have an Ansible hosts.ini with an error
[linux]
server01 pr_ip_address = 10.0.0.1
I wrote following function in python
from ansible.inventory.manager import InventoryManager
from ansible.parsing.dataloader import DataLoader
def check_inventory():
loader = DataLoader()
InventoryManager(loader=loader, sources='hosts.ini')
check_inventory()
I'm trying to use the following messages as stderr :
[WARNING]: * Failed to parse
/inventories/hosts.ini
with script plugin: problem running
/hosts.ini
--list ([Errno 8] Exec format error: '/hosts.ini')
[WARNING]: * Failed to parse
/hosts.ini
with ini plugin: /hosts.ini:913: Expected key=value host variable assignment, got:
pr_ip_address
[WARNING]: Unable to parse
/hosts.ini
as an inventory source
[WARNING]: No inventory was parsed, only implicit localhost is available
My problem, I don't know to grab it and can't write it to stderr or stdout.
When my ini-file is right like:
[linux]
server01 pr_ip_address=10.0.0.1
I get nothing back.... I thought I can use it for a try except or if else condition, but I don't know how.

I found a solution for my problem. I Use ansible_runner and capture the Output of it. this works well for me :). Maybe it helps someone else too!
import os
import sys
import ansible_runner
from io import StringIO
from termcolor import colored, cprint
def verify(inipath):
class Capturing(list):
def __enter__(self):
self._stdout = sys.stdout
sys.stdout = self._stringio = StringIO()
return self
def __exit__(self, *args):
self.extend(self._stringio.getvalue().splitlines())
del self._stringio # free up some memory
sys.stdout = self._stdout
with Capturing() as output:
r = ansible_runner.run(private_data_dir=inipath,
inventory='hosts.ini',
host_pattern='localhost',
module='shell',
module_args='whoami')
print(r.status, str)
def words_in_string(word_list, a_string):
return set(word_list).intersection(a_string.split())
error_handling = ['error', 'Failed', 'Errno']
if words_in_string(error_handling, str(output)):
print(output)
cprint('something went wrong with your hosts.ini :(', 'green')
sys.exit(0)
else:
cprint('hosts.ini verify was successfully!!!', 'green')
pass
if __name__ == "__main__":
verify(inipath='./')

An Alternate Approach to the same:-
from ansible.inventory.manager import InventoryManager
from ansible.parsing.dataloader import DataLoader
def check_inventory():
loader = DataLoader()
inventory_obj = InventoryManager(loader=loader, sources=None, parse=True)
valid_inventory = inventory_obj.parse_source(source='hosts.ini')
if valid_inventory:
return True
return False
validate_inventory = check_inventory()
if validate_inventory:
print('hosts.ini verification was successful!')
else:
print('something went wrong with your hosts.ini inventory')

Can i get the generated ip-address or domain name of flask_ngrok or py-ngrok and return it to 127.0.0.1/

I'm trying to get the generated domain name or IP-address of flask_ngrok or py-ngrok after been deploy. I want to deploy flask_app to localhost and get the new IP-address or domain name on the main page.
I.E: If I access 127.0.0.1/ I want it to return something like
You can now log in through https://aaf8447ee878.ngrok.io/
I have tried checking through the directories and read some help but I can't still get it. Thanks in advance ❤

add
import atexit
import json
import os
import platform
import shutil
import subprocess
import tempfile
import time
import zipfile
from pathlib import Path
from threading import Timer
import requests
def _run_ngrok():
ngrok_path = str(Path(tempfile.gettempdir(), "ngrok"))
_download_ngrok(ngrok_path)
system = platform.system()
if system == "Darwin":
command = "ngrok"
elif system == "Windows":
command = "ngrok.exe"
elif system == "Linux":
command = "ngrok"
else:
raise Exception(f"{system} is not supported")
executable = str(Path(ngrok_path, command))
os.chmod(executable, 777)
ngrok = subprocess.Popen([executable, 'http', '5000'])
atexit.register(ngrok.terminate)
localhost_url = "http://localhost:4040/api/tunnels" # Url with tunnel details
time.sleep(1)
tunnel_url = requests.get(localhost_url).text # Get the tunnel information
j = json.loads(tunnel_url)
tunnel_url = j['tunnels'][0]['public_url'] # Do the parsing of the get
tunnel_url = tunnel_url.replace("https", "http")
return tunnel_url
def _download_ngrok(ngrok_path):
if Path(ngrok_path).exists():
return
system = platform.system()
if system == "Darwin":
url = "https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-darwin-amd64.zip"
elif system == "Windows":
url = "https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-windows-amd64.zip"
elif system == "Linux":
url = "https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip"
else:
raise Exception(f"{system} is not supported")
download_path = _download_file(url)
with zipfile.ZipFile(download_path, "r") as zip_ref:
zip_ref.extractall(ngrok_path)
def _download_file(url):
local_filename = url.split('/')[-1]
r = requests.get(url, stream=True)
download_path = str(Path(tempfile.gettempdir(), local_filename))
with open(download_path, 'wb') as f:
shutil.copyfileobj(r.raw, f)
return download_path
def start_ngrok():
global ngrok_address
ngrok_address = _run_ngrok()
print(f" * Running on {ngrok_address}")
print(f" * Traffic stats available on http://127.0.0.1:4040")
def run_with_ngrok(app):
"""
The provided Flask app will be securely exposed to the public internet via ngrok when run,
and the its ngrok address will be printed to stdout
:param app: a Flask application object
:return: None
"""
old_run = app.run
def new_run():
thread = Timer(1, start_ngrok)
thread.setDaemon(True)
thread.start()
old_run()
app.run = new_run
####################
dont import flask_ngrok
at the end at before name == 'main' add this function
def ngrok_url():
global tunnel_url
while True:
try:
print(ngrok_address)
except Exception as e:
print(e)
and after before app.run() put
thread = Timer(1, ngrok_url)
thread.setDaemon(True)
thread.start()
and run Warning: this will crash your code editor/ or terminal if u dont want that in the ngrok url function replace print with whatever you want to do with the url

and you dont need that
global tunnel_url
def ngrok_url():
while True:
try:
print(ngrok_address)
except Exception as e:
print(e)
you can delete the threading part before the name == 'main' too after the imports set
ngrok_address = ''
then you can accses the ngrok_address anywhere in your code

I found out the easiest way to do this is the just copy the url when the user is visiting the site. You can do this by...
#app.before_request
def before_request():
global url
url = request.url
# url = url.replace('http://', 'https://', 1)
url = url.split('.ngrok.io')[0]
url += '.ngrok.io'

executescript in python not passing flow file to next processor

I am trying to run executescript process in Apache Nifi using python but having problem with passing flow file to next processor in my data flow.
If I run the standalone flow file creation and writing snippet it works and I can read flow file in the next processor but when I try to enrich it, it simply does not pass the flow file. In fact no error is generated and somehow I have no clue how to proceed. I am bit new with python and nifi and appreciate your help with this particular issue.
Below is the code I am using and you can see its very simple. I just want to create and write some string to flow file using some logic. But no luck so far
import urllib2
import json
import datetime
import csv
import time
import sys
import traceback
from org.apache.nifi.processor.io import OutputStreamCallback
from org.python.core.util import StringUtil
class WriteContentCallback(OutputStreamCallback):
def __init__(self, content):
self.content_text = content
def process(self, outputStream):
try:
outputStream.write(StringUtil.toBytes(self.content_text))
except:
traceback.print_exc(file=sys.stdout)
raise
page_id = "dsssssss"
access_token = "sdfsdfsf%sdfsdf"
def scrapeFacebookPageFeedStatus(page_id, access_token):
flowFile = session.create()
flowFile = session.write(flowFile, WriteContentCallback("Hello there this is my data"))
flowFile = session.write()
session.transfer(flowFile, REL_SUCCESS)
print "\nDone!\n%s Statuses Processed in %s" % \
(num_processed, datetime.datetime.now() - scrape_starttime)
if __name__ == '__main__':
scrapeFacebookPageFeedStatus(page_id, access_token)

I believe the problem is the check for __main__:
if __name__ == '__main__':
scrapeFacebookPageFeedStatus(page_id, access_token)
__builtin__ was the actual module name in my experiment. You could either remove that check, or add a different one if you want to preserve your separate testing path.

Python Script works when run from command line but not when run from windows service

I have created a windwos service utilising the following code:
import win32service
import win32serviceutil
import win32api
import win32con
import win32event
import win32evtlogutil
import os, sys, string, time
class aservice(win32serviceutil.ServiceFramework):
_svc_name_ = "PAStoDistillerIFC"
_svc_display_name_ = "PAS DW to Distiller Interface"
_svc_description_ = "Service that checks the Clinical Research folder for any new files from PAS to process in Distiller"
def __init__(self, args):
win32serviceutil.ServiceFramework.__init__(self, args)
self.hWaitStop = win32event.CreateEvent(None, 0, 0, None)
def SvcStop(self):
self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
win32event.SetEvent(self.hWaitStop)
def SvcDoRun(self):
import servicemanager
servicemanager.LogMsg(servicemanager.EVENTLOG_INFORMATION_TYPE,servicemanager.PYS_SERVICE_STARTED,(self._svc_name_, ''))
#self.timeout = 640000 #640 seconds / 10 minutes (value is in milliseconds)
self.timeout = 120000 #120 seconds / 2 minutes
# This is how long the service will wait to run / refresh itself (see script below)
while 1:
# Wait for service stop signal, if timeout, loop again
rc = win32event.WaitForSingleObject(self.hWaitStop, self.timeout)
# Check to see if self.hWaitStop happened
if rc == win32event.WAIT_OBJECT_0:
# Stop signal encountered
servicemanager.LogInfoMsg("PAStoDistillerIFC - STOPPED!") #For Event Log
break
else:
#[actual service code between rests]
try:
file_path = "D:\\SCRIPTS\\script.py"
execfile(file_path) #Execute the script
except:
servicemanager.LogInfoMsg("File CRASHED")
pass
#[actual service code between rests]
def ctrlHandler(ctrlType):
return True
if __name__ == '__main__':
win32api.SetConsoleCtrlHandler(ctrlHandler, True)
win32serviceutil.HandleCommandLine(aservice)
To run this script:
import os, re, urllib, urllib2, time, datetime
def postXML( path, fname):
fileresultop = open("D:\\CLinicalResearch\\SCRIPTS\\LOG.txt", 'a') # open result file
fileresultop.write('CheckXXX ')
fileresultop.close()
now = datetime.datetime.now() #####ALWAYS CRASHES HERE######
fileresult = open("D:\\SCRIPTS\\IFCPYTHONLOG.txt", 'a') # open result file
fileresultop = open("D:\\SCRIPTS\\LOG.txt", 'a')
fileresultop.write('Check2 ')
fileresultop.close()
path="D:\\Test2" # Put location of XML files here.
procpath="D:\\Test2Processed" # Location of processed files
now = datetime.datetime.now()
dirList=os.listdir(path)
for fname in dirList: # For each file in directory
if re.search("PatientIndexInsert", fname): # Brand new patient records
fileresultop = open("D:\\SCRIPTS\\LOG.txt", 'a') # open result file
fileresultop.write('Check1 ')
fileresultop.close()
postXML(path, fname)
I have pared down the script to the bare code where I believe this is crashing.
This works perfectly from the command line, I run the windows service under my own login.
Once I take the datetime function out of the function it seems to work.
Edit 1: I saw that the service runs in a blank environment. I don't have any environmental variables set myself.
Edit 2: Added traceback:
File "D:\ClinicalResearch\SCRIPTS\PAS2DIST.py", line 23, in <module>
postXML(path, fname)
File "D:\ClinicalResearch\SCRIPTS\PAS2DIST.py", line 6, in postXML
now = datetime.datetime.now()
NameError: global name 'datetime' is not defined

I didn't find the cause but I did find a workaround.
I needed to import all the same libraries into the function too. Once I did that, worked like a charm.
Hope this can help someone else.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to download a file via https using QNetworkAccessManager - python

Related

Argo - submit workflow from python with input parameter file

ansible.inventory.manager check inventory for error in python

Can i get the generated ip-address or domain name of flask_ngrok or py-ngrok and return it to 127.0.0.1/

executescript in python not passing flow file to next processor

Python Script works when run from command line but not when run from windows service

Categories

Resources