Getting the link to PDF files from QWebEngineView - python

I am working with QWebEngineView and have found that when trying to click on a pdf file link it won't open the file. I have found that QWebEngineView has not way to display pdf files on it's own. With some research I can now download pdf files and display them on their own, however I need to be able to get the link of the pdf file from QWebEngineView to know which one to download. The problem is that the .url() function only returns the url of the current webpage and doesn't seem to be affected by me clicking the link of the pdf file and I can't find any other way to get the link of the pdf file. Any ideas on how to get the link to the pdf file? Any help is appreciated.

You can use javascript to get all the links and then filter by the extension:
import sys
from PyQt5.QtCore import QCoreApplication, QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEngineView
def main():
app = QApplication(sys.argv)
url = QUrl("https://www.princexml.com/samples/")
view = QWebEngineView()
def callback(links):
for link in links:
if link.endswith(".pdf"):
print(link)
QCoreApplication.quit()
def handle_load_finished(ok):
if ok:
view.page().runJavaScript(
"""
(function() {
// https://stackoverflow.com/a/3824292/6622587
var urls = [];
for(var i = document.links.length; i --> 0;)
if(document.links[i].hostname === location.hostname)
urls.push(document.links[i].href);
return urls;
})();""",
callback,
)
view.loadFinished.connect(handle_load_finished)
view.load(url)
view.resize(640, 480)
view.show()
sys.exit(app.exec_())
if __name__ == "__main__":
main()
Output:
http://www.princexml.com/howcome/2016/samples/magic6/magic.pdf
http://www.princexml.com/howcome/2016/samples/magic6/magic.pdf
https://www.princexml.com/samples/flyer/flyer.pdf
https://www.princexml.com/samples/flyer/flyer.pdf
https://www.princexml.com/samples/catalog/PrinceCatalogue.pdf
https://www.princexml.com/samples/catalog/PrinceCatalogue.pdf
http://www.princexml.com/howcome/2016/samples//malthus/essay.pdf
http://www.princexml.com/howcome/2016/samples//malthus/essay.pdf
http://www.princexml.com/howcome/2016/samples/magic8/index.pdf
http://www.princexml.com/howcome/2016/samples/magic8/index.pdf
http://www.princexml.com/howcome/2016/samples/invoice/index.pdf
https://www.princexml.com/samples/invoice/invoicesample.pdf
http://www.princexml.com/howcome/2016/samples/invoice/index.pdf
https://www.princexml.com/samples/invoice/invoicesample.pdf
Update:
If you want to download the PDF then it is not necessary to implement the above since QWebEngineView does allow downloads.
import sys
from PyQt5.QtCore import QCoreApplication, QFileInfo, QUrl
from PyQt5.QtWidgets import QApplication, QFileDialog
from PyQt5.QtWebEngineWidgets import QWebEngineView
def handle_download_erequested(download):
download.downloadProgress.connect(print)
download.stateChanged.connect(print)
download.finished.connect(lambda: print("download finished"))
old_path = download.url().path() # download.path()
suffix = QFileInfo(old_path).suffix()
path, _ = QFileDialog.getSaveFileName(None, "Save File", old_path, "*." + suffix)
if path:
download.setPath(path)
download.accept()
def main():
app = QApplication(sys.argv)
url = QUrl("https://www.princexml.com/samples/")
view = QWebEngineView()
view.page().profile().downloadRequested.connect(handle_download_erequested)
view.load(url)
view.resize(640, 480)
view.show()
sys.exit(app.exec_())
if __name__ == "__main__":
main()
Also QWebEngineView has a PDF viewer
import sys
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
def main():
print(
f"PyQt5 version: {QtCore.PYQT_VERSION_STR}, Qt version: {QtCore.QT_VERSION_STR}"
)
app = QtWidgets.QApplication(sys.argv)
view = QtWebEngineWidgets.QWebEngineView()
settings = view.settings()
settings.setAttribute(QtWebEngineWidgets.QWebEngineSettings.PluginsEnabled, True)
url = QtCore.QUrl("https://www.princexml.com/samples/invoice/invoicesample.pdf")
view.load(url)
view.resize(640, 480)
view.show()
sys.exit(app.exec_())
if __name__ == "__main__":
main()

Related

QWebEngineView crashes when accessing specific websites

I have a very weird bug with QWebEngineView. Below is some code which creates a QWebEngineView.
import sys
from PyQt5.QtCore import QCoreApplication, QFileInfo, QUrl
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import QWebEngineView, QWebEngineSettings, QWebEngineProfile, QWebEnginePage
class Widget(QWidget):
def __init__(self):
super().__init__()
webview = QWebEngineView(self)
webview.settings().setAttribute(QWebEngineSettings.PluginsEnabled, True)
profile = QWebEngineProfile("my_profile", webview)
webpage = QWebEnginePage(profile, webview)
webview.setUrl(QUrl("https://www.youtube.com"))
#self.webview.setPage(self.webpage)
webview.setGeometry(0, 0, 800, 480)
webview.show()
self.show()
if __name__ == "__main__":
app = QApplication(sys.argv)
w = Widget()
app.exec_()
My issue is that when I run the code as it is (with the self.webview.setPage(self.webpage) line commented out) I am able to open PDF files, however opening YouTube crashes the program. However if I uncomment the line and run it, I can't open PDF files (although it doesn't crash the program it just doesn't open the PDF file), but I can then open YouTube with no issue.
You have to set the QWebEngineProfile and QWebEnginePage first and then enable the plugins:
import sys
from PyQt5.QtCore import QUrl
from PyQt5.QtWidgets import QApplication, QVBoxLayout, QWidget
from PyQt5.QtWebEngineWidgets import (
QWebEnginePage,
QWebEngineProfile,
QWebEngineSettings,
QWebEngineView,
)
class Widget(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
profile = QWebEngineProfile("my_profile", self)
webpage = QWebEnginePage(profile, self)
webview = QWebEngineView(self)
webview.setPage(webpage)
webview.settings().setAttribute(QWebEngineSettings.PluginsEnabled, True)
webview.load(QUrl("https://www.youtube.com"))
lay = QVBoxLayout(self)
lay.addWidget(webview)
self.resize(640, 480)
if __name__ == "__main__":
app = QApplication(sys.argv)
w = Widget()
w.show()
app.exec_()

pyqt5 qwebenginview doesn't autoplay youtube videos

I am trying to use Qwebengineview to view a list of youtube videos but the browser doesn't autoplay the videos, I am using PyQt5 5.13.1 Python 3.6
here is a sample code
from PyQt5.QtCore import QUrl
from PyQt5.QtWebEngineWidgets import QWebEngineView, QWebEngineProfile, QWebEnginePage
from PyQt5.QtWidgets import QApplication
if __name__ == '__main__':
import sys
app = QApplication(sys.argv)
webview = QWebEngineView()
profile = QWebEngineProfile("my_profile", webview)
profile.defaultProfile().setPersistentCookiesPolicy(QWebEngineProfile.ForcePersistentCookies)
webpage = QWebEnginePage(profile, webview)
webview.setPage(webpage)
webview.load(QUrl("https://www.youtube.com/watch?v=VzIVI2fsRb0"))
webview.show()
sys.exit(app.exec_())
I have find a solution for this using QWebEngineSettings and here is a full working example in case someone needs it
from PyQt5.QtCore import QUrl
from PyQt5.QtWebEngineWidgets import QWebEngineView, QWebEngineProfile, QWebEnginePage, QWebEngineSettings
from PyQt5.QtWidgets import QApplication
import time
if __name__ == '__main__':
import sys
app = QApplication(sys.argv)
webview = QWebEngineView()
profile = QWebEngineProfile("my_profile", webview)
profile.defaultProfile().setPersistentCookiesPolicy(QWebEngineProfile.ForcePersistentCookies)
webpage = QWebEnginePage(profile, webview)
webpage.settings().setAttribute(QWebEngineSettings.PlaybackRequiresUserGesture, False)
webview.setPage(webpage)
webview.load(QUrl("https://www.youtube.com/watch?v=aKCNrkERJ3E"))
webview.show()
sys.exit(app.exec_())

Pdfjs print button does not work with PyQt5

Straight to issue, when pdf loads with pdfjs into pyqt5, seems print button does not work correctly, also the same for download button.
How could this bug be fixed?
The code:
import sys
from PyQt5 import QtCore, QtWidgets, QtGui, QtWebEngineWidgets
PDFJS = 'file:///pdfjs/web/viewer.html'
PDF = 'file:///file0.pdf'
class PdfReport(QtWebEngineWidgets.QWebEngineView):
def __init__(self, parent=None):
super(PdfReport, self).__init__(parent)
self.load(QtCore.QUrl.fromUserInput('%s?file=%s' % (PDFJS, PDF)))
def sizeHint(self):
return QtCore.QSize(640, 480)
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
im = PdfReport()
im.show()
sys.exit(app.exec_())
Display:
Any idea how to fix that?
The print task is not enabled in Qt WebEngine so the fault is displayed (I'm still trying to get the data). But in the case of the download button of the PDF it is possible and for this you must use the downloadRequested signal of the QWebEngineProfile:
import os
import sys
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
PDFJS = QtCore.QUrl.fromLocalFile(
os.path.join(CURRENT_DIR, "pdfjs/web/viewer.html")
).toString()
class PdfReport(QtWebEngineWidgets.QWebEngineView):
def __init__(self, parent=None):
super(PdfReport, self).__init__(parent)
QtWebEngineWidgets.QWebEngineProfile.defaultProfile().downloadRequested.connect(
self.on_downloadRequested
)
def load_pdf(self, filename):
url = QtCore.QUrl.fromLocalFile(filename).toString()
self.load(QtCore.QUrl.fromUserInput("%s?file=%s" % (PDFJS, url)))
def sizeHint(self):
return QtCore.QSize(640, 480)
#QtCore.pyqtSlot(QtWebEngineWidgets.QWebEngineDownloadItem)
def on_downloadRequested(self, download):
path, _ = QtWidgets.QFileDialog.getSaveFileName(
self, "Save File", "sample.pdf", "*.pdf"
)
if path:
download.setPath(path)
download.accept()
if __name__ == "__main__":
app = QtWidgets.QApplication(sys.argv)
w = PdfReport()
path = os.path.join(CURRENT_DIR, "file0.pdf")
w.load_pdf(path)
w.show()
sys.exit(app.exec_())
That's not a PyQt5 button, that is a button from your web view. It may not work because of your webView object or because the web part of your code lacks functionality for the button.

pyQt5 pass a file object to QWebEngineView

how to send an opened file inside the main application to a page in QWebEngineView and handle it by javascript FileReader() function , as it was opened by an html5 file input inside the QWebEngineView
here is a part of my code
# -*- coding: utf-8 -*-
import sys, os
from PyQt5.QtWidgets import ( QApplication, QMainWindow)
from PyQt5.QtCore import QUrl
import PyQt5.QtWebEngineWidgets as QtWebEngineWidgets
import interface
class MyWindow(QMainWindow):
def __init__(self, parent=None):
QMainWindow.__init__(self, parent)
self.ui = interface.Ui_MainWindow()
self.ui.setupUi(self)
web_view = QtWebEngineWidgets.QWebEngineView()
self.web_view = web_view
self.ui.verticalLayout_navigateur.addWidget(web_view)
url = self.local_url("src/index.html")
self.web_view.load(url)
f = open('myfile.json', 'r')
#send f to self.web_view and handle it by javascript FileReader() function
def local_url(self, relativePath):
absolutePath = os.path.abspath(relativePath)
url = QUrl.fromLocalFile(absolutePath)
return url
if __name__ == '__main__':
app = QApplication(sys.argv)
window = MyWindow()
window.show()
sys.exit(app.exec_())
thanks
i found an alternative solution, it's quite simple, instead of sending object file to read with javascript, i have just to read it in the main application and inject the content via a QWebChannel

Filling out a form using PyQt and QWebview

I would like to use PyQt/QWebview to 1) load a specific url, 2) enter information into a form, 3) click buttons/links. Mechanize does not work because I need an actual browser.
Here's my code:
import sys
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
from PyQt4 import QtCore
app = QApplication(sys.argv)
web = QWebView()
web.load(QUrl("https://www.lendingclub.com/account/gotoLogin.action"))
def fillForm():
doc = web.page().mainFrame().documentElement()
user = doc.findFirst("input[id=master_username]")
passwd = doc.findFirst("input[id=master_password]")
user.setAttribute("value", "email#email.com")
passwd.setAttribute("value", "password")
button = doc.findFirst("input[id=master_sign-in-submit]")
button.evaluateJavaScript("click()")
QtCore.QObject.connect(web, QtCore.SIGNAL("loadFinished"), fillForm)
web.show()
sys.exit(app.exec_())
The page loads correctly, but no input is entered and the form is not submitted. Any ideas?
This helped me to make it work:
user.setAttribute("value", "email#email.com")
-->
user.evaluateJavaScript("this.value = 'email#email.com'")
Attribute and property are different things.
One more fix:
click() --> this.click()
For anyone looking to do this with PyQt5, this example may help as several things have changed. Obviously the javascript needs to be adjusted based on the contents of the website.
import os
import sys
from PyQt5.QtWidgets import QApplication, QVBoxLayout, QWidget
from PyQt5.QtCore import QUrl, QEventLoop
from PyQt5.QtWebEngineWidgets import QWebEngineView
class WebPage(QWebEngineView):
def __init__(self):
QWebEngineView.__init__(self)
self.load(QUrl("https://www.url.com"))
self.loadFinished.connect(self._on_load_finished)
def _on_load_finished(self):
print("Finished Loading")
self.page().toHtml(self.Callable)
def Callable(self, html_str):
self.html = html_str
self.page().runJavaScript("document.getElementsByName('loginid')[0].value = 'email#email.com'")
self.page().runJavaScript("document.getElementsByName('password')[0].value = 'test'")
self.page().runJavaScript ("document.getElementById('signin').click()")
if __name__ == "__main__":
app = QApplication(sys.argv)
web = WebPage()
web.show()
sys.exit(app.exec_()) # only need one app, one running event loop
You might be able to do it with Webkit/QWebView but what about using selenium: http://code.google.com/p/selenium/ ? It is designed for exactly this kind of browser automation and has nice python bindings.

Categories

Resources