Converting multiple HTML files to PDF using PyQt5 - python

I tried following this answer: How to use PyQT5 to convert multiple HTML docs to PDF in one loop
I modified it to convert all html files found in a local folder. For example htmls is a list of html files to be converted: [Q:\Ray\test1.html, Q:\Ray\prac2.html]
This is the code. However, when I try to run it, Python just freezes and I have to stop the run.
import os
import glob
from PyQt5 import QtWidgets, QtWebEngineWidgets
class PdfPage(QtWebEngineWidgets.QWebEnginePage):
def __init__(self):
super().__init__()
self._htmls = []
self._current_path = ""
self.setZoomFactor(1)
self.loadFinished.connect(self._handleLoadFinished)
self.pdfPrintingFinished.connect(self._handlePrintingFinished)
def convert(self, htmls):
self._htmls = iter(zip(htmls))
self._fetchNext()
def _fetchNext(self):
try:
self._current_path = next(self._htmls)
except StopIteration:
return False
def _handleLoadFinished(self, ok):
if ok:
self.printToPdf(self._current_path)
def _handlePrintingFinished(self, filePath, success):
print("finished:", filePath, success)
if not self._fetchNext():
QtWidgets.QApplication.quit()
if __name__ == "__main__":
current_dir = os.path.dirname(os.path.realpath(__file__))
folder= current_dir+ '\\*.HTML'
htmls= glob.glob(folder)
app = QtWidgets.QApplication([])
page = PdfPage()
page.convert(htmls)
app.exec_()
print("finished")

It seems that the OP has not understood the logic of my previous solution which is:
Get the resource, in this case files,
Load it on the page,
When the load is finished then print the content of the page,
When the printing is finished then execute step 1 with the next resource.
In this it does not perform step 2, on the other hand it is recommended that the path of the pdf has a name other than the html
import os
import glob
from PyQt5.QtCore import QUrl
from PyQt5 import QtWidgets, QtWebEngineWidgets
class PdfPage(QtWebEngineWidgets.QWebEnginePage):
def __init__(self):
super().__init__()
self._htmls = []
self._current_path = ""
self.setZoomFactor(1)
self.loadFinished.connect(self._handleLoadFinished)
self.pdfPrintingFinished.connect(self._handlePrintingFinished)
def convert(self, htmls):
self._htmls = iter(htmls)
self._fetchNext()
def _fetchNext(self):
try:
self._current_path = next(self._htmls)
except StopIteration:
return False
else:
self.load(QUrl.fromLocalFile(self._current_path))
return True
def _handleLoadFinished(self, ok):
if ok:
self.printToPdf(self._current_path + ".pdf")
def _handlePrintingFinished(self, filePath, success):
print("finished:", filePath, success)
if not self._fetchNext():
QtWidgets.QApplication.quit()
if __name__ == "__main__":
current_dir = os.path.dirname(os.path.realpath(__file__))
folder= current_dir+ '\\*.HTML'
htmls = glob.glob(folder)
print(htmls)
if htmls:
app = QtWidgets.QApplication([])
page = PdfPage()
page.convert(htmls)
app.exec_()
print("finished")

Related

QPixmapCache in PyQt6

Hello I am trying to create a PyQt6 widget that animates over a set of images, but at a precise frame rate that varies as a function of realtime parameters. I insert QPixmaps into the cache within a for loop, and am able to find the QPixmap objects through QPixmap.find() within the same for loop, but outside the loop .find() returns None.
Below is the script. I can include the image series if needed. Appreciate any help.
import sys
from PyQt6.QtWidgets import (
QApplication,
QMainWindow,
QLabel
)
from PyQt6.QtCore import Qt
from PyQt6.QtGui import QPixmap, QPixmapCache
from pathlib import Path
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.animating_label = QLabel('animation here')
self.animating_label.setAlignment(Qt.AlignmentFlag.AlignHCenter | Qt.AlignmentFlag.AlignVCenter)
self.animating_label.pixmap().size().setWidth(200)
self.animating_label.pixmap().size().setHeight(200)
self.setCentralWidget(self.animating_label)
parent_path = Path(__file__).parent
self.images_path = parent_path / 'images/animation_set/'
self.animating_label.setPixmap(QPixmap(self.images_path.__str__() + "/clock01.png"))
self.pixmapcache_keys: [str] = []
self.load_images()
test = QPixmapCache.find("clock02")
try:
self.animating_label.setPixmap(test)
except:
if test == None:
print("Failure: QPixMapCache.find() returned", test)
def load_images(self):
image_suffixes = ['.jpg', '.png', '.gif', '.bmp']
imgs_found_count = 0
for filepath in self.images_path.iterdir():
if filepath.suffix in image_suffixes:
imgs_found_count += 1
cache_key = filepath.stem
self.pixmapcache_keys.append(cache_key)
if not QPixmapCache.find(cache_key):
pixmap = QPixmap(filepath.__str__())
QPixmapCache.insert(cache_key, pixmap)
print("pixmap %s" % cache_key, QPixmapCache.find(cache_key))
print(imgs_found_count, "image(s) found in animation_set directory.", len(self.pixmapcache_keys),
"keys loaded into QPixmapCache")
app = QApplication(sys.argv)
window = MainWindow()
window.show()
# start Qt event loop
app.exec()
print("Script complete.")
sys.exit(1)
Here is the output:
pixmap clock04 <PyQt6.QtGui.QPixmap object at 0x101c84b30>
pixmap clock10 <PyQt6.QtGui.QPixmap object at 0x101c84970>
pixmap clock11 <PyQt6.QtGui.QPixmap object at 0x101c84b30>
pixmap clock05 <PyQt6.QtGui.QPixmap object at 0x101c84970>
...
24 image(s) found in animation_set directory. 24 keys loaded into QPixmapCache
Failure: QPixMapCache.find() returned None
Script complete.
I was able to get the animation working using a list of QPixmaps instead of QPixmapCache, but I am not sure if this is the optimal approach.
Generating Qpixmap list from files:
for filepath in sorted(self.images_path.iterdir()):
if filepath.suffix in image_suffixes:
qpixmap = QPixmap(filepath.__str__())
self.pixmaps.append(qpixmap)
called elsewhere on a timer
def advance(self):
if self.pixmap_current_index >= len(self.pixmaps) - 1:
self.pixmap_current_index = 0
else:
self.pixmap_current_index += 1
self.animating_label.setPixmap(self.pixmaps[self.pixmap_current_index])
self.timer.start(self.refresh_interval)

Using Pyqt4 to download thousands of PDF's from URL

I am trying to ideally download thousands of PDF from a given website. However, for some reason it won't even download 100 PDF's. I am not sure why. Here is the code:
#!/usr/bin/env python
import time
from pyPdf import PdfFileWriter, PdfFileReader
import StringIO
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from xhtml2pdf import pisa
import sys
from PyQt4.QtCore import *
from PyQt4.QtGui import*
from PyQt4.QtWebKit import *
class Foo(QWidget):
def __init__(self, parent=None):
super(Foo, self).__init__(parent)
self.count = -1
text_file = open("input.txt", "r")
self.params = text_file.read().split('\n')
self.url = 'http://www.asdfasdfasdf.com/Property.aspx?mode=details&pin={0}'
self.gridLayout = QGridLayout(self)
#self.tabWidget = QTabWidget(self)
#self.gridLayout.addWidget(self.tabWidget, 0, 0, 1, 1)
self.mapper = QSignalMapper(self)
self.mapper.mapped.connect(self.on_mapper_mapped)
for i in range(100):
grabber = QWebView()
grabber.loadFinished.connect(self.mapper.map)
self.mapper.setMapping(grabber, i)
#self.tabWidget.addTab(grabber, "opener {0}".format(str(i)))
grabber.loadFinished.emit(True)
#pyqtSlot(int)
def on_mapper_mapped(self, gNumber):
self.count += 1
if self.count < len(self.params):
#gParam = self.params[self.count]
gParam = self.params[self.count]
opener = self.mapper.mapping(gNumber)
opener.load(QUrl(self.url.format(gParam)))
printer = QPrinter()
#setting format
printer.setPageSize(QPrinter.A4)
printer.setOutputFormat(QPrinter.PdfFormat)
#export file as c:\tem_pdf.pdf
PIDString = gParam[:2] + '-' + gParam[2:4] + '-' + gParam[4:7] + '-' + gParam[7:10] + '-' + gParam[10:14]
printer.setOutputFileName(PIDString + '.pdf')
def convertIt():
opener.print_(printer)
QObject.connect(opener, SIGNAL("loadFinished(bool)"), convertIt)
print str(self.count) + ' of ' + str(len(self.params))
if __name__ == "__main__":
import sys
app = QApplication(sys.argv)
main = Foo()
#main.show()
app.exec_()
sys.exit
Ideally I would like to add a footer as well but it bugs out if I try to do that. Input.txt has 100 numbers ( just for test, I need it to work on 85000). It works on real small numbers like 5 or 10 but won't do 100. Is there a limit to the QwebView instance? Should I manage that in order to make it work?
When I run this code for 100 PDF, it will print out 20 PDFS. If i Change the range to something smaller than 100, it will have duplicate PDF's but certainly more than 20.
Also, I get this error:
qpainter::begin: returned false
I think since you are trying to make a "PyQt Application" you should firstly make use of what it offers your.
Try have some look into it QNetworkAccessMAnager to have control of all your requests and all else, QNetworkRequest, QNetworkReply.
It will even take care of your threads and have your parallelism as desired. It won't freeze your app while it's being downloaded.
Have a small look into this code of mine. It doesn't do exactly what you want but is kinda a really good filtered example for what you need to have all set up.
# Subclass QNetworkAccessManager Here
from PyQt5.QtCore import QByteArray
from PyQt5.QtCore import QFile, pyqtSlot
from PyQt5.QtCore import QIODevice
from PyQt5.QtCore import QUrl
from PyQt5.QtCore import pyqtSignal
from PyQt5.QtNetwork import QNetworkAccessManager
from PyQt5.QtNetwork import QNetworkRequest
class NetworkAccessManager(QNetworkAccessManager):
signal_add_image = pyqtSignal()
dialog = None
download_finished = False
message_buffer = None
reply = None
def __init__(self):
QNetworkAccessManager.__init__(self)
self.reply = self.get(QNetworkRequest(QUrl("")))
# Save image data in QByteArray buffer to the disk (google_image_logo.png
# in the same directory)
#pyqtSlot()
def slot_finished(self):
image_file = QFile("resources/browser_images/image_required_browser")
if image_file.open(QIODevice.ReadWrite):
image_file.write(self.message_buffer)
image_file.close()
self.signal_add_image.emit()
# QMessageBox.information(None, "Hello!", "File has been saved!")
else:
pass
# QMessageBox.critical(None, "Hello!", "Error saving file!")
self.download_finished = True
self.dialog.close()
# Append current data to the buffer every time readyRead() signal is
# emitted
#pyqtSlot()
def slot_read_data(self):
self.message_buffer += self.reply.readAll()
def request_image(self, url, progress_bar, dialog):
self.reply.deleteLater()
self.download_finished = False
self.dialog = dialog
self.message_buffer = QByteArray()
url = QUrl(url)
req = QNetworkRequest(url)
req.setRawHeader(b'User-Agent',
b'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36')
self.reply = self.get(req)
self.reply.readyRead.connect(self.slot_read_data)
self.reply.finished.connect(self.slot_finished)
self.reply.downloadProgress.connect(progress_bar)
def get_reply(self):
return self.reply
def done(self):
return self.download_finished
def set_reply(self, reply):
self.reply = reply
def del_reply(self):
self.reply.deleteLater()
NETWORK_ACCESS_MANAGER = NetworkAccessManager()
Hope it brings you some light ")

Using QApplication::exec() in a function or class

I want the following code to get the request url starting with http://down.51en.com:88 during the web loading process , and then do other processing with the response object of the url .
In my program, once targetUrl is assigned a value , I want the function targetUrlGetter(url) to return it to the caller, however , the problem is that QApplication::exec() enters the main event loop so cannot execute code at the end of thetargetUrlGetter() function after the exec() call , thus the function cannot return , I have tried with qApp.quit() in interceptRequest(self, info) in order to tell the application to exit so that targetUrlGetter(url) can return , but the function still cannot return and the program even crashes on exit(tested on Win7 32bit), so how can I return the targetUrl to the caller program ?
The difficulties here are how to exit the Qt event loop without crash and return the request url to the caller.
import sys
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import *
from PyQt5.QtWebEngineCore import *
from PyQt5.QtCore import *
class WebEngineUrlRequestInterceptor(QWebEngineUrlRequestInterceptor):
def __init__(self, parent=None):
super().__init__(parent)
self.page = parent
def interceptRequest(self, info):
if info.requestUrl().toString().startswith('http://down.51en.com:88'):
self.targetUrl = info.requestUrl().toString()
print('----------------------------------------------', self.targetUrl)
qApp.quit()
# self.page.load(QUrl(''))
def targetUrlGetter(url=None):
app = QApplication(sys.argv)
page = QWebEnginePage()
globalSettings = page.settings().globalSettings()
globalSettings.setAttribute(
QWebEngineSettings.PluginsEnabled, True)
globalSettings.setAttribute(
QWebEngineSettings.AutoLoadImages, False)
profile = page.profile()
webEngineUrlRequestInterceptor = WebEngineUrlRequestInterceptor(page)
profile.setRequestInterceptor(webEngineUrlRequestInterceptor)
page.load(QUrl(url))
# view = QWebEngineView()
# view.setPage(page)
# view.show()
app.exec_()
return webEngineUrlRequestInterceptor.targetUrl
url = "http://www.51en.com/news/sci/everything-there-is-20160513.html"
# url = "http://www.51en.com/news/sci/obese-dad-s-sperm-may-influence-offsprin.html"
# url = "http://www.51en.com/news/sci/mars-surface-glass-could-hold-ancient-fo.html"
targetUrl = targetUrlGetter(url)
print(targetUrl)
You should always initialize QApplication at the beginning of the program, and always call the QApplication::exec function at the end of the program.
Another thing is that QWebEngineUrlRequestInterceptor.interceptRequest is a callback function which is called asynchronously. Since info.requestUrl().toString() is called inside the callback function, there is no way to return the result it synchronously.
import sys
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import *
from PyQt5.QtWebEngineCore import *
from PyQt5.QtCore import *
class WebEngineUrlRequestInterceptor(QWebEngineUrlRequestInterceptor):
def __init__(self, parent=None):
super().__init__(parent)
self.page = parent
def interceptRequest(self, info):
if info.requestUrl().toString().startswith('http://down.51en.com:88'):
self.targetUrl = info.requestUrl().toString()
print('----------------------------------------------', self.targetUrl)
# Don't do thatDo you really want to exit the whole program?
# qApp.quit()
# Do something here, rather than return it.
# It must be run asynchronously
# self.page.load(QUrl(''))
def targetUrlGetter(url=None):
page = QWebEnginePage()
globalSettings = page.settings().globalSettings()
globalSettings.setAttribute(
QWebEngineSettings.PluginsEnabled, True)
globalSettings.setAttribute(
QWebEngineSettings.AutoLoadImages, False)
profile = page.profile()
webEngineUrlRequestInterceptor = WebEngineUrlRequestInterceptor(page)
profile.setRequestInterceptor(webEngineUrlRequestInterceptor)
page.load(QUrl(url))
# view = QWebEngineView()
# view.setPage(page)
# view.show()
# Don't return this. It cannot be called synchronously. It must be called asynchronously.
# return webEngineUrlRequestInterceptor.targetUrl
app = QApplication(sys.argv) # always initialize QApplication at the beginning of the program
url = "http://www.51en.com/news/sci/everything-there-is-20160513.html"
# url = "http://www.51en.com/news/sci/obese-dad-s-sperm-may-influence-offsprin.html"
# url = "http://www.51en.com/news/sci/mars-surface-glass-could-hold-ancient-fo.html"
targetUrl = targetUrlGetter(url)
print(targetUrl)
app.exec_() # always call the QApplication::exec at the end of the program

PyQt application closes immediately after start

I'm trying make an exe application from my scripts using py2exe. The problem is that if I run it by doubleclicking on exe file, it closes immediately after open. If I run it from cmd, it works perfect.
I've tried to make a table global but it did not helped. I've already tried to keep the reference - no effect.
Here is the code of GUI in case it helps:
from PyQt4.QtGui import QTableWidget, QTableWidgetItem,QApplication
import sys
from PyQt4.QtGui import *
import os
from meteo import mesiac,den
class MyTable(QTableWidget):
def __init__(self, data, *args):
QTableWidget.__init__(self, *args)
self.data = data
self.setmydata()
self.resizeColumnsToContents()
self.resizeRowsToContents()
def setmydata(self):
horHeaders = []
for n, key in enumerate(['max teplota','min teplota','max vlhkost','min vlhkost','max tlak',
'min tlak','avg teplota','avg vlhkost','avg tlak']):
horHeaders.append(key)
for m, item in enumerate(self.data[key]):
newitem = QTableWidgetItem(str(item))
self.setItem(m, n, newitem)
self.setHorizontalHeaderLabels(horHeaders)
vertical_headers = ['{}.'.format(x+1) for x in xrange(len(self.data.values()[0])-1)]+['Mesiac:']
self.setVerticalHeaderLabels(vertical_headers)
self.setWindowTitle('Meteo')
def show_table():
global table
table = MyTable(data, len(data.values()[0]), len(data.keys()))
table.setFixedHeight(len(data.values()[0])*20)
table.setFixedWidth(len(data.keys())*95)
table.show()
return table
if __name__ == '__main__':
data = {}
os.system("mode con: cols=100 lines=40")
filenames = next(os.walk('mesiac'))[2]
list_of_den = []
for filename in filenames:
list_of_den.append(den(filename,filename[:-4]))
m = mesiac(list_of_den)
for den in list_of_den:
for n,attr in enumerate(['max teplota','min teplota','max vlhkost','min vlhkost','max tlak',
'min tlak','avg teplota','avg vlhkost','avg tlak']):
try:
data[attr].append(den.to_list()[n])
except:
data[attr]=[den.to_list()[n]]
m_list = m.to_list()
for n,attr in enumerate(['max teplota','min teplota','max vlhkost','min vlhkost','max tlak',
'min tlak','avg teplota','avg vlhkost','avg tlak']):
try:
data[attr].append(m_list[n])
except:
data[attr]=[m_list[n]]
app = QApplication(sys.argv)
t = show_table()
sys.exit(app.exec_())
And for sure - here is a setup.py file:
from distutils.core import setup
import py2exe
options ={"py2exe":{"includes":["sip","PyQt4.QtGui","PyQt4.QtCore","meteo"],"dll_excludes": ["MSVCP90.dll", "HID.DLL", "w9xpopen.exe"]}}
setup(window=['table.py'],options=options)
How to make it visible?

Why does QFileSystemWatcher work for directories but not files in Python?

I borrowed this code from another StackOverflow answer:
from PyQt4 import QtCore
#QtCore.pyqtSlot(str)
def directory_changed(path):
print('Directory Changed!!!')
#QtCore.pyqtSlot(str)
def file_changed(path):
print('File Changed!!!')
fs_watcher = QtCore.QFileSystemWatcher(['/path/to/files_1', '/path/to/files_2', '/path/to/files_3'])
fs_watcher.connect(fs_watcher, QtCore.SIGNAL('directoryChanged(QString)'), directory_changed)
fs_watcher.connect(fs_watcher, QtCore.SIGNAL('fileChanged(QString)'), file_changed)
The problem is, file_changed never gets called, no matter what. directory_changed is reliably called when a file is added, for example, but changing the files content does not result in file_changed being called.
I called a few variations of QtCore.SIGNAL('fileChanged(QString)'), eg, QtCore.SIGNAL('fileChanged(const QString &)'), to no avail. There are no warnings, or errors -- it simply does not trigger the function.
Recommendations?
It's hard to be certain what's going wrong, because the example code is incomplete, and so cannot work at all as it stands.
However, assuming the real code you are running is more or less sane/complete, your problem is probably caused by not adding the directory itself to the list of paths.
A basic script should look something like this:
import sys
from PyQt4 import QtCore
def directory_changed(path):
print('Directory Changed: %s' % path)
def file_changed(path):
print('File Changed: %s' % path)
app = QtCore.QCoreApplication(sys.argv)
paths = [
'/path/to',
'/path/to/files_1',
'/path/to/files_2',
'/path/to/files_3',
]
fs_watcher = QtCore.QFileSystemWatcher(paths)
fs_watcher.directoryChanged.connect(directory_changed)
fs_watcher.fileChanged.connect(file_changed)
sys.exit(app.exec_())
import argparse
import configparser
import os
import sys
from PyQt5 import QtCore
from PyQt5.QtGui import QIcon
from PyQt5.QtWidgets import QApplication, QDesktopWidget, QMessageBox
from PyQt5.QtWidgets import QMainWindow
from ProgressBar_ui import Ui_Form
class ProgressBarWindow(QMainWindow, Ui_Form):
def __init__(self):
super().__init__()
self.ui = Ui_Form()
self.ui.setupUi(self)
self.ui.progressBar.setMinimum(0)
self.ui.progressBar.setMaximum(0)
self.ui.progressBar.setValue(0)
self.setWindowTitle("Progress Bar")
self.MSversion = ""
self.LOADING_LOG_PATH = ""
mainIco = ("Icons\myIcon.ico")
self.setWindowIcon(QIcon(mainIco))
self.ui.label.setText("")
self.ui.label.setWordWrap(True)
def location_on_the_screen(self):
ag = QDesktopWidget().availableGeometry()
sg = QDesktopWidget().screenGeometry()
widget = self.geometry()
x = ag.width() - widget.width()
y = 2 * ag.height() - sg.height() - widget.height()
self.move(x, y)
def file_changed(self, pathPassed):
if os.path.exists(pathPassed):
f = open(pathPassed, "r")
for x in f:
#print(x)
label =x
self.ui.label.setText(label)
if x == "CloseLoading":
self.close()
def main():
app = QApplication(sys.argv)
w = ProgressBarWindow()
w.setWindowFlags(w.windowFlags() & ~QtCore.Qt.WindowMaximizeButtonHint)
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(description='ProgressBar Arguments')
parser.add_argument('version', action="store", type=str)
args = vars(parser.parse_args())
if len(sys.argv) < 1:
msg = QMessageBox()
msg.setIcon(QMessageBox.Critical)
errorMsg = "There are less than 2 command line arguments provided.\nLauncher can not be run."
msg.setText(errorMsg)
msg.setWindowTitle("Save paths..")
msg.exec_()
sys.exit()
p= '/path/toFile/'
paths = [ p ]
fs_watcher = QtCore.QFileSystemWatcher(paths)
#fs_watcher.directoryChanged.connect(w.directory_changed)
fs_watcher.fileChanged.connect(w.file_changed)
w.location_on_the_screen()
w.show()
app.exec_()
if __name__ == "__main__":
sys.exit(main())

Categories

Resources