How to do Web Scraping with BeautifulSoup without affecting my PyQt5 application? - python

I am making an application that involves taking websites and getting the links they have (and also images), but these operations take some time and cause the application to freeze. I've tried using QThread and QRunnable to try and separate the execution of the application from the execution of the functions I use.
I programmed this small example of how it works in my app:
from PyQt5 import QtWidgets,QtCore
from PyQt5.QtWidgets import QApplication, QMainWindow,QLineEdit,QVBoxLayout
from PyQt5.QtCore import QThread
from bs4 import BeautifulSoup
import requests
class mainwindow(QtWidgets.QMainWindow):
def __init__(self):
super().__init__()
self.setFixedSize(600,64)
## Set a QLineEdit
self.Line=QtWidgets.QLineEdit()
self.Line.setPlaceholderText('You should be able to type here while the links are searched')
self.Line.setFixedSize(600,32)
self.layout().addWidget(self.Line)
## Set a QPushButton
self.Button=QtWidgets.QPushButton()
self.Button.setText('Seach links')
self.Button.move(0,32)
self.Button.setFixedSize(600,32)
self.layout().addWidget(self.Button)
## Connect button with function
self.Button.clicked.connect(lambda:self.search_start())
## Function calls QProcess class
def search_start(self):
self.sclass=search_class()
self.sclass.func_seach()
## Class search for links
class search_class(QThread):
def func_seach(self):
url='https://youtu.be/dQw4w9WgXcQ'
links_list=[]
for link in BeautifulSoup(requests.get(url).text, 'html.parser').find_all('a'):
links_list.append(link.get('href'))
print(links_list)
if __name__=='__main__':
Aplication=QtWidgets.QApplication([])
MainWindow=mainwindow()
MainWindow.show()
Aplication.exec_()
How can I prevent the app from freezing while executing that function?
I think you may have already noticed that I'm pretty new to this. I need to know what's wrong, what I'm doing wrong and how to fix it.
Thank you very much in advance.

The solution was to put the entire class below in a function inside the first class and trigger my function through another function that started the other one in a thread:
from PyQt5 import QtWidgets,QtCore
from PyQt5.QtWidgets import QApplication, QMainWindow,QLineEdit,QVBoxLayout
from PyQt5.QtCore import QThread
from bs4 import BeautifulSoup
import requests
class mainwindow(QtWidgets.QMainWindow):
def __init__(self):
super().__init__()
self.setFixedSize(600,64)
self.Line=QtWidgets.QLineEdit()
self.Line.setPlaceholderText('You should be able to type here while the links are searched')
self.Line.setFixedSize(600,32)
self.layout().addWidget(self.Line)
## Set a QPushButton
self.Button=QtWidgets.QPushButton()
self.Button.setText('Seach links')
self.Button.move(0,32)
self.Button.setFixedSize(600,32)
self.layout().addWidget(self.Button)
## Connect button with function
self.Button.clicked.connect(lambda:self.search_start())
## Function calls func_seach function as a thread
def search_start(self):
thread=Thread(target=self.func_seach)
thread.start()
def func_seach(self):
url='https://youtu.be/dQw4w9WgXcQ'
links_list=[]
for link in BeautifulSoup(requests.get(url).text, 'html.parser').find_all('a'):
links_list.append(link.get('href'))
print(links_list)
if __name__=='__main__':
Aplication=QtWidgets.QApplication([])
MainWindow=mainwindow()
MainWindow.show()
Aplication.exec_()

Related

QWebEngineView not loading with default Windows elevation

I'm having a bizarre issue where this simple QWebEngine code runs perfectly fine as a regular user on Windows 10 (loads the page fully), but when I elevate my own default user account it stops loading the page. The progress output will go from 0->100 and not call loadFinished on the browser or show any output on the page. I've tried running it as an elevated standard Administrator built-in account and it seemed to work oddly enough.
Here's the unloaded page:
and here's it working properly:
Running with os.environ["QT_DEBUG_PLUGINS"] = "1" doesn't show any discrepancies between the elevated and non-elevated processes.
import sys
from PyQt5.QtCore import QUrl
from PyQt5.QtWebEngineWidgets import QWebEngineView
from PyQt5.QtWidgets import QApplication, QMainWindow
class MainWindow(QMainWindow):
def __init__(self, *args, **kwargs):
super(MainWindow, self).__init__(*args, **kwargs)
self.browser = QWebEngineView()
self.browser.setUrl(QUrl('https://www.google.com'))
self.browser.loadProgress.connect(self.on_load_progress)
self.setCentralWidget(self.browser)
self.show()
def on_load_progress(self, progress: int):
print(f'loading progress:[{progress}]...')
app = QApplication(sys.argv)
main_win = MainWindow()
app.exec()
After trying many things and going through many logs, the only partial solution I've found to this problem is adding --no-sandbox to the QApplication argument list to make the Chromium webdriver work at all. For my app this is enough but running without sandboxing is less than ideal for a broader solution.
For those interested I found the answer thanks to this thread (has a lot more relevant info than my descriptions).

QwebEngineView doesn't show anything

I have created a piece of code that show a simple webpage (a bokeh graph saved in an html file). This code is working in Windows 10 at work, but on macos-mojave, using PyQT5.9, with Python 3.6, the opened window don't show anything, and makes python crash.
Could anyone helps ?
Many thanks
import sys
from PyQt5.QtWidgets import (QApplication)
from PyQt5.QtWebEngineWidgets import QWebEngineView
from PyQt5 import QtCore
class Principale():
def __init__(self):
self.view = QWebEngineView()
self.view.load(QtCore.QUrl("/Users/moncompte/Desktop/essai.html"))
self.view.show()
# Create a custom font
# ---------------------
app = QApplication(sys.argv)
fenetre=Principale()
sys.exit(app.exec_())

QWebEngineView - how to open links in system browser

I have the following code snippet working in PySide and need to translate it to work in PySide2.
The purpose is to force all links to open in the system browser when clicked (rather than the widget trying to load them):
from PySide.QtWebKit import QWebView, QWebPage
class HtmlView(QWebView):
def __init__(self, parent=None):
super(HtmlView, self).__init__(parent)
self.page().setLinkDelegationPolicy(QWebPage.DelegateAllLinks) # not working in PySide2
self.linkClicked.connect(self.openWebsite) # not working in PySide2
This was my attempt of a translation:
from PySide2.QtWebEngineWidgets import QWebEngineView, QWebEnginePage
class HtmlView(QWebEngineView):
def __init__(self, parent=None):
super(HtmlView, self).__init__(parent)
self.page().setLinkDelegationPolicy(QWebEnginePage.DelegateAllLinks) # not working in PySide2
self.linkClicked.connect(self.openWebsite) # not working in PySide2
However, QWebEngineView.linkClicked does not exist and neither does QWebEngineView.setLinkDelegationPolicy or
QWebEnginePage.DelegateAllLinks.
What is the best way to achieve this in PySide2 without the above?
Edit: I checked the QEvents that are triggered but no event seems to be fired off when a link is clicked, so without the linkClicked event from PySide/Qt4.8 I have no idea how to hook into this.
Thanks,
frank
You have to use acceptNavigationRequest:
This function is called upon receiving a request to navigate to the
specified url by means of the specified navigation type type.
isMainFrame indicates whether the request corresponds to the main
frame or a child frame. If the function returns true, the navigation
request is accepted and url is loaded. The default implementation
accepts all navigation requests.
In your case you must reject and open the url when the type is QWebEnginePage::NavigationTypeLinkClicked.
from PySide2.QtCore import QUrl
from PySide2.QtGui import QDesktopServices
from PySide2.QtWidgets import QApplication
from PySide2.QtWebEngineWidgets import QWebEngineView, QWebEnginePage
class WebEnginePage(QWebEnginePage):
def acceptNavigationRequest(self, url, _type, isMainFrame):
if _type == QWebEnginePage.NavigationTypeLinkClicked:
QDesktopServices.openUrl(url);
return False
return True
class HtmlView(QWebEngineView):
def __init__(self, *args, **kwargs):
QWebEngineView.__init__(self, *args, **kwargs)
self.setPage(WebEnginePage(self))
if __name__ == '__main__':
import sys
app = QApplication(sys.argv)
w = HtmlView()
w.load(QUrl("https://stackoverflow.com/questions/47736408/pyside2-qwebview-how-to-open-links-in-system-browser"));
w.show()
sys.exit(app.exec_())

How to connect PyQt signal to external function

How does one connect a pyqt button signal in one file, to a function in another python file? I've tried various things, but nothing seems to work.
This is the first file:
from PyQt4 import QtGui
from PyQt4.QtGui import QMainWindow
from MainUIFile import Ui_Main
from pythonfile import myOutsideFunction
class MainWindow(QMainWindow,Ui_file):
def __init__(self):
QMainWindow.__init__(self)
self.setupUi(self)
self.btn.clicked.connect(myOutsideFunction())
The second file that is called by the first:
def myOutsideFunction(self):
# Do some stuff here
How would I go about doing this?
You are currently making a call to myOutsideFunction and passing the result to the connect function, which expects a callable as an argument.
Remove the parenthesis from myOutsideFunction in the connect call
self.btn.clicked.connect(myOutsideFunction)
What is the importance of connecting to a function outside of your code? Could you not just do something like this:
def myOutsideFunction(a,b,c,d):
#process variables/objects a,b,c,d as you wish
return answer
from PyQt4 import QtGui
from PyQt4.QtGui import QMainWindow
from MainUIFile import Ui_Main
from pythonfile import myOutsideFunction
class MainWindow(QMainWindow,Ui_file):
def __init__(self):
QMainWindow.__init__(self)
self.setupUi(self)
self.btn.clicked.connect(self.pressed_the_button())
#initialize your variables/objects for your outside function if need be
def pressed_the_button(self):
answer_from_outside_function = myOutsideFunction(self.a,self.b,self.c,self.d)
# run whatever you need to here...

Problems with connect in pyqt5

I have a problem. I am writing a simple app in Pyqt5. I am trying to do this block of code in PyQt:
QNetworkAccessManager manager;
QNetworkReply *response = manager.get(QNetworkRequest(QUrl(url)));
QEventLoop event;
connect(response,SIGNAL(finished()),&event,SLOT(quit()));
event.exec();
QString html = response->readAll();
But when I am trying to use "connect" IDE tells me that "MainWindow" don't have method. How can I do it ?? Please help
This is my code:
class MainWindow(QtWidgets.QWidget):
def __init__(self, parent = None):
super(MainWindow, self).__init__()
# window settings
self.setWindowTitle("Hello world app")
# main layout
self.lay = QtWidgets.QVBoxLayout()
# main widgets
self.label = QtWidgets.QLabel("Enter URL:")
self.line = QtWidgets.QLineEdit()
self.label_conn = QtWidgets.QLabel("")
self.btn = QtWidgets.QPushButton("Connect")
self.btn.clicked.connect(self.btn_click)
# adding widgets to layout
self.lay.addWidget(self.label, alignment=QtCore.Qt.AlignBottom)
self.lay.addWidget(self.line)
self.lay.addWidget(self.btn)
self.lay.addWidget(self.label_conn, alignment=QtCore.Qt.AlignTop | QtCore.Qt.AlignCenter)
self.setLayout(self.lay)
self.connect()
The connect method belongs to the signal that you wish to connect to a specific slot, not to the MainWindow widget itself. (BTW, you should consider inheriting from QMainWindow instead.)
In your code, the MainWindow widget is not a signal, so does not have a connect method. Also, even if it did, you need to specify the slot to which you're trying to connect the signal, which is also missing.
In other words, you must declare a pyqtSignal, if you're not using a pre-existing one, and then connect it to the pyqtSlot of your choice. Whether this slot is pre-defined or a custom one is up to you.
Consider the following code snippet, which I tested in Python3:
#!/usr/bin/python3 -B
import sys
from PyQt5.QtWidgets import QApplication, QDialog, QPushButton
if __name__ == '__main__':
app = QApplication(sys.argv)
diag = QDialog()
diag.setWindowTitle('Signal Demo')
diag.resize(200,50)
btn = QPushButton(diag)
btn.setText('Close Dialog')
# connect button's clicked signal to dialog's close slot
btn.clicked.connect(diag.close)
diag.show()
diag.exec_()
Notice that the button's clicked signal, not the button, is what gets connected to the dialog's close slot, not the dialog itself.
EDIT 1:
Just noticed that the very code you've posted has an example of how to properly perform a connection.
If your code has not simply been copy-pasted from some other place, you should've noticed that you seem to know how to properly connect signals and slots already. This line plainly gives it away:
self.btn.clicked.connect(self.btn_click)
If your MainWindow does have a btn_click method, then it should get invoked after the QPushButton named btn gets clicked.
EDIT 2:
Based on your recent comment, you seem to simply be trying to translate a snippet for a larger application, so consider the following code:
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtNetwork import QNetworkAccessManager, QNetworkRequest, QNetworkReply
from PyQt5.QtCore import QEventLoop, QUrl
app = QApplication(sys.argv)
url = 'https://stackoverflow.com'
manager = QNetworkAccessManager()
response = manager.get(QNetworkRequest(QUrl(url)))
event = QEventLoop()
response.finished.connect(event.quit)
event.exec()
html = str(response.readAll()) # in Python3 all strings are unicode, so QString is not defined
print(html)
The code above was tested to work as expected.
PS: I did notice that some seemingly valid URLs were returning an empty response (e.g. http://sourceforge.net/), but others, such as the one above, worked fine. It seems to be unrelated to the code snippet itself.

Categories

Resources