Is it possible to perform in asynchrone(like with asyncio) web requests under Pyqt4 (QwebPage)?
For example, how can I call multiple urls in parallel with this code:
#!/usr/bin/env python3.4
import sys
import signal
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import QWebPage
class Crawler( QWebPage ):
def __init__(self, url):
QWebPage.__init__( self )
self._url = url
self.content = ''
def crawl( self ):
signal.signal( signal.SIGINT, signal.SIG_DFL )
self.connect( self, SIGNAL( 'loadFinished(bool)' ), self._finished_loading )
self.mainFrame().load( QUrl( self._url ) )
def _finished_loading( self, result ):
self.content = self.mainFrame().toHtml()
print(self.content)
sys.exit( 0 )
def main():
app = QApplication( sys.argv )
crawler = Crawler( self._url, self._file )
crawler.crawl()
sys.exit( app.exec_() )
if __name__ == '__main__':
crawl = Crawler( 'http://www.example.com')
crawl.main()
Thanks
You cannot make self.mainFrame().load(QUrl(self._url)) working through asyncio, sorry -- the method implemented in Qt itself.
But you can install quamash event loop and asynchronously call aiohttp.request coroutine to get web pages.
The way doesn't work with QWebPage though.
Requests are already done asynchronously, so you all you need to do is create multiple instances of QWebPage.
Here's a simple demo based on your example script:
import sys, signal
from PyQt4 import QtCore, QtGui, QtWebKit
urls = [
'http://qt-project.org/doc/qt-4.8/qwebelement.html',
'http://qt-project.org/doc/qt-4.8/qwebframe.html',
'http://qt-project.org/doc/qt-4.8/qwebinspector.html',
'http://qt-project.org/doc/qt-4.8/qwebpage.html',
'http://qt-project.org/doc/qt-4.8/qwebsettings.html',
'http://qt-project.org/doc/qt-4.8/qwebview.html',
]
class Crawler(QtWebKit.QWebPage):
def __init__(self, url, identifier):
super(Crawler, self).__init__()
self.loadFinished.connect(self._finished_loading)
self._id = identifier
self._url = url
self.content = ''
def crawl(self):
self.mainFrame().load(QtCore.QUrl(self._url))
def _finished_loading(self, result):
self.content = self.mainFrame().toHtml()
print('[%d] %s' % (self._id, self._url))
print(self.content[:250].rstrip(), '...')
print()
self.deleteLater()
if __name__ == '__main__':
app = QtGui.QApplication( sys.argv )
signal.signal( signal.SIGINT, signal.SIG_DFL)
crawlers = []
for index, url in enumerate(urls):
crawlers.append(Crawler(url, index))
crawlers[-1].crawl()
sys.exit( app.exec_() )
Related
I have made a Desktop Application using Python and used PyQt5 and Pytube which could download video from youtube. When download is in Progress, I want to show user an animation. In Fact I did it, but when the file is getting downloaded the PyQt window seems like freezing and everything just gets paused until the download is complete. So, Does anyone know why is this happening? How do I fix it?
Here's the code snippet:
def download_created(self, qual): # Used in 'selection' method
selected_stream = yt.streams.get_by_resolution(qual)
self.progress_func()
try:
self.download_btn.setCurrentIndex(-1)
selected_stream.download(self.askLocation() + "/")
except:
pass
# This gets the quality that the user chooses
def selection(self):
global quality
quality = self.download_btn.currentText()
try:
self.download_created(quality) # Calls a method called 'download'
except:
self.start_anime()
# Fetching the details about the Link from Youtube
def download_youtube(self):
global check
if check != self.get_input():
check = self.get_input()
self.download_btn.clear()
enter_url = self.get_input()
try:
global yt
yt = pytube.YouTube(
enter_url,
on_progress_callback = on_progress,
on_complete_callback = self.complete_func)
self.start_anime()
except:
self.input_error()
VIDEO_TITLE = (yt.title)
global VIDEO_ID
VIDEO_ID = (yt.video_id)
videos = yt.streams.filter(mime_type="video/mp4", progressive="True")
# Display all the available qualities
for i in videos:
self.download_btn.addItem(i.resolution)
self.download_btn.currentIndexChanged.connect(self.selection)
You have to execute the time consuming tasks in another thread, for example in your case the task of getting the streams and downloading.
import sys
import threading
from functools import cached_property
from PyQt5 import QtCore, QtWidgets
import pytube
class QPyTube(QtCore.QObject):
initialized = QtCore.pyqtSignal(bool, str)
download_started = QtCore.pyqtSignal()
download_progress_changed = QtCore.pyqtSignal(int)
download_finished = QtCore.pyqtSignal()
def __init__(self, url):
super().__init__()
self._url = url
self._yt = None
self._mutex = threading.Lock()
threading.Thread(target=self._init, daemon=True).start()
#property
def url(self):
return self._url
#cached_property
def resolutions(self):
return list()
def _init(self):
with self._mutex:
self.resolutions.clear()
try:
self._yt = pytube.YouTube(
self.url,
on_progress_callback=self._on_progress,
on_complete_callback=self._on_complete,
)
streams = self._yt.streams.filter(mime_type="video/mp4", progressive="True")
except Exception as e:
self.initialized.emit(False, str(e))
return
with self._mutex:
self.resolutions = [stream.resolution for stream in streams]
self.initialized.emit(True, "")
def download(self, resolution, directory):
threading.Thread(
target=self._download, args=(resolution, directory), daemon=True
).start()
def _download(self, resolution, directory):
stream = self._yt.streams.get_by_resolution(resolution)
self.download_started.emit()
stream.download(directory)
def _on_progress(self, stream, chunk, bytes_remaining):
self.download_progress_changed.emit(
100 * (stream.filesize - bytes_remaining) // stream.filesize
)
def _on_complete(self, stream, filepath):
self.download_finished.emit()
class MainWindow(QtWidgets.QMainWindow):
def __init__(self, parent=None):
super().__init__(parent)
self.le_url = QtWidgets.QLineEdit("http://youtube.com/watch?v=2lAe1cqCOXo")
self.lbl_error = QtWidgets.QLabel()
self.btn_search = QtWidgets.QPushButton("Search")
self.cmb_resolutions = QtWidgets.QComboBox()
self.le_directory = QtWidgets.QLineEdit("")
self.btn_download = QtWidgets.QPushButton("Download")
self.pgb_download = QtWidgets.QProgressBar()
central_widget = QtWidgets.QWidget()
self.setCentralWidget(central_widget)
lay = QtWidgets.QGridLayout(central_widget)
lay.addWidget(self.le_url, 0, 0)
lay.addWidget(self.btn_search, 0, 1)
lay.addWidget(self.cmb_resolutions, 1, 0)
lay.addWidget(self.le_directory, 1, 1)
lay.addWidget(self.btn_download, 1, 2)
lay.addWidget(self.pgb_download, 2, 0, 1, 3)
self.btn_download.setEnabled(False)
self._qpytube = None
self.btn_search.clicked.connect(self.handle_search_clicked)
self.btn_download.clicked.connect(self.handle_download_clicked)
def handle_search_clicked(self):
self.cmb_resolutions.clear()
self.btn_search.setEnabled(False)
self.btn_download.setEnabled(False)
self.lbl_error.clear()
self._qpytube = QPyTube(self.le_url.text())
self._qpytube.initialized.connect(self.handle_initialized)
self._qpytube.download_progress_changed.connect(self.pgb_download.setValue)
self._qpytube.download_started.connect(self.handle_download_started)
self._qpytube.download_finished.connect(self.handle_download_finished)
#QtCore.pyqtSlot(bool, str)
def handle_initialized(self, status, error=""):
if status:
self.cmb_resolutions.addItems(self._qpytube.resolutions)
self.btn_download.setEnabled(True)
else:
self.lbl_error.setText(error)
self.btn_search.setEnabled(True)
def handle_download_clicked(self):
self._qpytube.download(
self.cmb_resolutions.currentText(), self.le_directory.text()
)
self.btn_search.setEnabled(False)
self.btn_download.setEnabled(False)
self.le_directory.setEnabled(False)
def handle_download_started(self):
self.lbl_error.clear()
print("started")
def handle_download_finished(self):
self.pgb_download.setValue(100)
self.btn_search.setEnabled(True)
self.btn_download.setEnabled(True)
self.le_directory.setEnabled(True)
print("finished")
def main(args):
app = QtWidgets.QApplication(args)
w = MainWindow()
w.show()
app.exec_()
if __name__ == "__main__":
main(sys.argv)
I have made a Desktop Application using Python and used PyQt5 and Pytube which could download video from youtube. When download is in Progress, I want to show user an animation. In Fact I did it, but when the file is getting downloaded the PyQt window seems like freezing and everything just gets paused until the download is complete. So, Does anyone know why is this happening? How do I fix it?
Here's the code snippet:
def download_created(self, qual): # Used in 'selection' method
selected_stream = yt.streams.get_by_resolution(qual)
self.progress_func()
try:
self.download_btn.setCurrentIndex(-1)
selected_stream.download(self.askLocation() + "/")
except:
pass
# This gets the quality that the user chooses
def selection(self):
global quality
quality = self.download_btn.currentText()
try:
self.download_created(quality) # Calls a method called 'download'
except:
self.start_anime()
# Fetching the details about the Link from Youtube
def download_youtube(self):
global check
if check != self.get_input():
check = self.get_input()
self.download_btn.clear()
enter_url = self.get_input()
try:
global yt
yt = pytube.YouTube(
enter_url,
on_progress_callback = on_progress,
on_complete_callback = self.complete_func)
self.start_anime()
except:
self.input_error()
VIDEO_TITLE = (yt.title)
global VIDEO_ID
VIDEO_ID = (yt.video_id)
videos = yt.streams.filter(mime_type="video/mp4", progressive="True")
# Display all the available qualities
for i in videos:
self.download_btn.addItem(i.resolution)
self.download_btn.currentIndexChanged.connect(self.selection)
You have to execute the time consuming tasks in another thread, for example in your case the task of getting the streams and downloading.
import sys
import threading
from functools import cached_property
from PyQt5 import QtCore, QtWidgets
import pytube
class QPyTube(QtCore.QObject):
initialized = QtCore.pyqtSignal(bool, str)
download_started = QtCore.pyqtSignal()
download_progress_changed = QtCore.pyqtSignal(int)
download_finished = QtCore.pyqtSignal()
def __init__(self, url):
super().__init__()
self._url = url
self._yt = None
self._mutex = threading.Lock()
threading.Thread(target=self._init, daemon=True).start()
#property
def url(self):
return self._url
#cached_property
def resolutions(self):
return list()
def _init(self):
with self._mutex:
self.resolutions.clear()
try:
self._yt = pytube.YouTube(
self.url,
on_progress_callback=self._on_progress,
on_complete_callback=self._on_complete,
)
streams = self._yt.streams.filter(mime_type="video/mp4", progressive="True")
except Exception as e:
self.initialized.emit(False, str(e))
return
with self._mutex:
self.resolutions = [stream.resolution for stream in streams]
self.initialized.emit(True, "")
def download(self, resolution, directory):
threading.Thread(
target=self._download, args=(resolution, directory), daemon=True
).start()
def _download(self, resolution, directory):
stream = self._yt.streams.get_by_resolution(resolution)
self.download_started.emit()
stream.download(directory)
def _on_progress(self, stream, chunk, bytes_remaining):
self.download_progress_changed.emit(
100 * (stream.filesize - bytes_remaining) // stream.filesize
)
def _on_complete(self, stream, filepath):
self.download_finished.emit()
class MainWindow(QtWidgets.QMainWindow):
def __init__(self, parent=None):
super().__init__(parent)
self.le_url = QtWidgets.QLineEdit("http://youtube.com/watch?v=2lAe1cqCOXo")
self.lbl_error = QtWidgets.QLabel()
self.btn_search = QtWidgets.QPushButton("Search")
self.cmb_resolutions = QtWidgets.QComboBox()
self.le_directory = QtWidgets.QLineEdit("")
self.btn_download = QtWidgets.QPushButton("Download")
self.pgb_download = QtWidgets.QProgressBar()
central_widget = QtWidgets.QWidget()
self.setCentralWidget(central_widget)
lay = QtWidgets.QGridLayout(central_widget)
lay.addWidget(self.le_url, 0, 0)
lay.addWidget(self.btn_search, 0, 1)
lay.addWidget(self.cmb_resolutions, 1, 0)
lay.addWidget(self.le_directory, 1, 1)
lay.addWidget(self.btn_download, 1, 2)
lay.addWidget(self.pgb_download, 2, 0, 1, 3)
self.btn_download.setEnabled(False)
self._qpytube = None
self.btn_search.clicked.connect(self.handle_search_clicked)
self.btn_download.clicked.connect(self.handle_download_clicked)
def handle_search_clicked(self):
self.cmb_resolutions.clear()
self.btn_search.setEnabled(False)
self.btn_download.setEnabled(False)
self.lbl_error.clear()
self._qpytube = QPyTube(self.le_url.text())
self._qpytube.initialized.connect(self.handle_initialized)
self._qpytube.download_progress_changed.connect(self.pgb_download.setValue)
self._qpytube.download_started.connect(self.handle_download_started)
self._qpytube.download_finished.connect(self.handle_download_finished)
#QtCore.pyqtSlot(bool, str)
def handle_initialized(self, status, error=""):
if status:
self.cmb_resolutions.addItems(self._qpytube.resolutions)
self.btn_download.setEnabled(True)
else:
self.lbl_error.setText(error)
self.btn_search.setEnabled(True)
def handle_download_clicked(self):
self._qpytube.download(
self.cmb_resolutions.currentText(), self.le_directory.text()
)
self.btn_search.setEnabled(False)
self.btn_download.setEnabled(False)
self.le_directory.setEnabled(False)
def handle_download_started(self):
self.lbl_error.clear()
print("started")
def handle_download_finished(self):
self.pgb_download.setValue(100)
self.btn_search.setEnabled(True)
self.btn_download.setEnabled(True)
self.le_directory.setEnabled(True)
print("finished")
def main(args):
app = QtWidgets.QApplication(args)
w = MainWindow()
w.show()
app.exec_()
if __name__ == "__main__":
main(sys.argv)
This question already has an answer here:
Scrape multiple urls using QWebPage
(1 answer)
Closed 4 years ago.
So, guys, i am still confuse why the iteration always stop at the third turn
Here is my code :
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
from PyQt5.QtWebEngineWidgets import QWebEnginePage
import sys
import numpy as np
from bs4 import BeautifulSoup as soup
class Client(QWebEnginePage):
def __init__(self,url):
global app
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.html = ""
self.loadFinished.connect(self.on_load_finished)
self.load(QUrl(url))
self.app.exec_()
def on_load_finished(self):
self.html = self.toHtml(self.Callable)
print("Load Finished")
def Callable(self,data):
self.html = data
self.app.quit()
linkgroup = []
linkgroup.append("https://docs.python.org/3/whatsnew/3.7.html")
linkgroup.append("https://docs.python.org/3/tutorial/index.html")
linkgroup.append("https://docs.python.org/3/installing/index.html")
linkgroup.append("https://docs.python.org/3/reference/index.html")
linkgroup.append("https://docs.python.org/3/using/index.html")
for h in range(0,len(linkgroup)):
#Setting Url
url = linkgroup[h]
print(url)
print("Loop Index : " + str(h))
client_response = Client(url)
The output is this
https://docs.python.org/3/whatsnew/3.7.html
Loop Index : 0
Load Finished
https://docs.python.org/3/tutorial/index.html
Loop Index : 1
Load Finished
https://docs.python.org/3/installing/index.html
Loop Index : 2
As you can see, the rest iterations of the loop doesn't get executed as it doesn't show the respond from the Client class
As commented by pschill above, you should have only one QApplication. How about passing it as a parameter to the constructor? Something like:
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
from PyQt5.QtWebEngineWidgets import QWebEnginePage
import sys
import numpy as np
from bs4 import BeautifulSoup as soup
class Client(QWebEnginePage):
def __init__(self,url,app):
self.app = app
QWebEnginePage.__init__(self)
self.html = ""
self.loadFinished.connect(self.on_load_finished)
self.load(QUrl(url))
self.app.exec_()
def on_load_finished(self):
self.html = self.toHtml(self.Callable)
print("Load Finished")
def Callable(self,data):
self.html = data
self.app.quit()
linkgroup = []
linkgroup.append("https://docs.python.org/3/whatsnew/3.7.html")
linkgroup.append("https://docs.python.org/3/tutorial/index.html")
linkgroup.append("https://docs.python.org/3/installing/index.html")
linkgroup.append("https://docs.python.org/3/reference/index.html")
linkgroup.append("https://docs.python.org/3/using/index.html")
app = QApplication(sys.argv)
for h in range(0,len(linkgroup)):
#Setting Url
url = linkgroup[h]
print(url)
print("Loop Index : " + str(h))
client_response = Client(url, app)
I have this code that receives all of the networking resources of a web page.
I took this code from this site so I don't know how it works but I know that it receives all of the networking resources of a web page, which is what I need.
This is my code:
import sys, time
from PySide.QtCore import QUrl, SIGNAL
from PySide.QtGui import QApplication
from PySide.QtWebKit import QWebPage, QWebView, QWebSettings
from PySide.QtNetwork import QNetworkAccessManager, QNetworkRequest
#reload(sys)
#sys.setdefaultencoding('utf-8')
fn_log = 'url_dd.txt'
fp_log = open(fn_log, 'ab+')
class WebPage(QWebPage):
def __init__(self, logger=None, parent=None):
super(WebPage, self).__init__(parent)
def javaScriptConsoleMessage(self, message, lineNumber, sourceID):
sys.stderr.write('Javascritp error at line number %d\n' % (lineNumber))
sys.stderr.write('%s\n' % (message, ))
sys.stderr.write('Source ID: %s\n' % (sourceID, ))
class Crawler(QApplication):
def __init__(self, url):
super(Crawler, self).__init__(sys.argv)
self.url = url
self.web_view = QWebView()
self.web_page = WebPage()
self.web_view.setPage(self.web_page)
self.web_frame = self.web_page.mainFrame()
self.network = NetworkAccessManager()
self.web_page.setNetworkAccessManager(self.network)
self.settings = self.web_page.settings().globalSettings()
self.settings.setAttribute(QWebSettings.PluginsEnabled, False)
QWebSettings.clearMemoryCaches()
self.web_view.resize(1024, 9000)
self.connect(self.web_page, SIGNAL('loadFinished(bool)'), self.loadFinished)
print('Before loading')
self.web_view.load(QUrl(self.url))
print('After loading')
def loadFinished(self, ok):
print('Start loadFinished()')
print('Start writing')
#with open('content_dd.txt', 'ab+') as fp:
#fp.write(self.web_frame.toHtml().toUtf8())
print('End writing')
print('End loadFinished()')
try:
self.quit()
except Exception as e:
print('FATAL ERROR: %s' % (str(e)))
class NetworkAccessManager(QNetworkAccessManager):
def __init__(self):
super(NetworkAccessManager, self).__init__()
# QNetworkAccessManager.__init__(self)
self.connect(self, SIGNAL('finished (QNetworkReply *)'), self.finishd)
def createRequest(self, operation, request, data):
# url = request.url().toString()
self.setNetworkAccessible(self.Accessible)
return QNetworkAccessManager.createRequest(self, operation, request, data)
def finishd(self, reply):
print('In NetworkAccessManager finishd')
url = str(reply.url().toString())
log = '%s: %s\n' % (time.ctime(), url)
#fp_log.write(log)
print(reply)
print(reply.request())
print(log)
print(url)
if __name__ == '__main__':
url = 'http://need4bit.com'
crawler = Crawler(url)
sys.exit(crawler.exec_())
How should I modify this code so it could save all the resources into a directory.
New to Tornado, and Redis
I find someone had this same problem here , tornado-redis: RPOP works but BRPOP doesn't?
but I still do not understand why, and how to resove my problem
code blow work fine
#coding:utf-8
import random
import time
import tornado.web
import tornado.httpserver
import tornado.ioloop
import tornado.options
from uuid import uuid4
# import redis
from tornado.escape import json_encode
import tornado.gen
import tornadoredis
class noticePush(tornado.web.RequestHandler):
def initialize(self):
print 'initialize'
#tornado.web.asynchronous
#tornado.gen.engine
def get(self):
print 'go here'
try:
**uid = self.get_argument('uid')
# key = u'test_comet%s'%uid
key = 'test_comet1'
c = tornadoredis.Client(host='127.0.0.1', port=6379,password='psw')
print key
res = yield tornado.gen.Task(c.blpop, key, 0)**
print res
if res :
self.finish(json_encode(res))
else :
self.finish('None')
except Exception, e :
print e
pass
class Application(tornado.web.Application):
def __init__(self):
handlers = [
(r'/', noticePush)
]
settings = {
'template_path': 'templates',
'static_path': 'static',
'debug': True
}
tornado.web.Application.__init__(self, handlers, **settings)
if __name__ == '__main__':
tornado.options.parse_command_line()
app = Application()
server = tornado.httpserver.HTTPServer(app)
server.listen(8000)
tornado.ioloop.IOLoop.instance().start()
But , I try to use get_argument for the key, blpop never return any data
**uid = self.get_argument('uid')
key = 'test_comet' + uid
c = tornadoredis.Client(host='127.0.0.1', port=6379, password='psw')
print key
res = yield tornado.gen.Task(c.blpop, key, 0)**
print res
if res :
self.finish(json_encode(res))
else :
self.finish('None')
I try to read the tornadoredis code, find the blpop def and I find the reason
def blpop(self, keys, timeout=0, callback=None):
tokens = to_list(keys)
tokens.append(timeout)
self.execute_command('BLPOP', *tokens, callback=callback)
def to_list(source):
if isinstance(source, str):
return [source]
else:
return list(source)
important is
str_key = 'test_comet', type (key) -> str
unicode_key = 'test_comet' + uid , type (key) -> unicode
when I encode the unicode_key.encode('utf-8'), code worked!