This question already has an answer here:
PyQt5 - QThread: Destroyed while thread is still running
(1 answer)
Closed 1 year ago.
I working on software, that includes parts working with the WHO ICD11 API.
When I run the code:
import json
import re
import threading
import time
from typing import Dict, List
import qdarkgraystyle as qdarkgraystyle
import requests
from PyQt5 import QtCore, QtWidgets
from PyQt5.QtCore import pyqtSignal, QThread
from PyQt5.QtWidgets import QTreeView
from threading import Lock
from gui import Ui_MainWindow
import auth
printlock = Lock()
p = print
def print(*a, **b):
with printlock:
p(*a, **b)
class g:
max = 1
progress = 0
end_workers = False
loaded_dict = None
class ApplicationWindow(QtWidgets.QMainWindow):
def __init__(self):
super(ApplicationWindow, self).__init__()
self.ui = Ui_MainWindow()
self.ui.setupUi(self)
def linearization_url():
return f"https://id.who.int/icd/release/{ui.dropRevision.currentText().split('/')[0]}/{ui.dropRelease.currentText()}{('/' + ui.dropRevision.currentText().split('/')[1]) if len(ui.dropRevision.currentText().split('/')) > 1 else ''}"
def download():
loader = Loader(linearization_url())
loader.statusUpdateHook.connect(updatehook)
loader.statusFinished.connect(finishedLoader)
loader.start()
def updatehook():
ui.progress.setTextVisible(True)
ui.progress.setMaximum(gl.max)
ui.progress.setValue(gl.progress)
def finishedLoader():
json.dump(gl.loaded_dict, open("dict.json"), indent=4)
def split_link(url: str) -> Dict[str, str]:
return re.search(
"https?://id.who.int/icd/release/(?P<revision>[0-9]{2})/(?P<release>[^/]*)(/(?P<linearization>.*))?",
url).groupdict()
def worker(loader):
print("Worker booting...")
_token = gl.token
over = True
while not gl.end_workers:
url = ""
with loader.index_lock:
try:
url = loader.working_list[loader.index]
loader.index += 1
except IndexError:
over = False
if over:
json = request_json(url, _token)
with loader.finished_count_lock:
loader.working_dict[url] = json
if "child" in json:
for child in json["child"]:
loader.working_list.append(child)
loader.finished_count += 1
else:
over = True
def loadReleases():
token = getToken(auth.id, auth.secret)
ui.dropRelease.clear()
ui.dropRelease.repaint()
for release in request_json("https://id.who.int/icd/release/" + ui.dropRevision.currentText(), token)[
"release"]:
ui.dropRelease.addItem(split_link(release)["release"])
def getToken(clientID, clientSecret) -> str:
return requests.post('https://icdaccessmanagement.who.int/connect/token',
data={'client_id': clientID, 'client_secret': clientSecret, 'scope': 'icdapi_access',
'grant_type': 'client_credentials'}).json()['access_token']
def request_json(link_: str, token_: str):
headers_ = {
'Authorization': 'Bearer ' + token_,
'Accept': 'application/json',
'Accept-Language': 'en',
'API-Version': 'v2'
}
return requests.get(link_, headers=headers_).json()
class Loader(QtCore.QThread):
statusFinished = QtCore.pyqtSignal()
statusUpdateHook = QtCore.pyqtSignal()
index = 0
finished_count = 0
working_list = []
working_dict = {}
index_lock = Lock()
finished_count_lock = Lock()
workers = []
def __init__(self, lurl: str):
super().__init__()
self.working_list.append(lurl)
def progressUpdate(self):
gl.max = len(self.working_list)
gl.progress = self.finished_count
self.statusUpdateHook.emit()
def run(self):
for i in range(0, 20):
self.workers.append(threading.Thread(target=worker, args=(self,)))
self.workers[i].start()
while self.finished_count < len(self.working_list):
with self.index_lock:
with self.finished_count_lock:
self.progressUpdate()
time.sleep(5)
for work in self.workers:
if work.isAlive():
gl.end_workers = True
gl.loaded_dict = self.working_dict
self.statusFinished.emit()
if __name__ == "__main__":
import sys
gl = g()
gl.token = getToken(auth.id, auth.secret)
tabs: List[QTreeView] = []
app = QtWidgets.QApplication(sys.argv)
application = ApplicationWindow()
application.setStyleSheet(qdarkgraystyle.load_stylesheet())
ui = application.ui
ui.buttonDownload.clicked.connect(download)
ui.dropRevision.addItems(["10", "11/mms"])
ui.dropRevision.currentIndexChanged.connect(loadReleases)
loadReleases()
application.show()
sys.exit(app.exec_())
in Pycharms debug mode, it does, what I want it to. It works fine as long as it is in debug mode, while when in normal mode, when the buttonDownload.clicked event is triggered, the whole program crashes with the only output being:
QThread: Destroyed while thread is still running
Has anyone any idea on how to fix that?
(For reproducing purposes: You need API keys to access the API. They are imported from auth as auth.id and auth.secret. ID and secret can be obtained from an account over the WHO ICD11 API site)
Loader inherits QThread, in download function QThread object is bound to local variable loader, on exiting function this variable got garbage collected and bound object gets destroyed. You need to make sure loader variable outlive function, for example make it global variable or return it from function and store somewhere.
Related
I'm using Qt's QWebPage to render a page that uses javascript to update its content dynamically - so a library that just downloads a static version of the page (such as urllib2) won't work.
My problem is, when I render a second page, about 99% of the time the program just crashes. At other times, it will work three times before crashing. I've also gotten a few segfaults, but it is all very random.
My guess is the object I'm using to render isn't getting deleted properly, so trying to reuse it is possibly causing some problems for myself. I've looked all over and no one really seems to be having this same issue.
Here's the code I'm using. The program downloads web pages from steam's community market so I can create a database of all the items. I need to call the getItemsFromPage function multiple times to get all of the items, as they are broken up into pages (showing results 1-10 out of X amount).
import csv
import re
import sys
from string import replace
from bs4 import BeautifulSoup
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
class Item:
__slots__ = ("name", "count", "price", "game")
def __repr__(self):
return self.name + "(" + str(self.count) + ")"
def __str__(self):
return self.name + ", " + str(self.count) + ", $" + str(self.price)
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
self.deleteLater()
def getItemsFromPage(appid, page=1):
r = Render("http://steamcommunity.com/market/search?q=appid:" + str(appid) + "#p" + str(page))
soup = BeautifulSoup(str(r.frame.toHtml().toUtf8()))
itemLst = soup.find_all("div", "market_listing_row market_recent_listing_row")
items = []
for k in itemLst:
i = Item()
i.name = k.find("span", "market_listing_item_name").string
i.count = int(replace(k.find("span", "market_listing_num_listings_qty").string, ",", ""))
i.price = float(re.search(r'\$([0-9]+\.[0-9]+)', str(k)).group(1))
i.game = appid
items.append(i)
return items
if __name__ == "__main__":
print "Updating market items to dota2.csv ..."
i = 1
with open("dota2.csv", "w") as f:
writer = csv.writer(f)
r = None
while True:
print "Page " + str(i)
items = getItemsFromPage(570)
if len(items) == 0:
print "No items found, stopping..."
break
for k in items:
writer.writerow((k.name, k.count, k.price, k.game))
i += 1
print "Done."
Calling getItemsFromPage once works fine. Subsequent calls give me my problem. The output of the program is typically
Updating market items to dota2.csv ...
Page 1
Page 2
and then it crashes. It should go on for over 700 pages.
The problem with your program is that you are attempting to create a new QApplication with every url you fetch.
Instead, only one QApplication and one WebPage should be created. The WebPage can use its loadFinished signal to create an internal loop by fetching a new url after each one has been processed. Custom html processing can be added by connecting a user-defined slot to a signal which emits the html text and the url when they become available. The scripts below (for PyQt5 and PyQt4) show how to implement this.
Here are some examples which show how to use the WebPage class:
Usage:
def my_html_processor(html, url):
print('loaded: [%d chars] %s' % (len(html), url))
import sys
app = QApplication(sys.argv)
webpage = WebPage(verbose=False)
webpage.htmlReady.connect(my_html_processor)
# example 1: process list of urls
urls = ['https://en.wikipedia.org/wiki/Special:Random'] * 3
print('Processing list of urls...')
webpage.process(urls)
# example 2: process one url continuously
#
# import signal, itertools
# signal.signal(signal.SIGINT, signal.SIG_DFL)
#
# print('Processing url continuously...')
# print('Press Ctrl+C to quit')
#
# url = 'https://en.wikipedia.org/wiki/Special:Random'
# webpage.process(itertools.repeat(url))
sys.exit(app.exec_())
PyQt5 WebPage:
from PyQt5.QtCore import pyqtSignal, QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEnginePage
class WebPage(QWebEnginePage):
htmlReady = pyqtSignal(str, str)
def __init__(self, verbose=False):
super().__init__()
self._verbose = verbose
self.loadFinished.connect(self.handleLoadFinished)
def process(self, urls):
self._urls = iter(urls)
self.fetchNext()
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.load(QUrl(url))
return True
def processCurrentPage(self, html):
self.htmlReady.emit(html, self.url().toString())
if not self.fetchNext():
QApplication.instance().quit()
def handleLoadFinished(self):
self.toHtml(self.processCurrentPage)
def javaScriptConsoleMessage(self, *args, **kwargs):
if self._verbose:
super().javaScriptConsoleMessage(*args, **kwargs)
PyQt4 WebPage:
from PyQt4.QtCore import pyqtSignal, QUrl
from PyQt4.QtGui import QApplication
from PyQt4.QtWebKit import QWebPage
class WebPage(QWebPage):
htmlReady = pyqtSignal(str, str)
def __init__(self, verbose=False):
super(WebPage, self).__init__()
self._verbose = verbose
self.mainFrame().loadFinished.connect(self.handleLoadFinished)
def start(self, urls):
self._urls = iter(urls)
self.fetchNext()
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.mainFrame().load(QUrl(url))
return True
def processCurrentPage(self):
self.htmlReady.emit(
self.mainFrame().toHtml(), self.mainFrame().url().toString())
print('loaded: [%d bytes] %s' % (self.bytesReceived(), url))
def handleLoadFinished(self):
self.processCurrentPage()
if not self.fetchNext():
QApplication.instance().quit()
def javaScriptConsoleMessage(self, *args, **kwargs):
if self._verbose:
super(WebPage, self).javaScriptConsoleMessage(*args, **kwargs)
I'm using Qt's QWebPage to render a page that uses javascript to update its content dynamically - so a library that just downloads a static version of the page (such as urllib2) won't work.
My problem is, when I render a second page, about 99% of the time the program just crashes. At other times, it will work three times before crashing. I've also gotten a few segfaults, but it is all very random.
My guess is the object I'm using to render isn't getting deleted properly, so trying to reuse it is possibly causing some problems for myself. I've looked all over and no one really seems to be having this same issue.
Here's the code I'm using. The program downloads web pages from steam's community market so I can create a database of all the items. I need to call the getItemsFromPage function multiple times to get all of the items, as they are broken up into pages (showing results 1-10 out of X amount).
import csv
import re
import sys
from string import replace
from bs4 import BeautifulSoup
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
class Item:
__slots__ = ("name", "count", "price", "game")
def __repr__(self):
return self.name + "(" + str(self.count) + ")"
def __str__(self):
return self.name + ", " + str(self.count) + ", $" + str(self.price)
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
self.deleteLater()
def getItemsFromPage(appid, page=1):
r = Render("http://steamcommunity.com/market/search?q=appid:" + str(appid) + "#p" + str(page))
soup = BeautifulSoup(str(r.frame.toHtml().toUtf8()))
itemLst = soup.find_all("div", "market_listing_row market_recent_listing_row")
items = []
for k in itemLst:
i = Item()
i.name = k.find("span", "market_listing_item_name").string
i.count = int(replace(k.find("span", "market_listing_num_listings_qty").string, ",", ""))
i.price = float(re.search(r'\$([0-9]+\.[0-9]+)', str(k)).group(1))
i.game = appid
items.append(i)
return items
if __name__ == "__main__":
print "Updating market items to dota2.csv ..."
i = 1
with open("dota2.csv", "w") as f:
writer = csv.writer(f)
r = None
while True:
print "Page " + str(i)
items = getItemsFromPage(570)
if len(items) == 0:
print "No items found, stopping..."
break
for k in items:
writer.writerow((k.name, k.count, k.price, k.game))
i += 1
print "Done."
Calling getItemsFromPage once works fine. Subsequent calls give me my problem. The output of the program is typically
Updating market items to dota2.csv ...
Page 1
Page 2
and then it crashes. It should go on for over 700 pages.
The problem with your program is that you are attempting to create a new QApplication with every url you fetch.
Instead, only one QApplication and one WebPage should be created. The WebPage can use its loadFinished signal to create an internal loop by fetching a new url after each one has been processed. Custom html processing can be added by connecting a user-defined slot to a signal which emits the html text and the url when they become available. The scripts below (for PyQt5 and PyQt4) show how to implement this.
Here are some examples which show how to use the WebPage class:
Usage:
def my_html_processor(html, url):
print('loaded: [%d chars] %s' % (len(html), url))
import sys
app = QApplication(sys.argv)
webpage = WebPage(verbose=False)
webpage.htmlReady.connect(my_html_processor)
# example 1: process list of urls
urls = ['https://en.wikipedia.org/wiki/Special:Random'] * 3
print('Processing list of urls...')
webpage.process(urls)
# example 2: process one url continuously
#
# import signal, itertools
# signal.signal(signal.SIGINT, signal.SIG_DFL)
#
# print('Processing url continuously...')
# print('Press Ctrl+C to quit')
#
# url = 'https://en.wikipedia.org/wiki/Special:Random'
# webpage.process(itertools.repeat(url))
sys.exit(app.exec_())
PyQt5 WebPage:
from PyQt5.QtCore import pyqtSignal, QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEnginePage
class WebPage(QWebEnginePage):
htmlReady = pyqtSignal(str, str)
def __init__(self, verbose=False):
super().__init__()
self._verbose = verbose
self.loadFinished.connect(self.handleLoadFinished)
def process(self, urls):
self._urls = iter(urls)
self.fetchNext()
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.load(QUrl(url))
return True
def processCurrentPage(self, html):
self.htmlReady.emit(html, self.url().toString())
if not self.fetchNext():
QApplication.instance().quit()
def handleLoadFinished(self):
self.toHtml(self.processCurrentPage)
def javaScriptConsoleMessage(self, *args, **kwargs):
if self._verbose:
super().javaScriptConsoleMessage(*args, **kwargs)
PyQt4 WebPage:
from PyQt4.QtCore import pyqtSignal, QUrl
from PyQt4.QtGui import QApplication
from PyQt4.QtWebKit import QWebPage
class WebPage(QWebPage):
htmlReady = pyqtSignal(str, str)
def __init__(self, verbose=False):
super(WebPage, self).__init__()
self._verbose = verbose
self.mainFrame().loadFinished.connect(self.handleLoadFinished)
def start(self, urls):
self._urls = iter(urls)
self.fetchNext()
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.mainFrame().load(QUrl(url))
return True
def processCurrentPage(self):
self.htmlReady.emit(
self.mainFrame().toHtml(), self.mainFrame().url().toString())
print('loaded: [%d bytes] %s' % (self.bytesReceived(), url))
def handleLoadFinished(self):
self.processCurrentPage()
if not self.fetchNext():
QApplication.instance().quit()
def javaScriptConsoleMessage(self, *args, **kwargs):
if self._verbose:
super(WebPage, self).javaScriptConsoleMessage(*args, **kwargs)
I'm using Qt's QWebPage to render a page that uses javascript to update its content dynamically - so a library that just downloads a static version of the page (such as urllib2) won't work.
My problem is, when I render a second page, about 99% of the time the program just crashes. At other times, it will work three times before crashing. I've also gotten a few segfaults, but it is all very random.
My guess is the object I'm using to render isn't getting deleted properly, so trying to reuse it is possibly causing some problems for myself. I've looked all over and no one really seems to be having this same issue.
Here's the code I'm using. The program downloads web pages from steam's community market so I can create a database of all the items. I need to call the getItemsFromPage function multiple times to get all of the items, as they are broken up into pages (showing results 1-10 out of X amount).
import csv
import re
import sys
from string import replace
from bs4 import BeautifulSoup
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
class Item:
__slots__ = ("name", "count", "price", "game")
def __repr__(self):
return self.name + "(" + str(self.count) + ")"
def __str__(self):
return self.name + ", " + str(self.count) + ", $" + str(self.price)
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
self.deleteLater()
def getItemsFromPage(appid, page=1):
r = Render("http://steamcommunity.com/market/search?q=appid:" + str(appid) + "#p" + str(page))
soup = BeautifulSoup(str(r.frame.toHtml().toUtf8()))
itemLst = soup.find_all("div", "market_listing_row market_recent_listing_row")
items = []
for k in itemLst:
i = Item()
i.name = k.find("span", "market_listing_item_name").string
i.count = int(replace(k.find("span", "market_listing_num_listings_qty").string, ",", ""))
i.price = float(re.search(r'\$([0-9]+\.[0-9]+)', str(k)).group(1))
i.game = appid
items.append(i)
return items
if __name__ == "__main__":
print "Updating market items to dota2.csv ..."
i = 1
with open("dota2.csv", "w") as f:
writer = csv.writer(f)
r = None
while True:
print "Page " + str(i)
items = getItemsFromPage(570)
if len(items) == 0:
print "No items found, stopping..."
break
for k in items:
writer.writerow((k.name, k.count, k.price, k.game))
i += 1
print "Done."
Calling getItemsFromPage once works fine. Subsequent calls give me my problem. The output of the program is typically
Updating market items to dota2.csv ...
Page 1
Page 2
and then it crashes. It should go on for over 700 pages.
The problem with your program is that you are attempting to create a new QApplication with every url you fetch.
Instead, only one QApplication and one WebPage should be created. The WebPage can use its loadFinished signal to create an internal loop by fetching a new url after each one has been processed. Custom html processing can be added by connecting a user-defined slot to a signal which emits the html text and the url when they become available. The scripts below (for PyQt5 and PyQt4) show how to implement this.
Here are some examples which show how to use the WebPage class:
Usage:
def my_html_processor(html, url):
print('loaded: [%d chars] %s' % (len(html), url))
import sys
app = QApplication(sys.argv)
webpage = WebPage(verbose=False)
webpage.htmlReady.connect(my_html_processor)
# example 1: process list of urls
urls = ['https://en.wikipedia.org/wiki/Special:Random'] * 3
print('Processing list of urls...')
webpage.process(urls)
# example 2: process one url continuously
#
# import signal, itertools
# signal.signal(signal.SIGINT, signal.SIG_DFL)
#
# print('Processing url continuously...')
# print('Press Ctrl+C to quit')
#
# url = 'https://en.wikipedia.org/wiki/Special:Random'
# webpage.process(itertools.repeat(url))
sys.exit(app.exec_())
PyQt5 WebPage:
from PyQt5.QtCore import pyqtSignal, QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEnginePage
class WebPage(QWebEnginePage):
htmlReady = pyqtSignal(str, str)
def __init__(self, verbose=False):
super().__init__()
self._verbose = verbose
self.loadFinished.connect(self.handleLoadFinished)
def process(self, urls):
self._urls = iter(urls)
self.fetchNext()
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.load(QUrl(url))
return True
def processCurrentPage(self, html):
self.htmlReady.emit(html, self.url().toString())
if not self.fetchNext():
QApplication.instance().quit()
def handleLoadFinished(self):
self.toHtml(self.processCurrentPage)
def javaScriptConsoleMessage(self, *args, **kwargs):
if self._verbose:
super().javaScriptConsoleMessage(*args, **kwargs)
PyQt4 WebPage:
from PyQt4.QtCore import pyqtSignal, QUrl
from PyQt4.QtGui import QApplication
from PyQt4.QtWebKit import QWebPage
class WebPage(QWebPage):
htmlReady = pyqtSignal(str, str)
def __init__(self, verbose=False):
super(WebPage, self).__init__()
self._verbose = verbose
self.mainFrame().loadFinished.connect(self.handleLoadFinished)
def start(self, urls):
self._urls = iter(urls)
self.fetchNext()
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.mainFrame().load(QUrl(url))
return True
def processCurrentPage(self):
self.htmlReady.emit(
self.mainFrame().toHtml(), self.mainFrame().url().toString())
print('loaded: [%d bytes] %s' % (self.bytesReceived(), url))
def handleLoadFinished(self):
self.processCurrentPage()
if not self.fetchNext():
QApplication.instance().quit()
def javaScriptConsoleMessage(self, *args, **kwargs):
if self._verbose:
super(WebPage, self).javaScriptConsoleMessage(*args, **kwargs)
I'm using Qt's QWebPage to render a page that uses javascript to update its content dynamically - so a library that just downloads a static version of the page (such as urllib2) won't work.
My problem is, when I render a second page, about 99% of the time the program just crashes. At other times, it will work three times before crashing. I've also gotten a few segfaults, but it is all very random.
My guess is the object I'm using to render isn't getting deleted properly, so trying to reuse it is possibly causing some problems for myself. I've looked all over and no one really seems to be having this same issue.
Here's the code I'm using. The program downloads web pages from steam's community market so I can create a database of all the items. I need to call the getItemsFromPage function multiple times to get all of the items, as they are broken up into pages (showing results 1-10 out of X amount).
import csv
import re
import sys
from string import replace
from bs4 import BeautifulSoup
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
class Item:
__slots__ = ("name", "count", "price", "game")
def __repr__(self):
return self.name + "(" + str(self.count) + ")"
def __str__(self):
return self.name + ", " + str(self.count) + ", $" + str(self.price)
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
self.deleteLater()
def getItemsFromPage(appid, page=1):
r = Render("http://steamcommunity.com/market/search?q=appid:" + str(appid) + "#p" + str(page))
soup = BeautifulSoup(str(r.frame.toHtml().toUtf8()))
itemLst = soup.find_all("div", "market_listing_row market_recent_listing_row")
items = []
for k in itemLst:
i = Item()
i.name = k.find("span", "market_listing_item_name").string
i.count = int(replace(k.find("span", "market_listing_num_listings_qty").string, ",", ""))
i.price = float(re.search(r'\$([0-9]+\.[0-9]+)', str(k)).group(1))
i.game = appid
items.append(i)
return items
if __name__ == "__main__":
print "Updating market items to dota2.csv ..."
i = 1
with open("dota2.csv", "w") as f:
writer = csv.writer(f)
r = None
while True:
print "Page " + str(i)
items = getItemsFromPage(570)
if len(items) == 0:
print "No items found, stopping..."
break
for k in items:
writer.writerow((k.name, k.count, k.price, k.game))
i += 1
print "Done."
Calling getItemsFromPage once works fine. Subsequent calls give me my problem. The output of the program is typically
Updating market items to dota2.csv ...
Page 1
Page 2
and then it crashes. It should go on for over 700 pages.
The problem with your program is that you are attempting to create a new QApplication with every url you fetch.
Instead, only one QApplication and one WebPage should be created. The WebPage can use its loadFinished signal to create an internal loop by fetching a new url after each one has been processed. Custom html processing can be added by connecting a user-defined slot to a signal which emits the html text and the url when they become available. The scripts below (for PyQt5 and PyQt4) show how to implement this.
Here are some examples which show how to use the WebPage class:
Usage:
def my_html_processor(html, url):
print('loaded: [%d chars] %s' % (len(html), url))
import sys
app = QApplication(sys.argv)
webpage = WebPage(verbose=False)
webpage.htmlReady.connect(my_html_processor)
# example 1: process list of urls
urls = ['https://en.wikipedia.org/wiki/Special:Random'] * 3
print('Processing list of urls...')
webpage.process(urls)
# example 2: process one url continuously
#
# import signal, itertools
# signal.signal(signal.SIGINT, signal.SIG_DFL)
#
# print('Processing url continuously...')
# print('Press Ctrl+C to quit')
#
# url = 'https://en.wikipedia.org/wiki/Special:Random'
# webpage.process(itertools.repeat(url))
sys.exit(app.exec_())
PyQt5 WebPage:
from PyQt5.QtCore import pyqtSignal, QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEnginePage
class WebPage(QWebEnginePage):
htmlReady = pyqtSignal(str, str)
def __init__(self, verbose=False):
super().__init__()
self._verbose = verbose
self.loadFinished.connect(self.handleLoadFinished)
def process(self, urls):
self._urls = iter(urls)
self.fetchNext()
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.load(QUrl(url))
return True
def processCurrentPage(self, html):
self.htmlReady.emit(html, self.url().toString())
if not self.fetchNext():
QApplication.instance().quit()
def handleLoadFinished(self):
self.toHtml(self.processCurrentPage)
def javaScriptConsoleMessage(self, *args, **kwargs):
if self._verbose:
super().javaScriptConsoleMessage(*args, **kwargs)
PyQt4 WebPage:
from PyQt4.QtCore import pyqtSignal, QUrl
from PyQt4.QtGui import QApplication
from PyQt4.QtWebKit import QWebPage
class WebPage(QWebPage):
htmlReady = pyqtSignal(str, str)
def __init__(self, verbose=False):
super(WebPage, self).__init__()
self._verbose = verbose
self.mainFrame().loadFinished.connect(self.handleLoadFinished)
def start(self, urls):
self._urls = iter(urls)
self.fetchNext()
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.mainFrame().load(QUrl(url))
return True
def processCurrentPage(self):
self.htmlReady.emit(
self.mainFrame().toHtml(), self.mainFrame().url().toString())
print('loaded: [%d bytes] %s' % (self.bytesReceived(), url))
def handleLoadFinished(self):
self.processCurrentPage()
if not self.fetchNext():
QApplication.instance().quit()
def javaScriptConsoleMessage(self, *args, **kwargs):
if self._verbose:
super(WebPage, self).javaScriptConsoleMessage(*args, **kwargs)
I'm trying to build a PyQt app which (among other things) has the ability via a QTextEdit Box to function like a serial terminal program (HyperTerminal, TeraTerm, etc.) I've read through a few examples from the PySerial page and I think I've managed to get the receive data thread working properly but maybe not as efficiently as possible.
My problem is how do I take the last typed character in the QTextEdit box and send that out the serial connection? I've tried using the textChanged signal that QTextEdit emits, but that then sends everything that I type AND that it receives. I've tried setting up an eventFilter in my main GUI class, but I can't figure out how to get that over to the serial function in another file. Do I want to have a separate thread that listens for a signal emitted from the eventFilter? How do I do that? Is there a more elegant way to do this?
I'm sure I've just managed to overthink this and the solution is simple, but I'm somewhat struggling with it. I'll attach the relevant code snippets (not a full code set) and perhaps somebody can point me in the right direction. If anybody also thinks that the threading that I'm doing could be done in a more efficient manner, then please relay that to me as well!
Thanks for any help that anybody can provide!
Main File:
import sys
from PyQt4 import QtGui
from MainGUI import TestGUI
from SerialClasses import *
from SerialMiniterm import *
class StartMainWindow(QtGui.QMainWindow):
def __init__(self, parent=None):
super(StartMainWindow, self).__init__(parent)
self.ui = TestGUI()
self.ui.setupUi(self)
self.ui.serialTextEditBox.installEventFilter(self)
def eventFilter(self, source, event):
if (event.type() == QtCore.QEvent.KeyPress and source is self.ui.serialTextEditBox):
# print some debug statements to console
if (event.key() == QtCore.Qt.Key_Tab):
print ('Tab pressed')
print ('key pressed: %s' % event.text())
print ('code pressed: %d' % event.key())
# do i emit a signal here? how do i catch it in thread?
self.emit(QtCore.SIGNAL('transmitSerialData(QString)'), event.key())
return True
return QtGui.QTextEdit.eventFilter(self, source, event)
def serialConnectCallback(self):
self.miniterm = SerialMiniterm(self.ui, self.SerialSettings)
self.miniterm.start()
temp = self.SerialSettings.Port + 1
self.ui.serialLabel.setText("<font color = green>Serial Terminal Connected on COM%d" % temp)
if __name__ == "__main__":
app = QtGui.QApplication(sys.argv)
app.setStyle("Cleanlooks")
myapp = StartMainWindow()
myapp.show()
sys.exit(app.exec_())
SerialMiniterm.py:
import serial
from PyQt4 import QtGui, QtCore
def character(b):
return b
class SerialMiniterm(object):
def __init__(self, ui, SerialSettings):
self.SerialSettings = SerialSettings
self.ui = ui
self.serial = serial.Serial(self.SerialSettings.Port, self.SerialSettings.BaudRate, parity=self.SerialSettings.Parity, rtscts=self.SerialSettings.RTS_CTS, xonxoff=self.SerialSettings.Xon_Xoff, timeout=1)
self.repr_mode = self.SerialSettings.RxMode
self.convert_outgoing = self.SerialSettings.NewlineMode
self.newline = NEWLINE_CONVERISON_MAP[self.convert_outgoing]
self.dtr_state = True
self.rts_state = True
self.break_state = False
def _start_reader(self):
"""Start reader thread"""
self._reader_alive = True
self.receiver_thread = ReaderThread(self.alive, self._reader_alive, self.repr_mode, self.convert_outgoing, self.serial)
self.receiver_thread.connect(self.receiver_thread, QtCore.SIGNAL("updateSerialTextBox(QString)"), self.updateTextBox)
self.receiver_thread.start()
def _stop_reader(self):
"""Stop reader thread only, wait for clean exit of thread"""
self._reader_alive = False
self.receiver_thread.join()
def updateTextBox(self, q):
self.ui.serialTextEditBox.insertPlainText(q)
self.ui.serialTextEditBox.moveCursor(QtGui.QTextCursor.End)
#print "got here with value %s..." % q
def start(self):
self.alive = True
self._start_reader()
# how do i handle transmitter thread?
def stop(self):
self.alive = False
def join(self, transmit_only=False):
self.transmitter_thread.join()
if not transmit_only:
self.receiver_thread.join()
class ReaderThread(QtCore.QThread):
def __init__(self, alive, _reader_alive, repr_mode, convert_outgoing, serial, parent=None):
QtCore.QThread.__init__(self, parent)
self.alive = alive
self._reader_alive = _reader_alive
self.repr_mode = repr_mode
self.convert_outgoing = convert_outgoing
self.serial = serial
def __del__(self):
self.wait()
def run(self):
"""loop and copy serial->console"""
while self.alive and self._reader_alive:
data = self.serial.read(self.serial.inWaiting())
if data: #check if not timeout
q = data
self.emit(QtCore.SIGNAL('updateSerialTextBox(QString)'), q)
Something like this?
from PyQt4 import QtCore, QtGui
app = QtGui.QApplication([])
class Terminal(QtGui.QPlainTextEdit):
def keyPressEvent(self, event):
print event.text()
return QtGui.QPlainTextEdit.keyPressEvent(self, event)
term = Terminal()
term.show()