I am creating a GUI application in python 3.4 using PyQt4 GUI-tool & NLTK. Here below i am explaining what task i have to perform in my application and what part i have completed:
Goal:
1) User will create category and provide very large text to save into database
2) based on uploaded text, user will search phrases( group of words). Phrases can occur on multiple line.
3) matched phrases list will be maintain according to lines where they found on document.
4) When user will select matched phrase line, cursor should move to that matched phrase line and highlight that phrase.
Completed:
1) I can upload documents and search phrase based on matched line.
2) and highlight in QEditText text box
Issue:
1) I am not able to highlight phrase. It only highlights single word from phrase
Code provided below what i have perform to highlight phrase:
import sys
from PyQt4 import QtGui
from PyQt4 import QtCore
import string
# list view class
from view_matched_phrases_ui import Ui_ViewList
from PyDB import DatabaseHandle
class ViewList(QtGui.QMainWindow):
def __init__(self, parent=None):
super(ViewList, self).__init__(parent)
self.list = Ui_ViewList()
self.list.setupUi(self)
def show_list(self, phrases):
# self.list.phrase_text_view.setText("<div>Hello</div> Sahadev")
# cursor = self.list.phrase_line_view.setCursor()
# cursor.movePosition(QtGui.QTextCursor.Start)
# self.list.phrase_text_view.setTextCursor(curson)
ids = phrases[1]['pid']
with DatabaseHandle() as db:
for id in ids:
sql = "SELECT document FROM contracts WHERE id="+str(id)
data = db.get_single_data(sql)
phrase_data = str.maketrans({key: None for key in string.punctuation})
new_s = data[0].translate(phrase_data).lower()
for line in new_s.splitlines():
if phrases[0].replace('_', ' ') in line:
self.list.phrase_line_view.addItem(line)
# set selected contract in QTextEdit
self.list.phrase_text_view.setPlainText(new_s)
# self.list.phrase_text_view
cursor = self.list.phrase_text_view.textCursor()
# setup match
format = QtGui.QTextCharFormat()
format.setBackground(QtGui.QBrush(QtGui.QColor("yellow")))
# Setup the regex engine
pattern = phrases[0].replace("_", " ")
regex = QtCore.QRegExp(pattern)
# Process the displayed document
pos = 0
index = regex.indexIn(self.list.phrase_text_view.toPlainText(), pos)
while index != -1:
# Select the matched text and apply the desired format
cursor.setPosition(index)
cursor.movePosition(QtGui.QTextCursor.EndOfWord, 1)
cursor.mergeCharFormat(format)
# Move to the next match
pos = index + regex.matchedLength()
index = regex.indexIn(self.list.phrase_text_view.toPlainText(), pos)
# print(self.list.phrase_text_view)
# print(phrases)
This contribution will be great help for me.
I solved this issue by counting phrase length and nextWord method with Qt Text Object.
Rest of the code will be same only changes part i am providing here:
while index != -1:
# Select the matched text and apply the desired format
cursor.setPosition(index)
# go to next word
for i in range(len(self.matched_phrase.split(" "))):
cursor.movePosition(QtGui.QTextCursor.NextWord, 1)
cursor.mergeCharFormat(format)
# Move to the next match
pos = index + regex.matchedLength()
index = regex.indexIn(self.list.phrase_text_view.toPlainText(), pos)
Related
Why is a word being detected as being under the cursor here? The red arrow in the image starts where the cursor actually is. No matter where I place it, as long as it is inside the window, the program thinks a word is being selected. If the cursor is below the text, it defaults to the very last one. If the cursor is above, it defaults to the first one.
IMAGE:
All of my code:
from PyQt5.QtWidgets import QTextEdit, QMainWindow, QApplication
from PyQt5.QtGui import QMouseEvent, QTextCursor
class Editor(QTextEdit):
def __init__(self):
super(Editor, self).__init__()
# make sure this widget is tracking the mouse position at all times
self.setMouseTracking(True)
def mouseMoveEvent(self, mouse_event: QMouseEvent) -> None:
if self.underMouse():
# create a QTextCursor at that position and select text
text_cursor = self.cursorForPosition(mouse_event.pos())
text_cursor.select(QTextCursor.WordUnderCursor)
word_under_cursor = text_cursor.selectedText()
print(word_under_cursor)
# replace substring with placeholder so that repeat occurrences aren't highlighted as well
selected_word_placeholder = self.replace_selected_text_with_placeholder(text_cursor)
word_under_cursor = '<span style="background-color: #FFFF00;font-weight:bold;">' + word_under_cursor + '</span>'
# replace the sentence with the new formatting
self.setHtml(self.toPlainText().replace(selected_word_placeholder, word_under_cursor))
def replace_in_html(self, old_string, new_string):
old_html = self.toHtml()
new_html = old_html.replace(old_string, new_string)
self.setHtml(new_html)
# use placeholder so that repeat occurrences of the word are not highlighted
def replace_selected_text_with_placeholder(self, text_cursor):
# remove the selected word to be replaced by the placeholder
text_cursor.removeSelectedText()
# create a placeholder with as many characters as the original word
word_placeholder = ''
for char in range(10):
word_placeholder += '#'
text_cursor.insertText(word_placeholder)
return word_placeholder
def set_up(main_window):
title_editor = Editor()
title_editor.setText('Venda quente original xiaomi redmi airdots 2 tws fones de ouvido sem fio bluetooth fones controle ai gaming headset come')
main_window.setCentralWidget(title_editor)
main_window.show()
application = QApplication([])
window = QMainWindow()
set_up(window)
application.exec()
The problem is caused by the fact that select() always tries to select something, and even if the mouse is not actually over a text block, it will get the closest word.
The solution is to check if the mouse is actually inside the rectangle of the text block:
if self.underMouse():
pos = mouse_event.pos()
# create a QTextCursor at that position and select text
text_cursor = self.cursorForPosition(pos)
text_cursor.select(QTextCursor.WordUnderCursor)
start = text_cursor.selectionStart()
end = text_cursor.selectionEnd()
length = end - start
block = text_cursor.block()
blockRect = self.document().documentLayout().blockBoundingRect(block)
# translate by the offset caused by the scroll bar
blockRect.translate(0, -self.verticalScrollBar().value())
if not pos in blockRect:
# clear the selection since the mouse is not over the block
text_cursor.setPosition(text_cursor.position())
elif length:
# ensure that the mouse is actually over a word
startFromBlock = start - block.position()
textLine = block.layout().lineForTextPosition(startFromBlock)
endFromBlock = startFromBlock + length
x, _ = textLine.cursorToX(endFromBlock)
if pos.x() > blockRect.x() + x:
# mouse cursor is not over a word, clear the selection
text_cursor.setPosition(text_cursor.position())
Please consider that, as suggested for your previous question, highlighting text using setHtml is not a good choice, as it always resets the contents of the editor; this is not only a problem for performance, but also for usability (even ignoring the scroll bar issue): setHtml always resets the undo stack, so the user cannot use undo operations anymore.
I'm working on a simple markdown parse in tkinter. Concept being that headings can be surrounded by asterisk symbols for example *Heading 1*, **Heading 2**.
I'm use regex to find strings in this format, tag them and change the style of the tags.
The item that I am struggling with is removing the asterisk symbols from the text after they've been searched. I tried some code (included by commented out) but it just removes the tagged text.
My code correctly finds *Heading 1* and turns it in to *Heading 1* but doesn't remove the markdown symbols to get Heading 1
Can anyone help me with an algorithm to remove the asterisk symbols from the headings that retains the formatting?
import tkinter as tk
from tkinter.scrolledtext import ScrolledText
from tkinter import font
class HelpDialog(tk.Toplevel):
"""Seperate window to show the results of SSO Search"""
def __init__(self, parent,text):
super().__init__(parent)
self.title("Help")
self.defaultfont = font.Font(family="Sans Serif",size=12)
self.textbox = ScrolledText(self,height=40,width=80,font=self.defaultfont)
self.textbox.config(wrap=tk.WORD)
self.textbox.grid()
self.textbox.insert(0.0,text)
self.style()
def style(self):
self.h1font = font.Font(family="Sans Serif", size=18, weight="bold")
self.h2font = font.Font(family="Sans Serif", size=14, weight="bold")
self.h3font = font.Font(family="Sans Serif", size=12, weight="bold", slant="italic")
self.textbox.tag_configure("h1",font=self.h1font)
self.textbox.tag_configure("h2",font=self.h2font)
self.textbox.tag_configure("h3",font=self.h3font)
self.tag_match(r"^[\*]{1}[\w\d -]+[\*]{1}$", "h1")
self.tag_match(r"^[\*]{2}[\w\d -]+[\*]{2}$", "h2")
self.tag_match(r"^[\*]{3}[\w\d -]+[\*]{3}$", "h3")
def tag_match(self,regex,tag):
count = tk.IntVar()
self.textbox.mark_set("matchStart", "1.0")
self.textbox.mark_set("matchEnd", "1.0")
while True:
index = self.textbox.search(regex,"matchEnd","end",count=count,regexp=True)
if index=="": break
self.textbox.mark_set("matchStart",index)
self.textbox.mark_set("matchEnd", "%s+%sc" % (index, count.get()))
self.textbox.tag_add(tag,"matchStart","matchEnd")
#Futile attempt to remove the ** from the headings
#text = self.textbox.get("matchStart", "matchEnd")
#orig_length = len(text)
#text = text.replace("*","").ljust(orig_length, " ")
#self.textbox.delete("matchStart", "matchEnd")
#self.textbox.insert("matchStart", text)
if __name__ == '__main__':
text = """*Heading 1*
A paragraph
**Heading 2**
Some more text
***Heading 3***
Conclusion
"""
root = tk.Tk()
root.withdraw()
HelpDialog(root,text)
The short answer is that you can use the delete method of the text widget to delete the characters at the start and end of the range. You can do simplified math on the indexes to adjust them. So, for example, to delete the character at "matchEnd" (which actually represents the spot just after the last character in the matched range) you can do delete("matchEnd-1c") where -1c is short hand for "minus one character".
At the every end of your loop inside of tag_match, add the following two lines:
self.textbox.delete("matchStart")
self.textbox.delete("matchEnd-1c")
However, this code assumes that the markup is just a single byte. You will need to pass information in to tell the function how many characters on each side of the text to delete, since that information doesn't otherwise exist.
For example, you could pass it in like this:
self.tag_match(r"^[\*]{1}[\w\d -]+[\*]{1}$", "h1", 1)
You will then need to adjust the code that deletes the characters to take this information into account. For example, assuming you pass that number in as the variable n, it would look something like this:
def tag_match(self, regex, tag, n):
...
while True:
...
self.textbox.delete("matchEnd-{}c".format(n), "matchEnd")
self.textbox.delete("matchStart", "matchStart+{}c".format(n))
I'm using mergeCharFormat on several words within my QTextEdit, in an effort to highlight them. Something like this:
import sys
from PyQt4 import QtGui, uic
from PyQt4.QtCore import *
def drawGUI():
app = QtGui.QApplication(sys.argv)
w = QtGui.QWidget()
w.setGeometry(200, 200, 200, 50)
editBox = QtGui.QTextEdit(w)
text = 'Hello stack overflow, this is a test and tish is a misspelled word'
editBox.setText(text)
""" Now there'd be a function that finds misspelled words """
# Highlight misspelled words
misspelledWord = 'tish'
cursor = editBox.textCursor()
format_ = QtGui.QTextCharFormat()
format_.setBackground(QtGui.QBrush(QtGui.QColor("pink")))
pattern = "\\b" + misspelledWord + "\\b"
regex = QRegExp(pattern)
index = regex.indexIn(editBox.toPlainText(), 0)
cursor.setPosition(index)
cursor.movePosition(QtGui.QTextCursor.EndOfWord, 1)
cursor.mergeCharFormat(format_)
w.showFullScreen()
sys.exit(app.exec_())
if __name__ == '__main__':
drawGUI()
So, this highlighting feature works exactly as intended. However, I can't find a good way to clear the highlights from the textarea. What is a good method of doing such a thing- essentially just setting the char format of the entire QTextEdit back to its defaults?
What I've tried so far is getting the cursor again, and setting its format to a new format with a clear background, then putting the cursor over the entire selection and using QTextCursor.setCharFormat(), but this appears to do nothing.
Applying a new QTextCharFormat to the whole document works for me:
def drawGUI():
...
cursor.mergeCharFormat(format_)
def clear():
cursor = editBox.textCursor()
cursor.select(QtGui.QTextCursor.Document)
cursor.setCharFormat(QtGui.QTextCharFormat())
cursor.clearSelection()
editBox.setTextCursor(cursor)
button = QtGui.QPushButton('Clear')
button.clicked.connect(clear)
layout = QtGui.QVBoxLayout(w)
layout.addWidget(editBox)
layout.addWidget(button)
I have a QTextEdit window that shows the content of a file.
I would like to be able to find all matches inside the text using a regex and highlight them either by making the match background different or by changing the match text color or making it bold. How can I do this?
I think the simplest solution to your problem is to use the cursor associated to your editor in order to do the formatting. This way you can set the foreground, the background, the font style... The following example marks the matches with a different background.
from PyQt4 import QtGui
from PyQt4 import QtCore
class MyHighlighter(QtGui.QTextEdit):
def __init__(self, parent=None):
super(MyHighlighter, self).__init__(parent)
# Setup the text editor
text = """In this text I want to highlight this word and only this word.\n""" +\
"""Any other word shouldn't be highlighted"""
self.setText(text)
cursor = self.textCursor()
# Setup the desired format for matches
format = QtGui.QTextCharFormat()
format.setBackground(QtGui.QBrush(QtGui.QColor("red")))
# Setup the regex engine
pattern = "word"
regex = QtCore.QRegExp(pattern)
# Process the displayed document
pos = 0
index = regex.indexIn(self.toPlainText(), pos)
while (index != -1):
# Select the matched text and apply the desired format
cursor.setPosition(index)
cursor.movePosition(QtGui.QTextCursor.EndOfWord, 1)
cursor.mergeCharFormat(format)
# Move to the next match
pos = index + regex.matchedLength()
index = regex.indexIn(self.toPlainText(), pos)
if __name__ == "__main__":
import sys
a = QtGui.QApplication(sys.argv)
t = MyHighlighter()
t.show()
sys.exit(a.exec_())
The code is self-explanatory but if you have any questions just ask them.
Here is a sample of how can you highlight text in a QTextEdit:
#!/usr/bin/env python
#-*- coding:utf-8 -*-
from PyQt4.QtGui import *
from PyQt4.QtCore import *
class highlightSyntax(QSyntaxHighlighter):
def __init__(self, listKeywords, parent=None):
super(highlightSyntax, self).__init__(parent)
brush = QBrush(Qt.darkBlue, Qt.SolidPattern)
keyword = QTextCharFormat()
keyword.setForeground(brush)
keyword.setFontWeight(QFont.Bold)
self.highlightingRules = [ highlightRule(QRegExp("\\b" + key + "\\b"), keyword)
for key in listKeywords
]
def highlightBlock(self, text):
for rule in self.highlightingRules:
expression = QRegExp(rule.pattern)
index = expression.indexIn(text)
while index >= 0:
length = expression.matchedLength()
self.setFormat(index, length, rule.format)
index = text.indexOf(expression, index + length)
self.setCurrentBlockState(0)
class highlightRule(object):
def __init__(self, pattern, format):
self.pattern = pattern
self.format = format
class highlightTextEdit(QTextEdit):
def __init__(self, fileInput, listKeywords, parent=None):
super(highlightTextEdit, self).__init__(parent)
highlightSyntax(QStringList(listKeywords), self)
with open(fileInput, "r") as fInput:
self.setPlainText(fInput.read())
if __name__ == "__main__":
import sys
app = QApplication(sys.argv)
main = highlightTextEdit("/path/to/file", ["foo", "bar", "baz"])
main.show()
sys.exit(app.exec_())
QT5 has updated the RegEx, see QRegularExpression https://dangelog.wordpress.com/2012/04/07/qregularexpression/
I have updated the first example using cursors.
Note the following changes:
This doesn't wrap an edit, but uses the edit box inside, it could easily be changed to allow you to pass in the edit widget.
This does a proper regex find, not just a single word.
def do_find_highlight(self, pattern):
cursor = self.editor.textCursor()
# Setup the desired format for matches
format = QTextCharFormat()
format.setBackground(QBrush(QColor("red")))
# Setup the regex engine
re = QRegularExpression(pattern)
i = re.globalMatch(self.editor.toPlainText()) # QRegularExpressionMatchIterator
# iterate through all the matches and highlight
while i.hasNext():
match = i.next() #QRegularExpressionMatch
# Select the matched text and apply the desired format
cursor.setPosition(match.capturedStart(), QTextCursor.MoveAnchor)
cursor.setPosition(match.capturedEnd(), QTextCursor.KeepAnchor)
cursor.mergeCharFormat(format)
I am writing a converter code for our Data Department to convert fixed width files into delmited files. Normally we use import the file into Excel, use the text import wizard to set the field lengths, and then just save as a csv. However we have run into the limitation where we have started getting files that are millions of records long, and thus cant be imported into Excel. The files do not always have spaces in between the fields, espicially so between value fields like phone numbers or zip codes. The headers are also often filled completely in with no spaces.
A sample of a typical fixed width file we are dealing with:
SequenSack and PaFull Name****************************]JOB TITLE****************]HOSP NAME******************************]Delivery Address***********************]Alternate 1 Address********************]Calculated Text**********************************]POSTNET Bar
000001T1 P1 Sample A Sample 123 Any Street Anytown 12345-6789 12345678900
000002T1 P1 Sample A Sample Director of Medicine 123 Any Street Po Box 1234 Anytown 12345-6789 12345678900
The program needs to break file into the following delimited fields:
Sequen
Sack and Pa
Full name
Job Title
Hosp Name
Delivery Address
Alternate Address 1
Calculated Text
POSTNET Bar
Each file as a slightly different width of each field depending on the rest of the job. What i am looking for is a GUI oriented delimiter much like the Excel import wizard for fixed width files. I am writing this tool in Python as a part of a larger tool that does many other file operations such as breaking up files into multiple up, reversing a file, converting from delimited to fixed width and check digit checking. I am using Tkinter for the rest of the tools and it would be ideal if the solution use it as well.
Any help appreciated
If I understand the problem correctly (and there's a good chance I don't...), the simplest solution might be to use a text widget.
Make the first line be a series of spaces the same length as the row. Use a couple of alternating tags (eg: "even" and "odd") to give each character an alternate color so they stand out from one another. The second line would be the header, and any remaining lines would be a couple lines of sample data.
Then, set up bindings on the first row to convert a space into an "x" when the user clicks on a character. If they click on an "x", convert it back to a space. They can then go and click on the character that is the start of each column. When the user is done, you can get the first line of the text widget and it will have an "x" for each column. You then just need a little function that translates that into whatever format you need.
It would look roughly like this (though obviously the colors would be different than what appears on this website)
x x x ...
SequenSack and PaFull Name****************************]JOB...
000001T1 P1 Sample A Sample ...
Here's a quick hack to illustrate the general idea. It's a little sloppy but I think it illustrates the technique. When you run it, click on an area in the first row to set or clear a marker. This will cause the header to be highlighted in alternate colors for each marker.
import sys
import Tkinter as tk
import tkFont
class SampleApp(tk.Tk):
def __init__(self, *args, **kwargs):
tk.Tk.__init__(self, *args, **kwargs)
header = "SequenSack and PaFull Name****************************]JOB TITLE****************]HOSP NAME******************************]Delivery Address***********************]Alternate 1 Address********************]Calculated Text**********************************]POSTNET Bar"
sample = "000001T1 P1 Sample A Sample 123 Any Street Anytown 12345-6789 12345678900"
widget = DelimiterWidget(self, header, sample)
hsb = tk.Scrollbar(orient="horizontal", command=widget.xview)
widget.configure(xscrollcommand=hsb.set)
hsb.pack(side="bottom", fill="x")
widget.pack(side="top", fill="x")
class DelimiterWidget(tk.Text):
def __init__(self, parent, header, samplerow):
fixedFont = tkFont.nametofont("TkFixedFont")
tk.Text.__init__(self, parent, wrap="none", height=3, font=fixedFont)
self.configure(cursor="left_ptr")
self.tag_configure("header", background="gray")
self.tag_configure("even", background="#ffffff")
self.tag_configure("header_even", background="bisque")
self.tag_configure("header_odd", background="lightblue")
self.tag_configure("odd", background="#eeeeee")
markers = " "*len(header)
for i in range(len(header)):
tag = "even" if i%2==0 else "odd"
self.insert("end", " ", (tag,))
self.insert("end", "\n")
self.insert("end", header+"\n", "header")
self.insert("end", samplerow, "sample")
self.configure(state="disabled")
self.bind("<1>", self.on_click)
self.bind("<Double-1>", self.on_click)
self.bind("<Triple-1>", self.on_click)
def on_click(self, event):
'''Handle a click on a marker'''
index = self.index("#%s,%s" % (event.x, event.y))
current = self.get(index)
self.configure(state="normal")
self.delete(index)
(line, column) = index.split(".")
tag = "even" if int(column)%2 == 0 else "odd"
char = " " if current == "x" else "x"
self.insert(index, char, tag)
self.configure(state="disabled")
self.highlight_header()
return "break"
def highlight_header(self):
'''Highlight the header based on marker positions'''
self.tag_remove("header_even", 1.0, "end")
self.tag_remove("header_odd", 1.0, "end")
markers = self.get(1.0, "1.0 lineend")
i = 0
start = "2.0"
tag = "header_even"
while True:
try:
i = markers.index("x", i+1)
end = "2.%s" % i
self.tag_add(tag, start, end)
start = self.index(end)
tag = "header_even" if tag == "header_odd" else "header_odd"
except ValueError:
break
if __name__ == "__main__":
app = SampleApp()
app.mainloop()
edit: I now see that you are looking for a gui. I'll leave this incorrect answer for posterity.
import csv
def fixedwidth2csv(fw_name, csv_name, field_info, headings=None):
with open(fw_name, 'r') as fw_in:
with open(csv_name, 'rb') as csv_out: # 'rb' => 'r' for python 3
wtr = csv.writer(csv_out)
if headings:
wtr.writerow(headings)
for line in fw_in:
wtr.writerow(line[pos:pos+width].strip() for pos, width in field_info)