Adding Syntax Highlighting to a Text Editor with Python - python

I am creating my own web-based text editor and I want to add in syntax highlighting. To start off I will on do highlighting for one language (Python, most likely), but later on I want to add highlighting for every language I can think of.
I would like to find a tutorial on this if possible, does anyone know of a place where I can find one
Also, if there are any other tips you can give me, that would be great.

Take a look at Pygments.
highlight.js

# syntax.py
import sys
from PyQt4.QtCore import QRegExp
from PyQt4.QtGui import QColor, QTextCharFormat, QFont, QSyntaxHighlighter
def format(color, style=''):
"""Return a QTextCharFormat with the given attributes.
"""
_color = QColor()
_color.setNamedColor(color)
_format = QTextCharFormat()
_format.setForeground(_color)
if 'bold' in style:
_format.setFontWeight(QFont.Bold)
if 'italic' in style:
_format.setFontItalic(True)
return _format
# Syntax styles that can be shared by all languages
STYLES = {
'keyword': format('blue'),
'operator': format('red'),
'brace': format('darkGray'),
'defclass': format('black', 'bold'),
'string': format('magenta'),
'string2': format('darkMagenta'),
'comment': format('darkGreen', 'italic'),
'self': format('black', 'italic'),
'numbers': format('brown'),
}
class PythonHighlighter (QSyntaxHighlighter):
"""Syntax highlighter for the Python language.
"""
# Python keywords
keywords = [
'and', 'assert', 'break', 'class', 'continue', 'def',
'del', 'elif', 'else', 'except', 'exec', 'finally',
'for', 'from', 'global', 'if', 'import', 'in',
'is', 'lambda', 'not', 'or', 'pass', 'print',
'raise', 'return', 'try', 'while', 'yield',
'None', 'True', 'False',
]
# Python operators
operators = [
'=',
# Comparison
'==', '!=', '<', '<=', '>', '>=',
# Arithmetic
'\+', '-', '\*', '/', '//', '\%', '\*\*',
# In-place
'\+=', '-=', '\*=', '/=', '\%=',
# Bitwise
'\^', '\|', '\&', '\~', '>>', '<<',
]
# Python braces
braces = [
'\{', '\}', '\(', '\)', '\[', '\]',
]
def __init__(self, document):
QSyntaxHighlighter.__init__(self, document)
# Multi-line strings (expression, flag, style)
# FIXME: The triple-quotes in these two lines will mess up the
# syntax highlighting from this point onward
self.tri_single = (QRegExp("'''"), 1, STYLES['string2'])
self.tri_double = (QRegExp('"""'), 2, STYLES['string2'])
rules = []
# Keyword, operator, and brace rules
rules += [(r'\b%s\b' % w, 0, STYLES['keyword'])
for w in PythonHighlighter.keywords]
rules += [(r'%s' % o, 0, STYLES['operator'])
for o in PythonHighlighter.operators]
rules += [(r'%s' % b, 0, STYLES['brace'])
for b in PythonHighlighter.braces]
# All other rules
rules += [
# 'self'
(r'\bself\b', 0, STYLES['self']),
# Double-quoted string, possibly containing escape sequences
(r'"[^"\\]*(\\.[^"\\]*)*"', 0, STYLES['string']),
# Single-quoted string, possibly containing escape sequences
(r"'[^'\\]*(\\.[^'\\]*)*'", 0, STYLES['string']),
# 'def' followed by an identifier
(r'\bdef\b\s*(\w+)', 1, STYLES['defclass']),
# 'class' followed by an identifier
(r'\bclass\b\s*(\w+)', 1, STYLES['defclass']),
# From '#' until a newline
(r'#[^\n]*', 0, STYLES['comment']),
# Numeric literals
(r'\b[+-]?[0-9]+[lL]?\b', 0, STYLES['numbers']),
(r'\b[+-]?0[xX][0-9A-Fa-f]+[lL]?\b', 0, STYLES['numbers']),
(r'\b[+-]?[0-9]+(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?\b', 0, STYLES['numbers']),
]
# Build a QRegExp for each pattern
self.rules = [(QRegExp(pat), index, fmt)
for (pat, index, fmt) in rules]
def highlightBlock(self, text):
"""Apply syntax highlighting to the given block of text.
"""
# Do other syntax formatting
for expression, nth, format in self.rules:
index = expression.indexIn(text, 0)
while index >= 0:
# We actually want the index of the nth match
index = expression.pos(nth)
length = expression.cap(nth).length()
self.setFormat(index, length, format)
index = expression.indexIn(text, index + length)
self.setCurrentBlockState(0)
# Do multi-line strings
in_multiline = self.match_multiline(text, *self.tri_single)
if not in_multiline:
in_multiline = self.match_multiline(text, *self.tri_double)
def match_multiline(self, text, delimiter, in_state, style):
"""Do highlighting of multi-line strings. ``delimiter`` should be a
``QRegExp`` for triple-single-quotes or triple-double-quotes, and
``in_state`` should be a unique integer to represent the corresponding
state changes when inside those strings. Returns True if we're still
inside a multi-line string when this function is finished.
"""
# If inside triple-single quotes, start at 0
if self.previousBlockState() == in_state:
start = 0
add = 0
# Otherwise, look for the delimiter on this line
else:
start = delimiter.indexIn(text)
# Move past this match
add = delimiter.matchedLength()
# As long as there's a delimiter match on this line...
while start >= 0:
# Look for the ending delimiter
end = delimiter.indexIn(text, start + add)
# Ending delimiter on this line?
if end >= add:
length = end - start + add + delimiter.matchedLength()
self.setCurrentBlockState(0)
# No; multi-line string
else:
self.setCurrentBlockState(in_state)
length = text.length() - start + add
# Apply formatting
self.setFormat(start, length, style)
# Look for the next match
start = delimiter.indexIn(text, start + length)
# Return True if still inside a multi-line string, False otherwise
if self.currentBlockState() == in_state:
return True
else:
return False

from PyQt4 import QtGui
import syntax
app = QtGui.QApplication([])
texter = QtGui.QPlainTextEdit()
highlight = syntax.PythonHighlighter(texter.document())
texter.show()
infile = open('syntax.py', 'r')
texter.setPlainText(infile.read())
app.exec_()
#use the program syntax.py to make it work I posted it

Related

Escape reserved characters in a list by adding backslash in front of it

reserved_chars = "? & | ! { } [ ] ( ) ^ ~ * : \ " ' + -"
list_vals = ['gold-bear#gmail.com', 'P&G#dom.com', 'JACKSON! BOT', 'annoying\name']
What is that fastest way to loop through every element in a list and add a \ in front of the reserved character if one of the elements contains them?
desired output:
fixed_list = ['gold\-bear#gmail.com', 'P\&G#dom.com', 'JACKSON\! BOT', 'annoying\\name']
You could make a translation table with str.maketrans() and pass that into translate. This takes a little setup, but you can reuse the translation table and it's quite fast:
reserved_chars = '''?&|!{}[]()^~*:\\"'+-'''
list_vals = ['gold-bear#gmail.com', 'P&G#dom.com', 'JACKSON! BOT', 'annoying\\name']
# make trans table
replace = ['\\' + l for l in reserved_chars]
trans = str.maketrans(dict(zip(reserved_chars, replace)))
# translate with trans table
fixed_list = [s.translate(trans) for s in list_vals]
print("\n".join(fixed_list))
Prints:
gold\-bear#gmail.com
P\&G#dom.com
JACKSON\! BOT
annoying\\name
There is no fast way - you got strings, strings are immuteable, you need to create new ones.
Probably best way is to build your own translation dictionary and do the grunt work yourself:
reserved = """? & | ! { } [ ] ( ) ^ ~ * : \ " ' + -"""
tr = { c:f"\\{c}" for c in reserved}
print(tr)
data = ['gold-bear#gmail.com', 'P&G#dom.com', 'JACKSON! BOT', 'annoying\name']
transformed = [ ''.join(tr.get(letter,letter) for letter in word) for word in data]
for word in transformed:
print(word)
Output:
# translation dictionary
{'?': '\\?', ' ': '\\ ', '&': '\\&', '|': '\\|', '!': '\\!', '{': '\\{',
'}': '\\}', '[': '\\[', ']': '\\]', '(': '\\(', ')': '\\)', '^': '\\^',
'~': '\\~', '*': '\\*', ':': '\\:', '\\': '\\\\', '"': '\\"', "'": "\\'",
'+': '\\+', '-': '\\-'}
# transformed strings
gold\-bear#gmail.com
P\&G#dom.com
JACKSON\!\ BOT
annoying
ame
Sidenotes:
Your example missed to escape the space inside 'JACKSON\! BOT'.
The repl() of the transformed list looks "wrongly" escaped because when printing it escapes each '\' itself again - whats being printed see wordlist
Definitely not the fastest, but could be the easiest to code. Make a regex that does it for you, and run re.sub, like this:
import re
reserved_chars = "?&|!{}[]()^~*:\\\"'+-"
replace_regex = "([" + ''.join('\\x%x' % ord(x) for x in reserved_chars) + "])"
list_vals = ['gold-bear#gmail.com', 'P&G#dom.com', 'JACKSON! BOT', r'annoying\name']
escaped_vals = [re.sub(replace_regex, r"\\\1", x) for x in list_vals]
Again, just to clarify, regexes are SLOW.

QtGui.QTextEdit set line color baced on what text the line contains

It's my first time using stackoverflow to find an answer, to my problems.
I'm using a QtGui.QTextEdit to display text similar to below and would like to change the color of the text on some lines based on if they contain certain text.
lines that start with --[ will be blue and lines that contain [ERROR] would be red.
I currently have something like the following,
from PyQt4 import QtCore, QtGui, uic
import sys
class Log(QtGui.QWidget):
def __init__(self, path=None, parent=None):
QtGui.QMainWindow.__init__(self, parent)
self.taskLog = QtGui.QTextEdit()
self.taskLog.setLineWrapMode(False)
vbox = QtGui.QVBoxLayout()
vbox.addWidget(self.taskLog)
self.setLayout(vbox)
log = open("/net/test.log", 'r')
self.taskLog.setText(log.read())
log.close()
app = QtGui.QApplication(sys.argv)
wnd = Log()
wnd.show()
sys.exit(app.exec_())
The text looks something like this at the moment
--[ Begin
this is a test
[ERROR] this test failed.
--[ Command returned exit code 1
Hopefully you all will be able to help me work this out a lot faster that, trying to work it out my self.
Thanks,
Mark
This can be done quite easily with QSyntaxHighlighter. Here's a simple demo:
from PyQt4 import QtCore, QtGui
sample = """
--[ Begin
this is a test
[ERROR] this test failed.
--[ Command returned exit code 1
"""
class Highlighter(QtGui.QSyntaxHighlighter):
def __init__(self, parent):
super(Highlighter, self).__init__(parent)
self.sectionFormat = QtGui.QTextCharFormat()
self.sectionFormat.setForeground(QtCore.Qt.blue)
self.errorFormat = QtGui.QTextCharFormat()
self.errorFormat.setForeground(QtCore.Qt.red)
def highlightBlock(self, text):
# uncomment this line for Python2
# text = unicode(text)
if text.startswith('--['):
self.setFormat(0, len(text), self.sectionFormat)
elif text.startswith('[ERROR]'):
self.setFormat(0, len(text), self.errorFormat)
class Window(QtGui.QWidget):
def __init__(self):
super(Window, self).__init__()
self.editor = QtGui.QTextEdit(self)
self.highlighter = Highlighter(self.editor.document())
self.editor.setText(sample)
layout = QtGui.QVBoxLayout(self)
layout.addWidget(self.editor)
if __name__ == '__main__':
import sys
app = QtGui.QApplication(sys.argv)
window = Window()
window.setGeometry(500, 150, 300, 300)
window.show()
sys.exit(app.exec_())
You can achieve this using HTML format
textEdit.setHtml(text);
But even better, the QSyntaxHighlighter class:
Doc : http://doc.qt.io/qt-5/qsyntaxhighlighter.html
Python Exemple : https://wiki.python.org/moin/PyQt/Python%20syntax%20highlighting
Here an exemple with a code editor.
import sys
from PyQt4.QtCore import QRegExp
from PyQt4.QtGui import QColor, QTextCharFormat, QFont, QSyntaxHighlighter
def format(color, style=''):
"""Return a QTextCharFormat with the given attributes.
"""
_color = QColor()
_color.setNamedColor(color)
_format = QTextCharFormat()
_format.setForeground(_color)
if 'bold' in style:
_format.setFontWeight(QFont.Bold)
if 'italic' in style:
_format.setFontItalic(True)
return _format
# Syntax styles that can be shared by all languages
STYLES = {
'keyword': format('blue'),
'operator': format('red'),
'brace': format('darkGray'),
'defclass': format('black', 'bold'),
'string': format('magenta'),
'string2': format('darkMagenta'),
'comment': format('darkGreen', 'italic'),
'self': format('black', 'italic'),
'numbers': format('brown'),
}
class PythonHighlighter (QSyntaxHighlighter):
"""Syntax highlighter for the Python language.
"""
# Python keywords
keywords = [
'and', 'assert', 'break', 'class', 'continue', 'def',
'del', 'elif', 'else', 'except', 'exec', 'finally',
'for', 'from', 'global', 'if', 'import', 'in',
'is', 'lambda', 'not', 'or', 'pass', 'print',
'raise', 'return', 'try', 'while', 'yield',
'None', 'True', 'False',
]
# Python operators
operators = [
'=',
# Comparison
'==', '!=', '<', '<=', '>', '>=',
# Arithmetic
'\+', '-', '\*', '/', '//', '\%', '\*\*',
# In-place
'\+=', '-=', '\*=', '/=', '\%=',
# Bitwise
'\^', '\|', '\&', '\~', '>>', '<<',
]
# Python braces
braces = [
'\{', '\}', '\(', '\)', '\[', '\]',
]
def __init__(self, document):
QSyntaxHighlighter.__init__(self, document)
# Multi-line strings (expression, flag, style)
# FIXME: The triple-quotes in these two lines will mess up the
# syntax highlighting from this point onward
self.tri_single = (QRegExp("'''"), 1, STYLES['string2'])
self.tri_double = (QRegExp('"""'), 2, STYLES['string2'])
rules = []
# Keyword, operator, and brace rules
rules += [(r'\b%s\b' % w, 0, STYLES['keyword'])
for w in PythonHighlighter.keywords]
rules += [(r'%s' % o, 0, STYLES['operator'])
for o in PythonHighlighter.operators]
rules += [(r'%s' % b, 0, STYLES['brace'])
for b in PythonHighlighter.braces]
# All other rules
rules += [
# 'self'
(r'\bself\b', 0, STYLES['self']),
# Double-quoted string, possibly containing escape sequences
(r'"[^"\\]*(\\.[^"\\]*)*"', 0, STYLES['string']),
# Single-quoted string, possibly containing escape sequences
(r"'[^'\\]*(\\.[^'\\]*)*'", 0, STYLES['string']),
# 'def' followed by an identifier
(r'\bdef\b\s*(\w+)', 1, STYLES['defclass']),
# 'class' followed by an identifier
(r'\bclass\b\s*(\w+)', 1, STYLES['defclass']),
# From '#' until a newline
(r'#[^\n]*', 0, STYLES['comment']),
# Numeric literals
(r'\b[+-]?[0-9]+[lL]?\b', 0, STYLES['numbers']),
(r'\b[+-]?0[xX][0-9A-Fa-f]+[lL]?\b', 0, STYLES['numbers']),
(r'\b[+-]?[0-9]+(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?\b', 0, STYLES['numbers']),
]
# Build a QRegExp for each pattern
self.rules = [(QRegExp(pat), index, fmt)
for (pat, index, fmt) in rules]
def highlightBlock(self, text):
"""Apply syntax highlighting to the given block of text.
"""
# Do other syntax formatting
for expression, nth, format in self.rules:
index = expression.indexIn(text, 0)
while index >= 0:
# We actually want the index of the nth match
index = expression.pos(nth)
length = expression.cap(nth).length()
self.setFormat(index, length, format)
index = expression.indexIn(text, index + length)
self.setCurrentBlockState(0)
# Do multi-line strings
in_multiline = self.match_multiline(text, *self.tri_single)
if not in_multiline:
in_multiline = self.match_multiline(text, *self.tri_double)
def match_multiline(self, text, delimiter, in_state, style):
"""Do highlighting of multi-line strings. ``delimiter`` should be a
``QRegExp`` for triple-single-quotes or triple-double-quotes, and
``in_state`` should be a unique integer to represent the corresponding
state changes when inside those strings. Returns True if we're still
inside a multi-line string when this function is finished.
"""
# If inside triple-single quotes, start at 0
if self.previousBlockState() == in_state:
start = 0
add = 0
# Otherwise, look for the delimiter on this line
else:
start = delimiter.indexIn(text)
# Move past this match
add = delimiter.matchedLength()
# As long as there's a delimiter match on this line...
while start >= 0:
# Look for the ending delimiter
end = delimiter.indexIn(text, start + add)
# Ending delimiter on this line?
if end >= add:
length = end - start + add + delimiter.matchedLength()
self.setCurrentBlockState(0)
# No; multi-line string
else:
self.setCurrentBlockState(in_state)
length = text.length() - start + add
# Apply formatting
self.setFormat(start, length, style)
# Look for the next match
start = delimiter.indexIn(text, start + length)
# Return True if still inside a multi-line string, False otherwise
if self.currentBlockState() == in_state:
return True
else:
return False

How to replace text in curly brackets with another text based on comparisons using Python Regex

I am quiet new to regular expressions. I have a string that looks like this:
str = "abc/def/([default], [testing])"
and a dictionary
dict = {'abc/def/[default]' : '2.7', 'abc/def/[testing]' : '2.1'}
and using Python RE, I want str in this form, after comparisons of each element in dict to str:
str = "abc/def/(2.7, 2.1)"
Any help how to do it using Python RE?
P.S. its not the part of any assignment, instead it is the part of my project at work and I have spent many hours to figure out solution but in vain.
import re
st = "abc/def/([default], [testing], [something])"
dic = {'abc/def/[default]' : '2.7',
'abc/def/[testing]' : '2.1',
'bcd/xed/[something]' : '3.1'}
prefix_regex = "^[\w*/]*"
tag_regex = "\[\w*\]"
prefix = re.findall(prefix_regex, st)[0]
tags = re.findall(tag_regex, st)
for key in dic:
key_prefix = re.findall(prefix_regex, key)[0]
key_tag = re.findall(tag_regex, key)[0]
if prefix == key_prefix:
for tag in tags:
if tag == key_tag:
st = st.replace(tag, dic[key])
print st
OUTPUT:
abc/def/(2.7, 2.1, [something])
Here is a solution using re module.
Hypotheses :
there is a dictionary whose keys are composed of a prefix and a variable part, the variable part is enclosed in brackets ([])
the values are strings by which the variable parts are to be replaced in the string
the string is composed by a prefix, a (, a list of variable parts and a )
the variable parts in the string are enclosed in []
the variable parts in the string are separated by a comma followed by optional spaces
Python code :
import re
class splitter:
pref = re.compile("[^(]+")
iden = re.compile("\[[^]]*\]")
def __init__(self, d):
self.d = d
def split(self, s):
m = self.pref.match(s)
if m is not None:
p = m.group(0)
elts = self.iden.findall(s, m.span()[1])
return p, elts
return None
def convert(self, s):
p, elts = self.split(s)
return p + "(" + ", ".join((self.d[p + elt] for elt in elts)) + ")"
Usage :
s = "abc/def/([default], [testing])"
d = {'abc/def/[default]' : '2.7', 'abc/def/[testing]' : '2.1'}
sp = splitter(d)
print(sp.convert(s))
output :
abc/def/(2.7, 2.1)
Regex is probably not required here. Hope this helps
lhs,rhs = str.split("/(")
rhs1,rhs2 = rhs.strip(")").split(", ")
lhs+="/"
print "{0}({1},{2})".format(lhs,dict[lhs+rhs1],dict[lhs+rhs2])
output
abc/def/(2.7,2.1)

regular expression for a string format

I have a string as
(device
(vfb
(xxxxxxxx)
(xxxxxxxx)
(location 0.0.0.0:5900)
)
)
(device
(console
(xxxxxxxx)
(xxxxxxxx)
(location 80)
)
)
I need to read the location line from "vfb" portion of the string. I have tried to use regular expression like
import re
re.findall(r'device.*?\vfb.*?\(.*?(.*?).*(.*?\))
But it doesn't give me the required output.
It's better to use a parser for problems like this. Fortunately, a parser would be rather trivial in your case:
def parse(source):
def expr(tokens):
t = tokens.pop(0)
if t != '(':
return {'value': t}
key, val = tokens.pop(0), {}
while tokens[0] != ')':
val.update(expr(tokens))
tokens.pop(0)
return {key:val}
tokens = re.findall(r'\(|\)|[^\s()]+', source)
lst = []
while tokens:
lst.append(expr(tokens))
return lst
Given the above snippet, this creates a structure like:
[{'device': {'vfb': {'location': {'value': '0.0.0.0:5900'}, 'xxxxxxxx': {}}}},
{'device': {'console': {'location': {'value': '80'}, 'xxxxxxxx': {}}}}]
Now you can iterate it and fetch whatever you need:
for item in parse(source):
try:
location = item['device']['vfb']['location']['value']
except KeyError:
pass
With that intro from Martijn Pieters, here is a pyparsing approach:
inputdata = """(device
(vfb
(xxxxxxxx)
(xxxxxxxx)
(location 0.0.0.0:5900)
)
)
(device
(console
(xxxxxxxx)
(xxxxxxxx)
(location 80)
)
)"""
from pyparsing import OneOrMore, nestedExpr
# a nestedExpr defaults to reading space-separated words within nested parentheses
data = OneOrMore(nestedExpr()).parseString(inputdata)
print (data.asList())
# recursive search to walk parsed data to find desired entry
def findPath(seq, path):
for s in seq:
if s[0] == path[0]:
if len(path) == 1:
return s[1]
else:
ret = findPath(s[1:], path[1:])
if ret is not None:
return ret
return None
print findPath(data, "device/vfb/location".split('/'))
prints:
[['device', ['vfb', ['xxxxxxxx'], ['xxxxxxxx'], ['location', '0.0.0.0:5900']]],
['device', ['console', ['xxxxxxxx'], ['xxxxxxxx'], ['location', '80']]]]
0.0.0.0:5900
Maybe this gets you started:
In [84]: data = '(device(vfb(xxxxxxxx)(xxxxxxxx)(location 0.0.0.0:5900)))'
In [85]: m = re.search(r"""
.....: vfb
.....: .*
.....: \(
.....: location
.....: \s+
.....: (
.....: [^\)]+
.....: )
.....: \)""", data, flags=re.X)
In [86]: m.group(1)
Out[86]: '0.0.0.0:5900'

pretty print assertEqual() for HTML strings

I want to compare two strings in a python unittest which contain html.
Is there a method which outputs the result in a human friendly (diff like) version?
A simple method is to strip whitespace from the HTML and split it into a list. Python 2.7's unittest (or the backported unittest2) then gives a human-readable diff between the lists.
import re
def split_html(html):
return re.split(r'\s*\n\s*', html.strip())
def test_render_html():
expected = ['<div>', '...', '</div>']
got = split_html(render_html())
self.assertEqual(expected, got)
If I'm writing a test for working code, I usually first set expected = [], insert a self.maxDiff = None before the assert and let the test fail once. The expected list can then be copy-pasted from the test output.
You might need to tweak how whitespace is stripped depending on what your HTML looks like.
I submitted a patch to do this some years back. The patch was rejected but you can still view it on the python bug list.
I doubt you would want to hack your unittest.py to apply the patch (if it even still works after all this time), but here's the function for reducing two strings a manageable size while still keeping at least part of what differs. So long as all you didn't want the complete differences this might be what you want:
def shortdiff(x,y):
'''shortdiff(x,y)
Compare strings x and y and display differences.
If the strings are too long, shorten them to fit
in one line, while still keeping at least some difference.
'''
import difflib
LINELEN = 79
def limit(s):
if len(s) > LINELEN:
return s[:LINELEN-3] + '...'
return s
def firstdiff(s, t):
span = 1000
for pos in range(0, max(len(s), len(t)), span):
if s[pos:pos+span] != t[pos:pos+span]:
for index in range(pos, pos+span):
if s[index:index+1] != t[index:index+1]:
return index
left = LINELEN/4
index = firstdiff(x, y)
if index > left + 7:
x = x[:left] + '...' + x[index-4:index+LINELEN]
y = y[:left] + '...' + y[index-4:index+LINELEN]
else:
x, y = x[:LINELEN+1], y[:LINELEN+1]
left = 0
cruncher = difflib.SequenceMatcher(None)
xtags = ytags = ""
cruncher.set_seqs(x, y)
editchars = { 'replace': ('^', '^'),
'delete': ('-', ''),
'insert': ('', '+'),
'equal': (' ',' ') }
for tag, xi1, xi2, yj1, yj2 in cruncher.get_opcodes():
lx, ly = xi2 - xi1, yj2 - yj1
edits = editchars[tag]
xtags += edits[0] * lx
ytags += edits[1] * ly
# Include ellipsis in edits line.
if left:
xtags = xtags[:left] + '...' + xtags[left+3:]
ytags = ytags[:left] + '...' + ytags[left+3:]
diffs = [ x, xtags, y, ytags ]
if max([len(s) for s in diffs]) < LINELEN:
return '\n'.join(diffs)
diffs = [ limit(s) for s in diffs ]
return '\n'.join(diffs)
Maybe this is a quite 'verbose' solution. You could add a new 'equality function' for your user defined type (e.g: HTMLString) which you have to define first:
class HTMLString(str):
pass
Now you have to define a type equality function:
def assertHTMLStringEqual(first, second):
if first != second:
message = ... # TODO here: format your message, e.g a diff
raise AssertionError(message)
All you have to do is format your message as you like. You can also use a class method in your specific TestCase as a type equality function. This gives you more functionality to format your message, since unittest.TestCase does this a lot.
Now you have to register this equality function in your unittest.TestCase:
...
def __init__(self):
self.addTypeEqualityFunc(HTMLString, assertHTMLStringEqual)
The same for a class method:
...
def __init__(self):
self.addTypeEqualityFunc(HTMLString, 'assertHTMLStringEqual')
And now you can use it in your tests:
def test_something(self):
htmlstring1 = HTMLString(...)
htmlstring2 = HTMLString(...)
self.assertEqual(htmlstring1, htmlstring2)
This should work well with python 2.7.
I (the one asking this question) use BeautfulSoup now:
def assertEqualHTML(string1, string2, file1='', file2=''):
u'''
Compare two unicode strings containing HTML.
A human friendly diff goes to logging.error() if there
are not equal, and an exception gets raised.
'''
from BeautifulSoup import BeautifulSoup as bs
import difflib
def short(mystr):
max=20
if len(mystr)>max:
return mystr[:max]
return mystr
p=[]
for mystr, file in [(string1, file1), (string2, file2)]:
if not isinstance(mystr, unicode):
raise Exception(u'string ist not unicode: %r %s' % (short(mystr), file))
soup=bs(mystr)
pretty=soup.prettify()
p.append(pretty)
if p[0]!=p[1]:
for line in difflib.unified_diff(p[0].splitlines(), p[1].splitlines(), fromfile=file1, tofile=file2):
logging.error(line)
raise Exception('Not equal %s %s' % (file1, file2))

Categories

Resources