I have a django project that takes in some data from another app of ours. The data looks like this:
{u' updated': u'2017-04-03T22:30:53.760278 Z',
u'added': u'2017-04-03T22:30:53.760197 Z',
u'name':u'Jean Lamb,
1942- ', u' authority':{
u'id':2,
u'added_by':2,
u'name':u'VIAF'
},
u'local_identifier':u'85363862',
u'concept_type':{
u'id':5,
u'identifier': u'viaf:personal',
u'name':u'',
u'description':None
},
u'identifier': u'http://viaf.org/viaf/85363862',
u'identities':[
{
u'part_of':{
u'id':1,
u'added_by':2,
u'name': u'builtin:Conceptpower'
},
u'added': u'2017-04-03T22:33:20.476637 Z',
u'name':u'Jean Lamb',
u'confidence':1.0,
u'updated': u'2017-04-03T22:33:20.476699 Z',
u'concepts':[
u'http://viaf.org/viaf/85363862',
u'http://www.digitalhps.org/concepts/CONpeSHC70qxNC0'
],
u'id':208,
u'added_by':{
u'username':u'erickjones',
u'email':u'erick.jones#example.com'
}
},
{
u'part_of':{
u'id':1,
u'added_by':2,
u'name': u'builtin:Conceptpower'
},
u'added': u'2017-04-03T22:35:02.546054 Z',
u'name':u'Jean Lamb',
u'confidence':1.0,
u'updated': u'2017-04-03T22:35:02.546116 Z',
u'concepts':[
u'http://viaf.org/viaf/85363862',
u'http://www.digitalhps.org/concepts/CONpeSHC70qxNC0'
],
u'id':209,
u'added_by':{
u'username':u'erickjones',
u'email':u'erick.jones#example.com'
}
},
Right now I have a function that goes through and compares the concepts in in the identities. What I want to do is delete the duplicate concepts. The nesting of the dictionaries and lists are throwing me off. What I have been trying is:
del results[i]["identities"][z]["concepts"]
Any ideas as to why this does not work?
Here is my loop incase anyone is interested:
while (i != di):
test = results[i]["identities"]
if results[i]["identities"]:
z = 0
while (z != len(results[i]["identities"])):
con1 = results[i]["identities"][z]["concepts"]
print "this is con1: %s", con1
if z != len(results[i]["identities"]):
z = z + 1
else:
break
if z != len(results[i]["identities"]):
con2 = results[i]["identities"][z]["concepts"]
print "this is con2: %s", con2
if set(con1) == set(con2):
del results[i]["identities"][z]["concepts"]
else:
break
i = i + 1
In this line,
if set(con1) and set(con2):
Do you intend to check if con1 and con2 are the same set? Shouldn't you use == operator?
Related
Working on some code that uses pysimplegui as the UI and SQlite for the data sorting. I'm using SQLite's execute function to select data based on input from the user in the UI through variables. For example user wants to search for part name they input all or part of the name into the box, hit the search button which then runs my "parts_search" method, which will then only filter the result based on part name. OR the user enters information in multiple boxes which then filters based on the boxes that have information.
This here is runnable code provided you add a file base1.db in the same folder location as the script itself
import PySimpleGUI as sg
import os.path
import sqlite3
# sql var
c = None
conn = None
setup = None
# list var
parts = []
def sql():
global setup
conn_sql()
c.execute("""CREATE TABLE IF NOT EXISTS parts (part_name TEXT, part_number TEXT, part_series TEXT,
part_size INTEGER, job_type TEXT)""")
conn.commit()
if conn:
conn.close()
def conn_sql():
global c
global conn
# SQL connection var
if os.path.isfile('./base1.db'):
conn = sqlite3.connect('base1.db')
c = conn.cursor()
def main_gui_parts():
global parts
layout = [[sg.Text('Part Name: '), sg.Input(size=(20, 1), key='-PName-'), sg.Text('Part Series:'),
sg.Input(size=(10, 1), key='-PSeries-')],
[sg.Text('Part Number:'), sg.Input(size=(20, 1), key='-PNumber-'), sg.Text('Part Size:'),
sg.Input(size=(10, 1), key='-PSize-')],
[sg.Checkbox('Fit', key='-PFit-'), sg.Checkbox('Weld', key='-PWeld-'),
sg.Checkbox('Assemble', key='-PAssemble-'),
sg.Button('Search', key='-PSearch-')],
[sg.Listbox(parts, size=(58, 10), key='-PParts-')], [sg.Button('Back', key='-PBack-')]]
window = sg.Window('parts list', layout, grab_anywhere=True)
sql()
while True:
event, values = window.read()
if event == 'Close' or event == sg.WIN_CLOSED:
break
# PART WINDOW
part_name = values['-PName-']
part_series = values['-PSeries-']
part_number = values['-PNumber-']
part_size = values['-PSize-']
fit = values['-PFit-']
weld = values['-PWeld-']
assemble = values['-PAssemble-']
if event == '-PSearch-':
print('search parts')
part_search(part_name, part_series, part_number, part_size, fit, weld, assemble)
if event == '-PBack-':
break
window.close()
def part_search(part_name, part_series, part_number, part_size, fit, weld, assemble):
global parts
conn_sql()
filter_original = """SELECT * FROM parts WHERE """
filter = filter_original
if part_name:
print('part name: ' + part_name)
if filter == filter_original:
filter += """part_name LIKE ? """
else:
filter += """AND part_name LIKE ? """
if part_series:
print('part series: ' + part_series)
if filter == filter_original:
filter += """part_series=(?) """
else:
filter += """AND part_series=(?) """
if part_number:
print('part number: ' + part_number)
if filter == filter_original:
filter += """part_number LIKE ? """ ### DONT USE LIKE???
else:
filter += """AND part_number LIKE ? """ ### DONT USE LIKE???
if part_size:
print('part size: ' + part_size)
if filter == filter_original:
filter += """part_size=(?) """
else:
filter += """AND part_size=(?) """
if fit:
print('job type: ' + str(fit))
if filter == filter_original:
filter += """job_type = fit """
else:
filter += """AND job_type = fit """
if weld:
print('job type: ' + str(weld))
if filter == filter_original:
filter += """job_type = weld """
else:
filter += """AND job_type = weld """
if assemble:
print('job type: ' + str(assemble))
if filter == filter_original:
filter += """job_type = assemble"""
else:
filter += """AND job_type = assemble"""
print(filter)
#if filter != filter_original:
#c.execute(filter, ())
#else:
#c.execute("""SELECT * FROM parts""")
main_gui_parts()
THE PROBLEM: The commented code at the bottom is where I'm having trouble figuring out (in the "part_search" method). I don't use all of the variables all the time. Only filter with the variables provided by the user. which means the tuple should only have the variables which was input by the user.
If all the variables were used this is what it would look like. c.execute(filter, (part_name, part_series, part_number, part_size, fit, weld, assemble)) but more often than not only some of those variable will have been used and may need to look like this instead. c.execute(filter, (part_name, part_series, weld)) Somehow I need the variables here to be removeable(for lack of better word)
I've been learning a lot about SQLite but I could be seeing tunnel vision and can't think of a different way to go about this.
Probably the easiest way to deal with this is to put all the filter conditions and values into lists, and then only add a WHERE clause if the length of the filters list is non-zero. For example:
query = """SELECT * FROM parts"""
filters = []
values = []
if part_name:
filters.append("""part_name LIKE ?""")
values.append(part_name)
...
if len(filters):
query += ' WHERE ' + ' AND '.join(filters)
c.execute(query, tuple(values))
Note: should your filters ever include OR conditions, you need to parenthesise them when building the query to ensure correct operation i.e.
query += ' WHERE (' + ') AND ('.join(filters) + ')'
Using a Korean Input Method Editor (IME), it's possible to type 버리 + 어 and it will automatically become 버려.
Is there a way to programmatically do that in Python?
>>> x, y = '버리', '어'
>>> z = '버려'
>>> ord(z[-1])
47140
>>> ord(x[-1]), ord(y)
(47532, 50612)
Is there a way to compute that 47532 + 50612 -> 47140?
Here's some more examples:
가보 + 아 -> 가봐
끝나 + ㄹ -> 끝날
I'm a Korean. First, if you type 버리 + 어, it becomes 버리어 not 버려. 버려 is an abbreviation of 버리어 and it's not automatically generated. Also 가보아 cannot becomes 가봐 automatically during typing by the same reason.
Second, by contrast, 끝나 + ㄹ becomes 끝날 because 나 has no jongseong(종성). Note that one character of Hangul is made of choseong(초성), jungseong(중성), and jongseong. choseong and jongseong are a consonant, jungseong is a vowel. See more at Wikipedia. So only when there's no jongseong during typing (like 끝나), there's a chance that it can have jongseong(ㄹ).
If you want to make 버리 + 어 to 버려, you should implement some Korean language grammar like, especially for this case, abbreviation of jungseong. For example ㅣ + ㅓ = ㅕ, ㅗ + ㅏ = ㅘ as you provided. 한글 맞춤법 chapter 4. section 5 (I can't find English pages right now) defines abbreviation like this. It's possible, but not so easy job especially for non-Koreans.
Next, if what you want is just to make 끝나 + ㄹ to 끝날, it can be a relatively easy job since there're libraries which can handle composition and decomposition of choseong, jungseong, jongseong. In case of Python, I found hgtk. You can try like this (nonpractical code):
# hgtk methods take one character at a time
cjj1 = hgtk.letter.decompose('나') # ('ㄴ', 'ㅏ', '')
cjj2 = hgtk.letter.decompose('ㄹ') # ('ㄹ', '', '')
if cjj1[2]) == '' and cjj2[1]) == '':
cjj = (cjj1[0], cjj1[1], cjj2[0])
cjj2 = None
Still, without proper knowledge of Hangul, it will be very hard to get it done.
You could use your own Translation table.
The drawback is you have to input all pairs manual or you have a file to get it from.
For instance:
# Sample Korean chars to map
k = [[('버리', '어'), ('버려')], [('가보', '아'), ('가봐')], [('끝나', 'ㄹ'), ('끝날')]]
class Korean(object):
def __init__(self):
self.map = {}
for m in k:
key = m[0][0] + m[0][1]
self.map[hash(key)] = m[1]
def __getitem__(self, item):
return self.map[hash(item)]
def translate(self, s):
return [ self.map[hash(token)] for token in s]
if __name__ == '__main__':
k_map = Korean()
k_chars = [ m[0][0] + m[0][1] for m in k]
print('Input: %s' % k_chars)
print('Output: %s' % k_map.translate(k_chars))
one_char_3 = k[0][0][0] + k[0][0][1]
print('%s = %s' % (one_char_3, k_map[ one_char_3 ]) )
Input: ['버리어', '가보아', '끝나ㄹ']
Output: ['버려', '가봐', '끝날']
버리어 = 버려
Tested with Python:3.4.2
I'm trying to figure out a way to create unified diffs with line numbers only showing N lines of context. I have been unable to do this with difflib.unified_diff. I need to show changes in both files.
The closest I can come is using diff on the command line like so:
/usr/bin/diff
--unchanged-line-format=' %.2dn %L'
--old-line-format="-%.2dn %L"
--new-line-format="+%.2dn %L"
file1.py
file2.py
BUT I only want to show N lines of context, and /usr/bin/diff doesn't seem to support context with a custom line format (eg. -U2 is not compatible with --line-format "conflicting output style options").
Below is an example of what I'd like to accomplish (the same output as the above diff, but only showing 1 line of context surrounding changes):
+01: def renamed_function()
-01: def original_function():
02:
+03: """ Neat stuff here """
04:
21:
+22: # Here's a new comment
23:
85: # Output the value of foo()
+86: print "Foo is %s"%(foo())
-86: print foo()
87:
I was able to figure out something very close to what I wanted to do. It's slower than regular diff, though. Here's the entire code, from my project GitGate.
def unified_diff(to_file_path, from_file_path, context=1):
""" Returns a list of differences between two files based
on some context. This is probably over-complicated. """
pat_diff = re.compile(r'## (.[0-9]+\,[0-9]+) (.[0-9]+,[0-9]+) ##')
from_lines = []
if os.path.exists(from_file_path):
from_fh = open(from_file_path,'r')
from_lines = from_fh.readlines()
from_fh.close()
to_lines = []
if os.path.exists(to_file_path):
to_fh = open(to_file_path,'r')
to_lines = to_fh.readlines()
to_fh.close()
diff_lines = []
lines = difflib.unified_diff(to_lines, from_lines, n=context)
for line in lines:
if line.startswith('--') or line.startswith('++'):
continue
m = pat_diff.match(line)
if m:
left = m.group(1)
right = m.group(2)
lstart = left.split(',')[0][1:]
rstart = right.split(',')[0][1:]
diff_lines.append("## %s %s ##\n"%(left, right))
to_lnum = int(lstart)
from_lnum = int(rstart)
continue
code = line[0]
lnum = from_lnum
if code == '-':
lnum = to_lnum
diff_lines.append("%s%.4d: %s"%(code, lnum, line[1:]))
if code == '-':
to_lnum += 1
elif code == '+':
from_lnum += 1
else:
to_lnum += 1
from_lnum += 1
return diff_lines
I have been trying to use a list in Python. I call it once to store a value
inside of one loop. I then call it again inside of another loop, but by the
time I add something to the list, it has overwritten my old entries with the
new ones, before I ever actually add the new one. It may be that I do not fully
understand Python, but I will post a simple version of the code below, then the
whole version:
ret=[]
plan=argument
for i in range(x):
plan.changeY
ret.append(plan)
plan=argument
for i in range(Z):
plan.changeD
ret.append(plan)
plan=argument
The problem arises before I get to the second append: all the values of the
first append are altered. The code for it is below.
global PLANCOUNTER
global VARNUM
ret=[]
ret=[]
plan = node.state
keep = plan
if printPlans:
print "fringe[0].state:",plan.id, "---"
printstate(plan)
if node.parent:
print "parent: plan",node.parent.state.id
if len(plan.openconds)>0:
print plan.openconds[0],"is the condition being resolved\n"
openStep = plan.openconds[0][1]#access the step
openStep1=openStep
openCond = plan.openconds[0][0]
plan.openconds = plan.openconds[1:]
keep=plan
if(len(plan.steps)>1):#has init and goal!
########################
#NOT GETTING RID OF THE OPENCOND, SO ASTAR NEVER TAKING IT
#######################
if openStep!="init" and openStep!="goal":
val = openStep.index("*")
openStep=openStep[:val]
numPreConds=len(preconds[openStep])
numStep=len(plan.steps)
for i in plan.steps:
i2=i
plan = keep
if i!="init" and i!="goal":
i=i[:i.index("*")]
if i !="goal" and i!=openStep:
for j in adds[i]:
bool=0
if j==openCond:
plan.causallinks.append((i2,openCond,openStep1))#problem
plan.ordercons.append((i2,openStep1))
PLANCOUNTER+=1
plan.id=PLANCOUNTER
#threats
bol=0
for t in plan.steps:#all steps
t2=t
if t!="init" and t!="goal":
val = t.index("*")
t=t[:val]
for k in deletes[t]:
if k == openCond:
for b in plan.ordercons:
if b ==(t,i):
bol=1
if bol==0 and t!=i:
for v in plan.threats:
if v[0]==(i2,openCond,openStep1) and v[1]==t2:
bol=1
if bol==0 and t!=i and i2!="init":
plan.threats.append(((i2,openCond,openStep1),t2))
else:
bol=0
ret.append(plan)
print len(plan.openconds)+len(plan.threats)," upper\n"
plan=keep
meh=ret
counter=0
arr={}
for k in ret:
print len(k.openconds)+len(k.threats)," ", k.id,"middle"
key = counter
arr[counter]=k
print arr[counter].id
counter+=1
for i in adds:#too many conditions
stepCons = i
plan2 = keep
if i!="goal" and i!="init" and i!=openStep:
for j in adds[i]:
if j==openCond:
nextStep=i
st = str(i)+"*"+str(VARNUM)
VARNUM+=1
plan2.steps.append(st)
plan2.ordercons.append(("init",st))
plan2.ordercons.append((st, "goal"))
plan2.ordercons.append((st,openStep1))
plan2.causallinks.append((st,openCond,openStep1))
##################################################
for k in preconds[i]:#issue is htereeeeeeeee
plan2.openconds.append((k,st))
for k in meh:
print len(k.openconds)+len(k.threats)," ", k.id,"middle2s"
PLANCOUNTER+=1
plan2.id=PLANCOUNTER
#threats
cnt=0
for tr in range(len(arr)):
print len(arr[cnt].openconds)+len(arr[cnt].threats)," ", arr[cnt].id,"middlearr"
cnt+=1
bol=0
for t in plan2.steps:#all steps
t2=t
if t!="init" and t!="goal":
val = t.index("*")
t=t[:val]
for k in deletes[t]:#check their delete list
if k == openCond:#if our condition is on our delete lise
for b in plan2.ordercons:
if b ==(t,i):# and it is not ordered before it
bol=1
if bol==0 and t!=i:
for v in plan2.threats:
if v[0]==(i2,openCond,openStep1) and v[1]==t2:
bol=1
if bol==0 and t!=i and st!="init":
plan2.threats.append(((st,openCond,openStep1),t2))
else:
bol=0
#and y is not before C
#causal link, threatening step
for k in ret:
print len(k.openconds)+len(k.threats)," ", k.id,"middle3"
ret.append(plan2)
print len(plan2.openconds)+len(plan2.threats)," ",plan2.id," lower\n"
elif len(plan.threats)>0:
#keep=plan
openThreatProducer = plan.threats[0][0][0]#access the step
openThreat=plan.threats[0][1]
plan.threats=plan.threats[1:]
print openThreatProducer, " ", openThreat
i=0
while i<2:
plan = keep
if i==0:
bool=0
for k in plan.ordercons:
if (k[0]==openThreat and k[1]==openThreatProducer) or (k[1]==openThreat and k[0]==openThreatProducer):
bool=1
if bool==0:
plan.ordercons.append((openThreatProducer,openThreat))
elif i==1:
bool=0
for k in plan.ordercons:
if (k[0]==openThreat and k[1]==openThreatProducer) or (k[1]==openThreat and k[0]==openThreatProducer):
bool=1
if bool==0:
plan.ordercons.append((openThreat,openThreatProducer))
ret.append(plan)
i+=1
t=len(ret)
for k in ret:
print len(k.openconds)+len(k.threats)," ", k.id,"lowest"
print t
return ret
It seems like after doing plan=argument, plan and argument are pointing to same location. you should do something like this
import copy
plan = copy.deepcopy(argument)
This would create a exact copy of argument.
I want to compare two strings in a python unittest which contain html.
Is there a method which outputs the result in a human friendly (diff like) version?
A simple method is to strip whitespace from the HTML and split it into a list. Python 2.7's unittest (or the backported unittest2) then gives a human-readable diff between the lists.
import re
def split_html(html):
return re.split(r'\s*\n\s*', html.strip())
def test_render_html():
expected = ['<div>', '...', '</div>']
got = split_html(render_html())
self.assertEqual(expected, got)
If I'm writing a test for working code, I usually first set expected = [], insert a self.maxDiff = None before the assert and let the test fail once. The expected list can then be copy-pasted from the test output.
You might need to tweak how whitespace is stripped depending on what your HTML looks like.
I submitted a patch to do this some years back. The patch was rejected but you can still view it on the python bug list.
I doubt you would want to hack your unittest.py to apply the patch (if it even still works after all this time), but here's the function for reducing two strings a manageable size while still keeping at least part of what differs. So long as all you didn't want the complete differences this might be what you want:
def shortdiff(x,y):
'''shortdiff(x,y)
Compare strings x and y and display differences.
If the strings are too long, shorten them to fit
in one line, while still keeping at least some difference.
'''
import difflib
LINELEN = 79
def limit(s):
if len(s) > LINELEN:
return s[:LINELEN-3] + '...'
return s
def firstdiff(s, t):
span = 1000
for pos in range(0, max(len(s), len(t)), span):
if s[pos:pos+span] != t[pos:pos+span]:
for index in range(pos, pos+span):
if s[index:index+1] != t[index:index+1]:
return index
left = LINELEN/4
index = firstdiff(x, y)
if index > left + 7:
x = x[:left] + '...' + x[index-4:index+LINELEN]
y = y[:left] + '...' + y[index-4:index+LINELEN]
else:
x, y = x[:LINELEN+1], y[:LINELEN+1]
left = 0
cruncher = difflib.SequenceMatcher(None)
xtags = ytags = ""
cruncher.set_seqs(x, y)
editchars = { 'replace': ('^', '^'),
'delete': ('-', ''),
'insert': ('', '+'),
'equal': (' ',' ') }
for tag, xi1, xi2, yj1, yj2 in cruncher.get_opcodes():
lx, ly = xi2 - xi1, yj2 - yj1
edits = editchars[tag]
xtags += edits[0] * lx
ytags += edits[1] * ly
# Include ellipsis in edits line.
if left:
xtags = xtags[:left] + '...' + xtags[left+3:]
ytags = ytags[:left] + '...' + ytags[left+3:]
diffs = [ x, xtags, y, ytags ]
if max([len(s) for s in diffs]) < LINELEN:
return '\n'.join(diffs)
diffs = [ limit(s) for s in diffs ]
return '\n'.join(diffs)
Maybe this is a quite 'verbose' solution. You could add a new 'equality function' for your user defined type (e.g: HTMLString) which you have to define first:
class HTMLString(str):
pass
Now you have to define a type equality function:
def assertHTMLStringEqual(first, second):
if first != second:
message = ... # TODO here: format your message, e.g a diff
raise AssertionError(message)
All you have to do is format your message as you like. You can also use a class method in your specific TestCase as a type equality function. This gives you more functionality to format your message, since unittest.TestCase does this a lot.
Now you have to register this equality function in your unittest.TestCase:
...
def __init__(self):
self.addTypeEqualityFunc(HTMLString, assertHTMLStringEqual)
The same for a class method:
...
def __init__(self):
self.addTypeEqualityFunc(HTMLString, 'assertHTMLStringEqual')
And now you can use it in your tests:
def test_something(self):
htmlstring1 = HTMLString(...)
htmlstring2 = HTMLString(...)
self.assertEqual(htmlstring1, htmlstring2)
This should work well with python 2.7.
I (the one asking this question) use BeautfulSoup now:
def assertEqualHTML(string1, string2, file1='', file2=''):
u'''
Compare two unicode strings containing HTML.
A human friendly diff goes to logging.error() if there
are not equal, and an exception gets raised.
'''
from BeautifulSoup import BeautifulSoup as bs
import difflib
def short(mystr):
max=20
if len(mystr)>max:
return mystr[:max]
return mystr
p=[]
for mystr, file in [(string1, file1), (string2, file2)]:
if not isinstance(mystr, unicode):
raise Exception(u'string ist not unicode: %r %s' % (short(mystr), file))
soup=bs(mystr)
pretty=soup.prettify()
p.append(pretty)
if p[0]!=p[1]:
for line in difflib.unified_diff(p[0].splitlines(), p[1].splitlines(), fromfile=file1, tofile=file2):
logging.error(line)
raise Exception('Not equal %s %s' % (file1, file2))