How do I split the 2nd tuple
data = [('152', 'Farko', 'Kier'), ('153', 'Park - Pub')]
to get this output:
[('152', 'Farko', 'Kier'), ('153', 'Park', 'Pub')]
I tried this way:
lst = []
for i in data:
if len(i) == 2:
i[1] = tuple(i[1].split(' - '))
lst.append(i)
It'd work, except it raised an exception TypeError: 'tuple' object does not support item assignment. But I can't assign i = tuple(i[1].split(' - ')) because I need to keep the number which is in position i[0] in tuple. List comprehansion solution would be greatly welcome. Suggestions?
You could use a list comprehension:
lst = [t[:1] + tuple(t[1].split(' - ')) if len(t) == 2 else t for t in data]
or you could adjust your loop to create a new tuple:
for i in data:
if len(i) == 2:
i = i[:1] + tuple(i[1].split(' - '))
lst.append(i)
newdata = [tuple(s for t in tu for s in t.split(' - ')) for tu in data]
# [('152', 'Farko', 'Kier'), ('153', 'Park', 'Pub')]
As you have seen you can't modify the tuple, however you can just create a new one with the data you want:
data = [('152', 'Farko', 'Kier'), ('153', 'Park - Pub')]
lst = []
for i in data:
if len(i) == 2:
i = i[:1] + tuple(i[1].split(' - '))
lst.append(i)
Or if you want to modify the original list:
for i, tup in enumerate(data):
if len(tup) == 2:
data[i] = tup[:1] + tuple(tup[1].split(' - '))
Related
Here is my data -
inp = [{'father_husband_mother_name': [['Father s Name', 0.8603670001029968],
['Shripati', 0.8603670001029968],
['Father s Name', 0.8903670001029969],
['Shpppati', 0.8903670001029969]],
'doc_id': [['GGX2176', 0.8435981869697571],
['GGC2176', 0.8835981869697571]],
'name': [['Elector s Name', 0.8301510810852051],
['Shibshankar Ghosh', 0.8301510810852051],
['Elector s Name', 0.8501510810852051],
['Shibshankar Ghosh', 0.8501510810852051]],
'date_of_birth': [['Age as on 1.1.2000', 0.8067844915390014],
['15', 0.8067844915390014],
['Age as on 1.1.2000', 0.8267844915390015],
['15', 0.8267844915390015]],
'gender_sex': [['Sex', 0.7784658074378967],
['M', 0.7784658074378967],
['Sex', 0.8784658074378967],
['M', 0.8784658074378967]]}]
STOPWORDS = ['Sex', 'Father s Name', 'Elector s Name', 'Address', 'Name', 'Gender', 'Mother s Name',
'Husband s Name']
The output that I expect:
{'father_husband_mother_name': 'Shpppati',
'doc_id': 'GGC2176',
'name': 'Shibshankar Ghosh',
'date_of_birth': 'Age as on 1.1.2000,15',
'gender_sex': 'M'}
Here is the logic -
Retrieve the value that has the highest confidence score [the float inside the list of lists] that is not present in STOPWORDS for each key.
What I have tried -
def process_kie_dict(voter_raw_labels, threshold=0.7):
cleaned_dict = {}
intermediate_dict = {}
for entity_dict in voter_raw_labels:
for entity, val in entity_dict.items():
conf_val = [item[1] for item in val]
unique_val = list(set(conf_val))
max_conf = max(unique_val)
if max_conf > threshold:
if len(unique_val)==1:
add_val = [item[0] for item in val]
else:
max_conf_index = conf_val.index(max_conf)
add_val = [item[0] for item in val[max_conf_index:]]
if entity not in intermediate_dict.keys():
intermediate_dict[entity] = [add_val,max_conf]
else:
if intermediate_dict[entity][1] < max_conf:
intermediate_dict[entity] = [add_val,max_conf]
# print(intermediate_dict)
for key, val in intermediate_dict.items():
final_value = ''
for value in val[0]:
m = len(str.strip(value))
edit_dist_list = []
for word in STOPWORDS:
n = len(word)
edit_dist = editDistDP(value, word, m, n)
edit_dist_list.append(edit_dist)
if min(edit_dist_list) < 2:
value=''
final_value = final_value + value + ','
clean_value = final_value.strip(",")
cleaned_dict[key]=clean_value
return cleaned_dict
def editDistDP(str1, str2, m, n):
# Create a table to store results of subproblems
dp = [[0 for x in range(n + 1)] for x in range(m + 1)]
# Fill d[][] in bottom up manner
for i in range(m + 1):
for j in range(n + 1):
# If first string is empty, only option is to
# insert all characters of second string
if i == 0:
dp[i][j] = j # Min. operations = j
# If second string is empty, only option is to
# remove all characters of second string
elif j == 0:
dp[i][j] = i # Min. operations = i
# If last characters are same, ignore last char
# and recur for remaining string
elif str1[i-1] == str2[j-1]:
dp[i][j] = dp[i-1][j-1]
# If last character are different, consider all
# possibilities and find minimum
else:
dp[i][j] = 1 + min(dp[i][j-1], # Insert
dp[i-1][j], # Remove
dp[i-1][j-1]) # Replace
return dp[m][n]
You can forget about the edit distance implementation, not important. What I want to know is given nested for loops, this won't work at scale. Looking for a more efficient implementation.
Here is a parser for your data
result = {k: sorted(v, key=lambda x: x[1] if x[0] not in STOPWORDS else 0)[-1][0] for k, v in inp[0].items()}
In short, it takes a key and sorts the rest of the dictionary based on the confidence value, unless the first element of the list is included in STOPWORDS. Then adds the first element of that sorted list to the result dictionary as a value.
Here is the given assignment + code:
Write a function even_value_set(list1, list2, list3), which receives
three lists containing integer items as arguments. The function
creates and returns a set that contains all items with even value from
all three lists.
def test():
l = [[],[],[]]
for i in range(3):
for j in range(random.randint(7,14)):
l[i].append(random.randint(1,35))
print ("List" + str(i + 1) +":",l[i])
print ("")
s = even_value_set(l[0],l[1],l[2])
print ("Return type:", str(type(s)).replace("<","").replace(">",""))
print ("Set with even values only:", end = "")
print (set(sorted(list(s))))
test()
import random
I tried using .union() and making adding lists into another before turning them into sets, but didn't have any luck.
You can do this in a simply pythonic way. This function can be written as a simple oneliner using comprehension
def even_value_set(list1, list2, list3):
return set([num for num in list1+list2+list3 if num % 2 == 0])
Basically, what we did here is concatenated all lists, fitered even numbers and then made it into a set.
You can union lists (with duplicates) with the + operation.
union_list = list1 + list2 + list3
If you only want the unique values you can call set on this list.
unique_set = set(union_list)
Now you can create a new empty set and iterate over the unqiue_set and add all the even values:
solution = set()
for v in unique_set:
if v%2==0:
solution.add(v)
def even_value_set(*lsts):
s = set()
for lst in lsts:
s |= {x for x in lst if x % 2 == 0}
return s
Also you can use starring
def even_value_set(*lsts):
return set().union(*[{x for x in lst if x % 2 == 0} for lst in lsts])
And solution with concatenation (more memory).
set comprehension is more effective than set()
def even_value_set(l1, l2, l3):
return {x for x in l1+l2+l3 if x % 2 == 0}
You can make the union of lists using +, like list1 + list2. Then just walk through the result list checking if the number is even and adding in another list.
def even_value_set(list):
result = set()
for val in list:
if val%2 == 0:
result.add(val)
return result
def test():
l = [[],[],[]]
for i in range(3):
for j in range(random.randint(7,14)):
l[i].append(random.randint(1,35))
print ("List" + str(i + 1) +":",l[i])
print ("")
uni = l[0] + l[1] + l[2]
s = even_value_set(uni)
print ("Return type:", str(type(s)).replace("<","").replace(">",""))
print ("Set with even values only:", end = "")
print (set(sorted(list(s))))
test()
import random
Is it what you want? I think your question is confusing.
I have a list of strings that could vary in pattern.
lst = ['ban-eur.kd', 'ban-eur.sd', 'ban-eur.td' ]
When converted this should become ban-eur<kd,sd,td>.
It should combine elements that are next only if they can be combined. (only if it matches the pattern ban-eur)
lst = ['ban-eur.kd', 'kd', 'ban-eur.sd', 'ban-eur.td' ]
This should result in 'ban-eur.kd_kd_ban-eur<sd,td>'.
If it doesn't have any element that could be combined then it should all be just joined with a _
How can I do this, without missing the first element in the array/duplicate with in the string.
Thanks for your time.
You can use itertools.groupby:
import itertools, re
lst = ['ban-eur.kd', 'ban-eur.sd', 'ban-eur.td' ]
def group_result(d:list) -> list:
if len(d) == 1:
return d[0]
new_result = [[a, list(b)] for a, b in itertools.groupby(sorted(d, key=lambda x:x.split('.')[0]), key=lambda x:x.split('.')[0])]
return '_'.join('{}<{}>'.format(a, ','.join(i.split('.')[-1] for i in b)) for a, b in new_result)
new_data = '_'.join(group_result(list(b)) if a else '_'.join(list(b)) for a, b in itertools.groupby(lst, key=lambda x:'.' in x))
Output:
'ban-eur<kd,sd,td>'
When running on ['ban-eur.kd', 'kd', 'ban-eur.sd', 'ban-eur.td']:
'ban-eur.kd_kd_ban-eur<sd,td>'
matches = []
resulting_string
for item in lst:
if item.startsWith('ban-eur'):
matches.append(item)
elif not item.startsWith('ban-eur') and len(matches) >= 1:
if len(matches) == 1:
resulting_string += item
else:
resulting_string += 'ban-eur.<'
for s in matches:
resulting_string += s + ', '
resulting_string += '>'
matches = []
resulting_string += '_' + item + '_'
This should work
pre = ''
result = []
endings = []
for item in lst:
if item.split('.')[0] == pre:
endings.append(item.split('.')[1])
else:
if endings:
result.append(pre+'<'+','.join(endings)+'>')
else:
result.append(item)
pre = item.split('.')[0]
endings = []
print('_'.join(result))
How do I use .setdefault with the default being a list that i filled in a function? For example,
import random
foundwords = []
with open("text", "r") as file:
contents = file.read().replace('\n',' ')
words = contents.split(' ')
def findwords(word):
for i in range(len(words) - 1):
if words[i] == word:
if not words[i + 1] == '':
foundwords.append(words[i + 1])
wordsDict = {}
for i in range(len(words) - 1):
findwords(words[i])
wordsDict.setdefault(words[i], foundwords)
del foundwords[:]
def assemble():
start = words[random.randint(0, len(words))]
print(start.capitalize())
assemble()
When I check wordsDict, all lists are empty. However, I know that the lists are filled.
You're not making a copy of the foundwords list when you use .setdefault(), so all the dictionary elements refer to the same list. Then you remove all the elements of that list with del foundwords[:], so they all refer to that empty list.
Make a copy of the list when you add it to the dictionary.
for word in words:
findwords(word)
wordsDict.setdefault(word, foundwords[:])
del foundwords[:]
Better would be to change findwords() so it returns a new list instead of writing into a global variable.
def findwords(word):
foundwords = []
for i in range(len(words) - 1):
if words[i] == word:
if not words[i + 1] == '':
foundwords.append(words[i + 1])
return foundwords
for words in words:
wordsDict.setdefault(word, findwords(word))
I have a max length of a list item that I need to enforce. How would I accomplish the following:
MAX_LENGTH = 13
>>> str(["hello","david"])[:MAX_LENGTH]
"['hello', 'da"
==> ["hello", "da"]
I was thinking using ast.literal_eval, but was wondering what might be recommended here.
I would caution against this. There has to be safer things to do than this. At the very least you should never be splitting elements in half. For instance:
import sys
overrun = []
data = ["Hello,"] + ( ["buffer"] * 80 )
maxsize = 800
# sys.getsizeof(data) is now 840 in my implementation.
while True:
while sys.getsizeof(data) > maxsize:
overrun.append(data.pop())
do_something_with(data)
if overrun:
data, overrun = overrun, []
else:
break
Here is a simplified version of #AdamSmith's answer which I ended up using:
import sys
from copy import copy
def limit_list_size(ls, maxsize=800):
data = copy(ls)
while (sys.getsizeof(str(data)) > maxsize):
if not data: break
data.pop()
return data
Note that this will not split mid-word. And because this is returning a copy of the data, the user can see which items were excluded in the output. For example:
old_ls = [...]
new_ls = limit_list_size(old_ls)
overflow_ls = list(set(old_ls) - set(new_ls))
If you want MAX_LENGTH of your strings concatenated, you could do it with a loop pretty simply, using something like this:
def truncateStringList(myArray)
currentLength = 0
result = []
for string in myArray:
if (currentLength + len(string)) > MAX_LENGTH:
result.append(string[:len(string) + currentLength - MAX_LENGTH])
return result
else:
result.append(string)
return result
If you want it of the string representation, you are effectively adding 2 chars at the beginning of each element, [' or ', and two at the end, ', or '], so add 2 to current length before and after each element in the loop:
for string in myArray:
currentLength += 2
if (currentLength + len(string)) > MAX_LENGTH:
result.append(string[:len(string) + currentLength - MAX_LENGTH])
return result
else:
result.append(string)
currentLength += 2
return result
with loops:
max = 11
mylist = ['awdawdwad', 'uppps']
newlist = ','.join(mylist)
print mylist
c= [x for x in newlist if not x==',']
if len(c)>max:
newlist= list(newlist)
newlist.reverse()
for x in range(len(c)-max):
if newlist[0]==',':
del newlist[0]
del newlist[0] # delete two times
else:
del newlist[0]
while newlist[0]==',':
del newlist[0]
newlist.reverse()
cappedlist = ''.join(newlist).split(',')
print cappedlist