I have the below input for a list of dictionaries:
links = [ {'uid': 1, 'lid': 6, 'path': 'a1.txt', 'shareid': 1},
{'uid': 1, 'lid': 7, 'path': 'a2.txt', 'shareid': 2},
{'uid': 1, 'lid': 8, 'path': 'a1.txt', 'shareid': 1}]
and I need to generate this output:
op = {'a1.txt': {'shareid': 1, 'lid': [6, 8]},
'a2.txt': {'shareid': 2, 'lid': [7]}
}
Below is the code that I have written:
def list_all_links():
new_list = []
result = {}
for i in range(len(links)):
entry = links[i]
if not result.has_key(entry['path']):
new_entry = {}
lid_list = []
new_entry['shareid'] = entry['shareid']
if new_entry.has_key('lid'):
lid_list = new_entry['lid']
lid_list.append(entry['lid'])
else:
lid_list.append(entry['lid'])
new_entry['lid'] = lid_list
result[entry['path']] = new_entry
else:
new_entry = result[entry['path']]
lid_list = new_entry['lid']
if new_entry.has_key(entry['shareid']):
new_entry['shareid'] = entry['shareid']
lid_list = new_entry['lid']
lid_list.append(entry['lid'])
new_entry['lid'] = lid_list
else:
new_entry['shareid'] = entry['shareid']
lid_list.append(entry['lid'])
new_entry['lid'] = lid_list
result[entry['path']] = new_entry
print "result = %s" %result
if __name__ == '__main__':
list_all_links()
I am able to generate the same output as desired. But, can somebody please point me out if there is any better way to solve this problem?
You can use setdefault method of dict to make it short
links = [
{'uid': 1, 'lid': 6, 'path': 'a1.txt', 'shareid': 1},
{'uid': 1, 'lid': 7, 'path': 'a2.txt', 'shareid': 2},
{'uid': 1, 'lid': 8, 'path': 'a1.txt', 'shareid': 1}
]
op = dict()
for a in links:
op.setdefault(a['path'], {}).update(shareid=a['shareid'])
op[a['path']].setdefault('lid', []).append(a['lid'])
print op
Output:
{'a2.txt': {'lid': [7], 'shareid': 2}, 'a1.txt': {'lid': [6, 8], 'shareid': 1}}
It isn't all that pretty, but the following solution works:
links = [ {'uid': 1, 'lid': 6, 'path': 'a1.txt', 'shareid': 1},
{'uid': 1, 'lid': 7, 'path': 'a2.txt', 'shareid': 2},
{'uid': 1, 'lid': 8, 'path': 'a1.txt', 'shareid': 1}]
links_restructured = [(d['path'], {'shareid': d['shareid'], 'lid': [d['lid']]}) for d in links]
answer = {}
for link in links_restructured:
if link[0] not in answer:
answer[link[0]] = link[1]
else:
answer[link[0]]['lid'].extend(link[1]['lid'])
print(answer)
Output
{'a2.txt': {'lid': [7], 'shareid': 2}, 'a1.txt': {'lid': [6, 8], 'shareid': 1}}
links = [ {'uid': 1, 'lid': 6, 'path': 'a1.txt', 'shareid': 1},
{'uid': 1, 'lid': 7, 'path': 'a2.txt', 'shareid': 2},
{'uid': 1, 'lid': 8, 'path': 'a1.txt', 'shareid': 1}]
def get_links(links):
new_links = {}
for x in links:
path = x.get('path')
if path in new_links.keys():
new_links[path]['lid'].append(x['lid'])
else:
del x['path']
del x['uid']
x['lid'] = [x['lid']]
new_links[path] = x
return new_links
print(get_links(links))
Output:
{'a2.txt': {'lid': [7], 'shareid': 2}, 'a1.txt': {'lid': [6, 8], 'shareid': 1}}
Here's how I'd do it:
def process_links(links):
'''
process entries in list 'links';
returns dictionary 'op'
'''
op = {}
for dict in links:
op_key = dict['path']
if op_key in op:
pass
else:
op[op_key] = {'shareid':None, 'lid':[]}
return op
def fill_op(op_dict, link_list):
for dict in link_list:
op_key = dict['path']
# fill shareid
op_dict[op_key]['shareid'] = dict['shareid']
# fill lid
lid_list = op_dict[op_key]['lid']
lid_list.append(dict['lid'])
op_dict[op_key]['lid'] = lid_list
return op_dict
if __name__ == "__main__":
links = [ {'uid': 1, 'lid': 6, 'path': 'a1.txt', 'shareid': 1},
{'uid': 1, 'lid': 7, 'path': 'a2.txt', 'shareid': 2},
{'uid': 1, 'lid': 8, 'path': 'a1.txt', 'shareid': 1}]
result1 = process_links(links)
result2 = fill_op(result1, links)
print(result2)
output varies slightly: {'a1.txt': {'lid': [6, 8], 'shareid': 1}, 'a2.txt': {'lid': [7], 'shareid': 2}}
Related
newlist= [{'row': 4, 'col': 4}, {'row': 4, 'col': 5}, {'row': 4, 'col': 6}, {'row': 4, 'col': 7}, {'row': 4, 'col': 8}, {'row': 4, 'col': 9}, {'row': 4, 'col': 10}, {'row': 5, 'col': 4}, {'row': 5, 'col': 5}, {'row': 5, 'col': 6}, {'row': 5, 'col': 7}, {'row': 5, 'col': 8}, {'row': 5, 'col': 9}, {'row': 5, 'col': 10}, {'row': 6, 'col': 4}, {'row': 6, 'col': 5}, {'row': 6, 'col': 6}, {'row': 6, 'col': 7}, {'row': 6, 'col': 8}, {'row': 6, 'col': 9}, {'row': 6, 'col': 10}, {'row': 7, 'col': 4}, {'row': 7, 'col': 5}, {'row': 7, 'col': 6}, {'row': 7, 'col': 7}, {'row': 7, 'col': 8}, {'row': 7, 'col': 9}, {'row': 7, 'col': 10}, {'row': 8, 'col': 11}, {'row': 9, 'col': 11}]
I want to delete the key value pair having row 4,col 5
how to do that
for lines in newlist :
if lines['row'] == 4and lines['col'] == 5:
print("delete")
del added['row']['col']
trie above code but not worked.it shows an error TypeError: list indices must be integers or slices, not str
The requirement is for below code
row=4
cols=4
l=9
m=11
added=[]
while row <= l:
print("row", row)
m = 11
cols= 4
while cols <= m:
print("column",cols)
for li in newlist:
print("li", li)
if li['row'] == row and li['col']!= cols:
added.append({
'row':row,
'col':cols,
# 'id':li['id']
})
print(row, "", cols)
print("new")
print("lop",added)
new_added = [x for x in added if x['row'] != row or x['col'] != cols]
cols = cols + 1
row = row + 1
so I require only
newlist= [{'row': 4, 'col': 11}, {'row': 5, 'col': 11},
{'row':'6','col:'11'} ,{'row':'7','col:'11'}{'row': 8, 'col': 4},
{'row': 8, 'col': 5}, {'row': 8, 'col': 6}, {'row': 8, 'col': 7},
{'row': 8, 'col': 8}, {'row': 8, 'col': 9}, {'row': 8, 'col': 10},
{'row': 9, 'col': 4}, {'row': 9, 'col': 5}, {'row': 9, 'col': 6},
{'row': 9, 'col': 7}, {'row': 9, 'col': 8}, {'row': 9, 'col': 9},
{'row': 9, 'col': 10}]
so I want output as above
newlist 3=out [{'row': 4, 'col': 4}, {'row': 4, 'col': 5}, {'row': 4, 'col': 6}, {'row': 4, 'col': 7}, {'row': 4, 'col': 8}, {'row': 4, 'col': 9}, {'row': 4, 'col': 10}, {'row': 4, 'col': 11}, {'row': 5, 'col': 4}, {'row': 5, 'col': 5}, {'row': 5, 'col': 6}, {'row': 5, 'col': 7}, {'row': 5, 'col': 8}, {'row': 5, 'col': 9}, {'row': 5, 'col': 10}, {'row': 5, 'col': 11}, {'row': 6, 'col': 4}, {'row': 6, 'col': 5}, {'row': 6, 'col': 6}, {'row': 6, 'col': 7}, {'row': 6, 'col': 8}, {'row': 6, 'col': 9}, {'row': 6, 'col': 10}, {'row': 6, 'col': 11}, {'row': 7, 'col': 4}, {'row': 7, 'col': 5}, {'row': 7, 'col': 6}, {'row': 7, 'col': 7}, {'row': 7, 'col': 8}, {'row': 7, 'col': 9}, {'row': 7, 'col': 10}, {'row': 7, 'col': 11}, {'row': 8, 'col': 4}, {'row': 8, 'col': 5}, {'row': 8, 'col': 6}, {'row': 8, 'col': 7}, {'row': 8, 'col': 8}, {'row': 8, 'col': 9}, {'row': 8, 'col': 10}, {'row': 8, 'col': 11}, {'row': 9, 'col': 4}, {'row': 9, 'col': 5}, {'row': 9, 'col': 6}, {'row': 9, 'col': 7}, {'row': 9, 'col': 8}, {'row': 9, 'col': 9}, {'row': 9, 'col': 10}, {'row': 9, 'col': 11}]
try via list comprehension:
checker=[{'row':x,'col':y} for x in {x['row'] for x in newlist} for y in range(4,12)]
#created a list of dict by the given range provided by you
out=[x for x in checker if x not in newlist]
#only get those elements of checker list which are not present in newlist
output of out:
[{'row': 4, 'col': 11},
{'row': 5, 'col': 11},
{'row': 6, 'col': 11},
{'row': 7, 'col': 11},
{'row': 8, 'col': 4},
{'row': 8, 'col': 5},
{'row': 8, 'col': 6},
{'row': 8, 'col': 7},
{'row': 8, 'col': 8},
{'row': 8, 'col': 9},
{'row': 8, 'col': 10},
{'row': 9, 'col': 4},
{'row': 9, 'col': 5},
{'row': 9, 'col': 6},
{'row': 9, 'col': 7},
{'row': 9, 'col': 8},
{'row': 9, 'col': 9},
{'row': 9, 'col': 10}]
Use the remove method for lists:
for dic in newList:
if dic['row'] == 4 and dic['col'] == 5:
newList.remove(dic)
Every loop in this function:
def sum_total(files, local_dir):
final_dict = {}
for i in range(len(files)):
with open(local_dir+files[i], 'r') as f:
data = f.readlines()
res = find_by_tag(data)
print('res: ', res)
sum_values_from_several_dict_to_one(res)
Generates example output:
{'Critical Tests': {'failed': 1, 'passed': 2, 'total': 5}, 'All Tests': {'failed': 5, 'passed': 0, 'total': 5}}
{'Critical Tests': {'failed': 2, 'passed': 3, 'total': 5}, 'All Tests': {'failed': 10, 'passed': 12, 'total': 12}}
{'Critical Tests': {'failed': 3, 'passed': 4, 'total': 5}, 'All Tests': {'failed': 10, 'passed': 0, 'total': 10}}
EXPECTED OUTPUT:
I would like to sum those values into one dictionary to get output like:
{'Critical Tests': {'failed': 6, 'passed': 9, 'total': 15}, 'All Tests': {'failed': 25, 'passed': 12, 'total': 27}}
The problem is - how should the 'sum_values_from_several_dict_to_one' function looks like? Thats my code but it does not work.. What should be improved?
def sum_values_from_several_dict_to_one(d1):
final_dict = {}
for d in d1 <?>:
for test, results in d.items():
if test not in final_dict:
final_dict[test] = {}
for key, value in results.items():
if key in final_dict[test]:
final_dict[test][results] += value
else:
final_dict[test][key] = value
return final_dict
Here you have:
a = {'Critical Tests': {'failed': 1, 'passed': 2, 'total': 5}, 'All Tests': {'failed': 5, 'passed': 0, 'total': 5}}
b = {'Critical Tests': {'failed': 2, 'passed': 3, 'total': 5}, 'All Tests': {'failed': 10, 'passed': 12, 'total': 12}}
def sum_dicts (dict1, dict2):
res = {}
for key, val in dict1.items():
for k, v in dict2.items():
if k == key:
if type(val) is dict:
res.update({key: sum_dicts(val, v)})
else:
res.update({key: val + v})
break
return res
if __name__ == '__main__':
sol = sum_dicts(a, b)
print(sol)
Output:
{'All Tests': {'failed': 15, 'total': 17, 'passed': 12}, 'Critical Tests': {'failed': 3, 'total': 10, 'passed': 5}}
EDIT:
Assuming res is a dict you can use it like this:
def sum_total(files, local_dir):
final_dict = {}
for i in range(len(files)):
with open(local_dir+files[i], 'r') as f:
data = f.readlines()
res = find_by_tag(data)
print('res: ', res)
final_dict = sum_dicts(final_dict, res)
Python 3.6
Task:
Given a sorted list of linear features (like in a linear referencing system),
combine adjacent linear features belonging to the same key (linear_feature[0]['key'] == linear_feature[1]['key'] and linear_feature[0]['end'] == linear_feature[1]['start'])
until the combined linear feature has (end - start) ≥ THRESHOLD.
If feature cannot be combined with subsequent adjacent features such that (end - start) ≥ THRESHOLD, combine with previous adjacent feature of the same key, or return self.
EDIT: Added a solution below in an answer post.
THRESHOLD = 3
linear_features = sorted([
{'key': 1, 'start': 0, 'end': 2, 'count': 1},
{'key': 1, 'start': 2, 'end': 4, 'count': 1},
{'key': 1, 'start': 4, 'end': 5, 'count': 1},
{'key': 2, 'start': 0, 'end': 3, 'count': 1},
{'key': 2, 'start': 3, 'end': 4, 'count': 1},
{'key': 2, 'start': 4, 'end': 5, 'count': 1},
{'key': 3, 'start': 0, 'end': 1, 'count': 1},
], key=lambda x: (x['key'], x['start']))
# This isn't necessarily an intermediate step, just here for visualization
intermediate = [
{'key': 1, 'start': 0, 'end': 4, 'count': 2}, # Adjacent features combined
{'key': 1, 'start': 4, 'end': 5, 'count': 1}, # This can't be made into a feature with (end - start) gte THRESHOLD; combine with previous
{'key': 2, 'start': 0, 'end': 3, 'count': 1},
{'key': 2, 'start': 3, 'end': 5, 'count': 2}, # This can't be made into a feature with (end - start) gte THRESHOLD; combine with previous
{'key': 3, 'start': 0, 'end': 1, 'count': 1}, # This can't be made into a new feature, and there is no previous, so self
]
desired_output = [
{'key': 1, 'start': 0, 'end': 5, 'count': 3},
{'key': 2, 'start': 0, 'end': 5, 'count': 3},
{'key': 3, 'start': 0, 'end': 1, 'count': 1},
]
I figured out a solution:
def reducer(x, THRESHOLD):
x = add_until(x, THRESHOLD)
if len(x) == 1:
return x
if len(x) == 2:
if length(x[1]) < THRESHOLD:
x[0]['end'] = x[1]['end']
x[0]['count'] += x[1]['count']
return [x[0]]
else:
return x
first, rest = x[0], x[1:]
return [first] + reducer(rest, THRESHOLD)
def add_until(x, THRESHOLD):
if len(x) == 1:
return x
first, rest = x[0], x[1:]
if length(first) >= THRESHOLD:
return [first] + add_until(rest, THRESHOLD)
else:
rest[0]['start'] = first['start']
rest[0]['count'] += first['count']
return add_until(rest, THRESHOLD)
from itertools import groupby
THRESHOLD = 3
linear_features = sorted([
{'key': 1, 'start': 0, 'end': 2, 'count': 1},
{'key': 1, 'start': 2, 'end': 4, 'count': 1},
{'key': 1, 'start': 4, 'end': 5, 'count': 1},
{'key': 2, 'start': 0, 'end': 3, 'count': 1},
{'key': 2, 'start': 3, 'end': 4, 'count': 1},
{'key': 2, 'start': 4, 'end': 5, 'count': 1},
{'key': 3, 'start': 0, 'end': 1, 'count': 1},
{'key': 4, 'start': 0, 'end': 3, 'count': 1},
{'key': 4, 'start': 3, 'end': 4, 'count': 1},
{'key': 4, 'start': 4, 'end': 5, 'count': 1},
{'key': 4, 'start': 5, 'end': 6, 'count': 1},
{'key': 4, 'start': 6, 'end': 9, 'count': 1},
], key=lambda x: (x['key'], x['start']))
def length(x):
"""x is a dict with a start and end property"""
return x['end'] - x['start']
results = []
for key, sites in groupby(linear_features, lambda x: x['key']):
sites = list(sites)
results += reducer(sites, 3)
print(results)
[
{'key': 1, 'start': 0, 'end': 5, 'count': 3},
{'key': 2, 'start': 0, 'end': 5, 'count': 3},
{'key': 3, 'start': 0, 'end': 1, 'count': 1},
{'key': 4, 'start': 0, 'end': 3, 'count': 1},
{'key': 4, 'start': 3, 'end': 6, 'count': 3},
{'key': 4, 'start': 6, 'end': 9, 'count': 1}
]
You want something like the this:
PSEUDOCODE
while f=1 < max = count of features:
if features[f-1]['key'] == features[f]['key'] and
features[f-1]['end'] == features[f]['start']:
#combine
features[f-1]['end'] = features[f]['end']
features[f-1]['count'] += 1
del features[f]; max -= 1
else:
f += 1
I have a list of id's sorted in a proper oder:
ids = [1, 2, 4, 6, 5, 0, 3]
I also have a list of dictionaries, sorted in some random way:
rez = [{'val': 7, 'id': 1}, {'val': 8, 'id': 2}, {'val': 2, 'id': 3}, {'val': 0, 'id': 4}, {'val': -1, 'id': 5}, {'val': -4, 'id': 6}, {'val': 9, 'id': 0}]
My intention is to sort rez list in a way that corresponds to ids:
rez = [{'val': 7, 'id': 1}, {'val': 8, 'id': 2}, {'val': 0, 'id': 4}, {'val': -4, 'id': 6}, {'val': -1, 'id': 5}, {'val': 9, 'id': 0}, {'val': 2, 'id': 3}]
I tried:
rez.sort(key = lambda x: ids.index(x['id']))
However that way is too slow for me, as len(ids) > 150K, and each dict actually had a lot of keys (some values there are strings). Any suggestion how to do it in the most pythonic, but still fastest way?
You don't need to sort because ids specifies the entire ordering of the result. You just need to pick the correct elements by their ids:
rez_dict = {d['id']:d for d in rez}
rez_ordered = [rez_dict[id] for id in ids]
Which gives:
>>> rez_ordered
[{'id': 1, 'val': 7}, {'id': 2, 'val': 8}, {'id': 4, 'val': 0}, {'id': 6, 'val': -4}, {'id': 5, 'val': -1}, {'id': 0, 'val': 9}, {'id': 3, 'val': 2}]
This should be faster than sorting because it can be done in linear time on average, while sort is O(nlogn).
Note that this assumes that there will be one entry per id, as in your example.
I think you are on the right track. If you need to speed it up, because your list is too long and you are having quadratic complexity, you can turn the list into a dictionary first, mapping the ids to their respective indices.
indices = {id_: pos for pos, id_ in enumerate(ids)}
rez.sort(key = lambda x: indices[x['id']])
This way, indices is {0: 5, 1: 0, 2: 1, 3: 6, 4: 2, 5: 4, 6: 3}, and rez is
[{'id': 1, 'val': 7},
{'id': 2, 'val': 8},
{'id': 4, 'val': 0},
{'id': 6, 'val': -4},
{'id': 5, 'val': -1},
{'id': 0, 'val': 9},
{'id': 3, 'val': 2}]
I am trying to replace list element value with value looked up in dictionary how do I do that?
list = [1, 3, 2, 10]
d = {'id': 1, 'val': 30},{'id': 2, 'val': 53}, {'id': 3, 'val': 1}, {'id': 4, 'val': 9}, {'id': 5, 'val': 2}, {'id': 6, 'val': 6}, {'id': 7, 'val': 11}, {'id': 8, 'val': 89}, {'id': 9, 'val': 2}, {'id': 10, 'val': 4}
for i in list:
for key, v in d.iteritems():
???
???
so at the end I am expecting:
list = [30, 1, 53, 4]
thank you
D2 = dict((x['id'], x['val']) for x in D)
L2 = [D2[x] for x in L]
td = (
{'val': 30, 'id': 1},
{'val': 53, 'id': 2},
{'val': 1, 'id': 3},
{'val': 9, 'id': 4},
{'val': 2, 'id': 5},
{'val': 6, 'id': 6},
{'val': 11, 'id': 7},
{'val': 89, 'id': 8},
{'val': 2, 'id': 9},
{'val': 4, 'id': 10}
)
source_list = [1, 3, 2, 10]
final_list = []
for item in source_list:
for d in td:
if d['id'] == item:
final_list.append(d['val'])
print('Source : ', source_list)
print('Final : ', final_list)
Result
Source : [1, 3, 2, 10]
Final : [30, 1, 53, 4]