Is there a way to speed this up? currently using 2 for loops (considering duplicates). any suggestions on speeding this up?
d1 = [{'key': 't1', 'val': 1}, {'key': 't2', 'val': 2}, {'key': 't3', 'val': 3}, {'key': 't4', 'val': 4}, {'key': 't5', 'val': 5}, {'key': 't6', 'val': 6}, {'key': 't7', 'val': 7}, {'key': 't8', 'val': 8}, {'key': 't9', 'val': 9}, {'key': 't10', 'val': 10}, {'key': 't11', 'val': 11}, {'key': 't12', 'val': 12}, {'key': 't13', 'val': 13}, {'key': 't14', 'val': 14}, {'key': 't15', 'val': 15}, {'key': 't16', 'val': 16}, {'key': 't17', 'val': 17}, {'key': 't18', 'val': 18}, {'key': 't19', 'val': 19}]
d2 = [{'key': 't1', 'val': 'newval1'}, {'key': 't1', 'val': 'newval11'}, {'key': 't2', 'val': 'newval2'}, {'key': 't3', 'val': 'newval3'}, {'key': 't6', 'val': 'newval6'}, {'key': 't7', 'val': 'newval7'}, {'key': 't8', 'val': 'newval8'}, {'key': 't9', 'val': 'newval9'}, {'key': 't10', 'val': 'newval10'}, {'key': 't11', 'val': 'newval11'}, {'key': 't12', 'val': 'newval12'}, {'key': 't13', 'val': 'newval13'}, {'key': 't14', 'val': 'newval14'}, {'key': 't15', 'val': 'newval15'}, {'key': 't16', 'val': 'newval16'}, {'key': 't17', 'val': 'newval17'}, {'key': 't18', 'val': 'newval18'}, {'key': 't19', 'val': 'newval19'}]
>>> for x in d1:
... for y in d2:
... if x['key'] == y['key']:
... print(x['key'], x['val'], y['val'])
...
('t1', 1, 'newval1')
('t1', 1, 'newval11')
('t2', 2, 'newval2')
('t3', 3, 'newval3')
('t6', 6, 'newval6')
('t7', 7, 'newval7')
('t8', 8, 'newval8')
('t9', 9, 'newval9')
('t10', 10, 'newval10')
('t11', 11, 'newval11')
('t12', 12, 'newval12')
('t13', 13, 'newval13')
('t14', 14, 'newval14')
('t15', 15, 'newval15')
('t16', 16, 'newval16')
('t17', 17, 'newval17')
('t18', 18, 'newval18')
('t19', 19, 'newval19')
With this structure No... They are two lists, you need to iterate over them to find what you are looking for.
But you can store this data as a dictionary of dictionaries using their keys: (After confirmation from you that d1 always have unique keys, just turn d1 to a dictionary of dictionaries)
d1 = {d['key']: {'val': d['val']} for d in d1}
This way you can iterate over d2(a single for loop) and pick the relevant value from d1.
for d in d2:
key, value = d['key'], d['val']
print(key, d1[key]['val'], value)
here is the full code:
d1 = [{'key': 't1', 'val': 1}, {'key': 't2', 'val': 2}, {'key': 't3', 'val': 3},
{'key': 't4', 'val': 4}, {'key': 't5', 'val': 5}, {'key': 't6', 'val': 6},
{'key': 't7', 'val': 7}, {'key': 't8', 'val': 8}, {'key': 't9', 'val': 9},
{'key': 't10', 'val': 10}, {'key': 't11', 'val': 11},
{'key': 't12', 'val': 12}, {'key': 't13', 'val': 13},
{'key': 't14', 'val': 14}, {'key': 't15', 'val': 15},
{'key': 't16', 'val': 16}, {'key': 't17', 'val': 17},
{'key': 't18', 'val': 18}, {'key': 't19', 'val': 19}]
d2 = [{'key': 't1', 'val': 'newval1'}, {'key': 't1', 'val': 'newval11'},
{'key': 't2', 'val': 'newval2'}, {'key': 't3', 'val': 'newval3'},
{'key': 't6', 'val': 'newval6'}, {'key': 't7', 'val': 'newval7'},
{'key': 't8', 'val': 'newval8'}, {'key': 't9', 'val': 'newval9'},
{'key': 't10', 'val': 'newval10'}, {'key': 't11', 'val': 'newval11'},
{'key': 't12', 'val': 'newval12'}, {'key': 't13', 'val': 'newval13'},
{'key': 't14', 'val': 'newval14'}, {'key': 't15', 'val': 'newval15'},
{'key': 't16', 'val': 'newval16'}, {'key': 't17', 'val': 'newval17'},
{'key': 't18', 'val': 'newval18'}, {'key': 't19', 'val': 'newval19'}]
d1 = {d['key']: {'val': d['val']} for d in d1}
for d in d2:
key, value = d['key'], d['val']
print(key, d1[key]['val'], value)
output:
t1 1 newval1
t1 1 newval11
t2 2 newval2
t3 3 newval3
t6 6 newval6
t7 7 newval7
t8 8 newval8
t9 9 newval9
t10 10 newval10
t11 11 newval11
t12 12 newval12
t13 13 newval13
t14 14 newval14
t15 15 newval15
t16 16 newval16
t17 17 newval17
t18 18 newval18
t19 19 newval19
Related
The Following Data is I am getting from Database i want to make table in html with this data
[{'id': 1}, {'e': 2}, {'i': 3}, {'id': 12}, {'e': 12}, {'i': 23},
{'id': 13}, {'e': 3}, {'i': None}, {'id': 14}, {'e': 23},
{'i': None}, {'id': 123}, {'e': 123}, {'i': 123}]
data = [{'id': 1}, {'e': 2}, {'i': 3}, {'id': 12}, {'e': 12}, {'i': 23},
{'id': 13}, {'e': 3}, {'i': None}, {'id': 14}, {'e': 23},
{'i': None}, {'id': 123}, {'e': 123}, {'i': 123}]
def chunker(seq, size):
# from: https://stackoverflow.com/a/434328/11384184
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
print(f"|id|e|i|")
for id_obj, e_obj, i_obj in chunker(data, 3):
print(f"|{id_obj['id']}|{e_obj['e']}|{i_obj['i']}|")
# |id|e|i|
# |1|2|3|
# |12|12|23|
# |13|3|None|
# |14|23|None|
# |123|123|123|
I just printed the result, but it should be simple to generate the corresponding HTML from there.
I'm trying to code a faster way to solve the following problem but I don't know how to do it:
I have the following list of dicts and list of identifiers:
list_of_dicts = [{'id': 1, 'name': 'A'}, {'id': 2, 'name': 'B'}, {'id': 3, 'name': 'C'}, {'id': 4, 'name': 'D'}]
list_of_ids = [1, 3, 2, 4, 1, 3, 4]
I'd like to have the following output:
[{'id': 1, 'name': 'A'}, {'id': 3, 'name': 'C'}, {'id': 2, 'name': 'B'}, {'id': 4, 'name': 'D'}, {'id': 1, 'name': 'A'}, {'id': 3, 'name': 'C'}, {'id': 4, 'name': 'D'}]
The way I'm doing it is:
list_of_dict_ids = [d['id'] for d in list_of_dicts]
ordered_list_by_ids = [list_of_dicts[list_of_dict_ids.index(i)] for i in list_of_ids]
Is there any faster way to do it?
You can do like this :
dic = {d["id"]: d for d in list_of_dicts}
dic
>>>{1: {'id': 1}, 2: {'id': 2}, 3: {'id': 3}, 4: {'id': 4}}
lst =[dic[i] for i in list_of_ids]
lst
>>>[{'id': 1}, {'id': 3}, {'id': 2}, {'id': 4}, {'id': 1}, {'id': 3}, {'id': 4}]
my data is a mix of lists and dictionaries.
I need to create a data frame for my_stats.
I can can get to the data at (my_stats[0]['stats'],['data']), through
df = pd.DataFrame (my_stats[0]["stats"]["data"])
area key ... Scrum Errors Confirm Try
0 C1 Kick Off ... NaN NaN
1 NaN Passive Tackle ... NaN NaN
2 D1 Rucks ... NaN NaN
3 D1 Lineouts ... NaN NaN
4 NaN Neutral Tackle ... NaN NaN
but I need my data frame to show the game _id as the 1st column of the data frame.
here is some of the data of two of the matches.
Please assist.
my_stats = [{'_id': 'GLEvHIL2020031419A', 'stats': {'data': [{'area': 'C1', 'key': 'Kick Off', 'possession': 'against', 'second': 0, 'endSecond': 6, 'time': '2020-03-14T12:00:06', 'Kick Off Fielded': 'Unsuccessful'}, {'key': 'Passive Tackle', 'possession': 'against', 'second': 9, 'time': '2020-03-14T12:00:09', 'value': 9, 'subMetric': 3, 'rtp': []}, {'area': 'D1', 'key': 'Rucks', 'possession': 'against', 'second': 9, 'endSecond': 19, 'time': '2020-03-14T12:00:19'}, d': 1175, 'time': '2020-03-14T12:19:35', 'value': 8, 'subMetric': None, 'rtp': []}, {'area': 'C2', 'key': 'Rucks', 'possession': 'against', 'second': 1176, 'endSecond': 1178, 'time': '2020-03-14T12:19:38'}, {'key': 'Defender in Position', 'possession': 'against', 'second': 1176, 'time': '2020-03-14T12:19:36', 'value': 6, 'subMetric': None, 'rtp': []}, {'key': 'Defender in Position', 'possession': 'against', 'second': 1177, 'time': '2020-03-14T12:19:37', 'value': 12, 'subMetric': None, 'rtp': []}, {'key': 'Tackle Assist', 'possession': 'against', 'second': 1184, 'time': '2020-03-14T12:19:44', 'value': 7, 'subMetric': None, 'rtp': []}]}}, {'_id': 'HJSvMON2020031419A', 'stats': {'data': [{'area': 'C2', 'key': 'Kick Off', 'possession': 'against', 'second': 1, 'endSecond': 5, 'time': '2020-03-14T12:00:05', 'Kick Off Fielded': 'Successful'}, {'area': 'C2', 'key': 'Kick Off', 'possession': 'against', 'second': 2, 'endSecond': 5, 'time': '2020-03-14T12:00:05', 'Kick Off Fielded': 'Successful'}, {'key': 'Kick Fielded Successfully', 'possession': 'for', 'second': 4, 'time': '2020-03-14T12:00:04', 'value': 7, 'subMetric': None, 'rtp': []}, {'key': 'Effective Ruck', 'possession': 'for', 'second': 6, 'time': '2020-03-14T12:00:06', 'value': 3, 'subMetric': None, 'rtp': []}, {'key': 'Effective Ruck', 'possession': 'for', 'second': 6, 'time': '2020-03-14T12:00:06', 'value': 2, 'subMetric': None, 'rtp': []}, {'area': 'C2', 'key': 'Rucks', 'possession': 'for', 'second': 6, 'endSecond': 10, 'time': '2020-03-14T12:00:10'}, {'area': 'C2', 'key': 'Rucks', 'possession': 'against', 'second': 6, 'endSecond': 9, 'time': '2020-03-14T12:00:09'}, {'key': 'Good Pass', 'possession': 'for', 'second': 10, 'time': '2020-03-14T12:00:10', 'value': 9, 'subMetric': None, 'rtp': [{'key': 'Good Pass', 'possession': 'for', 'second': 16, 'time': '2020-03-14T12:00:16', 'value': 9, 'subMetric': None}]}, ]}}]
Here's a solution:
df = pd.DataFrame(my_stats)
def pd_unnest_dict(df, col):
exploded_col = df[col].apply(pd.Series)
return pd.concat([df.drop(columns=col), exploded_col], axis=1)
df = pd_unnest_dict(df, 'stats')
df = df.explode('data')
df = pd_unnest_dict(df, 'data')
I'm new with Python and I have this structure achieved from a DB
data=[
{'Value': '0.2', 'id': 1},
{'Value': '1.2', 'id': 1},
{'Value': '33.34', 'id': 2},
{'Value': '44.3', 'id': 3},
{'Value': '33.23', 'id': 3},
{'Value': '21.1', 'id': 4},
{'Value': '5.33', 'id': 4},
{'Value': '33.3', 'id': 5},
{'Value': '12.2', 'id': 5},
{'Value': '1.22', 'id': 5},
{'Value': '1.23', 'id': 6}
]
I know that I can get the id of a record with:
data[i]['id']
but I need to collect by ID in a proper data structure, in order to get the average values for every ID.
What is the better choice for this?
I'm thinking build a new dict for every ID set, but the IDs can grow in number, and I don't figure out how tackle this problem. If someone can give me some idea I would be very grateful.
Assuming your data is sorted by ID as it appears in your data variable, you can try using itertools.groupby, which can be instructed to group by id. You can then create a new dictionary that has keys equal to the id numbers and values equal to the means:
In [1]: from itertools import groupby
In [2]: data=[
...: {'Value': '0.2', 'id': 1},
...: {'Value': '1.2', 'id': 1},
...: {'Value': '33.34', 'id': 2},
...: {'Value': '44.3', 'id': 3},
...: {'Value': '33.23', 'id': 3},
...: {'Value': '21.1', 'id': 4},
...: {'Value': '5.33', 'id': 4},
...: {'Value': '33.3', 'id': 5},
...: {'Value': '12.2', 'id': 5},
...: {'Value': '1.22', 'id': 5},
...: {'Value': '1.23', 'id': 6}
...: ]
In [3]: means = {}
In [4]: for k, g in groupby(data, key=lambda x: x['id']):
...: g = list(g)
...: means[k] = sum(float(x['Value']) for x in g) / len(g)
...:
...:
In [5]: means
Out[5]:
{1: 0.69999999999999996,
2: 33.340000000000003,
3: 38.765000000000001,
4: 13.215,
5: 15.573333333333332,
6: 1.23}
(Updated: after DSM's comment.)
You could reshape the data like this:
from collections import defaultdict
data=[
{'Value': '0.2', 'id': 1},
{'Value': '1.2', 'id': 1},
{'Value': '33.34', 'id': 2},
{'Value': '44.3', 'id': 3},
{'Value': '33.23', 'id': 3},
{'Value': '21.1', 'id': 4},
{'Value': '5.33', 'id': 4},
{'Value': '33.3', 'id': 5},
{'Value': '12.2', 'id': 5},
{'Value': '1.22', 'id': 5},
{'Value': '1.23', 'id': 6}
]
newdata = defaultdict(list)
for r in data:
newdata[r['id']].append(float(r['Value']))
This would yield:
In [2]: newdata
Out[2]: defaultdict(<type 'list'>, {1: [0.2, 1.2], 2: [33.34], 3: [44.3, 33.23], 4: [21.1, 5.33], 5: [33.3, 12.2, 1.22], 6: [1.23]})
(Update 2)
Calculating the means is now simple with a dictionary comprehension:
mean = {id: sum(values) / len(values) for id, values in newdata.viewitems()}
Which gives:
In [4]: mean
Out[4]: {1: 0.7, 2: 33.34, 3: 38.765, 4: 13.215, 5: 15.573333333333332, 6: 1.23}
If you have numpy, you could use it for this easily:
import numpy
numpy.mean([x['id'] for x in data])
Otherwise, it would be as simple as:
from __future__ import division # if python2.7
ids = [x['id'] for x in data]
print sum(ids)/len(ids)
You can simply create a list of IDs after all have been collected:
id_list = [element['id'] for element in data]
From there you can calculate whatever you want.
I am trying to replace list element value with value looked up in dictionary how do I do that?
list = [1, 3, 2, 10]
d = {'id': 1, 'val': 30},{'id': 2, 'val': 53}, {'id': 3, 'val': 1}, {'id': 4, 'val': 9}, {'id': 5, 'val': 2}, {'id': 6, 'val': 6}, {'id': 7, 'val': 11}, {'id': 8, 'val': 89}, {'id': 9, 'val': 2}, {'id': 10, 'val': 4}
for i in list:
for key, v in d.iteritems():
???
???
so at the end I am expecting:
list = [30, 1, 53, 4]
thank you
D2 = dict((x['id'], x['val']) for x in D)
L2 = [D2[x] for x in L]
td = (
{'val': 30, 'id': 1},
{'val': 53, 'id': 2},
{'val': 1, 'id': 3},
{'val': 9, 'id': 4},
{'val': 2, 'id': 5},
{'val': 6, 'id': 6},
{'val': 11, 'id': 7},
{'val': 89, 'id': 8},
{'val': 2, 'id': 9},
{'val': 4, 'id': 10}
)
source_list = [1, 3, 2, 10]
final_list = []
for item in source_list:
for d in td:
if d['id'] == item:
final_list.append(d['val'])
print('Source : ', source_list)
print('Final : ', final_list)
Result
Source : [1, 3, 2, 10]
Final : [30, 1, 53, 4]