Python deduplicate list of Dictionaries by Value of a Key - python

I have a pretty basic (but not quite working) function to dedupe a list of dictionaries from key values by adding the key value to a list for keeping track.
def dedupe(rs):
delist = []
for r in rs:
if r['key'] not in delist:
delist.append(r['key'])
else:
rs.remove(r)
return rs
Which gets used in the script just below on two lists of dictionaries:
from pprint import pprint
records = [
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:00:00', '00:05:54'],
['00:05:55', '00:07:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:05:55', '00:07:54'],
['00:00:00', '00:05:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:16:47', '00:20:04'],
['00:00:00', '00:05:54'],
['00:05:55', '00:07:54']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:07:55', '00:11:23'], ['00:11:24', '00:16:46']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:11:24', '00:16:46'], ['00:07:55', '00:11:23']]},
{'key': 'Item 3', 'name': 'Item 3', 'positions': [['00:20:05', '00:25:56']]}
]
records2 = [
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:00:00', '00:05:54'],
['00:05:55', '00:07:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:05:55', '00:07:54'],
['00:00:00', '00:05:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:07:55', '00:11:23'], ['00:11:24', '00:16:46']]},
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:16:47', '00:20:04'],
['00:00:00', '00:05:54'],
['00:05:55', '00:07:54']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:11:24', '00:16:46'], ['00:07:55', '00:11:23']]},
{'key': 'Item 3', 'name': 'Item 3', 'positions': [['00:20:05', '00:25:56']]}
]
def dedupe(rs):
delist = []
for r in rs:
if r['key'] not in delist:
delist.append(r['key'])
else:
rs.remove(r)
return rs
if __name__ == '__main__':
res = dedupe(records)
res2 = dedupe(records2)
pprint(res)
pprint(res2)
For either records or records2, I would expect to get:
[
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:00:00', '00:05:54'],
['00:05:55', '00:07:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:07:55', '00:11:23'], ['00:11:24', '00:16:46']]},
{'key': 'Item 3',
'name': 'Item 3',
'positions': [['00:20:05', '00:25:56']]}
]
But instead I get (for each of the two inputs):
[
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:00:00', '00:05:54'],
['00:05:55', '00:07:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:16:47', '00:20:04'],
['00:00:00', '00:05:54'],
['00:05:55', '00:07:54']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:07:55', '00:11:23'], ['00:11:24', '00:16:46']]},
{'key': 'Item 3', 'name': 'Item 3', 'positions': [['00:20:05', '00:25:56']]}
]
[
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:00:00', '00:05:54'],
['00:05:55', '00:07:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:07:55', '00:11:23'], ['00:11:24', '00:16:46']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:11:24', '00:16:46'], ['00:07:55', '00:11:23']]},
{'key': 'Item 3', 'name': 'Item 3', 'positions': [['00:20:05', '00:25:56']]}
]
I keep staring at and tweaking this, but it's not clear to me why it is not deleting the third instance if they are in sequence (records), or works for the one with three, but fails on the one with two if the one with three instances are broken up (records2).

I wouldn't remove elements from an iterator while iterating it.
Instead do this:
def dedupe(rs):
delist = []
new_rs = []
for r in rs:
if r['key'] not in delist:
print r['key']
delist.append(r['key'])
new_rs.append(r)
return new_rs

Related

Problems replacing a key in a nested dictionary

I'm trying to replace the value of a nested dictionary with a for loop and if statement, it kinda works but it replaces other keys as well. I tried solutions from other similar problems but no luck. Any help is appreciated, thanks!
d = {'Item 1': {'Name': 'Wireless Power Bank',
'Price': '$50',
'Status': 'sold-out'},
'Item 2': {'Name': 'Case - iPhone 12',
'Price': '$50',
'Status': 'not'},
'Item 3': {'Name': 'The Case - iPhone 12',
'Price': '$50',
'Status': 'not'},
'Item 4': {'Name': 'Case - iPhone 11',
'Price': '$50',
'Status': 'sold-out'},
'Item 5': {'Name': 'The Card Case - iPhone X/XS',
'Price': '$50',
'Status': 'not'}}
for k, v in d.items():
for inK, inV in v.items():
if "sold-out" in inV:
d[k][inK] = 'Out of Stock'
else:
d[k][inK] = 'In Stock'
Output:
{'Item 1': {'Name': 'In Stock', 'Price': 'In Stock', 'Status': 'Out of Stock'},
'Item 2': {'Name': 'In Stock', 'Price': 'In Stock', 'Status': 'In Stock'},
'Item 3': {'Name': 'In Stock', 'Price': 'In Stock', 'Status': 'In Stock'},
'Item 4': {'Name': 'In Stock', 'Price': 'In Stock', 'Status': 'Out of Stock'},
'Item 5': {'Name': 'In Stock', 'Price': 'In Stock', 'Status': 'In Stock'}}
Expected Output:
{'Item 1': {'Name': 'Wireless Power Bank',
'Price': '$50',
'Status': 'Out of Stock'},
'Item 2': {'Name': 'Case - iPhone 12',
'Price': '$50',
'Status': 'In Stock'},
'Item 3': {'Name': 'The Case - iPhone 12',
'Price': '$50',
'Status': 'In Stock'},
'Item 4': {'Name': 'Case - iPhone 11',
'Price': '$50',
'Status': 'Out of Stock'},
'Item 5': {'Name': 'The Card Case - iPhone X/XS',
'Price': '$50',
'Status': 'In Stock'}}
Solved thanks to #luk2302!
for k, v in d.items():
if "sold-out" in d[k]["Status"]:
d[k]["Status"] = 'Out of Stock'
else:
d[k]["Status"] = 'In Stock'
Output:
{'Item 1': {'Name': 'Wireless Power Bank', 'Price': '$50', 'Status': 'Out of Stock'},
'Item 2': {'Name': 'Case - iPhone 12', 'Price': '$50', 'Status': 'In Stock'},
'Item 3': {'Name': 'The Case - iPhone 12', 'Price': '$50', 'Status': 'In Stock'},
'Item 4': {'Name': 'Case - iPhone 11', 'Price': '$50', 'Status': 'Out of Stock'},
'Item 5': {'Name': 'The Card Case - iPhone X/XS', 'Price': '$50', 'Status': 'In Stock'}}

How to insert key value after a specific Key Value in nested dictionary lists?

mydict = {'sequence': [{'name': 'Start 1', 'action': [{'subscribe': {'user': ['user 1', 'user 3'], 'delay': 120}}, {'subscribe': {'user': ['user 2', 'user5'], 'delay': 20}}]}, {'name': 'Start 5', 'action': [{'subscribe': {'user': ['user 6', 'user 7'], 'delay': 100}}, {'subscribe': {'user': ['user 2', 'user5'], 'delay': 80}}]}]}
I am trying to insert/append additional 'subscribe' and 'delay' dictionaries to the 'action' list under each 'name' dictionary.
I am having issues being able to reference/identify the correct list 'name' in order to append the 'subscribe' and 'delay'. The subscribe and delay data is coming from different excel sheets with the only linking data being the 'Name'.
Currently using pandas to import the excel data.
newdata = {'subscribe': {'user': ['user X', 'user Y'], 'delay': Z}}
newdata2 = {'subscribe': {'user': ['user A', 'user B'], 'delay': C}}
Final dictionary
mydict = {'sequence': [{'name': 'Start 1', 'action': [{'subscribe': {'user': ['user 1', 'user 3'], 'delay': 120}}, {'subscribe': {'user': ['user 2', 'user5'], 'delay': 20}}, {'subscribe': {'user': ['user X', 'user Y'], 'delay': Z}}]}, {'name': 'Start 5', 'action': [{'subscribe': {'user': ['user 6', 'user 7'], 'delay': 100}}, {'subscribe': {'user': ['user 2', 'user5'], 'delay': 80}}, {'subscribe': {'user': ['user A', 'user B'], 'delay': C}}]}]}
Is what i am trying to do possible?
Given the original data:
mydict = {'sequence':
[ {'name': 'Start 1',
'action': [{'subscribe': {'user': ['user 1', 'user 3'], 'delay': 120}},
{'subscribe': {'user': ['user 2', 'user5'], 'delay': 20}}}]},
{'name': 'Start 5',
'action': [{'subscribe': {'user': ['user 6', 'user 7'], 'delay': 100}},
{'subscribe': {'user': ['user 2', 'user5'], 'delay': 80}}}]}
]}
With mydict['sequence'][0], you can get:
{'name': 'Start 1',
'action': [{'subscribe': {'user': ['user 1', 'user 3'], 'delay': 120}},
{'subscribe': {'user': ['user 2', 'user5'], 'delay': 20}}]}
And give that you want to add newdata to actions in mydict['sequence'][0], you can do
mydict['sequence'][0]['action'].append(newdata)
Similarly if you want to add newdata2 to actions in mydict['sequence'][1]`, you can do:
mydict['sequence'][1]['action'].append(newdata2)
TL;DR
mydict = {'sequence': [{'name': 'Start 1',
'action': [{'subscribe': {'user': ['user 1', 'user 3'], 'delay': 120}},
{'subscribe': {'user': ['user 2', 'user5'], 'delay': 20}}]},
{'name': 'Start 5',
'action': [{'subscribe': {'user': ['user 6', 'user 7'], 'delay': 100}},
{'subscribe': {'user': ['user 2', 'user5'], 'delay': 80}}]}]}
Z = 9999
C = 8888
newdata = {'subscribe': {'user': ['user X', 'user Y'], 'delay': Z}}
newdata2 = {'subscribe': {'user': ['user A', 'user B'], 'delay': C}}
mydict['sequence'][0]['action'].append(newdata)
mydict['sequence'][1]['action'].append(newdata2)
[out]:
[{'name': 'Start 1',
'action': [{'subscribe': {'user': ['user 1', 'user 3'], 'delay': 120}},
{'subscribe': {'user': ['user 2', 'user5'], 'delay': 20}},
{'subscribe': {'user': ['user X', 'user Y'], 'delay': 9999}}]},
{'name': 'Start 5',
'action': [{'subscribe': {'user': ['user 6', 'user 7'], 'delay': 100}},
{'subscribe': {'user': ['user 2', 'user5'], 'delay': 80}},
{'subscribe': {'user': ['user A', 'user B'], 'delay': 8888}}]}]

How to convert selected column with index to a list of tuples in pandas

Given the dataframe below:
df = pd.DataFrame([{'Name': 'Chris', 'Item Purchased': 'Sponge', 'Cost': 22.50},
{'Name': 'Kevyn', 'Item Purchased': 'Kitty Litter', 'Cost': 2.50},
{'Name': 'Filip', 'Item Purchased': 'Spoon', 'Cost': 5.00}],
index=['Store 1', 'Store 1', 'Store 2'])
How do I write a script to produce the following output:
[('Store 1', 22.5), ('Store 1', 2.5), ('Store 2', 5.0)]
We can do zip
list(zip(df.index,df.Cost))
[('Store 1', 22.5), ('Store 1', 2.5), ('Store 2', 5.0)]
You can also use items:
list(df['Cost'].items())
#df['Cost'].to_dict().items()
[('Store 1', 22.5), ('Store 1', 2.5), ('Store 2', 5.0)]
You can also use itertuples since you have an index set.
list(df[['Cost']].itertuples(name=None))
[('Store 1', 22.5), ('Store 1', 2.5), ('Store 2', 5.0)]

How can I create a nested list from an existing one where the first x items are the outer items?

I have a table scraped into a list of the form:
['Column A', 'Column B' 'Column C', 'Item A1', 'Item B1', 'Item C1' ...]
And I'd like to turn that into a nested list so that I can use it like a table in Python, like so:
[[Column A, Item A1, Item A2, Item A3, Item A4], [Column B, ... ]
Also, would this be the best/simplest way of working with this data?
Thank you
x = 3
data = ['Column A', 'Column B', 'Column C', 'Item A1', ... , 'Item C4']
newList = []
for i in range (x):
newList.append(data[i::x])
print (newList)
will give this:
[['Column A', 'Item A1', 'Item A2', 'Item A3', , 'Item A4'], ['Column B', 'Item B1', 'Item B2', 'Item B3', 'Item B4'], ['Column C', 'Item C1', 'Item C2', 'Item C3', 'Item C4']]
As pppery suggested, a dictionary may be better. Rather than adding to a list called newList create a newDictionary like this:
newDict = {}
and add to it like so:
for i in range (x):
newDict[data[i]] = data[i+x::x]
print (newDict)
This is in an odd order:
{'Column B': ['Item B1', 'Item B2', 'Item B3'], 'Column C': ['Item C1', 'Item C2', 'Item C3'], 'Column A': ['Item A1', 'Item A2', 'Item A3']}
but the order doesn't matter. To access the "A" items, you would use newDict['Column A'].

Python sort list of lists over multiple levels and with a custom order

I have a list of lists my_list e.g.
[
['item 3.1', 'item 3.2', 'item 3.3', 'item 3.4', 'item 3.5'],
['item 3.1', 'item 3.2', 'item 3.2', 'item 3.2', 'item 3.2'],
['item 3.1', 'item 3.2', 'item 3.2', 'item 3.6', 'item 3.2'],
['item 3.1', 'item 2.2', 'item 2.4', 'item 2.7', 'item 2.5'],
['item 2.1', 'item 2.2', 'item 2.3', 'item 2.4', 'item 2.5'],
['item 1.1', 'item 1.2', 'item 1.3', 'item 1.4', 'item 1.5'],
['item 4.1', 'item 4.2', 'item 4.3', 'item 4.4', 'item 4.5'],
['item 1.1', 'item 1.2', 'item 1.3', 'item 1.5', 'item 1.5']
]
I want to order this list on multiple levels with 1 of the levels being a customer order but don't know how to do this.
e.g. I want to order by the first item first then within that order, order them by the third item then within that order, order by the fourth item. But I want the first item to be ordered/sorted in this particular order rather than alphabetical:
item 4.1
item 2.1
item 3.1
item 1.1
so the final list (after sorting) would be:
[
['item 4.1', 'item 4.2', 'item 4.3', 'item 4.4', 'item 4.5']
['item 2.1', 'item 2.2', 'item 2.3', 'item 2.4', 'item 2.5'],
['item 3.1', 'item 3.2', 'item 3.2', 'item 3.2', 'item 3.2'],
['item 3.1', 'item 3.2', 'item 3.2', 'item 3.6', 'item 3.2'],
['item 3.1', 'item 3.2', 'item 3.3', 'item 3.4', 'item 3.5'],
['item 3.1', 'item 2.2', 'item 2.4', 'item 2.7', 'item 2.5'],
['item 1.1', 'item 1.2', 'item 1.3', 'item 1.4', 'item 1.5'],
['item 1.1', 'item 1.2', 'item 1.3', 'item 1.5', 'item 1.5'],
]
I know I can sort over multiple levels by doing:
s = sorted(my_list, key=itemgetter(0,2,3))
But this would sort them all alphabetically and what I don't know how to do is set the first column to be sorted by the custom order.
Ugly and inefficient, but probably working:
order = ['item 4.1', 'item 2.1', 'item 3.1', 'item 1.1']
my_list.sort(key=lambda x: order.index(x[0]))
reorders my_list to:
[['item 4.1', 'item 4.2', 'item 4.3', 'item 4.4', 'item 4.5'],
['item 2.1', 'item 2.2', 'item 2.3', 'item 2.4', 'item 2.5'],
['item 3.1', 'item 3.2', 'item 3.3', 'item 3.4', 'item 3.5'],
['item 3.1', 'item 3.2', 'item 3.2', 'item 3.2', 'item 3.2'],
['item 3.1', 'item 3.2', 'item 3.2', 'item 3.6', 'item 3.2'],
['item 3.1', 'item 2.2', 'item 2.4', 'item 2.7', 'item 2.5'],
['item 1.1', 'item 1.2', 'item 1.3', 'item 1.4', 'item 1.5'],
['item 1.1', 'item 1.2', 'item 1.3', 'item 1.5', 'item 1.5']]
If you want to sort by first column in a customized way, then by the third and fourth alphabetically, just expand the key:
my_list.sort(key=lambda x: (order.index(x[0]), x[2], x[3]))
which returns
[['item 4.1', 'item 4.2', 'item 4.3', 'item 4.4', 'item 4.5'],
['item 2.1', 'item 2.2', 'item 2.3', 'item 2.4', 'item 2.5'],
['item 3.1', 'item 2.2', 'item 2.4', 'item 2.7', 'item 2.5'],
['item 3.1', 'item 3.2', 'item 3.2', 'item 3.2', 'item 3.2'],
['item 3.1', 'item 3.2', 'item 3.2', 'item 3.6', 'item 3.2'],
['item 3.1', 'item 3.2', 'item 3.3', 'item 3.4', 'item 3.5'],
['item 1.1', 'item 1.2', 'item 1.3', 'item 1.4', 'item 1.5'],
['item 1.1', 'item 1.2', 'item 1.3', 'item 1.5', 'item 1.5']]

Categories

Resources