How to convert nested dict of dict to nested OrderedDict - python

Having a requirement to convert nested dict of dict to nested ordered dict
user_dict = {"a": {"b": {"c":
{'d': 'e',
'f': 'g',
'h': 'i'
}}}}
Expected output:
cfg_opts = OrderedDict([('a', OrderedDict([('b', OrderedDict([('c', OrderedDict([('d', 'e'), ('f','g'), ('h', 'i')]))]))]))])

I would use recursive function for this task as follows
import collections
user_dict = {'a': {'b': {'c': {'d': 'e', 'f': 'g', 'h': 'i'}}}}
def orderify(d):
if isinstance(d,dict):
return collections.OrderedDict({k:orderify(v) for k,v in d.items()})
else:
return d
ordered_user_dict = orderify(user_dict)
print(ordered_user_dict)
output
OrderedDict([('a', OrderedDict([('b', OrderedDict([('c', OrderedDict([('d', 'e'), ('f', 'g'), ('h', 'i')]))]))]))])

Related

Pandas dataframe to dict of list of tuples

Suppose I have the following dataframe:
df = pd.DataFrame({'id': [1,2,3,3,3], 'v1': ['a', 'a', 'c', 'c', 'd'], 'v2': ['z', 'y', 'w', 'y', 'z']})
df
id v1 v2
1 a z
2 a y
3 c w
3 c y
3 d z
And I want to transform it to this format:
{1: [('a', 'z')], 2: [('a', 'y')], 3: [('c', 'w'), ('c', 'y'), ('d', 'z')]}
I basically want to create a dict where the keys are the id and the values is a list of tuples of the (v1,v2) of this id.
I tried using groupby in id:
df.groupby('id')[['v1', 'v2']].apply(list)
But this didn't work
Create tuples first and then pass to groupby with aggregate list:
d = df[['v1', 'v2']].agg(tuple, 1).groupby(df['id']).apply(list).to_dict()
print (d)
{1: [('a', 'z')], 2: [('a', 'y')], 3: [('c', 'w'), ('c', 'y'), ('d', 'z')]}
Another idea is using MultiIndex:
d = df.set_index(['v1', 'v2']).groupby('id').apply(lambda x: x.index.tolist()).to_dict()
You can use defaultdict from the collections library :
from collections import defaultdict
d = defaultdict(list)
for k, v, s in df.to_numpy():
d[k].append((v, s))
defaultdict(list,
{1: [('a', 'z')],
2: [('a', 'y')],
3: [('c', 'w'), ('c', 'y'), ('d', 'z')]})
df['New'] = [tuple(x) for x in df[['v1','v2']].to_records(index=False)]
df=df[['id','New']]
df=df.set_index('id')
df.to_dict()
Output:
{'New': {1: ('a', 'z'), 2: ('a', 'y'), 3: ('d', 'z')}}

How can I replace all the values of a Python dictionary with a range of values?

I have the following dictionary:
mydict = {('a', 'b'): 28.379,
('c', 'd'): 32.292,
('e', 'f'): 61.295,
('g', 'h'): 112.593,
('i', 'j'): 117.975}
And I would like to replace all the values with a range from 1 to 5, but keep the order of the keys. As a result, I would get this:
mydict = {('a', 'b'): 1,
('c', 'd'): 2,
('e', 'f'): 3,
('g', 'h'): 4,
('i', 'j'): 5}
The length of the dictionary is actually 22000, so I need a range from 1 to 22000.
How can I do it?
Thanks in advance.
Using enumerate to iterate on the keys, you can do:
mydict = {('a', 'b'): 28.379,
('c', 'd'): 32.292,
('e', 'f'): 61.295,
('g', 'h'): 112.593,
('i', 'j'): 117.975}
for i, key in enumerate(mydict): # iterates on the keys
mydict[key] = i
print(mydict)
# {('a', 'b'): 0, ('c', 'd'): 1, ('e', 'f'): 2, ('g', 'h'): 3, ('i', 'j'): 4}
Important note: dicts are only officially ordered since Python 3.7 (and in the CPython implementation since 3.6), so this would n't make much sense with older versions of Python.
To answer your comment: enumerate takes an optional second parameter start(that defaults to 0)
So, if you want to start at 1, just do:
for i, key in enumerate(mydict, start=1): # iterates on the keys
mydict[key] = i
The most simple is to create another dictionary from the keys of the previous one.
mydict2=dict()
for i,key in enumerate(mydict):
mydict2[key]=i+1
You can do this with a one-liner which is more compact:
mydict = {('a', 'b'): 28.379,
('c', 'd'): 32.292,
('e', 'f'): 61.295,
('g', 'h'): 112.593,
('i', 'j'): 117.975}
{k: i for i, (k, v) in enumerate(mydict.items())}
Pandas solution for this:
import pandas as pd
a = pd.DataFrame(mydict, index=[0]).T
a[0] = list(range(0,len(a)))
a.to_dict()[0]
# {('a', 'b'): 0, ('c', 'd'): 1, ('e', 'f'): 2, ('g', 'h'): 3, ('i', 'j'): 4}
This can be done gracefully with dict.update and itertools.count, and explicit loops can be avoided:
>>> mydict = {('a', 'b'): 28.379,
... ('c', 'd'): 32.292,
... ('e', 'f'): 61.295,
... ('g', 'h'): 112.593,
... ('i', 'j'): 117.975}
>>> from itertools import count
>>> mydict.update(zip(mydict, count(1)))
>>> mydict
{('a', 'b'): 1, ('c', 'd'): 2, ('e', 'f'): 3, ('g', 'h'): 4, ('i', 'j'): 5}

Brute force stable marriage, How to implements all possible pairs between 2 lists?

I'm trying to implement an algorithm to find all stable marriage solutions with a brute force approach without the Gale-Shapley algorithm (because it gives us only 2 of them).
I'm using the checking mechanism found in rosettacoode but I'm having an hard time trying to find a way to create all possible matches with no repetitions (the kind you have with 2 for cycles)
e.g
from these 2 lists [a,b,c] and [d,e,f] create
[(a,d),(b,e),(c,f)]
[(a,d),(b,f),(c,e)]
[(a,e),(b,f),(c,d)]
[(a,e),(b,d),(c,f)]
[(a,f),(b,d),(c,e)]
[(a,f),(b,e),(c,d)]
UPDATE1:
With all the solutions so far I'm not able to run it when it gets big.
I should probably do it recursively without storing long data structures, testing the single result when I get it and discard the others . I came out with this solution but still has problems because gives me some repetition and something that is missing. I don't know how to fix it, and sorry my brain is melting!
boys=['a','b','c']
girls=['d','e','f']
def matches(boys, girls, dic={}):
if len(dic)==3: #len(girls)==0 geves me more problems
print dic #just for testing with few elements
#run the stability check
else:
for b in boys:
for g in girls:
dic[b]=g
bb=boys[:]
bb.remove(b)
gg=girls[:]
gg.remove(g)
matches(bb,gg, dic)
dic.clear()
matches(boys,girls)
gives me this output
{'a': 'd', 'c': 'f', 'b': 'e'} <-
{'a': 'e', 'c': 'f', 'b': 'd'} <-
{'a': 'f', 'c': 'e', 'b': 'd'}
{'a': 'e', 'c': 'f', 'b': 'd'} <-
{'a': 'd', 'c': 'f', 'b': 'e'} <-
{'a': 'd', 'c': 'e', 'b': 'f'} <-
{'a': 'e', 'c': 'd', 'b': 'f'}
{'a': 'd', 'c': 'e', 'b': 'f'} <-
{'a': 'd', 'c': 'f', 'b': 'e'} <-
UPDATE 2
My complete working exercise inspired by #Zags (inspired by #Jonas):
guyprefers = {
'A': ['P','S','L','M','R','T','O','N'],
'B': ['M','N','S','P','O','L','T','R'],
'D': ['T','P','L','O','R','M','N','S'],
'E': ['N','M','S','O','L','R','T','P'],
'F': ['S','M','P','L','N','R','T','O'],
'G': ['L','R','S','P','T','O','M','N'],
'J': ['M','P','S','R','N','O','T','L'],
'K': ['N','T','O','P','S','M','R','L']
}
galprefers = {
'L': ['F','D','J','G','A','B','K','E'],
'M': ['K','G','D','F','J','B','A','E'],
'N': ['A','F','G','B','E','K','J','D'],
'O': ['K','J','D','B','E','A','F','G'],
'P': ['G','E','J','D','K','A','B','F'],
'R': ['B','K','F','D','E','G','J','A'],
'S': ['J','F','B','A','K','G','E','D'],
'T': ['J','E','A','F','B','D','G','K']
}
guys = sorted(guyprefers.keys())
gals = sorted(galprefers.keys())
def permutations(iterable): #from itertools a bit simplified
pool = tuple(iterable) #just to understand what it is doing
n = len(pool)
indices = range(n)
cycles = range(n, 0, -1)
while n:
for i in reversed(range(n)):
cycles[i] -= 1
if cycles[i] == 0:
indices[i:] = indices[i+1:] + indices[i:i+1]
cycles[i] = n - i
else:
j = cycles[i]
indices[i], indices[-j] = indices[-j], indices[i]
yield tuple(pool[i] for i in indices[:n])
break
else:
return
def check(engaged): #thanks to rosettacode
inversengaged = dict((v,k) for k,v in engaged.items())
for she, he in engaged.items():
shelikes = galprefers[she]
shelikesbetter = shelikes[:shelikes.index(he)]
helikes = guyprefers[he]
helikesbetter = helikes[:helikes.index(she)]
for guy in shelikesbetter:
guysgirl = inversengaged[guy]
guylikes = guyprefers[guy]
if guylikes.index(guysgirl) > guylikes.index(she):
return False
for gal in helikesbetter:
girlsguy = engaged[gal]
gallikes = galprefers[gal]
if gallikes.index(girlsguy) > gallikes.index(he):
return False
return True
match_to_check={}
for i in permutations(guys):
couples = sorted(zip(i, gals))
for couple in couples:
match_to_check[couple[1]]=couple[0]
if check(match_to_check):
print match_to_check
match_to_check.clear()
with the correct output:
{'M': 'F', 'L': 'D', 'O': 'K', 'N': 'A', 'P': 'G', 'S': 'J', 'R': 'B', 'T': 'E'}
{'M': 'F', 'L': 'D', 'O': 'K', 'N': 'B', 'P': 'G', 'S': 'J', 'R': 'E', 'T': 'A'}
{'M': 'J', 'L': 'D', 'O': 'K', 'N': 'A', 'P': 'G', 'S': 'F', 'R': 'B', 'T': 'E'}
{'M': 'J', 'L': 'D', 'O': 'K', 'N': 'B', 'P': 'G', 'S': 'F', 'R': 'E', 'T': 'A'}
{'M': 'D', 'L': 'F', 'O': 'K', 'N': 'A', 'P': 'G', 'S': 'J', 'R': 'B', 'T': 'E'}
{'M': 'J', 'L': 'G', 'O': 'K', 'N': 'A', 'P': 'D', 'S': 'F', 'R': 'B', 'T': 'E'}
{'M': 'J', 'L': 'G', 'O': 'K', 'N': 'B', 'P': 'A', 'S': 'F', 'R': 'E', 'T': 'D'}
{'M': 'J', 'L': 'G', 'O': 'K', 'N': 'B', 'P': 'D', 'S': 'F', 'R': 'E', 'T': 'A'}
Optimized answer
(Insipred by #Jonas but doesn't require Numpy):
from itertools import permutations
l1 = ["a", "b", "c"]
l2 = ["d", "e", "f"]
valid_pairings = [sorted(zip(i, l2)) for i in permutations(l1)]
valid_pairings is:
[
[('a', 'd'), ('b', 'e'), ('c', 'f')],
[('a', 'd'), ('b', 'f'), ('c', 'e')],
[('a', 'e'), ('b', 'd'), ('c', 'f')],
[('a', 'f'), ('b', 'd'), ('c', 'e')],
[('a', 'e'), ('b', 'f'), ('c', 'd')],
[('a', 'f'), ('b', 'e'), ('c', 'd')]
]
Warning: the size of the output is factiorial(n), where n is the size of one the smaller input. At n = 14, this requires 100's of GBs of memory to store, more than most modern systems have.
Old Answer
from itertools import product, combinations
def flatten(lst):
return [item for sublist in lst for item in sublist]
l1 = ["a", "b", "c"]
l2 = ["d", "e", "f"]
all_pairings = combinations(product(l1, l2), min(len(l1), len(l2)))
# remove those pairings where an item appears more than once
valid_pairings = [i for i in all_pairings if len(set(flatten(i))) == len(flatten(i))]
Valid pairings is:
[
(('a', 'd'), ('b', 'e'), ('c', 'f')),
(('a', 'd'), ('b', 'f'), ('c', 'e')),
(('a', 'e'), ('b', 'd'), ('c', 'f')),
(('a', 'e'), ('b', 'f'), ('c', 'd')),
(('a', 'f'), ('b', 'd'), ('c', 'e')),
(('a', 'f'), ('b', 'e'), ('c', 'd'))
]
This is a bit of a brute force approach (don't use it for long lists), just sample the population enough times to be sure you have all possible combinations, make a set of it and sort the result.
from random import sample
x = ["a","b","c"]
y = ['d','e','f']
z = {tuple(sample(y,3)) for i in range(25)}
result = sorted([list(zip(x,z_)) for z_ in z])
>>>result
[[('a', 'd'), ('b', 'e'), ('c', 'f')],
[('a', 'd'), ('b', 'f'), ('c', 'e')],
[('a', 'e'), ('b', 'd'), ('c', 'f')],
[('a', 'e'), ('b', 'f'), ('c', 'd')],
[('a', 'f'), ('b', 'd'), ('c', 'e')],
[('a', 'f'), ('b', 'e'), ('c', 'd')]]
This is not the way to go, it's just a different approach.
Combine the "wives" with all permutations of the "husbands" and you get all combinations.
import itertools
import numpy as np
husbands = ['d', 'e', 'f']
wifes = ['a', 'b', 'c']
permutations = list(itertools.permutations(husbands))
repetition = [wifes for _ in permutations]
res = np.dstack((repetition,permutations))
print(res)
Result is:
[[['a' 'd']
['b' 'e']
['c' 'f']]
[['a' 'd']
['b' 'f']
['c' 'e']]
[['a' 'e']
['b' 'd']
['c' 'f']]
[['a' 'e']
['b' 'f']
['c' 'd']]
[['a' 'f']
['b' 'd']
['c' 'e']]
[['a' 'f']
['b' 'e']
['c' 'd']]]
If you prefer tuples:
res = res.view(dtype=np.dtype([('x', np.dtype('U1')), ('y', np.dtype('U1'))]))
res = res.reshape(res.shape[:-1])
print(res)
Result:
[[('a', 'd') ('b', 'e') ('c', 'f')]
[('a', 'd') ('b', 'f') ('c', 'e')]
[('a', 'e') ('b', 'd') ('c', 'f')]
[('a', 'e') ('b', 'f') ('c', 'd')]
[('a', 'f') ('b', 'd') ('c', 'e')]
[('a', 'f') ('b', 'e') ('c', 'd')]]

Create a new dictionary using an existing one and list

If I had:
adict = {'a':3, 'b':6, 'c':9, 'd':12}
alist = ['a', 'z', 't', 's']
How would I create a new dict with the keys of the first dict and the items of the list, resulting in this?
bdict = {'a': 'a', 'b': 'z', 'c': 't', 'd': 's'}
To bring the keys of adict together the values from alist use the zip() function.
>>> from collections import OrderedDict
>>> adict = OrderedDict([('a', 3), ('b', 6), ('c', 9), ('d', 12)])
>>> alist = ['a', 'z', 't', 's']
>>> bdict = OrderedDict(zip(adict, alist))
>>> bdict
OrderedDict([('a', 'a'), ('b', 'z'), ('c', 't'), ('d', 's')])
I've used ordered dictionaries here because the question only makes sense if the dictionaries are OrderedDicts; otherwise, you can't guarantee the pairwise one-to-one correspondence between adict and alist.

Most Pythonic way for creating a defaultdictionary counter

I am trying to count occurrences of various items based on condition. What I have until now is this function that given two items will increase the counter like this:
given [('a', 'a'), ('a', 'b'), ('b', 'a')] will output defaultdict(<class 'collections.Counter'>, {'a': Counter({'a': 1, 'b': 1}), 'b': Counter({'a': 1})
the function can be seen bellow
def freq(samples=None):
out = defaultdict(Counter)
if samples:
for (c, s) in samples:
out[c][s] += 1
return out
It is limited though to only work with tuples while I would like it to be more generic and work with any number of variables e.g., [('a', 'a', 'b'), ('a', 'b', 'c'), ('b', 'a', 'a')] would still work and I would be able to query the result for lets say res['a']['b'] and get the count for 'c' that is one.
What would be the best way to do this in Python?
Assuming all tuples in the list have the same length:
from collections import Counter
from itertools import groupby
from operator import itemgetter
def freq(samples=[]):
sorted_samples = sorted(samples)
if sorted_samples and len(sorted_samples[0]) > 2:
return {key: freq(value[1:] for value in values) for key, values in groupby(sorted_samples, itemgetter(0))}
else:
return {key: Counter(value[1] for value in values) for key, values in groupby(sorted_samples, itemgetter(0))}
That gives:
freq([('a', 'a'), ('a', 'b'), ('b', 'a'), ('a', 'c')])
>>> {'a': Counter({'a': 1, 'b': 1, 'c': 1}), 'b': Counter({'a': 1})}
freq([('a', 'a', 'a'), ('a', 'b', 'c'), ('b', 'a', 'a'), ('a', 'c', 'c')])
>>> {'a': {'a': Counter({'a': 1}), 'b': Counter({'c': 1}), 'c': Counter({'c': 1})}, 'b': {'a': Counter({'a': 1})}}
One option is to use the full tuples as keys
def freq(samples=[]):
out = Counter()
for sample in samples:
out[sample] += 1
return out
which would then return things as
Counter({('a', 'a', 'b'): 1, ('a', 'b', 'c'): 1, ('b', 'a', 'a'): 1})
You could convert the tuples to strings to select certain slices, e.g. "('a', 'b',". For example in a new dictionary {k: v for k,v in out.items() if str(k)[:10] == "('a', 'b',"}.
If the groups are indeed either 2 or 3 long, but never both, you can change to:
def freq(samples):
l = len(samples[0])
if l == 2:
out = defaultdict(lambda: 0)
for a, b in samples:
out[a][b] += 1
elif l == 3:
out = defaultdict(lambda: defaultdict(lambda: 0))
for a, b, c in samples:
out[a][b][c] += 1
return out

Categories

Resources