Pandas dataframe to dict of list of tuples - python

Suppose I have the following dataframe:
df = pd.DataFrame({'id': [1,2,3,3,3], 'v1': ['a', 'a', 'c', 'c', 'd'], 'v2': ['z', 'y', 'w', 'y', 'z']})
df
id v1 v2
1 a z
2 a y
3 c w
3 c y
3 d z
And I want to transform it to this format:
{1: [('a', 'z')], 2: [('a', 'y')], 3: [('c', 'w'), ('c', 'y'), ('d', 'z')]}
I basically want to create a dict where the keys are the id and the values is a list of tuples of the (v1,v2) of this id.
I tried using groupby in id:
df.groupby('id')[['v1', 'v2']].apply(list)
But this didn't work

Create tuples first and then pass to groupby with aggregate list:
d = df[['v1', 'v2']].agg(tuple, 1).groupby(df['id']).apply(list).to_dict()
print (d)
{1: [('a', 'z')], 2: [('a', 'y')], 3: [('c', 'w'), ('c', 'y'), ('d', 'z')]}
Another idea is using MultiIndex:
d = df.set_index(['v1', 'v2']).groupby('id').apply(lambda x: x.index.tolist()).to_dict()

You can use defaultdict from the collections library :
from collections import defaultdict
d = defaultdict(list)
for k, v, s in df.to_numpy():
d[k].append((v, s))
defaultdict(list,
{1: [('a', 'z')],
2: [('a', 'y')],
3: [('c', 'w'), ('c', 'y'), ('d', 'z')]})

df['New'] = [tuple(x) for x in df[['v1','v2']].to_records(index=False)]
df=df[['id','New']]
df=df.set_index('id')
df.to_dict()
Output:
{'New': {1: ('a', 'z'), 2: ('a', 'y'), 3: ('d', 'z')}}

Related

How to map i-th elements of tuples in a list to keys in another list to form a dictionary

I would like to map i-th elements of tuples within given list to keys that are available in another list to form a dictionary.
Here below is the code I have (it is not working as it is updating the values so I end up with the latest value rather than what I am after):
k = [1,2,3]
v = [('a', 'b', 'c'), ('d', 'e', 'f')]
d = {}
for i, j in enumerate(k):
if len(v) > 1:
for k in range(len(v)):
d[j] = v[k][i]
else:
d[j] = v[0][i]
print(d)
#{1: 'd', 2: 'e', 3: 'f'}
I would like to have this output instead:
{1: ('a', 'd'), 2: ('b', 'e'), 3: ('c', 'f')}
Any tips are appreciated.
Using zip with a dict comprehension
Ex:
k = [1,2,3]
v = [('a', 'b', 'c'), ('d', 'e', 'f')]
print({i: v for i, v in zip(k, zip(*v))})
#or
print(dict(zip(k, zip(*v))))
Output:
{1: ('a', 'd'), 2: ('b', 'e'), 3: ('c', 'f')}

Matrix file to dictionary in python

I have a file matrix.txt that contains :
A B C
A 1 2 3
B 4 5 6
C 7 8 9
I want to read the content of the file and store it in a dictionary as following :
{('A', 'A') : 1, ('A', 'B') : 2, ('A', 'C') : 3,
('B', 'A') : 4, ('B', 'B') : 5, ('B', 'C') : 6,
('C', 'A') : 7, ('C', 'B') : 8, ('C', 'C') : 9}
The following Python3 function will yield all matrix items with it's indices, compatible with dict constructor:
def read_mx_cells(file, parse_cell = lambda x:x):
rows = (line.rstrip().split() for line in file)
header = next(rows)
for row in rows:
row_id = row[0]
for col_id,cell in zip(header, row[1:]):
yield ((row_id, col_id), parse_cell(cell))
with open('matrix.txt') as f:
for x in read_mx_cells(f, int):
print(x)
# ('A','A'),1
# ('A','B'),2
# ('A','C'),3 ...
with open('matrix.txt') as f:
print(dict(read_mx_cells(f, int)))
# { ('A','A'): 1, ('A','B'): 2, ('A','C'): 3 ... }
# Note that python dicts dont retain item order
You can use itertools.product to create your keys, using the file header and the first column after transposing to create the keys, then just zip transforming the remaining rows back to their original state and creating a single iterable of the split substrings. To maintain order we also need to use an OrderedDict:
from collections import OrderedDict
from itertools import izip, product, imap, chain
with open("matrix.txt") as f:
head, zipped = next(f).split(), izip(*imap(str.split, f))
cols = next(zipped)
od = OrderedDict(zip(product(head, cols), chain.from_iterable(izip(*zipped))))
Output:
OrderedDict([(('A', 'A'), '1'), (('A', 'B'), '2'), (('A', 'C'), '3'),
(('B', 'A'), '4'), (('B', 'B'), '5'), (('B', 'C'), '6'), (('C', 'A'), '7'),
(('C', 'B'), '8'), (('C', 'C'), '9')])
For python3 just use map and zip.
Or without transposing and using the csv lib:
from collections import OrderedDict
from itertools import izip,repeat
import csv
with open("matrix.txt") as f:
r = csv.reader(f, delimiter=" ", skipinitialspace=1)
head = repeat(next(r))
od = OrderedDict((((row[0], k), v) for row in r
for k, v in izip(next(head), row[1:])))
output will be the same.
pandas makes it pretty neat.
import pandas as pd
Approach 1
df = pd.read_table('matrix.txt', sep=' ')
>>> df
A B C
A 1 2 3
B 4 5 6
C 7 8 9
d = df.to_dict()
>>> d
{'A': {'A': 1, 'B': 4, 'C': 7},
'B': {'A': 2, 'B': 5, 'C': 8},
'C': {'A': 3, 'B': 6, 'C': 9}}
new_d = {}
{new_d.update(g) for g in [{(r,c):v for r,v in v1.iteritems()} for c,v1 in d.iteritems()]}
>>> new_d
{('A', 'A'): 1,
('A', 'B'): 2,
('A', 'C'): 3,
('B', 'A'): 4,
('B', 'B'): 5,
('B', 'C'): 6,
('C', 'A'): 7,
('C', 'B'): 8,
('C', 'C'): 9}
Approach 2
df = pd.read_table('matrix.txt', sep=' ')
>>> df
A B C
A 1 2 3
B 4 5 6
C 7 8 9
new_d = {}
for r, v in df.iterrows():
for c, v1 in v.iteritems():
new_d.update({(r,c): v1})
>>> new_d
{('A', 'A'): 1,
('A', 'B'): 2,
('A', 'C'): 3,
('B', 'A'): 4,
('B', 'B'): 5,
('B', 'C'): 6,
('C', 'A'): 7,
('C', 'B'): 8,
('C', 'C'): 9}

How to create a new list of tuples based on a values from original list of tuples?

My function is currently returning:
[('a', 'b', 'c'), ('d', 'e', 'f'), ('g', 'e', 'f'), ('h', 'b', 'c')]
However, I need the final output to be:
[('a', 'h'), ('d', 'g')]
As you can see, if i[1] and i[2] match I need i[0] to be paired together.
I was trying to use a for loop but I can't think of how to write it, at this moment.
This seems to work:
from itertools import combinations
l = [('a', 'b', 'c'), ('d', 'e', 'f'), ('g', 'e', 'f'), ('h', 'b', 'c')]
print([(a[0], b[0]) for a, b in combinations(l, 2) if a[1:] == b[1:]])
You can do this by sorting the list based on second and third element , and then using itertools.groupby . Then for each group, you can take the first elements from the elements inside it. Example -
>>> a = [('a', 'b', 'c'), ('d', 'e', 'f'), ('g', 'e', 'f'), ('h', 'b', 'c')]
>>> lst = []
>>> new_a = sorted(a, key=lambda i: (i[1], i[2]))
>>> for _, x in itertools.groupby(new_a, lambda i: (i[1], i[2])):
... lst.append(tuple(y[0] for y in x))
...
>>> lst
[('a', 'h'), ('d', 'g')]
This can also be done in one line as (though unreadable) -
>>> l = [tuple(y[0] for y in x) for _, x in itertools.groupby(sorted(a, key=lambda i: (i[1], i[2])), lambda i: (i[1], i[2]))]
>>> l
[('a', 'h'), ('d', 'g')]
group based on the second and third elements of each tuple, appending the first element to a list then filter out the lists that have a length < 1:
from collections import defaultdict
d = defaultdict(list)
for a,b,c in l:
d[b,c].append(a)
print([tuple(val) for val in d.values() if len(val)>1])
[('a', 'h'), ('d', 'g')]
To guarantee first match order use an OrderedDict:
from collections import OrderedDict
d = OrderedDict()
for a,b,c in l:
d.setdefault((b,c),[]).append(a)
print([tuple(val) for val in d.values() if len(val)>1])
I think this solution will preserve order (based on initial match location):
from itertools import groupby
from operator import itemgetter
from collections import defaultdict
x = [('a', 'b', 'c'), ('d', 'e', 'f'), ('g', 'e', 'f'), ('h', 'b', 'c')]
groupings, seen_list=defaultdict(list), []
for key, value in groupby(x, itemgetter(1, 2)):
if key not in seen_list:
seen_list.append(key)
groupings[key].extend(list(map(itemgetter(0),value)))
print([groupings[key] for key in seen_list])
if order is not important you can disregard the seen_list and just print the groupings.values()
x = [('a', 'b', 'c'), ('d', 'e', 'f'), ('g', 'e', 'f'), ('h', 'b', 'c')]
groupings=defaultdict(list)
for key, value in groupby(x, itemgetter(1, 2)):
groupings[key].extend(list(map(itemgetter(0),value)))
print(groupings.values())
May be not so pythonic, but a bit easier:
>>> a = [('a', 'b', 'c'), ('d', 'e', 'f'), ('g', 'e', 'f'), ('h', 'b', 'c')]
>>> c = {}
>>> [c[j+k].append(i) if j+k in c else c.update({j+k:[i]}) for i,j,k in a]
>>> c = c.values()
>>> print c
[['d', 'g'], ['a', 'h']]

All combinations of a mapped list of lists in python

Hi how can i get a mapped list to print all possible combinations
say the dict mapping is = {1:[a,b],2:[c,d]......
so with the list [1,2] and the sample mapping above I would like to print out all possible combinations of the pairs a,d against c,d into a list
Have a look at the combinatoric functions in the itertools module.
If you're looking for all the pairings of ab against cd, the product function should help:
>>> d = {1: ['a','b'], 2: ['c', 'd']}
>>> for t in product(*d.values()):
print t
('a', 'c')
('a', 'd')
('b', 'c')
('b', 'd')
If you're looking all combinations of abcd taken r at a time for the various sizes of r, then tthe combinations function should get the job done:
>>> for r in range(5):
for t in combinations('abcd', r):
print t
()
('a',)
('b',)
('c',)
('d',)
('a', 'b')
('a', 'c')
('a', 'd')
('b', 'c')
('b', 'd')
('c', 'd')
('a', 'b', 'c')
('a', 'b', 'd')
('a', 'c', 'd')
('b', 'c', 'd')
('a', 'b', 'c', 'd')
from itertools import product
mapping = {1:['a','b'], 2:['c','d']}
data = [1, 2]
for combo in product(*(mapping[d] for d in data)):
print combo
results in
('a', 'c')
('a', 'd')
('b', 'c')
('b', 'd')
Edit it sounds like what you actually want is
strings = [''.join(combo) for combo in product(*(mapping[d] for d in data))]
which gives strings == ['ac', 'ad', 'bc', 'bd'].

Finding permuations and combinations using Python

I have 2 variables - a and b. I need to fill up k places using these variables. So if k = 3 output should be
[a,a,a], [a,a,b] , [a,b,a], [b,a,a], [a,b,b], [b,a,b], [b,b,a] and [b,b,b]
Input - k
Output - All the combinations
How do I code this in Python? Can itertools be of any help here?
>>> import itertools
>>> list(itertools.product('ab', repeat=3))
[('a', 'a', 'a'), ('a', 'a', 'b'), ('a', 'b', 'a'), ('a', 'b', 'b'), ('b', 'a', 'a'), ('b', 'a', 'b'), ('b', 'b', 'a'), ('b', 'b', 'b')]
def genPerm(varslist, pos,resultLen, result, resultsList)
if pos>resultLen:
return;
for e in varslist:
if pos==resultLen:
resultsList.append(result + [e]);
else
genPerm(varsList, pos+1, resultLen, result + [e], resultsList);
Call with:
genPerm([a,b], 0, resLength, [], resultsList);

Categories

Resources