Here I have a code that initiates by a list, it takes two random letter and put them back into the main list. Then I count each letter from each generated list:
import random
import collections
def randMerge(l:list, count:int) -> list:
return l + [random.sample(l,k=count)]
def flatten(d):
return [i for b in [[c] if not isinstance(c, list) else flatten(c)
for c in d] for i in b]
num = 2
aList = ['A','B','C','D']
newList = aList[:]
for _ in range(3):
newList = randMerge(newList,num)
print(newList)
new_counts = collections.Counter(flatten(newList))
print(new_counts)
which gives:
['A', 'B', 'C', 'D', ['A', 'C']]
Counter({'A': 2, 'C': 2, 'B': 1, 'D': 1})
['A', 'B', 'C', 'D', ['A', 'C'], ['D', 'A']]
Counter({'A': 3, 'C': 2, 'D': 2, 'B': 1})
['A', 'B', 'C', 'D', ['A', 'C'], ['D', 'A'], ['A', 'B']]
Counter({'A': 4, 'B': 2, 'C': 2, 'D': 2})
Now I wonder how can I make a dataframe such that each column the numbers in counters and the row will be representing the letters. I did this:
df = pandas.DataFrame.from_dict(new_counts, orient='index')
yet this gives me only the last Counter. Also how can I make a histogram of each Counter and show them together?
If you need each column to represent the contents of a Counter object, you can create your dataframe from the list of objects, then transpose.
import collections, random, pandas as pd
def randMerge(l:list, count:int) -> list:
return l + [random.sample(l,k=count)]
def flatten(d):
return [i for b in [[c] if not isinstance(c, list) else flatten(c)
for c in d] for i in b]
num = 2
aList = ['A','B','C','D']
res = []
newList = aList[:]
for _ in range(3):
newList = randMerge(newList,num)
new_counts = collections.Counter(flatten(newList))
res.append(new_counts)
print(res)
# [Counter({'A': 2, 'C': 2, 'B': 1, 'D': 1}),
# Counter({'C': 3, 'A': 2, 'D': 2, 'B': 1}),
# Counter({'C': 4, 'A': 2, 'B': 2, 'D': 2})]
df = pd.DataFrame(res).T
print(df)
# 0 1 2
# A 2 2 2
# B 1 1 1
# C 2 3 5
# D 1 2 3
Related
I have created two list.
list1= [a,b,c,a,d]
list2=[1,2,3,4,5]
I want to find relationship between this two list based on index position i.e
In list1 a is repeated 2 times index 0,3 .in list2 index 0,3 values are 1 ,4 the relation is a one to many is a:{1,4}
next b not repeated in list 1 and it index is 1 and list2 index 1 value is 2 ,the relation is one to one b:{2}
my expected output will be {a:{1,4},b:{2},c:{3},d:{5}}
I'd use a defaultdict:
from collections import defaultdict
list1 = ['a', 'b', 'c', 'a', 'd']
list2 = [1, 2, 3, 4, 5]
result = defaultdict(set)
for value1, value2, in zip(list1, list2):
result[value1].add(value2)
print(dict(result))
outputs
{'a': {1, 4}, 'b': {2}, 'c': {3}, 'd': {5}}
You can use a combination of dictionary and list comprehension to do this:
{x: [list2[i] for i, j in enumerate(list1) if j == x] for x in list1}
output:
{'a': [1, 4], 'b': [2], 'c': [3], 'd': [5]}
a = ['a', 'b', 'c', 'a', 'd']
b = [1, 2, 3, 4, 5]
ret = {}
for idx, _a in enumerate(a):
value = ret.get(_a, ret.setdefault(_a, []))
value.append(b[idx])
And ret will be the output
Option is to zip the two lists:
L = list(zip(list1, list2))
Result:
[('a', 1), ('b', 2), ('c', 3), ('a', 4), ('d', 5)]
Use it to create a dictionary with sets as values:
D ={}
for key in L:
if key[0] not in D:
D[key[0]] = {key[1]}
else:
D[key[0]].add(key[1])
I would not do it this way in real code, but this approach is mildly entertaining and perhaps educational.
from collections import defaultdict
from itertools import groupby
from operator import itemgetter
xs = ['a', 'b', 'c', 'a', 'd']
ys = [1, 2, 3, 4, 5]
d = {
x : set(y for _, y in group)
for x, group in groupby(sorted(zip(xs, ys)), key = itemgetter(0))
}
print(d) # {'a': {1, 4}, 'b': {2}, 'c': {3}, 'd': {5}}
It's not from pure python, as this question tagged with pandas I tried this way.
Option-1
df=pd.DataFrame({'l1':list1,'l2':list2})
res1=df.groupby('l1').apply(lambda x:x.l2.values.tolist()).to_dict()
Option-2
print df.groupby('l1')['l2'].unique().to_dict()
Output:
{'a': [1, 4], 'c': [3], 'b': [2], 'd': [5]}
I have this list:
myList = [a, a, a, b, b, a, c, c, b, a]
I want to count the inflow and the outflow for every unique item.
Inflow for 'a' = number of appearances = 5
(when a transition ENTERS in 'a')
Outflow for 'a' = number of different characters after 'a' = 2
(when a transition EXITS from 'a' to another character)
For inflow I have this and it works:
myListDict = {}
for item in myList:
myListDict.setdefault(item, 0)
myListDict[item] += 1
But I don't really know how to do this in a fast and elegant way for the outflow, on a single overall iteration, if possible.
If you want to calculate both inflow and outflow in one single pass, you could use this structure:
from collections import Counter
last_char = None
my_list = "aaabbaccba"
inflow = Counter()
outflow = Counter()
for char in my_list:
inflow[char] += 1
if last_char and char != last_char:
outflow[last_char] += 1
last_char = char
print(inflow)
print(outflow)
It outputs:
Counter({'a': 5, 'b': 3, 'c': 2})
Counter({'a': 2, 'b': 2, 'c': 1})
Note that with Counter, you don't need setdefault.
Using collections.Counter and itertools.groupby:
from collections import Counter
from itertools import groupby
def in_out_flow(lst):
in_flow = Counter(lst)
out_flow = Counter(k for k, _ in groupby(lst))
out_flow[lst[-1]] -= 1
for k,v in in_flow.items():
print('key: {}, in flow: {}, out flow: {}'.format(k, v, out_flow[k]))
Examples:
in_out_flow(['a', 'a', 'a', 'b', 'b', 'a', 'c', 'c', 'b', 'a'])
print('##')
in_out_flow(['a', 'a', 'a', 'b', 'a', 'c', 'a', 'b'])
Outputs:
key: a, in flow: 5, out flow: 2
key: b, in flow: 3, out flow: 2
key: c, in flow: 2, out flow: 1
##
key: a, in flow: 5, out flow: 3
key: b, in flow: 2, out flow: 1
key: c, in flow: 1, out flow: 1
I use itertools.groupby to eliminate identical successive items, then count the inflow transitions. For the outflow, we just have to substract 1 to the inflow count for the last item of the list.
from itertools import groupby
from collections import Counter
myList = ['a', 'a', 'a', 'b', 'b', 'a', 'c', 'c', 'b', 'a']
uniques = [key for key, g in groupby(myList)] # ['a', 'b', 'a', 'c', 'b', 'a']
c = Counter(uniques)
inflow = dict(c)
c.update({myList[-1]: -1}) # No outflow for the last element
outflow = dict(c)
print(inflow)
# {'a': 3, 'b': 2, 'c': 1}
print(outflow)
# {'a': 2, 'b': 2, 'c': 1}
I am trying to learn Python dictionary comprehension, and I think it is possible to do in one line what the following functions do. I wasn't able to make the n+1 as in the first or avoid using range() as in the second.
Is it possible to use a counter that automatically increments during the comprehension, as in test1()?
def test1():
l = ['a', 'b', 'c', 'd']
d = {}
n = 1
for i in l:
d[i] = n
n = n + 1
return d
def test2():
l = ['a', 'b', 'c', 'd']
d = {}
for n in range(len(l)):
d[l[n]] = n + 1
return d
It's quite simple using the enumerate function:
>>> L = ['a', 'b', 'c', 'd']
>>> {letter: i for i,letter in enumerate(L, start=1)}
{'a': 1, 'c': 3, 'b': 2, 'd': 4}
Note that, if you wanted the inverse mapping, i.e. mapping 1 to a, 2 to b etc, you could simply do:
>>> dict(enumerate(L, start=1))
{1: 'a', 2: 'b', 3: 'c', 4: 'd'}
This works
>>> l = ['a', 'b', 'c', 'd']
>>> { x:(y+1) for (x,y) in zip(l, range(len(l))) }
{'a': 1, 'c': 3, 'b': 2, 'd': 4}
x = [['a', 'b', 'c'], ['a', 'c', 'd'], ['e', 'f', 'f']]
Let's say we have a list with random str letters.
How can i create a function so it tells me how many times the letter 'a' comes out, which in this case 2. Or any other letter, like 'b' comes out once, 'f' comes out twice. etc.
Thank you!
You could flatten the list and use collections.Counter:
>>> import collections
>>> x = [['a', 'b', 'c'], ['a', 'c', 'd'], ['e', 'f', 'f']]
>>> d = collections.Counter(e for sublist in x for e in sublist)
>>> d
Counter({'a': 2, 'c': 2, 'f': 2, 'b': 1, 'e': 1, 'd': 1})
>>> d['a']
2
import itertools, collections
result = collections.defaultdict(int)
for i in itertools.chain(*x):
result[i] += 1
This will create result as a dictionary with the characters as keys and their counts as values.
Just FYI, you can use sum() to flatten a single nested list.
>>> from collections import Counter
>>>
>>> x = [['a', 'b', 'c'], ['a', 'c', 'd'], ['e', 'f', 'f']]
>>> c = Counter(sum(x, []))
>>> c
Counter({'a': 2, 'c': 2, 'f': 2, 'b': 1, 'e': 1, 'd': 1})
But, as Blender and John Clements have addressed, itertools.chain.from_iterable() may be more clear.
>>> from itertools import chain
>>> c = Counter(chain.from_iterable(x)))
>>> c
Counter({'a': 2, 'c': 2, 'f': 2, 'b': 1, 'e': 1, 'd': 1})
I need to take a list and use a dictionary to catalogue where a particular item occurs in a list, as an example:
L = ['a', 'b', 'c', 'b', 'c', 'a', 'e']
the dictionary needs to contain the following:
D = {'a': 0, 5 , 'b': 1, 3 , 'c': 2, 4 , 'e': 6}
However if I use what I wrote:
for i in range(len(word_list)):
if D.has_key('word_list[i]') == False:
D['word_list[i]'] = i
else:
D[word_list[i]] += i
Then I get a KeyError for a certain word and I don't understand why I should be getting an error.
if D.has_key('word_list[i]') == False:
Uh, what?
At the very least, you should drop the quotes:
if D.has_key(word_list[i]) == False:
But you're also misusing a number of Python structures:
Why are summing up the indices?
Why are you comparing to False?
Shouldn't you be using setdefault
Like this:
for i in range(len(word_list)):
D.setdefault(word_list[i], []).append(i)
I modified you solution a bit to work
word_list = ['a', 'b', 'c', 'b', 'c', 'a', 'e']
dict = {'a': [], 'b': [], 'c': [], 'e': []}
for i in range(len(word_list)):
if word_list[i] not in dict:
dict[word_list[i]] = [i]
else:
dict[word_list[i]].append(i)
Result
{'a': [0, 5], 'c': [2, 4], 'b': [1, 3], 'e': [6]}
I think this would be the shortest solution for your problem:
>>> from collections import defaultdict
>>> D = defaultdict(list)
>>> for i,el in enumerate(L):
D[el].append(i)
>>> D
defaultdict(<type 'list'>, {'a': [0, 5], 'c': [2, 4], 'b': [1, 3], 'e': [6]})
If you want to stick with dict, correcting your code I would came up with:
>>> D = {}
>>> for i,el in enumerate(L):
if el not in D:
D[el] = [i] #crate a new list
else:
D[el].append(i) #appending to the existing list
>>> D
{'a': [0, 5], 'c': [2, 4], 'b': [1, 3], 'e': [6]}
Also, there is a setdefault method in dict which can be used:
>>> D = {}
>>> for i,el in enumerate(L):
D.setdefault(el,[]).append(i)
>>> D
{'a': [0, 5], 'c': [2, 4], 'b': [1, 3], 'e': [6]}
But I prefer to use defaultdict from collections.