I have a dictionary associating a probability to a char
d = {'a': 0.2, 'b': 0.3, 'c': 0.4, 'd':0.1}
And I am searching a way to associate to each char the lowest value of his frequency distribution. So every char must be associated to the sum of the previous ones.
I know dictionary are not ordered but it should return something like
ddist = {'a': 0, 'b': 0.2, 'c': 0.5, 'd': 0.9}
I tried with a loop but I did not find a way to get the previous values...
Any ideas ?
You can simply iterate over a sorted version of the keys:
d = {'a': 0.2, 'b': 0.3, 'c': 0.4, 'd':0.1}
ddist = {}
t = 0
for key in sorted(d):
ddist[key] = t
t += d[key]
As dicts are unordered, so you need to define the key order yourself, or use collections.OrderedDict from the start.
>>> def accumulate(seq):
total = 0
for item in seq:
yield total
total += item
...
>>> keys = ['a', 'b', 'c', 'd'] #For your dict, this is sorted(d)
>>> dict(zip(keys, accumulate(d[k] for k in keys)))
{'a': 0, 'c': 0.5, 'b': 0.2, 'd': 0.9}
#or
>>> from collections import OrderedDict
>>> OrderedDict(zip(keys, accumulate(d[k] for k in keys)))
OrderedDict([('a', 0), ('b', 0.2), ('c', 0.5), ('d', 0.9)])
Related
I have a list of 5 lists:
X = [['a','b','c'],['a','d','e'],['a','x','f'],['g','h','j'],['y','u','i']]
I'm trying to find out how to get a list showing the elements that exists in 60% of the lists in X.
So I'd want it to return one element ['a'] because the element 'a' exists within 3 of the 5 lists i.e 'a' exists in 60% of the lists in X.
One approach using collections.Counter:
from collections import Counter
from itertools import chain
X = [['a', 'b', 'c'], ['a', 'd', 'e'], ['a', 'x', 'f'], ['g', 'h', 'j'], ['y', 'u', 'i']]
counts = Counter(chain.from_iterable(set(li) for li in X))
threshold = int(0.6 * len(X))
res = []
for key, count in counts.most_common():
if count >= threshold:
res.append(key)
else:
break
print(res)
Output
['a']
Note that this solution only counts each item one time per list (set(li)).
The easiest approach is cycling the unique elements of X in a list-comprehension and check if the given element is present at least one time in at least the 60% of the sub-arrays.
import numpy as np
X = np.array([["a","b","c"],["a","d","e"],["a","x","f"],["g","h","j"],["y","u","i"]])
[element for element in np.unique(X) if (X==element).any(axis=1).mean()>=.6]
#['a']
One approach without any imports:
counts = {} # Counts of each element
for x in X: # Iterate through X
for y in x: # Iterate through x
counts[y] = counts.get(y, 0) + 1 # Add 1 to counts[y]
res = [] # Output list
for k, v in counts.items(): # Iterate through items of counts
if v / len(x) >= 0.6: # Check if it appears in more than 60% of lists
res.append(k) # If it does, apppend to res
print(res) # Output res
Output: ['a']
Here is the solution to get the list:
all_members=set()
for list in X:
all_members=all_members.union(set(list))
stats={}
frequencies={}
for member in all_members:
stats[member]=0
for list in X:
if member in list:
stats[member]=stats[member]+1
frequencies[member]=stats[member]/len(X)
l=[]
for member in frequencies.keys():
if frequencies[member]>=0.6:
l.append(member)
print("number of occurrences in X:\n",stats)
print("frequencies:\n",frequencies)
print("list of members occurring more than 60% of times:\n",l)
number of occurrences in X:
{'i': 1, 'j': 1, 'b': 1, 'd': 1, 'x': 1, 'a': 3, 'y': 1, 'c': 1, 'f': 1, 'e': 1, 'u': 1, 'g': 1, 'h': 1}
frequencies:
{'i': 0.2, 'j': 0.2, 'b': 0.2, 'd': 0.2, 'x': 0.2, 'a': 0.6, 'y': 0.2, 'c': 0.2, 'f': 0.2, 'e': 0.2, 'u': 0.2, 'g': 0.2, 'h': 0.2}
list of members occurring more than 60% of times:
['a']
Wizards of stackoverflow,
I wish to combine two lists to create a dictionary, I have used dict & zip, however it does not meet what I require.
If had these lists
keys = ['a', 'a', 'b', 'c']
values = [6, 2, 3, 4]
I would like for the dictionary to reflect the average value such that the output would be:
a_dict = {'a' : 4, 'b' : 3, 'c' : 4}
as a bonus but not required, if this is possible is there anyway to get a count of each duplicate?
i.e. output would be followed by 'a' was counted twice, other than just doing the count in the keys.
A straightforward solution (thanks #DeepSpace for dict-comprehension suggestion):
keys = ['a', 'a', 'b', 'c']
values = [6, 2, 3, 4]
out = {}
for k, v in zip(keys, values):
out.setdefault(k, []).append(v)
out = {key: sum(value) / len(value) for key, value in out.items()}
print(out)
Prints:
{'a': 4.0, 'b': 3.0, 'c': 4.0}
If you want count of keys, you can do for example:
out = {}
for k, v in zip(keys, values):
out.setdefault(k, []).append(v)
out = {key: (sum(value) / len(value), len(value)) for key, value in out.items()}
print(out)
Prints:
{'a': (4.0, 2), 'b': (3.0, 1), 'c': (4.0, 1)}
Where the second element of values is a count of key.
Solution with itertools (if keys are sorted):
keys = ['a', 'a', 'b', 'c']
values = [6, 2, 3, 4]
from itertools import groupby
from statistics import mean
out = {}
for k, g in groupby(zip(keys, values), lambda k: k[0]):
out[k] = mean(v for _, v in g)
print(out)
Prints:
{'a': 4, 'b': 3, 'c': 4}
calculating avg and frequency of each key dic = {key: [avg, frequency]}
keys = ['a', 'a', 'b', 'c']
values = [6, 2, 3, 4]
dic = {i:[[], 0] for i in keys}
for k, v in zip(keys, values):
dic[k][0].append(v)
dic[k][1]+=1
for k, v in dic.items():
dic[k][0] = sum(dic[k][0])/len(dic[k][0])
print(dic)
output
{'a': [4.0, 2], 'b': [3.0, 1], 'c': [4.0, 1]}
keys = ['a', 'a', 'b', 'c']
values = [6, 2, 3, 4]
d, count_dict=dict(), dict()
for i in range(len(keys)):
try:
d[keys[i]]+=values[i]
count_dict[keys[i]]+=1
except KeyError:
d[keys[i]]=values[i]
count_dict[keys[i]]=1
for keys,values in d.items():
d[keys]=d[keys]/count_dict[keys]
print(f'{keys} comes {count_dict[keys]} times')
print(d)
Lets say I have a dictionary:
dict1 = {'a': 3, 'b': 1.2, 'c': 1.6, 'd': 3.88, 'e': 0.72}
I need to be able to sort this by min and max value and call on them using this function I am still writing (note: 'occurences,' 'avg_scores' and 'std_dev' are all dictionaries and 'words' are the dictionary's keys.):
def sort(words, occurrences, avg_scores, std_dev):
'''sorts and prints the output'''
menu = menu_validate("You must choose one of the valid choices of 1, 2, 3, 4 \n Sort Options\n 1. Sort by Avg Ascending\n 2. Sort by Avg Descending\n 3. Sort by Std Deviation Ascending\n 4. Sort by Std Deviation Descending", 1, 4)
print ("{}{}{}{}\n{}".format("Word", "Occurence", "Avg. Score", "Std. Dev.", "="*51))
if menu == 1:
for i in range (len(word_list)):
print ("{}{}{}{}".format(cnt_list.sorted[i],)
I'm sure I am making this way more difficult on myself than necessary and any help would be appreciated. Thanks!
You can sort the keys based on the associated value. For instance:
>>> dict1 = {'a': 3, 'b': 1.2, 'c': 1.6, 'd': 3.88, 'e': 0.72}
>>> for k in sorted(dict1, key=dict1.get):
... print k, dict1[k]
...
e 0.72
b 1.2
c 1.6
a 3
d 3.88
Use min and max with key:
dict1 = {'a': 3, 'b': 1.2, 'c': 1.6, 'd': 3.88, 'e': 0.72}
min_v = min(dict1.items(), key=lambda x: x[1])
max_v = max(dict1.items(), key=lambda x: x[1])
print min_v, max_v
You can't sort a dict, only it's representation.
But, you can use an ordereddict instead.
from collections import OrderedDict
dictionnary = OrderedDict(
sorted(
{'a': 3, 'b': 1.2, 'c': 1.6, 'd': 3.88, 'e': 0.72
}.items(), key=lambda x:x[1], reverse=True))
I have three dictionaries (or more):
A = {'a':1,'b':2,'c':3,'d':4,'e':5}
B = {'b':1,'c':2,'d':3,'e':4,'f':5}
C = {'c':1,'d':2,'e':3,'f':4,'g':5}
How can I get a dictionary of the average values of every key in the three dictionaries?
For example, given the above dictionaries, the output would be:
{'a':1/1, 'b':(2+1)/2, 'c':(3+2+1)/3, 'd':(4+3+2)/3, 'e':(5+4+3)/3, 'f':(5+4)/2, 'g':5/1}
You can use Pandas, like this:
import pandas as pd
df = pd.DataFrame([A,B,C])
answer = dict(df.mean())
print(answer)
I use Counter to solve this problem. Please try the following code :)
from collections import Counter
A = {'a':1,'b':2,'c':3,'d':4,'e':5}
B = {'b':1,'c':2,'d':3,'e':4,'f':5}
C = {'c':1,'d':2,'e':3,'f':4,'g':5}
sums = Counter()
counters = Counter()
for itemset in [A, B, C]:
sums.update(itemset)
counters.update(itemset.keys())
ret = {x: float(sums[x])/counters[x] for x in sums.keys()}
print ret
The easiest way would be to use collections.Counter as explained here, like this:
from collections import Counter
sums = dict(Counter(A) + Counter(B) + Counter(C))
# Which is {'a': 1, 'c': 6, 'b': 3, 'e': 12, 'd': 9, 'g': 5, 'f': 9}
means = {k: sums[k] / float((k in A) + (k in B) + (k in C)) for k in sums}
The result would be:
>>> means
{'a': 1.0, 'b': 1.5, 'c': 2.0, 'd': 3.0, 'e': 4.0, 'f': 4.5, 'g': 5.0}
If you are working in python 2.7 or 3.5 you can use the following:
keys = set(A.keys()+B.keys()+C.keys())
D = {key:(A.get(key,0)+B.get(key,0)+C.get(key,0))/float((key in A)+(key in B)+(key in C)) for key in keys}
which outputs
D
{'a': 1.0, 'c': 2.0, 'b': 1.5, 'e': 4.0, 'd': 3.0, 'g': 5.0, 'f': 4.5}
if you don't want to use any packages. This doesn't work in python 2.6 and below though.
Here's a very general way to do so (i.e. you can easily change to any aggregation function).:
def aggregate_dicts(dicts, operation=lambda x: sum(x) / len(x)):
"""
Aggregate a sequence of dictionaries to a single dictionary using `operation`. `Operation` should
reduce a list of all values with the same key. Keyrs that are not found in one dictionary will
be mapped to `None`, `operation` can then chose how to deal with those.
"""
all_keys = set().union(*[el.keys() for el in dicts])
return {k: operation([dic.get(k, None) for dic in dicts]) for k in all_keys}
example:
dicts_diff_keys = [{'x': 0, 'y': 1}, {'x': 1, 'y': 2}, {'x': 2, 'y': 3, 'c': 4}]
def mean_no_none(l):
l_no_none = [el for el in l if el is not None]
return sum(l_no_none) / len(l_no_none)
aggregate_dicts(dicts_diff_keys, operation= mean_no_none)
#{'x': 1.0, 'c': 4.0, 'y': 2.0}
Is there a way to group several keys which points to the same value in a dictionary?
from collections import defaultdict
newdict = defaultdict(list)
for k,v in originaldict.items():
newdict[v].append(k)
Not exactly sure you want the result structured, but here's one guess:
from collections import defaultdict
mydict = {'a': 1, 'b': 2, 'c': 3, 'd': 2, 'e': 4, 'f': 2, 'g': 4}
tempdict = defaultdict(list)
for k,v in mydict.iteritems():
tempdict[v].append(k)
groupedkeysdict = {}
for k,v in tempdict.iteritems():
groupedkeysdict[tuple(v) if len(v)>1 else v[0]] = k
print groupedkeysdict
# {'a': 1, 'c': 3, ('e', 'g'): 4, ('b', 'd', 'f'): 2}