Match list's index based off its value - python

I am new to Python and working on a problem where I have to match a list of indices to a list of value with 2 conditions:
If there is a repeated index, then the values should be summed
If there is no index in the list, then value should be 0
For example, below are my 2 lists: 'List of Inds' and 'List of Vals'. So at index 0, my value is 5; at index 1, my value is 4; at index 2, my value is 3 (2+1), at index 3, may value 0 (since no value associated with the index) and so on.
Input:
'List of Inds' = [0,1,4,2,2]
'List Vals' = [5,4,3,2,1]
Output = [5,4,3,0,3]
I have been struggling with it for few days and can't find anything online that can point me in the right direction. Thank you.

List_of_Inds = [0,1,4,2,2]
List_Vals = [5,4,3,2,1]
dic ={}
i = 0
for key in List_of_Inds:
if key not in dic:
dic[key] = 0
dic[key] = List_Vals[i]+dic[key]
i = i+1
output = []
for key in range(0, len(dic)+1):
if key in dic:
output.append(dic[key])
else:
output.append(0)
print(dic)
print(output)
output:
{0: 5, 1: 4, 4: 3, 2: 3}
[5, 4, 3, 0, 3]

The following code works as desired. In computer science it is called "Sparse Matrix" where the data is kept only for said indices, but the "virtual size" of the data structure seems large from the outside.
import logging
class SparseVector:
def __init__(self, indices, values):
self.d = {}
for c, indx in enumerate(indices):
logging.info(c)
logging.info(indx)
if indx not in self.d:
self.d[indx] = 0
self.d[indx] += values[c]
def getItem(self, key):
if key in self.d:
return self.d[key]
else:
return 0
p1 = SparseVector([0,1,4,2,2], [5,4,3,2,1])
print p1.getItem(0);
print p1.getItem(1);
print p1.getItem(2);
print p1.getItem(3);
print p1.getItem(4);
print p1.getItem(5);
print p1.getItem(6);

Answer code is
def ans(list1,list2):
dic={}
ans=[]
if not(len(list1)==len(list2)):
return "Not Possible"
for i in range(0,len(list1)):
ind=list1[i]
val=list2[i]
if not(ind in dic.keys()):
dic[ind]=val
else:
dic[ind]+=val
val=len(list1)
for i in range(0,val):
if not(i in dic.keys()):
ans.append(0)
else:
ans.append(dic[i])
return ans
To test:
print(ans([0,1,4,2,2], [5,4,3,2,1]))
output:
[5, 4, 3, 0, 3]
Hope it helps
Comment if you dont understand any step

what you can do is sort the indexes and values in an ascending order, and then sum it up. Here is an example code:
import numpy as np
ind = [0,1,4,2,2]
vals = [5,4,3,2,1]
points = zip(ind,vals)
sorted_points = sorted(points)
new_ind = [point[0] for point in sorted_points]
new_val = [point[1] for point in sorted_points]
output = np.zeros((len(new_ind)))
for i in range(len(new_ind)):
output[new_ind[i]] += new_val[i]
In this code, the index values are sorted to be in ascending order and then the value array is rearranged according to the sorted index array. Then, using a simple for loop, you can sum the values of each existing index and calculate the output.

This is a grouping problem. You can use collections.defaultdict to build a dictionary mapping, incrementing values in each iteration. Then use a list comprehension:
indices = [0,1,4,2,2]
values = [5,4,3,2,1]
from collections import defaultdict
dd = defaultdict(int)
for idx, val in zip(indices, values):
dd[idx] += val
res = [dd[idx] for idx in range(max(dd) + 1)]
## functional alternative:
# res = list(map(dd.get, range(max(dd) + 1)))
print(res)
# [5, 4, 3, 0, 3]

Related

How to loop through a dictionary of dictionaries and make a 2d array?

So, I have a dictionary like this:
dic_parsed_sentences = {'religion': {'david': 1, 'joslin': 1, 'apolog': 5, 'jim': 1, 'meritt': 2},
'sport': {'sari': 1, 'basebal': 1, 'kolang': 5, 'footbal': 1, 'baba': 2},
'education': {'madrese': 1, 'kelas': 1, 'yahyah': 5, 'dars': 1},
'computer': {'net': 1, 'internet': 1},
'windows': {'copy': 1, 'right': 1}}
I want to loop through it based on the length of the dictionaries within that dictionary.
For example,
it has two items with length 5, one item with length 4, and two items with length 2. I want to process the same length items together (something like a group by in pandas).
So the output of the first iteration will look like this (as you see only items with length 5 are available here):
[[david, joslin, apolog, jim, meritt],
[sari, baseball, kolang, footbal, baba]]
and next iteration it will make the next same length items:
[[madrese, kelas, yahyah, dars]]
And the last iteration:
[[net, internet],
[copy, right]]
Why do we only have three iterations here? Because we only have three different lengths of items within the dictionary dic_parsed_sentences.
I have done something like this, but I dont know how to iterate through the same length items:
for i in dic_parsed_sentences.groupby(dic_parsed_sentences.same_length_items): # this line is sodoku line I dont know how to code it(I mean iterate through same length items in the dicts)
for index_file in dic_parsed_sentences:
temp_sentence = dic_parsed_sentences[index_file]
keys_words = list(temp_sentence.keys())
for index_word in range(len(keys_words)):
arr_sent_wids[index_sentence, index_word] =
keys_words[index_word]
index = index + 1
index_sentence = index_sentence + 1
Update:
for length, dics in itertools.groupby(dic_parsed_sentences, len):
for index_file in dics:
temp_sentence = dics[index_file]
keys_words = list(temp_sentence.keys())
for index_word in range(len(keys_words)):
test_sent_wids[index_sentence, index_word] = lookup_word2id(keys_words[index_word])
index = index + 1
index_sentence = index_sentence + 1
You can use itertools.groupby after sorting the dictionary elements by length.
import itertools
items = sorted(dic_parsed_sentences.values(), key = len, reverse = True)
for length, dics in itertools.groupby(items, len):
# dics is all the nested dictionaries with this length
for temp_sentence in dics:
keys_words = list(temp_sentence.keys())
for index_word in range(len(keys_words)):
test_sent_wids[index_sentence, index_word] = lookup_word2id(keys_words[index_word])
index = index + 1
index_sentence = index_sentence + 1
bylen = {}
for v in dic_parsed_sentences.values():
l = len(v)
if not l in bylen:
bylen[l] = []
bylen[l].append(list(v.keys()))
for k in reversed(sorted(bylen.keys())):
# use bylen[k]
You can do it using the following method:
finds = [[key, len(dic_parsed_sentences[key])] for key in dic_parsed_sentences]
finds.sort(reverse=True, key=lambda x: x[1])
previous = finds[0][1]
res = []
for elem in finds:
current = elem[1]
if current != previous:
previous = current
print(res)
res = []
res.append(list(dic_parsed_sentences[elem[0]]))
print(res)

How to iterate while for values in a dictionary?

I'm a new programmer. I need help with my code. In code, I have a dictionary that its values are a list. I want to do something in 'while' until the dictionary has a value. I used while(len(migration))!= 0, but it's not correct. How can I iterate while for all values in a dictionary?
At the end of while, I want to delete values in the dictionary, but I don't know how can I do this and update my dictionary's values.
migration_p = {2: [3, 4], 3: [3]}
def q_sequence():
global p, sequence_q, r_s, d_s, d, partition, migration_p
while len(migration_p) != 0:
sequence_qi = []
migration_pi = copy.deepcopy(migration_p)
while len(migration_pi) != 0:
tqi_min = math.inf
for p in migration_pi.keys():
# if len(migration_pi[p]) < M - A:
for d in migration_pi[p]:
for r_src in migration_pi.keys():
if r_src not in sequence_qi:
xx = time_qi(d, p)[0]
if xx < tqi_min:
tqi_min = xx
r_s = r_src
d_s = d
sequence_qi.append([(time_qi(d, p)[1], d_s), r_s])
migration_pi = {p: d for p, d in migration_pi.items() if p != r_s}
migration_pi = {p: d for p, d in migration_pi.items() if (p != r_s or d != d_s)}
sequence_q.append(sequence_qi)
# print(sequence_q)
break
return sequence_q
I expect the output to be [[[(1, 3), 2], [(1, 3), 3]]] but the actual output is [[[(1, 3), 2], [(1, 3), 3], [(2, 4), 2]]].
To iterate on values
for value in dictionary.values():
...
do this to retrieve pair of key and value
for key, value in yourDictionary:
print "this is key : "+key
print "this is value : "+value
for key in migration.keys():
print "Key: " + key
print "Value " + migration.pop(key) # This will delete the key from your dict and return a value
migration[key] = new_value # This will create the key again and update it's value
Would it help to know there's a built in way to iterate over all entries in a dictionary?
I can't remember exactly, but it is something to the effect of
for key in nameOfDictionary:
print(nameOfDictionary[key])
Then if after you did all of your iterative stuff, you could make it delete the dictionary by doing something like this.
for key in nameOfDictionary:
print(nameOfDictionary[key])
del nameOfDictionary
Optionally, if you want the dictionary to still exist, you can use nameOfDictionary.clear to simply remove all entries.

Creating a dictionary that keeps track of the count of the number of items in a list

I am a python newbie and im trying to use a dictionary to count the number of items in x.
x = [1,2,1,4,3,2,5,9]
def counts(my_list):
counts = dict()
for item in my_list:
counts[x] = counts.get(item,0)+1
return counts
counts(x)
Any help that makes this work would be greatly appreciated!!
You should set a default value 0 using counts.setdefault(item, 0) and then increment with counts[item] += 1 for each item.
You can also used the collections.Counter class, that does just that.
import collections
c = collections.Counter(your_array)
If you are trying to find each element count in the list. Below solution will be helpful.
x = [1,2,1,4,3,2,5,9]
def counts(my_list):
item_count = {}
for item in my_list:
item_count[item] = item_count.get(item,0)+1
return counts
counts(x)
You can also:
def counts(my_list):
counts= {}
for item in x:
counts[item] = x.count(item)
return counts
print(counts(x))
Or have fun with it and :
def counts(my_list):
counts = {item: x.count(item) for item in x}
return counts
print(counts(x))
Output
(xenial)vash#localhost:~/python/stack_overflow$ python3.7 list_dict.py
{1: 2, 2: 2, 4: 1, 3: 1, 5: 1, 9: 1}

Problems with Python Dictionarys and nested Lists

I am trying to create a dictionary that has a nested list inside of it.
The goal would be to have it be:
key : [x,y,z]
I am pulling the information from a csv file and counting the number of times a certain key shows up in each column. However I am getting the below error
> d[key][i] = 1
KeyError: 'owner'
Where owner is the title of my column.
if __name__ == '__main__':
d = {}
with open ('sample.csv','r') as f:
reader = csv.reader(f)
for i in range(0,3):
for row in reader:
key = row[0]
if key in d:
d[key][i] +=1
else:
d[key][i] = 1
for key,value in d.iteritems():
print key,value
What do I tweak in this loop to have it create a key if it doesn't exist and then add to it if it does?
The problem is, that you try to use a list ([i]) where no list is.
So you have to replace
d[key][i] = 1
with
d[key] = [0,0,0]
d[key][i] = 1
This would first create the list with three entries (so you can use [0], [1] and [2] afterward without error) and then assigns one to the correct entry in the list.
You can use defaultdict:
from collections import defaultdict
ncols = 3
d = defaultdict(lambda: [0 for i in range(ncols)])
Use a try, catch block to append a list to the new key, then increment as needed
if __name__ == '__main__':
d = {}
with open ('sample.csv','r') as f:
reader = csv.reader(f)
for i in xrange(0,3):
for row in reader:
key = row[i]
try: d[key][i] += 1
except KeyError:
d[key] = [0, 0, 0]
d[key][i] = 1
for key,value in d.iteritems():
print key,value
Using defaultdict and Counter you can come up with a dict that allows you to easily measure how many times a key appeared in a position (in this case 1st, 2nd or 3rd, by the slice)
csv = [
['a','b','c','d'],
['e','f','g', 4 ],
['a','b','c','d']
]
from collections import Counter, defaultdict
d = defaultdict(Counter)
for row in csv:
for idx, value in enumerate(row[0:3]):
d[value][idx] += 1
example usage:
print d
print d['a'][0] #number of times 'a' has been found in the 1st position
print d['b'][2] #number of times 'b' found in the 3rd position
print d['f'][1] #number of times 'f' found in 2nd position
print [d['a'][n] for n in xrange(3)] # to match the format requested in your post
defaultdict(<class 'collections.Counter'>, {'a': Counter({0: 2}), 'c': Counter({2: 2}), 'b': Counter({1: 2}), 'e': Counter({0: 1}), 'g': Counter({2: 1}), 'f': Counter({1: 1})})
2
0
1
[2, 0, 0]
Or put into a function:
def occurrences(key):
return [d[key][n] for n in xrange(3)]
print occurrences('a') # [2, 0, 0]

Random Python dictionary key, weighted by values

I have a dictionary where each key has a list of variable length, eg:
d = {
'a': [1, 3, 2],
'b': [6],
'c': [0, 0]
}
Is there a clean way to get a random dictionary key, weighted by the length of its value?
random.choice(d.keys()) will weight the keys equally, but in the case above I want 'a' to be returned roughly half the time.
This would work:
random.choice([k for k in d for x in d[k]])
Do you always know the total number of values in the dictionary? If so, this might be easy to do with the following algorithm, which can be used whenever you want to make a probabilistic selection of some items from an ordered list:
Iterate over your list of keys.
Generate a uniformly distributed random value between 0 and 1 (aka "roll the dice").
Assuming that this key has N_VALS values associated with it and there are TOTAL_VALS total values in the entire dictionary, accept this key with a probability N_VALS / N_REMAINING, where N_REMAINING is the number of items left in the list.
This algorithm has the advantage of not having to generate any new lists, which is important if your dictionary is large. Your program is only paying for the loop over K keys to calculate the total, a another loop over the keys which will on average end halfway through, and whatever it costs to generate a random number between 0 and 1. Generating such a random number is a very common application in programming, so most languages have a fast implementation of such a function. In Python the random number generator a C implementation of the Mersenne Twister algorithm, which should be very fast. Additionally, the documentation claims that this implementation is thread-safe.
Here's the code. I'm sure that you can clean it up if you'd like to use more Pythonic features:
#!/usr/bin/python
import random
def select_weighted( d ):
# calculate total
total = 0
for key in d:
total = total + len(d[key])
accept_prob = float( 1.0 / total )
# pick a weighted value from d
n_seen = 0
for key in d:
current_key = key
for val in d[key]:
dice_roll = random.random()
accept_prob = float( 1.0 / ( total - n_seen ) )
n_seen = n_seen + 1
if dice_roll <= accept_prob:
return current_key
dict = {
'a': [1, 3, 2],
'b': [6],
'c': [0, 0]
}
counts = {}
for key in dict:
counts[key] = 0
for s in range(1,100000):
k = select_weighted(dict)
counts[k] = counts[k] + 1
print counts
After running this 100 times, I get select keys this number of times:
{'a': 49801, 'c': 33548, 'b': 16650}
Those are fairly close to your expected values of:
{'a': 0.5, 'c': 0.33333333333333331, 'b': 0.16666666666666666}
Edit: Miles pointed out a serious error in my original implementation, which has since been corrected. Sorry about that!
Without constructing a new, possibly big list with repeated values:
def select_weighted(d):
offset = random.randint(0, sum(d.itervalues())-1)
for k, v in d.iteritems():
if offset < v:
return k
offset -= v
Given that your dict fits in memory, the random.choice method should be reasonable. But assuming otherwise, the next technique is to use a list of increasing weights, and use bisect to find a randomly chosen weight.
>>> import random, bisect
>>> items, total = [], 0
>>> for key, value in d.items():
total += len(value)
items.append((total, key))
>>> items[bisect.bisect_left(items, (random.randint(1, total),))][1]
'a'
>>> items[bisect.bisect_left(items, (random.randint(1, total),))][1]
'c'
Make a list in which each key is repeated a number of times equal to the length of its value. In your example: ['a', 'a', 'a', 'b', 'c', 'c']. Then use random.choice().
Edit: or, less elegantly but more efficiently, try this: take the sum of the lengths of all values in the dictionary, S (you can cache and invalidate this value, or keep it up to date as you edit the dictionary, depending on the exact usage pattern you anticipate). Generate a random number from 0 to S, and do a linear search through the dictionary keys to find the range into which your random number falls.
I think that's the best you can do without changing or adding to your data representation.
Here is some code that is based on a previous answer I gave for probability distribution in python but is using the length to set the weight. It uses an iterative markov chain so that it does not need to know what the total of all of the weights are. Currently it calculates the max length but if that is too slow just change
self._maxw = 1
to
self._maxw = max lenght
and remove
for k in self._odata:
if len(self._odata[k])> self._maxw:
self._maxw=len(self._odata[k])
Here is the code.
import random
class RandomDict:
"""
The weight is the length of each object in the dict.
"""
def __init__(self,odict,n=0):
self._odata = odict
self._keys = list(odict.keys())
self._maxw = 1 # to increase speed set me to max length
self._len=len(odict)
if n==0:
self._n=self._len
else:
self._n=n
# to increase speed set above max value and comment out next 3 lines
for k in self._odata:
if len(self._odata[k])> self._maxw:
self._maxw=len(self._odata[k])
def __iter__(self):
return self.next()
def next(self):
while (self._len > 0) and (self._n>0):
self._n -= 1
for i in range(100):
k=random.choice(self._keys)
rx=random.uniform(0,self._maxw)
if rx <= len(self._odata[k]): # test to see if that is the value we want
break
# if you do not find one after 100 tries then just get a random one
yield k
def GetRdnKey(self):
for i in range(100):
k=random.choice(self._keys)
rx=random.uniform(0,self._maxw)
if rx <= len(self._odata[k]): # test to see if that is the value we want
break
# if you do not find one after 100 tries then just get a random one
return k
#test code
d = {
'a': [1, 3, 2],
'b': [6],
'c': [0, 0]
}
rd=RandomDict(d)
dc = {
'a': 0,
'b': 0,
'c': 0
}
for i in range(100000):
k=rd.GetRdnKey()
dc[k]+=1
print("Key count=",dc)
#iterate over the objects
dc = {
'a': 0,
'b': 0,
'c': 0
}
for k in RandomDict(d,100000):
dc[k]+=1
print("Key count=",dc)
Test results
Key count= {'a': 50181, 'c': 33363, 'b': 16456}
Key count= {'a': 50080, 'c': 33411, 'b': 16509}
I'd say this:
random.choice("".join([k * len(d[k]) for k in d]))
This makes it clear that each k in d gets as many chances as the length of its value. Of course, it is relying on dictionary keys of length 1 that are characters....
Much later:
table = "".join([key * len(value) for key, value in d.iteritems()])
random.choice(table)
I modified some of the other answers to come up with this. It's a bit more configurable. It takes 2 arguments, a list and a lambda function to tell it how to generate a key.
def select_weighted(lst, weight):
""" Usage: select_weighted([0,1,10], weight=lambda x: x) """
thesum = sum([weight(x) for x in lst])
if thesum == 0:
return random.choice(lst)
offset = random.randint(0, thesum - 1)
for k in lst:
v = weight(k)
if offset < v:
return k
offset -= v
Thanks to sth for the base code for this.
import numpy as np
my_dict = {
"one": 5,
"two": 1,
"three": 25,
"four": 14
}
probs = []
elements = [my_dict[x] for x in my_dict.keys()]
total = sum(elements)
probs[:] = [x / total for x in elements]
r = np.random.choice(len(my_dict), p=probs)
print(list(my_dict.values())[r])
# 25
Need to mention random.choices for Python 3.6+:
import random
raffle_dict = {"Person 1": [1,2], "Person 2": [1]}
random.choices(list(raffle_dict.keys()), [len(w[1]) for w in raffle_dict.items()], k=1)[0]
random.choices returns a list of samples, so k=1 if you only need one and we'll take the first item in the list. If your dictionary already has the weights, just get rid of the len or better yet:
raffle_dict = {"Person 1": 1, "Person 2": 10}
random.choices(list(raffle_dict.keys()), raffle_dict.values(), k=1)[0]
See also this question and this tutorial,

Categories

Resources