List of list increment if exists otherwise extend - python

I have a list of lists as follows
[
['a', 1],
['b',2],
['c',1],
['a', 2],
['c', 5]
]
I want to normalize this list in such a way that for every, 'a', 'b', 'c' etc, I have only one unique entry in the list, and for every duplicate list, the second value which is the quantity is added so that I get something like:
[
['a', 3], # so since 'a' already existed the quantity is added 1 + 2 becomes 3
['b',2],
['c',6] # similarly for 'c' it becomes 1 + 5 = 6
]
How to do this with Python ?

I'd suggest use collections.defaultdict() like below:
from collections import defaultdict
l = [
['a', 1],
['b',2],
['c',1],
['a', 2],
['c', 5]
]
d = defaultdict(int)
for key, value in l:
d[key] += value
print(d.items())
Output:
dict_items([('b', 2), ('a', 3), ('c', 6)])
Also you can use a try...expect instead of collections.defaultdict()...if you'd like:
d = {}
for key, value in l:
try:
d[key] += value
except KeyError:
d[key] = value
Also, you can try if...else:
d = {}
for key, value in l:
if key in d:
d[key] += value
else:
d[key] = value

here is a long way to do it, but returns a list, not a data object
def normalizelist(inp):
out=[]
for key,count in inp:
exist=0
for place, val in enumerate(out):
if (key==val[0]):
out[place][1]+=count
exist=1 #found it, so don't need to make it
break #no need to go further
if (exist==0): #didn't find it, therefor it needs to be added
out.append([key,count])
return out
hopefully that helps!

Related

How to call each value of a dictionary by its key?

I want to separate each (key, value) pair from a dictionary and want to call each value by its key name.
I have two lists,
1. ListA = [1, 2, 3, 4, 5]
2. ListB = ['A', 'B', 'C', 'D', 'E']
Now I have created a dictionary like this,
Dict = {1: 'A', 2: 'B', 3: 'C', 4: 'D', 5: 'E'}
So, now if I want to see value for each key, I have to type:
Dict[key].
Now I expect my result to be like:
If I ask for the value of each key, I have to just type key not Dict[key]
and it should give me the answer.
Typing 1 should give A.
Typing 2 should give B.
You can somehow do like this but you if you can simply access below is not a very good idea honestly. You can set key as attribute of an object.
But it wont work for the dictionary you have my Input Dictionary is {"key1":"Apple"}
class MyDict:
def __init__(self,input_dict):
for key,value in input_dict.items():
setattr(self,key,value) #setting key as attribute
obj = MyDict({"key1":"Apple"}) #new instance
print(obj.key1) # this will print Apple
But still it wont work like obj.1 better not to mess it.
If you want to loop over dictionary values, you can use Dict.values():
for v in Dict.values():
print(v)
# > 'A'
# > 'B'
# ...
Inside a while true loop, keep popping items from your lists until you run into an IndexError like this:
ListA = [1, 2, 3]
ListB = ['A', 'B', 'C']
DictC = {}
while True:
try:
DictC[ListA.pop(0)] = ListB.pop(0)
except IndexError:
break
print(DictC)

Get sequences of same values within list and count elements within sequences

I'd like to find the amount of values within sequences of the same value from a list:
list = ['A','A','A','B','B','C','A','A']
The result should look like:
result_dic = {A: [3,2], B: [2], C: [1]}
I do not just want the counts of different values in a list as you can see in the result for A.
collections.defaultdict and itertools.groupby
from itertools import groupby
from collections import defaultdict
listy = ['A','A','A','B','B','C','A','A']
d = defaultdict(list)
for k, v in groupby(listy):
d[k].append(len([*v]))
d
defaultdict(list, {'A': [3, 2], 'B': [2], 'C': [1]})
groupby will loop through an iterable and lump contiguous things together.
[(k, [*v]) for k, v in groupby(listy)]
[('A', ['A', 'A', 'A']), ('B', ['B', 'B']), ('C', ['C']), ('A', ['A', 'A'])]
So I loop through those results and append the length of each grouped thing to the values of a defaultdict
I'd suggest using a defaultdict and looping through the list.
from collections import defaultdict
sample = ['A','A','A','B','B','C','A','A']
result_dic = defaultdict(list)
last_letter = None
num = 0
for l in sample:
if last_letter == l or last_letter is None:
num += 1
else:
result_dic[last_letter].append(num)
Edit
This is my approach, although I'd have a look at #piRSquared's answer because they were keen enough to include groupby as well. Nice work!
I'd suggest looping through the list.
result_dic = {}
old_word = ''
for word in list:
if not word in result_dic:
d[word] = [1]
elif word == old_word:
result_dic[word][-1] += 1
else:
result_dic[word].append(1)
old_word = word

Matching list elements with element of tuple in list of tuples

I have a list containing strings:
lst = ['a', 'a', 'b']
where each string is, in fact, a category of a corpus, and I need a list of integers that corresponds to the index of that category.
For this purpose, I built a list of tuples where I have each (unique) category and its index, f.ex:
catlist = [(0, 'a'), (1, 'b')]
I now need to iterate over the first list of strings, and if the element matches any of the second elements of the tuple, return the tuple's first element to an array, like this:
[0, 0, 1]
for now I have
catindexes = []
for item in lst:
for i in catlist:
if cat == catlist[i][i]:
catindexes.append(i)
but this clearly doesn't work and I'm failing to get to the solution.
Any tips would be appreciated.
You were close, after iterating the inner loop, you should check whether the item from the outer loop is actually equal to the tup[1] (each tup represent (0, 'a') or (1, 'b') for example).
if they equal, just append the first element in tup (tup[0]) to the result list.
lst = ['a', 'a', 'b']
catlist = [(0, 'a'), (1, 'b')]
catindexes = []
for item in lst:
for tup in catlist:
if item == tup[1]:
catindexes.append(tup[0])
print (catindexes)
You also can use list comprehension:
catindexes = [tup[0] for item in lst for tup in catlist if tup[1] == item]
>>> lst = ['a', 'a', 'b']
>>> catlist = [(0, 'a'), (1, 'b')]
>>> catindexes = []
>>> for item in lst:
... for i in catlist:
... if i[1] == item:
... catindexes.append(i[0])
...
>>> catindexes
[0, 0, 1]
During the iteration, i is a direct reference to an element of catlist, not its index. I'm not using i to extract an element from lst, the for ... in ... already takes care of that. As i is a direct reference to a tuple, I can simply extract the relevant fields for matching and appending without the need to mess with the indexing of lst.
I would recommend using a dictionary for your catlist instead. I think it more naturally fits what you are trying to do:
lst = ['a', 'a', 'b']
catdict = {'a': 0, 'b': 1}
res = [catdict[k] for k in lst] # res = [0, 0, 1]
Condition defines in if block is not correct.
Try this..
lst = ['a', 'a', 'b']
catlist = [(0, 'a'), (1, 'b')]
catindexes = []
for item in lst:
for i in catlist:
if i[1]==item:
catindexes.append(i[0]);
print catindexes
You can create a dictionary (we call it d) from catlist and reverse it. Now, for each element i of lst, what you're looking for is d[i]:
d = {v: k for k, v in catlist}
res = [d[i] for i in lst]
Output:
>>> lst = ['a', 'a', 'b']
>>> d = {v: k for k, v in catlist}
>>> d
{'a': 0, 'b': 1}
>>>
>>> res = [d[i] for i in lst]
>>> res
[0, 0, 1]
An efficient way for big lists :
step 1 : build the good dictionary.
d=dict((v,k) for (k,v) in catlist)
step 2 : use it.
[d[k] for k in lst]
This way the execution time will grow like len(lst) + len(catlist) instead of
len(lst) x len(catlist).

how to use recursion to nest dictionaries while integrating with existing records

I am parsing some XML data from Open Street Map into JSON (later to be uploaded into a database). It is large, so I am using iterparse. I have some tags that look like this
<tag 'k'=stringA:stringB:stringC 'v'=value>
which I want to parse into
{stringA: {stringB: {stringC: value} } }
I have done so with an ugly hardcode. However, I would like to create a function that uses recursion to address the same issue in case the 'k' attribute value contains arbitrarily as many ':'.
I created this
def nestify(l):
"""Takes a list l and returns nested dictionaries where each element from
the list is both a key to the inner dictionary and a value to the outer,
except for the last element of the list, one which is only a value.
For best understanding, feed w = [ 'a', 'b', 'c', 'd', 'e', 'f', 'v'] to the
function and look at the output."""
n = len(l)
if n==2:
key = l[0]
value = l[1]
else:
key = l[0]
value = nestify(l[1:])
return {key:value}
which works. (You have to make the keys and value into a list first.) Except not really, because it always makes a new dictionary. I need it to respect the previous data and integrate. For example, if my parser encounters
<tag 'k'=a:b:c 'v'=1 />
and then in the same element
<tag 'k'=a:d:e 'v'=2 />
I need it to make
{a: {'b': {'c' : 1}, 'd' : {'e' : 2}}}
and not
{a: {'b' : {'c' : 1}}}
{a: {'d' : {'e' : 2}}}
I have tried this code:
def smart_nestify(l, record):
n = len(l)
if n==2:
key = l[0]
value = l[1]
else:
key = l[0]
value = smart_nestify(l[1:], key)
if key not in record:
return {key:value}
else:
record[key] = value
return record
but it still writes over and returns only the latest record. Why is that? How can I fix this code?
Here is a "smarter" nestify that merges a list l into the dictionary record:
def smarter_nestify(l, record):
if len(l) == 2:
return {l[0]: l[1]}
key = l.pop(0)
record[key] = smarter_nestify(l, record.get(key, {}))
return record
Each time through the recursion, it pops the first element from the list l.pop(0) and merges the value from the next level of the dictionary record.get(key, {}).
You can call it with two lists like this:
l = ['a', 'b', 'c', 1]
record = smarter_nestify(l, {})
l = ['a', 'd', 'e', 2]
record = smarter_nestify(l, record)
print record
Sometimes it is better to just use iteration rather than recursion. Here’s the iterative answer.
g_values = dict()
def make_nested(l):
"""
l is the array like you have for nestify
e.g., ['a', 'b', 'c', 1 ] or ['a', 'd', 'e', 2]
"""
global g_values
value = l[-1] ; l = l[:-1]
last_dict = None
for x in l[:-1]:
mp_temp = last_dict if last_dict is not None or g_values
last_dict = mp_temp.get(x)
if last_dict is None:
mp_temp[x] = dict()
last_dict = mp_temp[x]
last_dict[l[-1]] = value
While I didn’t test it, you could also try something like this recursively:
def make_nested(values, l):
if len(l == 2):
values[l[0]] = l[1]
return
mp = values.get(l[0])
if mp is None:
values[l[0]] = dict()
make_nested(values[l[0]], l[1:])
else:
make_nested(mp, l[1:])

Build dictionary from list of lists and maintain position of elements

I am trying to build a dictionary from list of lists and want to maintain position.
I want to maintain in the dictionary as key the occurrence of the text
and for value it will be list of lists, which will contain at first
position the list where text came from and rest are the indices
of it in that list. If it appears in 2nd list then second list
and indices of it in 2nd list and so on etc.
This is my code:
def make_dict(lists):
my_dict = defaultdict(lambda:None)
for sublist in lists:
for i in range(len(sublist)):
if sublist[i] in my_dict:
my_dict[sublist[i]].append(i)
else:
my_dict[sublist[i]] = [i]
return my_dict
Sample Input:
some_dict = make_dict([['d', 'e', 'f','d'], ['d', 'g','f']])
print some_dict['d']
print some_dict['e']
print some_dict['g']
Sample Output:
[[0,0,3],[1,0]]
d appears in first list (0) with indices 0 and 3 and then in 2nd list(1) with index 0.
[[0,1]]
e appears in first list (0) with index 1
[[1,1]]
g appears in 2nd list (1) with index 1
Any suggestion?
You can do something like this:
def make_dict(seq):
dic = {}
for i, lis in enumerate(seq):
indices = {}
for j, item in enumerate(lis):
indices.setdefault(item, [i]).append(j)
for k, v in indices.items():
dic.setdefault(k, []).append(v)
return dic
lis = [['d', 'e', 'f','d'], ['d', 'g','f']]
Output:
>>> my_dict = make_dict([['d', 'e', 'f','d'], ['d', 'g','f']])
>>> my_dict['d']
[[0, 0, 3], [1, 0]]
>>> my_dict['e']
[[0, 1]]
Instead of this output
some_dict['d']
[[0,0,3],[1,0]]
Why not this output?
some_dict['d']
{0:[0,3],1:[0]}
This is doable with a defaultdict that gives you a defaultdict(list):
def make_dict(nested_list):
d = defaultdict(lambda: defaultdict(list))
for i,li in enumerate(nested_list):
for j,c in enumerate(li):
d[c][i].append(j)
return d
demo
some_dict['d']
Out[12]: defaultdict(<class 'list'>, {0: [0, 3], 1: [0]})
some_dict['e']
Out[13]: defaultdict(<class 'list'>, {0: [1]})

Categories

Resources