Create dictionary with lambda (incremental counter inside) - python

I'm newbie to python. I have this program:
wordlist = ['pea', 'rpai', 'rpai', 'schiai', 'pea', 'rpe', 'zoi', 'zoi', 'briai', 'rpe']
dictionary = {}
counter = 0
result = list(map(lambda x: dictionary[wordlist[x]] = dictionary.get(wordlist[x], counter +=1), wordlist))
print(result)
Result has to be:
result = [0, 1, 1, 2, 0, 3, 4, 4, 5, 3]
What I have to do is append all of the element in the list (as key) inside the dictionary with an incremental counter as value of the key. With this code I get "lambda cannot contain assignment. How can I do this? Thank you!
EDIT FOR EXPLANATION:
With the list of strings I have to create a dictionary with element of list of str as "argument" and value as "key"
The value is calculated like this:
The first element of the list is 0.
The following element, if it is a new string never appeared (unique) has last value (in this case 0) =+1.
Instead if the new element is a duplicate string (there is already one in the dictionary) it take the same originale value as the first.
The dictionary will be:
{'pea': 0, 'rpai': 1, 'rpai': 1, 'schiai': 2, 'pea': 0, 'rpe': 3,
'zoi': 4, 'zoi': 4, 'briai': 5,'rpe': 3}
And result instead with list will be:
[0, 1, 1, 2, 0, 3, 4, 4, 5, 3]

I guess the easiest solution with vanilla Python is to use defaultdict:
from collections import defaultdict
wordlist = ["pea","rpai","rpai","schiai","pea","rpe", "zoi","zoi","briai","rpe"]
vocab = defaultdict(lambda: len(vocab))
# result will be [0, 1, 1, 2, 0, 3, 4, 4, 5, 3]
result = [vocab[word] for word in wordlist]
A more verbose equivalent, leading to the same result:
vocab = {}
result = []
for word in wordlist:
if word not in vocab:
vocab[word] = len(vocab)
result.append(vocab[word])

Update:
Use dictionary's setdefault then.
wordlist = ["pea","rpai","rpai","schiai","pea","rpe", "zoi","zoi","briai","rpe"]
dic = {}
res = list(map(lambda x: dic.setdefault(x, len(dic)), wordlist))
print(res)
Dictonary can't have same keys. You just need a for loop:
wordlist = ["pea","rpai","rpai","schiai","pea","rpe", "zoi","zoi","briai","rpe"]
c = 0
dic = {}
res = []
for i in range(len(wordlist)):
word = wordlist[i]
if word in dic:
res.append(dic[word])
else:
dic[word] = c
res.append(c)
c += 1
print(res)

Once you have the dictionary built the code for the lambda will be as follows.
list(map(lambda x: dictionary[x], wordlist))
This assumes you already have the keys and values of the dictionary populated. Is this the case, like so?
{'pea': 0, 'rpai': 1, 'schiai': 2, 'rpe': 3, 'zoi': 4, 'briai': 5, 'rpei': 6}

All you need to do is make a dict with the first occurrence of each word, then look up each word in the dict. Don't use map and lambda, they'll only make it harder, or at least less readable.
first_occ = {}
counter = 0
for word in wordlist:
if word not in first_occ:
first_occ[word] = counter
counter += 1
result = [first_occ[w] for w in wordlist]
print(result) # -> [0, 1, 1, 2, 0, 3, 4, 4, 5, 3]

Related

Delete occurrences of an element if it occurs more than n times in Python

How can I fix my code to pass the test case for Delete occurrences of an element if it occurs more than n times?
My current code pass one test case and I'm sure that the problem is caused by order.remove(check_list[i]).
However, there is no way to delete the specific element with pop() because it is required to put an index number rather than the element in pop().
Test case
Test.assert_equals(delete_nth([20,37,20,21], 1), [20,37,21])
Test.assert_equals(delete_nth([1,1,3,3,7,2,2,2,2], 3), [1, 1, 3, 3, 7, 2, 2, 2])
Program
def delete_nth(order, max_e):
# code here
check_list = [x for x in dict.fromkeys(order) if order.count(x) > 1]
print(check_list)
print(order)
for i in range(len(check_list)):
while(order.count(check_list[i]) > max_e):
order.remove(check_list[i])
#order.pop(index)
return order
Your assertions fails, because the order is not preserved. Here is a simple example of how this could be done without doing redundant internal loops to count the occurrences for each number:
def delete_nth(order, max_e):
# Get a new list that we will return
result = []
# Get a dictionary to count the occurences
occurrences = {}
# Loop through all provided numbers
for n in order:
# Get the count of the current number, or assign it to 0
count = occurrences.setdefault(n, 0)
# If we reached the max occurence for that number, skip it
if count >= max_e:
continue
# Add the current number to the list
result.append(n)
# Increase the
occurrences[n] += 1
# We are done, return the list
return result
assert delete_nth([20,37,20,21], 1) == [20, 37, 21]
assert delete_nth([1, 1, 1, 1], 2) == [1, 1]
assert delete_nth([1, 1, 3, 3, 7, 2, 2, 2, 2], 3) == [1, 1, 3, 3, 7, 2, 2, 2]
assert delete_nth([1, 1, 2, 2], 1) == [1, 2]
A version which maintains the order:
from collections import defaultdict
def delete_nth(order, max_e):
count = defaultdict(int)
delet = []
for i, v in enumerate(order):
count[v] += 1
if count[v] > max_e:
delet.append(i)
for i in reversed(delet): # start deleting from the end
order.pop(i)
return order
print(delete_nth([1,1,2,2], 1))
print(delete_nth([20,37,20,21], 1))
print(delete_nth([1,1,3,3,7,2,2,2,2], 3))
This should do the trick:
from itertools import groupby
import numpy as np
def delete_nth(order, max_e):
if(len(order)<=max_e):
return order
elif(max_e<=0):
return []
return np.array(
sorted(
np.concatenate(
[list(v)[:max_e]
for k,v in groupby(
sorted(
zip(order, list(range(len(order)))),
key=lambda k: k[0]),
key=lambda k: k[0])
]
),
key=lambda k: k[1])
)[:,0].tolist()
Outputs:
print(delete_nth([2,3,4,5,3,2,3,2,1], 2))
[2, 3, 4, 5, 3, 2, 1]
print(delete_nth([2,3,4,5,5,3,2,3,2,1], 1))
[2, 3, 4, 5, 1]
print(delete_nth([2,3,4,5,3,2,3,2,1], 3))
[2, 3, 4, 5, 3, 2, 3, 2, 1]
print(delete_nth([2,2,1,1], 1))
[2, 1]
Originally my answer only worked for one test case, this is quick (not the prettiest) but works for both:
def delete_nth(x, e):
x = x[::-1]
for i in x:
while x.count(i) > e:
x.remove(i)
return x[::-1]

function for suffix array python

I want to write a function that outputs a suffix array. This is what I have so far:
def suffixArray(s):
sa = []
for i in range(len(s)):
suffix= sorted([s[i:]])
sa = [len(s)-len(suffix[i:])
return list(sa)
This outputs an error because I think I'm missing an additional if statement but I'm not really sure how to go about it. And yes, I know that there are probably easier ways to obtain a suffix array but I'm a beginner in python and there are few functions that I can use. Any help is appreciated. Thanks
Also here's an example of what I want my input and output to be:
input --> suffixArray('banana')
output--> [5, 3, 1, 0, 4, 2]
Apparently you want the index of each suffix after lexicographicly sorting them
s = 'banana'
>>> [t[1] for t in sorted((s[i:],i) for i in range(len(s)))]
[5, 3, 1, 0, 4, 2]
or another way:
>>> sorted(range(len(s)), key=lambda i: s[i:])
[5, 3, 1, 0, 4, 2]
For a simple suffix array:
s = 'banana'
sa = sorted([s[i:] for i in range(len(s))])
For an array of suffix indices:
s = 'banana'
usd = {i: s[i:] for i in range(len(s))
sai = [x for x, _ in sorted(d.items(), key=lambda x: x[1])]
First, generate an array with suffix pairs: the suffix string, and its number:
suffixes = [(s[i:], i) for i in range(len(s))]
Next, sort this list by the suffix string:
suffixes.sort(key=lambda x: x[0])
Now you can return just the numbers:
return [s[1] for s in suffixes]
Putting it together:
def suffixArray(s):
suffixes = [(s[i:], i) for i in range(len(s))]
suffixes.sort(key=lambda x: x[0])
return [s[1] for s in suffixes]
def get_suffix_array(str_sample):
lis = list(str_sample)
suffix_array = {v:k for k,v in enumerate(["".join(trim_elem) for trim_elem in [lis[-len(str_sample)+idx:] for idx in range(len(str_sample))]])}
return [suffix_array.get(k) for k in sorted(list(suffix_array.keys()))]
print(get_suffix_array('banana'))
Result: [5, 3, 1, 0, 4, 2]

Count occurrences of each key in python dictionary

I have a python dictionary object that looks somewhat like this:
[{"house": 4, "sign": "Aquarius"},
{"house": 2, "sign": "Sagittarius"},
{"house": 8, "sign": "Gemini"},
{"house": 3, "sign": "Capricorn"},
{"house": 2, "sign": "Sagittarius"},
{"house": 3, "sign": "Capricorn"},
{"house": 10, "sign": "Leo"},
{"house": 4, "sign": "Aquarius"},
{"house": 10, "sign": "Leo"},
{"house": 1, "sign": "Scorpio"}]
Now for each 'sign' key, I'd like to count how many times each value occurs.
def predominant_sign(data):
signs = [k['sign'] for k in data if k.get('sign')]
print len(signs)
This however, prints number of times 'sign' appears in the dictionary, instead of getting the value of the sign and counting the number of times a particular value appears.
For example, the output I'd like to see is:
Aquarius: 2
Sagittarius: 2
Gemini: 1
...
And so on. What should I change to get the desired output?
Use collections.Counter and its most_common method:
from collections import Counter
def predominant_sign(data):
signs = Counter(k['sign'] for k in data if k.get('sign'))
for sign, count in signs.most_common():
print(sign, count)
You can use collections.Counter module, with a simple generator expression, like this
>>> from collections import Counter
>>> Counter(k['sign'] for k in data if k.get('sign'))
Counter({'Sagittarius': 2, 'Capricorn': 2, 'Aquarius': 2, 'Leo': 2, 'Scorpio': 1, 'Gemini': 1})
This will give you a dictionary which has the signs as keys and their number of occurrences as the values.
You can do the same with a normal dictionary, like this
>>> result = {}
>>> for k in data:
... if 'sign' in k:
... result[k['sign']] = result.get(k['sign'], 0) + 1
>>> result
{'Sagittarius': 2, 'Capricorn': 2, 'Aquarius': 2, 'Leo': 2, 'Scorpio': 1, 'Gemini': 1}
The dictionary.get method, accepts a second parameter, which will be the default value to be returned if the key is not found in the dictionary. So, if the current sign is not in result, it will give 0 instead.
def counter(my_list):
my_list = sorted(my_list)
first_val, *all_val = my_list
p_index = my_list.index(first_val)
my_counter = {}
for item in all_val:
c_index = my_list.index(item)
diff = abs(c_index-p_index)
p_index = c_index
my_counter[first_val] = diff
first_val = item
c_index = my_list.index(item)
diff = len(my_list) - c_index
my_counter[first_val] = diff
return my_counter
>>> counter([list(i.values())[1] for i in my_list])
{'Aquarius': 2,
'Capricorn': 2,
'Gemini': 1,
'Leo': 2,
'Sagittarius': 2,
'Scorpio': 1}

Counting occurrences in a loop

gzip_files=["complete-credit-ctrl-txn-SE06_2013-07-17-00.log.gz","complete-credit-ctrl-txn-SE06_2013-07-17-01.log.gz"]
def input_func():
num = input("Enter the number of MIN series digits: ")
return num
for i in gzip_files:
import gzip
f=gzip.open(i,'rb')
file_content=f.read()
digit = input_func()
file_content = file_content.split('[')
series = [] #list of MIN
for line in file_content:
MIN = line.split('|')[13:15]
for x in MIN:
n = digit
x = x[:n]
series.append(x)
break
#count the number of occurences in the list named series
for i in series:
print i
#end count
Result:
63928
63928
63929
63929
63928
63928
That is only a part of the result. the actual result shows a really long list. Now i want to just list unique numbers and specify how many times it showed on the list.
So
63928 = 4,
63929 = 2
I would use a collections.Counter class here.
>>> a = [1, 1, 1, 2, 3, 4, 4, 5]
>>> from collections import Counter
>>> Counter(a)
Counter({1: 3, 4: 2, 2: 1, 3: 1, 5: 1})
Just pass your series variable to Counter and you'll get a dictionary where the keys are the unique elements and the values are their occurences in the list.
collections.Counter was introduced in Python 2.7. Use the following list comprehension for versions below 2.7
>>> [(elem, a.count(elem)) for elem in set(a)]
[(1, 3), (2, 1), (3, 1), (4, 2), (5, 1)]
You can then just convert this into a dictionary for easy access.
>>> dict((elem, a.count(elem)) for elem in set(a))
{1: 3, 2: 1, 3: 1, 4: 2, 5: 1}
You can use a Counter() for this.
So this will print what you need:
from collections import Counter
c = Counter(series)
for item,count in c.items():
print "%s = %s" % (item,count)
Compile a dictionary using unique numbers as keys, and their total occurrences as values:
d = {} #instantiate dictionary
for s in series:
# set default key and value if key does not exist in dictionary
d.setdefault(s, 0)
# increment by 1 for every occurrence of s
d[s] += 1
If this problem were any more complex. Implementation of map reduce (aka map fold) may be appropriate.
Map Reduce:
https://en.wikipedia.org/wiki/MapReduce
Python map function:
http://docs.python.org/2/library/functions.html#map
Python reduce function:
http://docs.python.org/2/library/functions.html#reduce

Cycle through list starting at a certain element

Say I have a list:
l = [1, 2, 3, 4]
And I want to cycle through it. Normally, it would do something like this,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2...
I want to be able to start at a certain point in the cycle, not necessarily an index, but perhaps matching an element. Say I wanted to start at whatever element in the list ==4, then the output would be,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1...
How can I accomplish this?
Look at itertools module. It provides all the necessary functionality.
from itertools import cycle, islice, dropwhile
L = [1, 2, 3, 4]
cycled = cycle(L) # cycle thorugh the list 'L'
skipped = dropwhile(lambda x: x != 4, cycled) # drop the values until x==4
sliced = islice(skipped, None, 10) # take the first 10 values
result = list(sliced) # create a list from iterator
print(result)
Output:
[4, 1, 2, 3, 4, 1, 2, 3, 4, 1]
Use the arithmetic mod operator. Suppose you're starting from position k, then k should be updated like this:
k = (k + 1) % len(l)
If you want to start from a certain element, not index, you can always look it up like k = l.index(x) where x is the desired item.
I'm not such a big fan of importing modules when you can do things by your own in a couple of lines. Here's my solution without imports:
def cycle(my_list, start_at=None):
start_at = 0 if start_at is None else my_list.index(start_at)
while True:
yield my_list[start_at]
start_at = (start_at + 1) % len(my_list)
This will return an (infinite) iterator looping your list. To get the next element in the cycle you must use the next statement:
>>> it1 = cycle([101,102,103,104])
>>> next(it1), next(it1), next(it1), next(it1), next(it1)
(101, 102, 103, 104, 101) # and so on ...
>>> it1 = cycle([101,102,103,104], start_at=103)
>>> next(it1), next(it1), next(it1), next(it1), next(it1)
(103, 104, 101, 102, 103) # and so on ...
import itertools as it
l = [1, 2, 3, 4]
list(it.islice(it.dropwhile(lambda x: x != 4, it.cycle(l)), 10))
# returns: [4, 1, 2, 3, 4, 1, 2, 3, 4, 1]
so the iterator you want is:
it.dropwhile(lambda x: x != 4, it.cycle(l))
Hm, http://docs.python.org/library/itertools.html#itertools.cycle doesn't have such a start element.
Maybe you just start the cycle anyway and drop the first elements that you don't like.
Another weird option is that cycling through lists can be accomplished backwards. For instance:
# Run this once
myList = ['foo', 'bar', 'baz', 'boom']
myItem = 'baz'
# Run this repeatedly to cycle through the list
if myItem in myList:
myItem = myList[myList.index(myItem)-1]
print myItem
Can use something like this:
def my_cycle(data, start=None):
k = 0 if not start else start
while True:
yield data[k]
k = (k + 1) % len(data)
Then run:
for val in my_cycle([0,1,2,3], 2):
print(val)
Essentially the same as one of the previous answers. My bad.

Categories

Resources