function for suffix array python - python

I want to write a function that outputs a suffix array. This is what I have so far:
def suffixArray(s):
sa = []
for i in range(len(s)):
suffix= sorted([s[i:]])
sa = [len(s)-len(suffix[i:])
return list(sa)
This outputs an error because I think I'm missing an additional if statement but I'm not really sure how to go about it. And yes, I know that there are probably easier ways to obtain a suffix array but I'm a beginner in python and there are few functions that I can use. Any help is appreciated. Thanks
Also here's an example of what I want my input and output to be:
input --> suffixArray('banana')
output--> [5, 3, 1, 0, 4, 2]

Apparently you want the index of each suffix after lexicographicly sorting them
s = 'banana'
>>> [t[1] for t in sorted((s[i:],i) for i in range(len(s)))]
[5, 3, 1, 0, 4, 2]
or another way:
>>> sorted(range(len(s)), key=lambda i: s[i:])
[5, 3, 1, 0, 4, 2]

For a simple suffix array:
s = 'banana'
sa = sorted([s[i:] for i in range(len(s))])
For an array of suffix indices:
s = 'banana'
usd = {i: s[i:] for i in range(len(s))
sai = [x for x, _ in sorted(d.items(), key=lambda x: x[1])]

First, generate an array with suffix pairs: the suffix string, and its number:
suffixes = [(s[i:], i) for i in range(len(s))]
Next, sort this list by the suffix string:
suffixes.sort(key=lambda x: x[0])
Now you can return just the numbers:
return [s[1] for s in suffixes]
Putting it together:
def suffixArray(s):
suffixes = [(s[i:], i) for i in range(len(s))]
suffixes.sort(key=lambda x: x[0])
return [s[1] for s in suffixes]

def get_suffix_array(str_sample):
lis = list(str_sample)
suffix_array = {v:k for k,v in enumerate(["".join(trim_elem) for trim_elem in [lis[-len(str_sample)+idx:] for idx in range(len(str_sample))]])}
return [suffix_array.get(k) for k in sorted(list(suffix_array.keys()))]
print(get_suffix_array('banana'))
Result: [5, 3, 1, 0, 4, 2]

Related

Create dictionary with lambda (incremental counter inside)

I'm newbie to python. I have this program:
wordlist = ['pea', 'rpai', 'rpai', 'schiai', 'pea', 'rpe', 'zoi', 'zoi', 'briai', 'rpe']
dictionary = {}
counter = 0
result = list(map(lambda x: dictionary[wordlist[x]] = dictionary.get(wordlist[x], counter +=1), wordlist))
print(result)
Result has to be:
result = [0, 1, 1, 2, 0, 3, 4, 4, 5, 3]
What I have to do is append all of the element in the list (as key) inside the dictionary with an incremental counter as value of the key. With this code I get "lambda cannot contain assignment. How can I do this? Thank you!
EDIT FOR EXPLANATION:
With the list of strings I have to create a dictionary with element of list of str as "argument" and value as "key"
The value is calculated like this:
The first element of the list is 0.
The following element, if it is a new string never appeared (unique) has last value (in this case 0) =+1.
Instead if the new element is a duplicate string (there is already one in the dictionary) it take the same originale value as the first.
The dictionary will be:
{'pea': 0, 'rpai': 1, 'rpai': 1, 'schiai': 2, 'pea': 0, 'rpe': 3,
'zoi': 4, 'zoi': 4, 'briai': 5,'rpe': 3}
And result instead with list will be:
[0, 1, 1, 2, 0, 3, 4, 4, 5, 3]
I guess the easiest solution with vanilla Python is to use defaultdict:
from collections import defaultdict
wordlist = ["pea","rpai","rpai","schiai","pea","rpe", "zoi","zoi","briai","rpe"]
vocab = defaultdict(lambda: len(vocab))
# result will be [0, 1, 1, 2, 0, 3, 4, 4, 5, 3]
result = [vocab[word] for word in wordlist]
A more verbose equivalent, leading to the same result:
vocab = {}
result = []
for word in wordlist:
if word not in vocab:
vocab[word] = len(vocab)
result.append(vocab[word])
Update:
Use dictionary's setdefault then.
wordlist = ["pea","rpai","rpai","schiai","pea","rpe", "zoi","zoi","briai","rpe"]
dic = {}
res = list(map(lambda x: dic.setdefault(x, len(dic)), wordlist))
print(res)
Dictonary can't have same keys. You just need a for loop:
wordlist = ["pea","rpai","rpai","schiai","pea","rpe", "zoi","zoi","briai","rpe"]
c = 0
dic = {}
res = []
for i in range(len(wordlist)):
word = wordlist[i]
if word in dic:
res.append(dic[word])
else:
dic[word] = c
res.append(c)
c += 1
print(res)
Once you have the dictionary built the code for the lambda will be as follows.
list(map(lambda x: dictionary[x], wordlist))
This assumes you already have the keys and values of the dictionary populated. Is this the case, like so?
{'pea': 0, 'rpai': 1, 'schiai': 2, 'rpe': 3, 'zoi': 4, 'briai': 5, 'rpei': 6}
All you need to do is make a dict with the first occurrence of each word, then look up each word in the dict. Don't use map and lambda, they'll only make it harder, or at least less readable.
first_occ = {}
counter = 0
for word in wordlist:
if word not in first_occ:
first_occ[word] = counter
counter += 1
result = [first_occ[w] for w in wordlist]
print(result) # -> [0, 1, 1, 2, 0, 3, 4, 4, 5, 3]

Delete occurrences of an element if it occurs more than n times in Python

How can I fix my code to pass the test case for Delete occurrences of an element if it occurs more than n times?
My current code pass one test case and I'm sure that the problem is caused by order.remove(check_list[i]).
However, there is no way to delete the specific element with pop() because it is required to put an index number rather than the element in pop().
Test case
Test.assert_equals(delete_nth([20,37,20,21], 1), [20,37,21])
Test.assert_equals(delete_nth([1,1,3,3,7,2,2,2,2], 3), [1, 1, 3, 3, 7, 2, 2, 2])
Program
def delete_nth(order, max_e):
# code here
check_list = [x for x in dict.fromkeys(order) if order.count(x) > 1]
print(check_list)
print(order)
for i in range(len(check_list)):
while(order.count(check_list[i]) > max_e):
order.remove(check_list[i])
#order.pop(index)
return order
Your assertions fails, because the order is not preserved. Here is a simple example of how this could be done without doing redundant internal loops to count the occurrences for each number:
def delete_nth(order, max_e):
# Get a new list that we will return
result = []
# Get a dictionary to count the occurences
occurrences = {}
# Loop through all provided numbers
for n in order:
# Get the count of the current number, or assign it to 0
count = occurrences.setdefault(n, 0)
# If we reached the max occurence for that number, skip it
if count >= max_e:
continue
# Add the current number to the list
result.append(n)
# Increase the
occurrences[n] += 1
# We are done, return the list
return result
assert delete_nth([20,37,20,21], 1) == [20, 37, 21]
assert delete_nth([1, 1, 1, 1], 2) == [1, 1]
assert delete_nth([1, 1, 3, 3, 7, 2, 2, 2, 2], 3) == [1, 1, 3, 3, 7, 2, 2, 2]
assert delete_nth([1, 1, 2, 2], 1) == [1, 2]
A version which maintains the order:
from collections import defaultdict
def delete_nth(order, max_e):
count = defaultdict(int)
delet = []
for i, v in enumerate(order):
count[v] += 1
if count[v] > max_e:
delet.append(i)
for i in reversed(delet): # start deleting from the end
order.pop(i)
return order
print(delete_nth([1,1,2,2], 1))
print(delete_nth([20,37,20,21], 1))
print(delete_nth([1,1,3,3,7,2,2,2,2], 3))
This should do the trick:
from itertools import groupby
import numpy as np
def delete_nth(order, max_e):
if(len(order)<=max_e):
return order
elif(max_e<=0):
return []
return np.array(
sorted(
np.concatenate(
[list(v)[:max_e]
for k,v in groupby(
sorted(
zip(order, list(range(len(order)))),
key=lambda k: k[0]),
key=lambda k: k[0])
]
),
key=lambda k: k[1])
)[:,0].tolist()
Outputs:
print(delete_nth([2,3,4,5,3,2,3,2,1], 2))
[2, 3, 4, 5, 3, 2, 1]
print(delete_nth([2,3,4,5,5,3,2,3,2,1], 1))
[2, 3, 4, 5, 1]
print(delete_nth([2,3,4,5,3,2,3,2,1], 3))
[2, 3, 4, 5, 3, 2, 3, 2, 1]
print(delete_nth([2,2,1,1], 1))
[2, 1]
Originally my answer only worked for one test case, this is quick (not the prettiest) but works for both:
def delete_nth(x, e):
x = x[::-1]
for i in x:
while x.count(i) > e:
x.remove(i)
return x[::-1]

How to efficiently make a numbered list in python Class-list

How to make the following code more compact and efficient.
Here, the code was to find the position where certain numerical value resides in the list.
For example, given set of number
ListNo = [[100,2,5], [50,10], 4, 1, [6,6,500]]
The value of 100, 50 and 500 was in the position of 0,3 and 9, respectively.
The testing code was as follows
ListNo = [[100,2,5], [50,10], 4, 1, [6,6,500]]
NumberedList = ListNo
Const = 0
items = 0
for i, item in enumerate(ListNo):
MaxRange = len(item) if isinstance(item, list) else 1
for x in range(0, MaxRange):
if MaxRange > 1:
NumberedList[i][x] = Const
else:
NumberedList[i] = Const
Const = Const + 1
print(NumberedList)
[[0, 1, 2], [3, 4], 5, 6, [7, 8, 9]]
My question is, whether there is another option to make this code more compact and efficient.
You can use itertools.count:
from itertools import count
i = count()
print([[next(i) for _ in range(len(l))] if isinstance(l, list) else next(i) for l in ListNo])
This outputs:
[[0, 1, 2], [3, 4], 5, 6, [7, 8, 9]]
A recursive solution would be more elegant, and handle more cases:
def nested_list_ordinal_recurse(l, it):
if isinstance(l, list):
return [nested_list_ordinal_recurse(item, it) for item in l]
else:
return next(it)
def nested_list_ordinal(l, _it=None):
return nested_list_ordinal_recurse(l, itertools.count())
ListNo = [[100,2,5], [50,10], 4, 1, [6,6,500]];
count=-1
def counter(l=[]):
global count
if l:
return [counter() for i in l]
else:
count+=1
return count
print [counter(item) if isinstance(item, list) else counter() for item in ListNo ]
Without iter tools

How to merge an array with its array elements in Python?

I have an array like below;
constants = ['(1,2)', '(1,5,1)', '1']
I would like to transform the array into like below;
constants = [(1,2), 1, 2, 3, 4, 5, 1]
For doing this, i tried some operations;
from ast import literal_eval
import numpy as np
constants = literal_eval(str(constants).replace("'",""))
constants = [(np.arange(*i) if len(i)==3 else i) if isinstance(i, tuple) else i for i in constants]
And the output was;
constants = [(1, 2), array([1, 2, 3, 4]), 1]
So, this is not expected result and I'm stuck in this step. The question is, how can i merge the array with its parent array?
This is one approach.
Demo:
from ast import literal_eval
constants = ['(1,2)', '(1,5,1)', '1']
res = []
for i in constants:
val = literal_eval(i) #Convert to python object
if isinstance(val, tuple): #Check if element is tuple
if len(val) == 3: #Check if no of elements in tuple == 3
val = list(val)
val[1]+=1
res.extend(range(*val))
continue
res.append(val)
print(res)
Output:
[(1, 2), 1, 2, 3, 4, 5, 1]
I'm going to assume that this question is very literal, and that you always want to transform this:
constants = ['(a, b)', '(x, y, z)', 'i']
into this:
transformed = [(a,b), x, x+z, x+2*z, ..., y, i]
such that the second tuple is a range from x to y with step z. So your final transformed array is the first element, then the range defined by your second element, and then your last element. The easiest way to do this is simply step-by-step:
constants = ['(a, b)', '(x, y, z)', 'i']
literals = [eval(k) for k in constants] # get rid of the strings
part1 = [literals[0]] # individually make each of the three parts of your list
part2 = [k for k in range(literals[1][0], literals[1][1] + 1, literals[1][2])] # or if you don't need to include y then you could just do range(literals[1])
part3 = [literals[2]]
transformed = part1 + part2 + part3
I propose the following:
res = []
for cst in constants:
if isinstance(cst,tuple) and (len(cst) == 3):
#add the range to the list
res.extend(range(cst[0],cst[1], cst[2]))
else:
res.append(cst)
res has the result you want.
There may be a more elegant way to solve it.
Please use code below to resolve parsing described above:
from ast import literal_eval
constants = ['(1,2)', '(1,5,1)', '1']
processed = []
for index, c in enumerate(constants):
parsed = literal_eval(c)
if isinstance(parsed, (tuple, list)) and index != 0:
processed.extend(range(1, max(parsed) + 1))
else:
processed.append(parsed)
print processed # [(1, 2), 1, 2, 3, 4, 5, 1]

Identify duplicate values in a list in Python

Is it possible to get which values are duplicates in a list using python?
I have a list of items:
mylist = [20, 30, 25, 20]
I know the best way of removing the duplicates is set(mylist), but is it possible to know what values are being duplicated? As you can see, in this list the duplicates are the first and last values. [0, 3].
Is it possible to get this result or something similar in python? I'm trying to avoid making a ridiculously big if elif conditional statement.
These answers are O(n), so a little more code than using mylist.count() but much more efficient as mylist gets longer
If you just want to know the duplicates, use collections.Counter
from collections import Counter
mylist = [20, 30, 25, 20]
[k for k,v in Counter(mylist).items() if v>1]
If you need to know the indices,
from collections import defaultdict
D = defaultdict(list)
for i,item in enumerate(mylist):
D[item].append(i)
D = {k:v for k,v in D.items() if len(v)>1}
Here's a list comprehension that does what you want. As #Codemonkey says, the list starts at index 0, so the indices of the duplicates are 0 and 3.
>>> [i for i, x in enumerate(mylist) if mylist.count(x) > 1]
[0, 3]
You can use list compression and set to reduce the complexity.
my_list = [3, 5, 2, 1, 4, 4, 1]
opt = [item for item in set(my_list) if my_list.count(item) > 1]
The following list comprehension will yield the duplicate values:
[x for x in mylist if mylist.count(x) >= 2]
simplest way without any intermediate list using list.index():
z = ['a', 'b', 'a', 'c', 'b', 'a', ]
[z[i] for i in range(len(z)) if i == z.index(z[i])]
>>>['a', 'b', 'c']
and you can also list the duplicates itself (may contain duplicates again as in the example):
[z[i] for i in range(len(z)) if not i == z.index(z[i])]
>>>['a', 'b', 'a']
or their index:
[i for i in range(len(z)) if not i == z.index(z[i])]
>>>[2, 4, 5]
or the duplicates as a list of 2-tuples of their index (referenced to their first occurrence only), what is the answer to the original question!!!:
[(i,z.index(z[i])) for i in range(len(z)) if not i == z.index(z[i])]
>>>[(2, 0), (4, 1), (5, 0)]
or this together with the item itself:
[(i,z.index(z[i]),z[i]) for i in range(len(z)) if not i == z.index(z[i])]
>>>[(2, 0, 'a'), (4, 1, 'b'), (5, 0, 'a')]
or any other combination of elements and indices....
I tried below code to find duplicate values from list
1) create a set of duplicate list
2) Iterated through set by looking in duplicate list.
glist=[1, 2, 3, "one", 5, 6, 1, "one"]
x=set(glist)
dup=[]
for c in x:
if(glist.count(c)>1):
dup.append(c)
print(dup)
OUTPUT
[1, 'one']
Now get the all index for duplicate element
glist=[1, 2, 3, "one", 5, 6, 1, "one"]
x=set(glist)
dup=[]
for c in x:
if(glist.count(c)>1):
indices = [i for i, x in enumerate(glist) if x == c]
dup.append((c,indices))
print(dup)
OUTPUT
[(1, [0, 6]), ('one', [3, 7])]
Hope this helps someone
That's the simplest way I can think for finding duplicates in a list:
my_list = [3, 5, 2, 1, 4, 4, 1]
my_list.sort()
for i in range(0,len(my_list)-1):
if my_list[i] == my_list[i+1]:
print str(my_list[i]) + ' is a duplicate'
The following code will fetch you desired results with duplicate items and their index values.
for i in set(mylist):
if mylist.count(i) > 1:
print(i, mylist.index(i))
You should sort the list:
mylist.sort()
After this, iterate through it like this:
doubles = []
for i, elem in enumerate(mylist):
if i != 0:
if elem == old:
doubles.append(elem)
old = None
continue
old = elem
You can print duplicate and Unqiue using below logic using list.
def dup(x):
duplicate = []
unique = []
for i in x:
if i in unique:
duplicate.append(i)
else:
unique.append(i)
print("Duplicate values: ",duplicate)
print("Unique Values: ",unique)
list1 = [1, 2, 1, 3, 2, 5]
dup(list1)
mylist = [20, 30, 25, 20]
kl = {i: mylist.count(i) for i in mylist if mylist.count(i) > 1 }
print(kl)
It looks like you want the indices of the duplicates. Here is some short code that will find those in O(n) time, without using any packages:
dups = {}
[dups.setdefault(v, []).append(i) for i, v in enumerate(mylist)]
dups = {k: v for k, v in dups.items() if len(v) > 1}
# dups now has keys for all the duplicate values
# and a list of matching indices for each
# The second line produces an unused list.
# It could be replaced with this:
for i, v in enumerate(mylist):
dups.setdefault(v, []).append(i)
m = len(mylist)
for index,value in enumerate(mylist):
for i in xrange(1,m):
if(index != i):
if (L[i] == L[index]):
print "Location %d and location %d has same list-entry: %r" % (index,i,value)
This has some redundancy that can be improved however.
def checkduplicate(lists):
a = []
for i in lists:
if i in a:
pass
else:
a.append(i)
return i
print(checkduplicate([1,9,78,989,2,2,3,6,8]))

Categories

Resources