Ordered ranking for a list of permutations - python

I am trying to develop a method for finding the orderd rank of a particular sequence in the following lists.
a = list(sorted(itertools.combinations(range(0,5),3)))
b = list(sorted(itertools.permutations(range(0,5),3)))
a represents a list of elemnts of a combinatorial number system so the formula for rank is pretty straight forward.
What I need are 2 function magic_rank_1 and magic_rank_2 which have the following definitions
def magic_rank_1(perm_item,permutation_list_object):
## permutation_list_object is actually b
return list_object.index(perm_item)
def magic_rank_2(perm_item,permutation_list_object,combination_list_object):
## permutation_list_object is actually b and combination_list_object is actually a
return combination_list_object.index(tuple(sorted(perm_item)))
So basically magic_rank_2((0,1,2),b,a) = magic_rank_2((2,0,1),b,a)
Sounds easy.. but i have a few restrictions.
I cant use the indexof function because I cannot afford to search lists >100000000 items for every item
I need magic_rank_1 and magic_rank_2 to be purely mathematical without using any sort function or comparison functions or search function. All the information I will have is the tuple whose rank needs to be identified and the last letter of my alphabet (in this case that will be 5)
magic rank 2 need not be a number between 0 and k-1 when k = len(a) as long as it is a unique number between 0 and 2^(ceiling((max_alphabet/2)+1))
I know magic_rank_1 can be calculated by something similar to this but there is a difference ,there every letter of the input alphabet is used, in my case it is a subset
Lastly yes this is supposed to be a substitute for a hashing function, currently using a hashing function but I am not taking advantage of the fact that magic_rank_2((0,1,2),b,a) = magic_rank_2((2,0,1),b,a) . If i can it will reduce my storage space requirements significantly as the length of my sequences is actually 5 , so if I can calculate a method for magic_rank_2 I reduce my storage requirement to 1% of current requirement
UPDATE
- For magic_rank_2 there should be no comparison operation between elements of the tuple, i.e no sorting, min,max etc
that only makes the algorithm less efficient than regular hashing

The following two functions will rank a combination and permutation, given a word and an alphabet (or in your case, a tuple and a list).
import itertools
import math
def rank_comb(word, alph, depth=0):
if not word: return 0
if depth == 0:
word = list(word)
alph = sorted(alph)
pos = 0
for (i,c) in enumerate(alph):
if c == word[0]:
# Recurse
new_word = [x for x in word if x != c]
new_alph = [x for x in alph if x > c]
return pos + rank_comb(new_word, new_alph, depth+1)
else:
num = math.factorial(len(alph)-i-1)
den = math.factorial(len(alph)-i-len(word)) * math.factorial(len(word)-1)
pos += num // den
def rank_perm(word, alph, depth=0):
if not word: return 0
if depth == 0:
word = list(word)
alph = sorted(alph)
pos = 0
for c in alph:
if c == word[0]:
# Recurse
new_word = [x for x in word if x != c]
new_alph = [x for x in alph if x != c]
return pos + rank_perm(new_word, new_alph, depth+1)
else:
num = math.factorial(len(alph)-1)
den = math.factorial(len(alph)-len(word))
pos += num // den
#== Validation =====================================================================
# Params
def get_alph(): return range(8)
r = 6
a = list(sorted(itertools.combinations(get_alph(), r)))
b = list(sorted(itertools.permutations(get_alph(), r)))
# Tests
PASS_COMB = True
PASS_PERM = True
for (i,x) in enumerate(a):
j = rank_comb(x, get_alph())
if i != j:
PASS_COMB = False
print("rank_comb() FAIL:", i, j)
for (i,x) in enumerate(b):
j = rank_perm(x, get_alph())
if i != j:
PASS_PERM = False
print("rank_perm() FAIL:", i, j)
print("rank_comb():", "PASS" if PASS_COMB else "FAIL")
print("rank_perm():", "PASS" if PASS_PERM else "FAIL")
The functions are similar, but there are few differences:
new_alph is filtered differently.
The calculation of both num and den are different.
Update:
rank_comb2 doesn't require sorting the input word (a 3-tuple):
import itertools
import math
def rank_comb2(word, alph, depth=0):
if not word: return 0
if depth == 0:
word = list(word)
alph = sorted(alph)
pos = 0
for (i,c) in enumerate(alph):
if c == min(word):
# Recurse
new_word = [x for x in word if x != c]
new_alph = [x for x in alph if x > c]
return pos + rank_comb2(new_word, new_alph, depth+1)
else:
num = math.factorial(len(alph)-i-1)
den = math.factorial(len(alph)-i-len(word)) * math.factorial(len(word)-1)
pos += num // den
r1 = rank_comb2([2,4,1], range(5))
r2 = rank_comb2([1,4,2], range(5))
r3 = rank_comb2([4,1,2], range(5))
print(r1, r2, r3) # 7 7 7

Related

Is there a more efficient way to compare two lists in python than O(m*n)?

I am trying to find a method for comparing two lists in python in a more efficient way than what I think is the current O(m*n) runtime. Right now I have a brute force approach of iterating each item in m and comparing it to n but is anything else possible? I have tried maybe sorting the lists first for maybe something faster but I am kind of stuck on whether anything else could work here.
In my function i take each item in m and compare it to n and count the number of times the item in m is greater than the item in n.
n = [1,3,7]
m = [2,9]
def comparison(n,m):
counter = 0
for i in m:
for j in n:
if i >= j:
counter += 1
return counter
Here's how you could use a binary search approach after sorting the target list:
from bisect import bisect_right
n = [1,3,7,2]
m = [2,9]
n.sort()
counter = sum(bisect_right(n,value) for value in m)
print(counter) # 6
This should correspond to O((n+m) x log(n)) if n is not known to be sorted. If n is always provided in sorted order, then you don't need your function to sort it and you will get O(m x log(n)) time complexity.
I wrote a code for you to test which one runs faster using the built-in "timeit" library. You can test others' advice using the same structure. There is the code:
import timeit
import numpy as np
n = [1,3,7]
m = [9,2]
my_code = '''
def comparison(n,m):
counter = 0
for i in n:
for j in m:
if i >= j:
counter += 1
return counter
'''
mysetup = "import numpy as np"
my_code2 = '''
def comparison_with_numpy(n,m):
x = np.array(n)
y = np.array(m)
smaller = np.array([x[i] > y[:] for i in range(x.shape[0])]).astype('int')
return sum(smaller)[0]
'''
my_code3 = '''
def sort_first(n,m):
sorted(n)
sorted(m)
count = 0
if len(n) > len(m):
iteration = len(n)
else:
iteration = len(m)
for _ in range(iteration):
if n != []:
y = n.pop(0)
if m != []:
x = m.pop(0)
if y > x:
count += 1
return count
'''
def comparison(n,m):
counter = 0
for i in n:
for j in m:
if i >= j:
counter += 1
print(counter)
return counter
def comparison_with_numpy(n,m):
x = np.array(n)
y = np.array(m)
smaller = np.array([x[i] > y[:] for i in range(x.shape[0])]).astype('int')
return sum(smaller)[0]
def sort_first(n,m):
sorted(n)
sorted(m)
count = 0
if len(n) > len(m):
iteration = len(n)
else:
iteration = len(m)
for _ in range(iteration):
if n != []:
y = n.pop(0)
if m != []:
x = m.pop(0)
if y > x:
count += 1
return count
def main():
print('comparison /w sort\t\t',timeit.timeit(stmt = my_code3,number=10000))
print('comparison\t\t',timeit.timeit(stmt = my_code,number=10000))
print('comparison with numpy\t\t',timeit.timeit(setup = mysetup
,stmt = my_code2
,number=10000))
if __name__ == "__main__":
main()

How to count common letters in order between two words in Python?

I have a string pizzas and when comparing it to pizza - it is not the same. How can you make a program that counts common letters (in order) between two words, and if it's a 60% match then a variable match is True?
For e.g. pizz and pizzas have 4 out of 6 letters in common, which is a 66% match, which means match must be True, but zzip and pizzasdo not have any letters in order in common, thus match is False
You can write a function to implement this logic.
zip is used to loop through the 2 strings simultaneously.
def checker(x, y):
c = 0
for i, j in zip(x, y):
if i==j:
c += 1
else:
break
return c/len(x)
res = checker('pizzas', 'pizz') # 0.6666666666666666
def longestSubstringFinder(string1, string2):
answer = ""
len1, len2 = len(string1), len(string2)
for i in range(len1):
match = ""
for j in range(len2):
if (i + j < len1 and string1[i + j] == string2[j]):
match += string2[j]
else:
if (len(match) > len(answer)): answer = match
match = ""
return answer
ss_len = len(longestSubstringFinder("pizz", "pizzas"))
max_len = max(len("pizza"),len("pizzas"))
percent = ss_len/max_len*100
print(percent)
if(percent>=60):
print("True");
else:
print("False")
Optimised algorithm using dynamic programming:
def LCSubStr(X, Y, m, n):
LCSuff = [[0 for k in range(n+1)] for l in range(m+1)]
result = 0
for i in range(m + 1):
for j in range(n + 1):
if (i == 0 or j == 0):
LCSuff[i][j] = 0
elif (X[i-1] == Y[j-1]):
LCSuff[i][j] = LCSuff[i-1][j-1] + 1
result = max(result, LCSuff[i][j])
else:
LCSuff[i][j] = 0
return result
This will directly return the length of LCS.

Evenly intermix two lists of elements (load balance)

Let's say I have two strings made with only 1 character:
'aaaaaaa'
'bbb'
I'd like to find an algorithm to produce a combined string of:
'aabaabaaba'
The two are merged so that there is the fewest # of consecutive characters from either list (in this case that # is 2). The length of each string is arbitrary, and I'd like for it to be symmetrical. Bonus points for extending it to more than just 2 strings.
I am doing this in python, but the language doesn't matter. This is for a load balancing problem I'm working on.
You can use the elements alternatively and use a letter of the longer string if necessary. You can determine whether an additional letter is possible with integer arithmetics: A fraction tells you how many letters come between each letter pair. You accumulate this fraction and use letters from the longer array as long as that accumulated fraction is larger than ½:
def intertwine(a, b):
""" Return a combination of string with fewest number of
consecutive elements from one string
"""
if len(b) > len(a):
return intertwine(b, a)
if not b:
return a
a = list(a)
b = list(b)
num = len(a) - len(b)
denom = len(b)
acc = 0
res = []
while a or b:
acc += num
while acc >= denom / 2:
if a: res += a.pop(0)
acc -= num
if a: res += a.pop(0)
if b: res += b.pop(0)
return "".join(res)
print intertwine("aaabaaa", "bbb") # "aababbaaba"
print intertwine("aaaaaaa", "b") # "aaabaaaa"
print intertwine("aaaaaa", "b") # "aaabaaa"
print intertwine("aa", "bbbbbb") # "bbabbabb"
print intertwine("", "bbbbbb") # "bbbbbb"
print intertwine("", "") # ""
import itertools
def intermix(*containers):
mix = []
for c in sorted(containers, key=lambda c: len(c)):
if len(c) >= len(mix):
bigger, smaller = c, mix
else:
bigger, smaller = mix, c
ratio, remainder = divmod(len(bigger), len(smaller) + 1)
chunk_sizes = (ratio + (1 if i < remainder else 0) for i in range(len(smaller) + 1))
chunk_offsets = itertools.accumulate(chunk_sizes)
off_start = 0
new_mix = []
for i, off in enumerate(chunk_offsets):
new_mix.extend(bigger[off_start:off])
if i == len(smaller):
break
new_mix.append(smaller[i])
off_start = off
mix = new_mix
return mix

Check if a list is a rotation of another list that works with duplicates

I have this function for determining if a list is a rotation of another list:
def isRotation(a,b):
if len(a) != len(b):
return False
c=b*2
i=0
while a[0] != c[i]:
i+=1
for x in a:
if x!= c[i]:
return False
i+=1
return True
e.g.
>>> a = [1,2,3]
>>> b = [2,3,1]
>>> isRotation(a, b)
True
How do I make this work with duplicates? e.g.
a = [3,1,2,3,4]
b = [3,4,3,1,2]
And can it be done in O(n)time?
The following meta-algorithm will solve it.
Build a concatenation of a, e.g., a = [3,1,2,3,4] => aa = [3,1,2,3,4,3,1,2,3,4].
Run any string adaptation of a string-matching algorithm, e.g., Boyer Moore to find b in aa.
One particularly easy implementation, which I would first try, is to use Rabin Karp as the underlying algorithm. In this, you would
calculate the Rabin Fingerprint for b
calculate the Rabin fingerprint for aa[: len(b)], aa[1: len(b) + 1], ..., and compare the lists only when the fingerprints match
Note that
The Rabin fingerprint for a sliding window can be calculated iteratively very efficiently (read about it in the Rabin-Karp link)
If your list is of integers, you actually have a slightly easier time than for strings, as you don't need to think what is the numerical hash value of a letter
-
You can do it in 0(n) time and 0(1) space using a modified version of a maximal suffixes algorithm:
From Jewels of Stringology:
Cyclic equality of words
A rotation of a word u of length n is any word of the form u[k + 1...n][l...k]. Let u, w be two words of the same length n. They are said to be cyclic-equivalent if u(i) == w(j) for some i, j.
If words u and w are written as circles, they are cyclic-equivalent if the circles coincide after appropriate rotations.
There are several linear-time algorithms for testing the cyclic-equivalence
of two words. The simplest one is to apply any string matching algorithm to pattern pat = u and text = ww because words u and w are cyclic=equivalent if pat occurs in text.
Another algorithm is to find maximal suffixes of uu and ww and check if
they are identical on prefixes of size n. We have chosen this problem because there is simpler interesting algorithm, working in linear time and constant space simultaneously, which deserves presentation.
Algorithm Cyclic-Equivalence(u, w)
{ checks cyclic equality of u and w of common length n }
x := uu; y := ww;
i := 0; j := 0;
while (i < n) and (j < n) do begin
k := 1;
while x[i + k] = y[j + k] do k := k + 1;
if k > n then return true;
if x[i + k]> y[i + k] then i := i + k else j := j + k;
{ invariant }
end;
return false;
Which translated to python becomes:
def cyclic_equiv(u, v):
n, i, j = len(u), 0, 0
if n != len(v):
return False
while i < n and j < n:
k = 1
while k <= n and u[(i + k) % n] == v[(j + k) % n]:
k += 1
if k > n:
return True
if u[(i + k) % n] > v[(j + k) % n]:
i += k
else:
j += k
return False
Running a few examples:
In [4]: a = [3,1,2,3,4]
In [5]: b =[3,4,3,1,2]
In [6]: cyclic_equiv(a,b)
Out[6]: True
In [7]: b =[3,4,3,2,1]
In [8]: cyclic_equiv(a,b)
Out[8]: False
In [9]: b =[3,4,3,2]
In [10]: cyclic_equiv(a,b)
Out[10]: False
In [11]: cyclic_equiv([1,2,3],[1,2,3])
Out[11]: True
In [12]: cyclic_equiv([3,1,2],[1,2,3])
Out[12]: True
A more naive approach would be to use a collections.deque to rotate the elements:
def rot(l1,l2):
from collections import deque
if l1 == l2:
return True
# if length is different we cannot get a match
if len(l2) != len(l1):
return False
# if any elements are different we cannot get a match
if set(l1).difference(l2):
return False
l2,l1 = deque(l2),deque(l1)
for i in range(len(l1)):
l2.rotate() # l2.appendleft(d.pop())
if l1 == l2:
return True
return False
I think you could use something like this:
a1 = [3,4,5,1,2,4,2]
a2 = [4,5,1,2,4,2,3]
# Array a2 is rotation of array a1 if it's sublist of a1+a1
def is_rotation(a1, a2):
if len(a1) != len(a2):
return False
double_array = a1 + a1
return check_sublist(double_array, a2)
def check_sublist(a1, a2):
if len(a1) < len(a2):
return False
j = 0
for i in range(len(a1)):
if a1[i] == a2[j]:
j += 1
else:
j = 0
if j == len(a2):
return True
return j == len(a2)
Just common sense if we are talking about interview questions:
we should remember that solution should be easy to code and to describe.
do not try to remember solution on interview. It's better to remember core principle and re-implement it.
Alternatively (I couldn't get the b in aa solution to work), you can 'rotate' your list and check if the rotated list is equal to b:
def is_rotation(a, b):
for n in range(len(a)):
c = c = a[-n:] + a[:-n]
if b == c:
return True
return False
I believe this would be O(n) as it only has one for loop. Hope it helps
This seems to work.
def func(a,b):
if len(a) != len(b):
return False
elif a == b:
return True
indices = [i for i, x in enumerate(b) if x == a[0] and i > 0]
for i in indices:
if a == b[i:] + b[:i]:
return True
return False
And this also:
def func(a, b):
length = len(a)
if length != len(b):
return False
i = 0
while i < length:
if a[0] == b[i]:
j = i
for x in a:
if x != b[j]:
break
j = (j + 1) % length
return True
i += 1
return False
You could try testing the performance of just using the rotate() function in the deque collection:
from collections import deque
def is_rotation(a, b):
if len(a) == len(b):
da = deque(a)
db = deque(b)
for offset in range(len(a)):
if da == db:
return True
da.rotate(1)
return False
In terms of performance, do you need to make this calculation many times for small arrays, or for few times on very large arrays? This would determine whether or not special case testing would speed it up.
If you can represent these as strings instead, just do:
def cyclically_equivalent(a, b):
return len(a) == len(b) and a in 2 * b
Otherwise, one should get a sublist searching algorithm, such as Knuth-Morris-Pratt (Google gives some implementations) and do
def cyclically_equivalent(a, b):
return len(a) == len(b) and sublist_check(a, 2 * b)
Knuth-Morris-Pratt algorithm is a string search algorithm that runs in O(n) where n is the length of a text S (assuming the existence of preconstructed table T, which runs in O(m) where m is the length of the search string). All in all it is O(n+m).
You could do a similar pattern matching algorithm inspired by KMP.
Concatenate a list to itself, like a+a or b+b - this is the searched text/list with 2*n elements
Build the table T based on the other list (be it b or a) - this is done in O(n)
Run the KMP inspired algorithm - this is done in O(2*n) (because you concatenate a list to itself)
Overall time complexity is O(2*n+n) = O(3*n) which is in O(n)

Longest palindrome in Python

I have a Python assignment which wants me write a program that finds the longest palindrome in a given text. I know there are examples of this function in other languages on this website, but I am a total beginner in Python and am having trouble writing the code.
This is how I am currently identifying palindromes:
def is_palindrome(word):
x = 0
for i in range (len(word)/2):
if (word[x]) == (word[len(word)-x-1]):
x+=1
if x == (len(word)/2):
return True
return False
Alternate way
def Is_palindrome(word):
return word==word[::-1]
# Assuming text is defined
print max((word for word in set(text.split()) if Is_Palindrome(word)), key=len)
I used:
def Is_palindrome(word):
x = 0
for i in range (len(word)/2):
if (word[x]) == (word[len(word)-x-1]):
x+=1
if x == (len(word)/2):
return True
return False
def longest_palindrome(text):
lst = text.split() #Split it into words (cannot have punctuation)
palindromes = [] #List that contains the palindromes
long_len = 0 #Length of the longest palindrome
longest = "" #The actual longest palindrome
for i in lst: #Loop through all the words
if Is_palindrome(i): #If the word is a palindrome
palindromes.append(i) #Add it to the palindrome list
for i in palindromes: #Loop through the palindrome list
if len(i) > long_len: #If the palindrome is longer than the longest one
longest = i #Set it as the longest one
longest_len = len(i) # Set the length of the longest one to the length of this one
return longest
def fastLongestPalindromes(seq):
seqLen = len(seq)
l = []
i = 0
palLen = 0
while i < seqLen:
if i > palLen and seq[i - palLen - 1] == seq[i]:
palLen += 2
i += 1
continue
l.append(palLen)
s = len(l) - 2
e = s - palLen
for j in range(s, e, -1):
d = j - e - 1
if l[j] == d:
palLen = d
break
l.append(min(d, l[j]))
else:
palLen = 1
i += 1
l.append(palLen)
lLen = len(l)
s = lLen - 2
e = s - (2 * seqLen + 1 - lLen)
for i in range(s, e, -1):
d = i - e - 1
l.append(min(d, l[i]))
return l
def getPalindrome(text):
lengths = fastLongestPalindromes(text)
start = 0
end = 0
length = 0
for i in range(len(lengths)):
if(lengths[i] > length):
length = lengths[i]
end = i//2+(lengths[i]//2)
start = i//2-(lengths[i]//2)
if(i%2 == 1):
start +=1
return text[start:end]
In linear time. (longer code, but faster than the other answers, atleast for long strings).
Source: http://www.akalin.cx/longest-palindrome-linear-time (first function is copy pasted)

Categories

Resources