I am trying to find a method for comparing two lists in python in a more efficient way than what I think is the current O(m*n) runtime. Right now I have a brute force approach of iterating each item in m and comparing it to n but is anything else possible? I have tried maybe sorting the lists first for maybe something faster but I am kind of stuck on whether anything else could work here.
In my function i take each item in m and compare it to n and count the number of times the item in m is greater than the item in n.
n = [1,3,7]
m = [2,9]
def comparison(n,m):
counter = 0
for i in m:
for j in n:
if i >= j:
counter += 1
return counter
Here's how you could use a binary search approach after sorting the target list:
from bisect import bisect_right
n = [1,3,7,2]
m = [2,9]
n.sort()
counter = sum(bisect_right(n,value) for value in m)
print(counter) # 6
This should correspond to O((n+m) x log(n)) if n is not known to be sorted. If n is always provided in sorted order, then you don't need your function to sort it and you will get O(m x log(n)) time complexity.
I wrote a code for you to test which one runs faster using the built-in "timeit" library. You can test others' advice using the same structure. There is the code:
import timeit
import numpy as np
n = [1,3,7]
m = [9,2]
my_code = '''
def comparison(n,m):
counter = 0
for i in n:
for j in m:
if i >= j:
counter += 1
return counter
'''
mysetup = "import numpy as np"
my_code2 = '''
def comparison_with_numpy(n,m):
x = np.array(n)
y = np.array(m)
smaller = np.array([x[i] > y[:] for i in range(x.shape[0])]).astype('int')
return sum(smaller)[0]
'''
my_code3 = '''
def sort_first(n,m):
sorted(n)
sorted(m)
count = 0
if len(n) > len(m):
iteration = len(n)
else:
iteration = len(m)
for _ in range(iteration):
if n != []:
y = n.pop(0)
if m != []:
x = m.pop(0)
if y > x:
count += 1
return count
'''
def comparison(n,m):
counter = 0
for i in n:
for j in m:
if i >= j:
counter += 1
print(counter)
return counter
def comparison_with_numpy(n,m):
x = np.array(n)
y = np.array(m)
smaller = np.array([x[i] > y[:] for i in range(x.shape[0])]).astype('int')
return sum(smaller)[0]
def sort_first(n,m):
sorted(n)
sorted(m)
count = 0
if len(n) > len(m):
iteration = len(n)
else:
iteration = len(m)
for _ in range(iteration):
if n != []:
y = n.pop(0)
if m != []:
x = m.pop(0)
if y > x:
count += 1
return count
def main():
print('comparison /w sort\t\t',timeit.timeit(stmt = my_code3,number=10000))
print('comparison\t\t',timeit.timeit(stmt = my_code,number=10000))
print('comparison with numpy\t\t',timeit.timeit(setup = mysetup
,stmt = my_code2
,number=10000))
if __name__ == "__main__":
main()
I have a string pizzas and when comparing it to pizza - it is not the same. How can you make a program that counts common letters (in order) between two words, and if it's a 60% match then a variable match is True?
For e.g. pizz and pizzas have 4 out of 6 letters in common, which is a 66% match, which means match must be True, but zzip and pizzasdo not have any letters in order in common, thus match is False
You can write a function to implement this logic.
zip is used to loop through the 2 strings simultaneously.
def checker(x, y):
c = 0
for i, j in zip(x, y):
if i==j:
c += 1
else:
break
return c/len(x)
res = checker('pizzas', 'pizz') # 0.6666666666666666
def longestSubstringFinder(string1, string2):
answer = ""
len1, len2 = len(string1), len(string2)
for i in range(len1):
match = ""
for j in range(len2):
if (i + j < len1 and string1[i + j] == string2[j]):
match += string2[j]
else:
if (len(match) > len(answer)): answer = match
match = ""
return answer
ss_len = len(longestSubstringFinder("pizz", "pizzas"))
max_len = max(len("pizza"),len("pizzas"))
percent = ss_len/max_len*100
print(percent)
if(percent>=60):
print("True");
else:
print("False")
Optimised algorithm using dynamic programming:
def LCSubStr(X, Y, m, n):
LCSuff = [[0 for k in range(n+1)] for l in range(m+1)]
result = 0
for i in range(m + 1):
for j in range(n + 1):
if (i == 0 or j == 0):
LCSuff[i][j] = 0
elif (X[i-1] == Y[j-1]):
LCSuff[i][j] = LCSuff[i-1][j-1] + 1
result = max(result, LCSuff[i][j])
else:
LCSuff[i][j] = 0
return result
This will directly return the length of LCS.
Let's say I have two strings made with only 1 character:
'aaaaaaa'
'bbb'
I'd like to find an algorithm to produce a combined string of:
'aabaabaaba'
The two are merged so that there is the fewest # of consecutive characters from either list (in this case that # is 2). The length of each string is arbitrary, and I'd like for it to be symmetrical. Bonus points for extending it to more than just 2 strings.
I am doing this in python, but the language doesn't matter. This is for a load balancing problem I'm working on.
You can use the elements alternatively and use a letter of the longer string if necessary. You can determine whether an additional letter is possible with integer arithmetics: A fraction tells you how many letters come between each letter pair. You accumulate this fraction and use letters from the longer array as long as that accumulated fraction is larger than ½:
def intertwine(a, b):
""" Return a combination of string with fewest number of
consecutive elements from one string
"""
if len(b) > len(a):
return intertwine(b, a)
if not b:
return a
a = list(a)
b = list(b)
num = len(a) - len(b)
denom = len(b)
acc = 0
res = []
while a or b:
acc += num
while acc >= denom / 2:
if a: res += a.pop(0)
acc -= num
if a: res += a.pop(0)
if b: res += b.pop(0)
return "".join(res)
print intertwine("aaabaaa", "bbb") # "aababbaaba"
print intertwine("aaaaaaa", "b") # "aaabaaaa"
print intertwine("aaaaaa", "b") # "aaabaaa"
print intertwine("aa", "bbbbbb") # "bbabbabb"
print intertwine("", "bbbbbb") # "bbbbbb"
print intertwine("", "") # ""
import itertools
def intermix(*containers):
mix = []
for c in sorted(containers, key=lambda c: len(c)):
if len(c) >= len(mix):
bigger, smaller = c, mix
else:
bigger, smaller = mix, c
ratio, remainder = divmod(len(bigger), len(smaller) + 1)
chunk_sizes = (ratio + (1 if i < remainder else 0) for i in range(len(smaller) + 1))
chunk_offsets = itertools.accumulate(chunk_sizes)
off_start = 0
new_mix = []
for i, off in enumerate(chunk_offsets):
new_mix.extend(bigger[off_start:off])
if i == len(smaller):
break
new_mix.append(smaller[i])
off_start = off
mix = new_mix
return mix
I have this function for determining if a list is a rotation of another list:
def isRotation(a,b):
if len(a) != len(b):
return False
c=b*2
i=0
while a[0] != c[i]:
i+=1
for x in a:
if x!= c[i]:
return False
i+=1
return True
e.g.
>>> a = [1,2,3]
>>> b = [2,3,1]
>>> isRotation(a, b)
True
How do I make this work with duplicates? e.g.
a = [3,1,2,3,4]
b = [3,4,3,1,2]
And can it be done in O(n)time?
The following meta-algorithm will solve it.
Build a concatenation of a, e.g., a = [3,1,2,3,4] => aa = [3,1,2,3,4,3,1,2,3,4].
Run any string adaptation of a string-matching algorithm, e.g., Boyer Moore to find b in aa.
One particularly easy implementation, which I would first try, is to use Rabin Karp as the underlying algorithm. In this, you would
calculate the Rabin Fingerprint for b
calculate the Rabin fingerprint for aa[: len(b)], aa[1: len(b) + 1], ..., and compare the lists only when the fingerprints match
Note that
The Rabin fingerprint for a sliding window can be calculated iteratively very efficiently (read about it in the Rabin-Karp link)
If your list is of integers, you actually have a slightly easier time than for strings, as you don't need to think what is the numerical hash value of a letter
-
You can do it in 0(n) time and 0(1) space using a modified version of a maximal suffixes algorithm:
From Jewels of Stringology:
Cyclic equality of words
A rotation of a word u of length n is any word of the form u[k + 1...n][l...k]. Let u, w be two words of the same length n. They are said to be cyclic-equivalent if u(i) == w(j) for some i, j.
If words u and w are written as circles, they are cyclic-equivalent if the circles coincide after appropriate rotations.
There are several linear-time algorithms for testing the cyclic-equivalence
of two words. The simplest one is to apply any string matching algorithm to pattern pat = u and text = ww because words u and w are cyclic=equivalent if pat occurs in text.
Another algorithm is to find maximal suffixes of uu and ww and check if
they are identical on prefixes of size n. We have chosen this problem because there is simpler interesting algorithm, working in linear time and constant space simultaneously, which deserves presentation.
Algorithm Cyclic-Equivalence(u, w)
{ checks cyclic equality of u and w of common length n }
x := uu; y := ww;
i := 0; j := 0;
while (i < n) and (j < n) do begin
k := 1;
while x[i + k] = y[j + k] do k := k + 1;
if k > n then return true;
if x[i + k]> y[i + k] then i := i + k else j := j + k;
{ invariant }
end;
return false;
Which translated to python becomes:
def cyclic_equiv(u, v):
n, i, j = len(u), 0, 0
if n != len(v):
return False
while i < n and j < n:
k = 1
while k <= n and u[(i + k) % n] == v[(j + k) % n]:
k += 1
if k > n:
return True
if u[(i + k) % n] > v[(j + k) % n]:
i += k
else:
j += k
return False
Running a few examples:
In [4]: a = [3,1,2,3,4]
In [5]: b =[3,4,3,1,2]
In [6]: cyclic_equiv(a,b)
Out[6]: True
In [7]: b =[3,4,3,2,1]
In [8]: cyclic_equiv(a,b)
Out[8]: False
In [9]: b =[3,4,3,2]
In [10]: cyclic_equiv(a,b)
Out[10]: False
In [11]: cyclic_equiv([1,2,3],[1,2,3])
Out[11]: True
In [12]: cyclic_equiv([3,1,2],[1,2,3])
Out[12]: True
A more naive approach would be to use a collections.deque to rotate the elements:
def rot(l1,l2):
from collections import deque
if l1 == l2:
return True
# if length is different we cannot get a match
if len(l2) != len(l1):
return False
# if any elements are different we cannot get a match
if set(l1).difference(l2):
return False
l2,l1 = deque(l2),deque(l1)
for i in range(len(l1)):
l2.rotate() # l2.appendleft(d.pop())
if l1 == l2:
return True
return False
I think you could use something like this:
a1 = [3,4,5,1,2,4,2]
a2 = [4,5,1,2,4,2,3]
# Array a2 is rotation of array a1 if it's sublist of a1+a1
def is_rotation(a1, a2):
if len(a1) != len(a2):
return False
double_array = a1 + a1
return check_sublist(double_array, a2)
def check_sublist(a1, a2):
if len(a1) < len(a2):
return False
j = 0
for i in range(len(a1)):
if a1[i] == a2[j]:
j += 1
else:
j = 0
if j == len(a2):
return True
return j == len(a2)
Just common sense if we are talking about interview questions:
we should remember that solution should be easy to code and to describe.
do not try to remember solution on interview. It's better to remember core principle and re-implement it.
Alternatively (I couldn't get the b in aa solution to work), you can 'rotate' your list and check if the rotated list is equal to b:
def is_rotation(a, b):
for n in range(len(a)):
c = c = a[-n:] + a[:-n]
if b == c:
return True
return False
I believe this would be O(n) as it only has one for loop. Hope it helps
This seems to work.
def func(a,b):
if len(a) != len(b):
return False
elif a == b:
return True
indices = [i for i, x in enumerate(b) if x == a[0] and i > 0]
for i in indices:
if a == b[i:] + b[:i]:
return True
return False
And this also:
def func(a, b):
length = len(a)
if length != len(b):
return False
i = 0
while i < length:
if a[0] == b[i]:
j = i
for x in a:
if x != b[j]:
break
j = (j + 1) % length
return True
i += 1
return False
You could try testing the performance of just using the rotate() function in the deque collection:
from collections import deque
def is_rotation(a, b):
if len(a) == len(b):
da = deque(a)
db = deque(b)
for offset in range(len(a)):
if da == db:
return True
da.rotate(1)
return False
In terms of performance, do you need to make this calculation many times for small arrays, or for few times on very large arrays? This would determine whether or not special case testing would speed it up.
If you can represent these as strings instead, just do:
def cyclically_equivalent(a, b):
return len(a) == len(b) and a in 2 * b
Otherwise, one should get a sublist searching algorithm, such as Knuth-Morris-Pratt (Google gives some implementations) and do
def cyclically_equivalent(a, b):
return len(a) == len(b) and sublist_check(a, 2 * b)
Knuth-Morris-Pratt algorithm is a string search algorithm that runs in O(n) where n is the length of a text S (assuming the existence of preconstructed table T, which runs in O(m) where m is the length of the search string). All in all it is O(n+m).
You could do a similar pattern matching algorithm inspired by KMP.
Concatenate a list to itself, like a+a or b+b - this is the searched text/list with 2*n elements
Build the table T based on the other list (be it b or a) - this is done in O(n)
Run the KMP inspired algorithm - this is done in O(2*n) (because you concatenate a list to itself)
Overall time complexity is O(2*n+n) = O(3*n) which is in O(n)
I have a Python assignment which wants me write a program that finds the longest palindrome in a given text. I know there are examples of this function in other languages on this website, but I am a total beginner in Python and am having trouble writing the code.
This is how I am currently identifying palindromes:
def is_palindrome(word):
x = 0
for i in range (len(word)/2):
if (word[x]) == (word[len(word)-x-1]):
x+=1
if x == (len(word)/2):
return True
return False
Alternate way
def Is_palindrome(word):
return word==word[::-1]
# Assuming text is defined
print max((word for word in set(text.split()) if Is_Palindrome(word)), key=len)
I used:
def Is_palindrome(word):
x = 0
for i in range (len(word)/2):
if (word[x]) == (word[len(word)-x-1]):
x+=1
if x == (len(word)/2):
return True
return False
def longest_palindrome(text):
lst = text.split() #Split it into words (cannot have punctuation)
palindromes = [] #List that contains the palindromes
long_len = 0 #Length of the longest palindrome
longest = "" #The actual longest palindrome
for i in lst: #Loop through all the words
if Is_palindrome(i): #If the word is a palindrome
palindromes.append(i) #Add it to the palindrome list
for i in palindromes: #Loop through the palindrome list
if len(i) > long_len: #If the palindrome is longer than the longest one
longest = i #Set it as the longest one
longest_len = len(i) # Set the length of the longest one to the length of this one
return longest
def fastLongestPalindromes(seq):
seqLen = len(seq)
l = []
i = 0
palLen = 0
while i < seqLen:
if i > palLen and seq[i - palLen - 1] == seq[i]:
palLen += 2
i += 1
continue
l.append(palLen)
s = len(l) - 2
e = s - palLen
for j in range(s, e, -1):
d = j - e - 1
if l[j] == d:
palLen = d
break
l.append(min(d, l[j]))
else:
palLen = 1
i += 1
l.append(palLen)
lLen = len(l)
s = lLen - 2
e = s - (2 * seqLen + 1 - lLen)
for i in range(s, e, -1):
d = i - e - 1
l.append(min(d, l[i]))
return l
def getPalindrome(text):
lengths = fastLongestPalindromes(text)
start = 0
end = 0
length = 0
for i in range(len(lengths)):
if(lengths[i] > length):
length = lengths[i]
end = i//2+(lengths[i]//2)
start = i//2-(lengths[i]//2)
if(i%2 == 1):
start +=1
return text[start:end]
In linear time. (longer code, but faster than the other answers, atleast for long strings).
Source: http://www.akalin.cx/longest-palindrome-linear-time (first function is copy pasted)