This question already has answers here:
Big O, how do you calculate/approximate it?
(24 answers)
Closed 2 years ago.
def is_prime(x):
'''
Function to check if a number is prime
'''
if x == 2:
return True
if x%2 != 0: #Check if number is even since all primes are odd except 2
a = [x % i for i in range(2,x+1)]
b = [i for i in a if i == 0] # Checks to make sure there's only one modulus of 0
if len(b) == 1:
return True
else:
return False
else:
return False
So like yeah please what is the time complexity (all those 0/n things) and how do i find that, a good resource link would be helpful (:
Your complexity is O(x) as you run one loop from 2 to x+1.
You can just check upto sqrt(x). That will bring down the complexity to O(sqrt(x)). (You can break once you find one factor, even though it won't bring down the worst time complexity)
Just change this line -
a = [x % i for i in range(2,math.sqrt(x+1))]
Why up to square root?
You can just google, there are many proofs. The simple one being, if
a = b.c, then the at least there is one divisor of a which is less than sqrt(a), or equal if a is square number a = b*b.
There are many fast heuristic and probabilistic (Miller-Rabin is very famous and frequently used) algorithms for faster prime detection.
Here's one which is deterministic:
The Adleman–Pomerance–Rumely primality test is an algorithm for determining whether a number is prime. Unlike other, more efficient algorithms for this purpose, it avoids the use of random numbers, so it is a deterministic primality test.
It's complexity is log(x)^O(logloglogx)
import copy
import time
from math import gcd # version >= 3.5
# primality test by trial division
def isprime_slow(n):
if n<2:
return False
elif n==2 or n==3:
return True
elif n%2==0:
return False
else:
i = 3
while i*i <= n:
if n%i == 0:
return False
i+=2
return True
# v_q(t): how many time is t divided by q
def v(q, t):
ans = 0
while(t % q == 0):
ans +=1
t//= q
return ans
def prime_factorize(n):
ret = []
p=2
while p*p <= n:
if n%p==0:
num = 0
while n%p==0:
num+=1
n//= p
ret.append((p,num))
p+= 1
if n!=1:
ret.append((n,1))
return ret
# calculate e(t)
def e(t):
s = 1
q_list = []
for q in range(2, t+2):
if t % (q-1) == 0 and isprime_slow(q):
s *= q ** (1+v(q,t))
q_list.append(q)
return 2*s, q_list
# Jacobi sum
class JacobiSum(object):
def __init__(self, p, k, q):
self.p = p
self.k = k
self.q = q
self.m = (p-1)*p**(k-1)
self.pk = p**k
self.coef = [0]*self.m
# 1
def one(self):
self.coef[0] = 1
for i in range(1,self.m):
self.coef[i] = 0
return self
# product of JacobiSum
# jac : JacobiSum
def mul(self, jac):
m = self.m
pk = self.pk
j_ret=JacobiSum(self.p, self.k, self.q)
for i in range(m):
for j in range(m):
if (i+j)% pk < m:
j_ret.coef[(i+j)% pk] += self.coef[i] * jac.coef[j]
else:
r = (i+j) % pk - self.p ** (self.k-1)
while r>=0:
j_ret.coef[r] -= self.coef[i] * jac.coef[j]
r-= self.p ** (self.k-1)
return j_ret
def __mul__(self, right):
if type(right) is int:
# product with integer
j_ret=JacobiSum(self.p, self.k, self.q)
for i in range(self.m):
j_ret.coef[i] = self.coef[i] * right
return j_ret
else:
# product with JacobiSum
return self.mul(right)
# power of JacobiSum(x-th power mod n)
def modpow(self, x, n):
j_ret=JacobiSum(self.p, self.k, self.q)
j_ret.coef[0]=1
j_a = copy.deepcopy(self)
while x>0:
if x%2==1:
j_ret = (j_ret * j_a).mod(n)
j_a = j_a*j_a
j_a.mod(n)
x //= 2
return j_ret
# applying "mod n" to coefficient of self
def mod(self, n):
for i in range(self.m):
self.coef[i] %= n
return self
# operate sigma_x
# verification for sigma_inv
def sigma(self, x):
m = self.m
pk = self.pk
j_ret=JacobiSum(self.p, self.k, self.q)
for i in range(m):
if (i*x) % pk < m:
j_ret.coef[(i*x) % pk] += self.coef[i]
else:
r = (i*x) % pk - self.p ** (self.k-1)
while r>=0:
j_ret.coef[r] -= self.coef[i]
r-= self.p ** (self.k-1)
return j_ret
# operate sigma_x^(-1)
def sigma_inv(self, x):
m = self.m
pk = self.pk
j_ret=JacobiSum(self.p, self.k, self.q)
for i in range(pk):
if i<m:
if (i*x)%pk < m:
j_ret.coef[i] += self.coef[(i*x)%pk]
else:
r = i - self.p ** (self.k-1)
while r>=0:
if (i*x)%pk < m:
j_ret.coef[r] -= self.coef[(i*x)%pk]
r-= self.p ** (self.k-1)
return j_ret
# Is self p^k-th root of unity (mod N)
# if so, return h where self is zeta^h
def is_root_of_unity(self, N):
m = self.m
p = self.p
k = self.k
# case of zeta^h (h<m)
one = 0
for i in range(m):
if self.coef[i]==1:
one += 1
h = i
elif self.coef[i] == 0:
continue
elif (self.coef[i] - (-1)) %N != 0:
return False, None
if one == 1:
return True, h
# case of zeta^h (h>=m)
for i in range(m):
if self.coef[i]!=0:
break
r = i % (p**(k-1))
for i in range(m):
if i % (p**(k-1)) == r:
if (self.coef[i] - (-1))%N != 0:
return False, None
else:
if self.coef[i] !=0:
return False, None
return True, (p-1)*p**(k-1)+ r
# find primitive root
def smallest_primitive_root(q):
for r in range(2, q):
s = set({})
m = 1
for i in range(1, q):
m = (m*r) % q
s.add(m)
if len(s) == q-1:
return r
return None # error
# calculate f_q(x)
def calc_f(q):
g = smallest_primitive_root(q)
m = {}
for x in range(1,q-1):
m[pow(g,x,q)] = x
f = {}
for x in range(1,q-1):
f[x] = m[ (1-pow(g,x,q))%q ]
return f
# sum zeta^(a*x+b*f(x))
def calc_J_ab(p, k, q, a, b):
j_ret = JacobiSum(p,k,q)
f = calc_f(q)
for x in range(1,q-1):
pk = p**k
if (a*x+b*f[x]) % pk < j_ret.m:
j_ret.coef[(a*x+b*f[x]) % pk] += 1
else:
r = (a*x+b*f[x]) % pk - p**(k-1)
while r>=0:
j_ret.coef[r] -= 1
r-= p**(k-1)
return j_ret
# calculate J(p,q)(p>=3 or p,q=2,2)
def calc_J(p, k, q):
return calc_J_ab(p, k, q, 1, 1)
# calculate J_3(q)(p=2 and k>=3)
def calc_J3(p, k, q):
j2q = calc_J(p, k, q)
j21 = calc_J_ab(p, k, q, 2, 1)
j_ret = j2q * j21
return j_ret
# calculate J_2(q)(p=2 and k>=3)
def calc_J2(p, k, q):
j31 = calc_J_ab(2, 3, q, 3, 1)
j_conv = JacobiSum(p, k, q)
for i in range(j31.m):
j_conv.coef[i*(p**k)//8] = j31.coef[i]
j_ret = j_conv * j_conv
return j_ret
# in case of p>=3
def APRtest_step4a(p, k, q, N):
print("Step 4a. (p^k, q = {0}^{1}, {2})".format(p,k,q))
J = calc_J(p, k, q)
# initialize s1=1
s1 = JacobiSum(p,k,q).one()
# J^Theta
for x in range(p**k):
if x % p == 0:
continue
t = J.sigma_inv(x)
t = t.modpow(x, N)
s1 = s1 * t
s1.mod(N)
# r = N mod p^k
r = N % (p**k)
# s2 = s1 ^ (N/p^k)
s2 = s1.modpow(N//(p**k), N)
# J^alpha
J_alpha = JacobiSum(p,k,q).one()
for x in range(p**k):
if x % p == 0:
continue
t = J.sigma_inv(x)
t = t.modpow((r*x)//(p**k), N)
J_alpha = J_alpha * t
J_alpha.mod(N)
# S = s2 * J_alpha
S = (s2 * J_alpha).mod(N)
# Is S root of unity
exist, h = S.is_root_of_unity(N)
if not exist:
# composite!
return False, None
else:
# possible prime
if h%p!=0:
l_p = 1
else:
l_p = 0
return True, l_p
# in case of p=2 and k>=3
def APRtest_step4b(p, k, q, N):
print("Step 4b. (p^k, q = {0}^{1}, {2})".format(p,k,q))
J = calc_J3(p, k, q)
# initialize s1=1
s1 = JacobiSum(p,k,q).one()
# J3^Theta
for x in range(p**k):
if x % 8 not in [1,3]:
continue
t = J.sigma_inv(x)
t = t.modpow(x, N)
s1 = s1 * t
s1.mod(N)
# r = N mod p^k
r = N % (p**k)
# s2 = s1 ^ (N/p^k)
s2 = s1.modpow(N//(p**k), N)
# J3^alpha
J_alpha = JacobiSum(p,k,q).one()
for x in range(p**k):
if x % 8 not in [1,3]:
continue
t = J.sigma_inv(x)
t = t.modpow((r*x)//(p**k), N)
J_alpha = J_alpha * t
J_alpha.mod(N)
# S = s2 * J_alpha * J2^delta
if N%8 in [1,3]:
S = (s2 * J_alpha ).mod(N)
else:
J2_delta = calc_J2(p,k,q)
S = (s2 * J_alpha * J2_delta).mod(N)
# Is S root of unity
exist, h = S.is_root_of_unity(N)
if not exist:
# composite
return False, None
else:
# possible prime
if h%p!=0 and (pow(q,(N-1)//2,N) + 1)%N==0:
l_p = 1
else:
l_p = 0
return True, l_p
# in case of p=2 and k=2
def APRtest_step4c(p, k, q, N):
print("Step 4c. (p^k, q = {0}^{1}, {2})".format(p,k,q))
J2q = calc_J(p, k, q)
# s1 = J(2,q)^2 * q (mod N)
s1 = (J2q * J2q * q).mod(N)
# s2 = s1 ^ (N/4)
s2 = s1.modpow(N//4, N)
if N%4 == 1:
S = s2
elif N%4 == 3:
S = (s2 * J2q * J2q).mod(N)
else:
print("Error")
# Is S root of unity
exist, h = S.is_root_of_unity(N)
if not exist:
# composite
return False, None
else:
# possible prime
if h%p!=0 and (pow(q,(N-1)//2,N) + 1)%N==0:
l_p = 1
else:
l_p = 0
return True, l_p
# in case of p=2 and k=1
def APRtest_step4d(p, k, q, N):
print("Step 4d. (p^k, q = {0}^{1}, {2})".format(p,k,q))
S2q = pow(-q, (N-1)//2, N)
if (S2q-1)%N != 0 and (S2q+1)%N != 0:
# composite
return False, None
else:
# possible prime
if (S2q + 1)%N == 0 and (N-1)%4==0:
l_p=1
else:
l_p=0
return True, l_p
# Step 4
def APRtest_step4(p, k, q, N):
if p>=3:
result, l_p = APRtest_step4a(p, k, q, N)
elif p==2 and k>=3:
result, l_p = APRtest_step4b(p, k, q, N)
elif p==2 and k==2:
result, l_p = APRtest_step4c(p, k, q, N)
elif p==2 and k==1:
result, l_p = APRtest_step4d(p, k, q, N)
else:
print("error")
if not result:
print("Composite")
return result, l_p
def APRtest(N):
t_list = [
2,
12,
60,
180,
840,
1260,
1680,
2520,
5040,
15120,
55440,
110880,
720720,
1441440,
4324320,
24504480,
73513440
]
print("N=", N)
if N<=3:
print("input should be greater than 3")
return False
# Select t
for t in t_list:
et, q_list = e(t)
if N < et*et:
break
else:
print("t not found")
return False
print("t=", t)
print("e(t)=", et, q_list)
# Step 1
print("=== Step 1 ===")
g = gcd(t*et, N)
if g > 1:
print("Composite")
return False
# Step 2
print("=== Step 2 ===")
l = {}
fac_t = prime_factorize(t)
for p, k in fac_t:
if p>=3 and pow(N, p-1, p*p)!=1:
l[p] = 1
else:
l[p] = 0
print("l_p=", l)
# Step 3 & Step 4
print("=== Step 3&4 ===")
for q in q_list:
if q == 2:
continue
fac = prime_factorize(q-1)
for p,k in fac:
# Step 4
result, l_p = APRtest_step4(p, k, q, N)
if not result:
# composite
print("Composite")
return False
elif l_p==1:
l[p] = 1
# Step 5
print("=== Step 5 ===")
print("l_p=", l)
for p, value in l.items():
if value==0:
# try other pair of (p,q)
print("Try other (p,q). p={}".format(p))
count = 0
i = 1
found = False
# try maximum 30 times
while count < 30:
q = p*i+1
if N%q != 0 and isprime_slow(q) and (q not in q_list):
count += 1
k = v(p, q-1)
# Step 4
result, l_p = APRtest_step4(p, k, q, N)
if not result:
# composite
print("Composite")
return False
elif l_p == 1:
found = True
break
i += 1
if not found:
print("error in Step 5")
return False
# Step 6
print("=== Step 6 ===")
r = 1
for t in range(t-1):
r = (r*N) % et
if r!=1 and r!= N and N % r == 0:
print("Composite", r)
return False
# prime!!
print("Prime!")
return True
if __name__ == '__main__':
start_time = time.time()
APRtest(2**521-1) # 157 digit, 18 sec
# APRtest(2**1279-1) # 386 digit, 355 sec
# APRtest(2074722246773485207821695222107608587480996474721117292752992589912196684750549658310084416732550077)
end_time = time.time()
print(end_time - start_time, "sec")
credit: https://github.com/wacchoz/APR_CL/blob/master/APR_CL.py
You only need to test odd numbers, except for 2 which you can test specially before the loop. This doesn't change the complexity, since it's a constant factor, but it reduces the time in half.
You should only test numbers up to math.sqrt(x). This changes the worst case complexity from O(n) to O(sqrt(n)).
You should stop as soon as you find a factor, rather than creating a list of all the x % i. This improves the best case complexity.
import math
def is_prime(x):
'''
Function to check if a number is prime
'''
if x == 2:
return True
if x % 2 == 0:
return False
for i in range(3, int(math.sqrt(x))+1, 2):
if x % i == 0:
return False
return True
Even better than checking all odd numbers is to check only prime numbers, using the Sieve of Eratosthenes.
I am trying to write RSA encryption and decryption in python without Crypto library and in short I have generated public(e,N) and private(d,N) keys to exchange with message and I don't know how to that.The message I want to send is also key:b'gAAAAABenIFsZD5Oa7GPNKPV7yBHSKasuzpMzYPPoXbEqX3cbxO_9-3eP9UdKOXsrQmLSesKkaeKk9VZXI6Qx-iWS8tglsxbRwjgdAWPZKQa8NLyH1ICKJgEihrc-9ybO6WgV_jASgHH0zg4mdEP8XhxQmg6-S96HA=='
Does someone know how to encrypt message with my public key and decrypt it with private ?
import random
def gcd(a, b): # gdc to find proper e
if (b == 0):
return a
else:
return gcd(b, a % b)
def isPrime(num):
if num > 1:
for i in range(2, num):
if (num % i) == 0:
return False;
break
else:
return True
else:
return False;
def pGenerator():
p = 0
while p == 0:
pn = random.randint(0, 40)
if (isPrime(pn)):
p = pn
break
return p
def qGenerator(p):
q = 0
while q == 0:
pn = random.randint(0, 40)
if (isPrime(pn) and pn != p and pn <p):
q = pn
break
return q
def eGenerator( fiN):
e = 0
while e == 0:
pn = random.randint(0, fiN)
if (gcd(pn, fiN) == 1):
e = pn
break
return e
def start():
p = pGenerator()
print(p)
q = qGenerator(p)
print(q)
N = p * q
fiN = (p - 1) * (q - 1)
print(fiN)
e = eGenerator(fiN)
d=multiplicative_inverse(e,fiN)
c=encrypt(e, N, "d")
decrypt(d,N,c)
print(c)
def encrypt(e,n, plaintext):
#Unpack the key into it's components
key=e
#Convert each letter in the plaintext to numbers based on the character using a^b mod m
cipher = [(ord(char) ** key) % n for char in plaintext]
#Return the array of bytes
return cipher
def multiplicative_inverse(e, phi):
d = 0
x1 = 0
x2 = 1
y1 = 1
temp_phi = phi
while e > 0:
temp1 = temp_phi / e
temp2 = temp_phi - temp1 * e
temp_phi = e
e = temp2
x = x2 - temp1 * x1
y = d - temp1 * y1
x2 = x1
x1 = x
d = y1
y1 = y
if temp_phi == 1:
return d + phi
start()
Have you tried rsapy?
Just read its docs on its PyPI page.
Example:
import rsapy
pub, pri = rsapy.genkey(2**512)
print(rsapy.encode("message", pub))
# This also works with b"message"
is there away to solve this problem even the code is work prefect for encrypted and decrypted the data when they are in same file, the problem happen when i divide the code into two parts, one part encryption and second part decryption, but still i'm getting wrong decryption data even i use same public key and n value was generated in encryption part.
suppose:
data='hello'
p=23
q=19
public key=(185,437)
private key=(533,437)
when i use the public key for decryption the data is wrong!! i have also try use private same also wrong!! any suggestion !
encryption code:
import random
def gcd(a, b):
while b != 0:
a, b = b, a % b
return a
def multiplicative_inverse(e, phi):
d = 0
x1 = 0
x2 = 1
y1 = 1
temp_phi = phi
while e > 0:
temp1 = temp_phi//e
temp2 = temp_phi - temp1 * e
temp_phi = e
e = temp2
x = x2- temp1* x1
y = d - temp1 * y1
x2 = x1
x1 = x
d = y1
y1 = y
if temp_phi == 1:
return d + phi
'''
Tests to see if a number is prime.
'''
def is_prime(num):
if num == 2:
return True
if num < 2 or num % 2 == 0:
return False
for n in range(3, int(num**0.5)+2, 2):
if num % n == 0:
return False
return True
def generate_keypair(p, q):
if not (is_prime(p) and is_prime(q)):
raise ValueError('Both numbers must be prime.')
elif p == q:
raise ValueError('p and q cannot be equal')
#n = pq
n = p * q
#Phi is the totient of n
phi = (p-1) * (q-1)
#Choose an integer e such that e and phi(n) are coprime
e = random.randrange(1, phi)
#Use Euclid's Algorithm to verify that e and phi(n) are comprime
g = gcd(e, phi)
while g != 1:
e = random.randrange(1, phi)
g = gcd(e, phi)
#Use Extended Euclid's Algorithm to generate the private key
d = multiplicative_inverse(e, phi)
#Return public and private keypair
#Public key is (e, n) and private key is (d, n)
return ((e, n), (d, n))
def encrypt(pk, plaintext):
#Unpack the key into it's components
key, n = pk
#Convert each letter in the plaintext to numbers based on the character using a^b mod m
cipher = [(ord(char) ** key) % n for char in plaintext]
#Return the array of bytes
return cipher
if __name__ == '__main__':
print ("RSA Encrypter/ Decrypter")
p = int(23)
q = int(19)
print ("Generating your public/private keypairs now . . .")
public, private = generate_keypair(p, q)
print ("Your public key is ", public ," and your private key is ", private)
message = str('hello')
encrypted_msg = encrypt(private, message)
print ("Your encrypted message is: ")
print (''.join(map(lambda x: str(x), encrypted_msg)))
decryption code:
def decrypt(k,pk, ciphertext):
#Unpack the key into its components
key=k
n = pk
#Generate the plaintext based on the ciphertext and key using a^b mod m
plain = [chr((ord(char) ** key) % n) for char in ciphertext]
return ''.join(plain)
if __name__ == '__main__':
'''
Detect if the script is being run directly by the user
'''
print ("RSA Encrypter/ Decrypter")
key = int(533)
n = int(437)
public=(key,n)
message = '271169420420218'
print ("Decrypting message with public key ", public ," . . .")
print ("Your message is:")
print (decrypt(key,n, message))
i'm using python 3.6 spyder
I am following Cormen Leiserson Rivest Stein (clrs) book and came across "kmp algorithm" for string matching. I implemented it using Python (as-is).
However, it doesn't seem to work for some reason. where is my fault?
The code is given below:
def kmp_matcher(t,p):
n=len(t)
m=len(p)
# pi=[0]*n;
pi = compute_prefix_function(p)
q=-1
for i in range(n):
while(q>0 and p[q]!=t[i]):
q=pi[q]
if(p[q]==t[i]):
q=q+1
if(q==m):
print "pattern occurs with shift "+str(i-m)
q=pi[q]
def compute_prefix_function(p):
m=len(p)
pi =range(m)
pi[1]=0
k=0
for q in range(2,m):
while(k>0 and p[k]!=p[q]):
k=pi[k]
if(p[k]==p[q]):
k=k+1
pi[q]=k
return pi
t = 'brownfoxlazydog'
p = 'lazy'
kmp_matcher(t,p)
This is a class I wrote based on CLRs KMP algorithm, which contains what you are after. Note that only DNA "characters" are accepted here.
class KmpMatcher(object):
def __init__(self, pattern, string, stringName):
self.motif = pattern.upper()
self.seq = string.upper()
self.header = stringName
self.prefix = []
self.validBases = ['A', 'T', 'G', 'C', 'N']
#Matches the motif pattern against itself.
def computePrefix(self):
#Initialize prefix array
self.fillPrefixList()
k = 0
for pos in range(1, len(self.motif)):
#Check valid nt
if(self.motif[pos] not in self.validBases):
self.invalidMotif()
#Unique base in motif
while(k > 0 and self.motif[k] != self.motif[pos]):
k = self.prefix[k]
#repeat in motif
if(self.motif[k] == self.motif[pos]):
k += 1
self.prefix[pos] = k
#Initialize the prefix list and set first element to 0
def fillPrefixList(self):
self.prefix = [None] * len(self.motif)
self.prefix[0] = 0
#An implementation of the Knuth-Morris-Pratt algorithm for linear time string matching
def kmpSearch(self):
#Compute prefix array
self.computePrefix()
#Number of characters matched
match = 0
found = False
for pos in range(0, len(self.seq)):
#Check valid nt
if(self.seq[pos] not in self.validBases):
self.invalidSequence()
#Next character is not a match
while(match > 0 and self.motif[match] != self.seq[pos]):
match = self.prefix[match-1]
#A character match has been found
if(self.motif[match] == self.seq[pos]):
match += 1
#Motif found
if(match == len(self.motif)):
print(self.header)
print("Match found at position: " + str(pos-match+2) + ':' + str(pos+1))
found = True
match = self.prefix[match-1]
if(found == False):
print("Sorry '" + self.motif + "'" + " was not found in " + str(self.header))
#An invalid character in the motif message to the user
def invalidMotif(self):
print("Error: motif contains invalid DNA nucleotides")
exit()
#An invalid character in the sequence message to the user
def invalidSequence(self):
print("Error: " + str(self.header) + "sequence contains invalid DNA nucleotides")
exit()
You might want to try out my code:
def recursive_find_match(i, j, pattern, pattern_track):
if pattern[i] == pattern[j]:
pattern_track.append(i+1)
return {"append":pattern_track, "i": i+1, "j": j+1}
elif pattern[i] != pattern[j] and i == 0:
pattern_track.append(i)
return {"append":pattern_track, "i": i, "j": j+1}
else:
i = pattern_track[i-1]
return recursive_find_match(i, j, pattern, pattern_track)
def kmp(str_, pattern):
len_str = len(str_)
len_pattern = len(pattern)
pattern_track = []
if len_pattern == 0:
return
elif len_pattern == 1:
pattern_track = [0]
else:
pattern_track = [0]
i = 0
j = 1
while j < len_pattern:
data = recursive_find_match(i, j, pattern, pattern_track)
i = data["i"]
j = data["j"]
pattern_track = data["append"]
index_str = 0
index_pattern = 0
match_from = -1
while index_str < len_str:
if index_pattern == len_pattern:
break
if str_[index_str] == pattern[index_pattern]:
if index_pattern == 0:
match_from = index_str
index_pattern += 1
index_str += 1
else:
if index_pattern == 0:
index_str += 1
else:
index_pattern = pattern_track[index_pattern-1]
match_from = index_str - index_pattern
Try this:
def kmp_matcher(t, d):
n=len(t)
m=len(d)
pi = compute_prefix_function(d)
q = 0
i = 0
while i < n:
if d[q]==t[i]:
q=q+1
i = i + 1
else:
if q != 0:
q = pi[q-1]
else:
i = i + 1
if q == m:
print "pattern occurs with shift "+str(i-q)
q = pi[q-1]
def compute_prefix_function(p):
m=len(p)
pi =range(m)
k=1
l = 0
while k < m:
if p[k] <= p[l]:
l = l + 1
pi[k] = l
k = k + 1
else:
if l != 0:
l = pi[l-1]
else:
pi[k] = 0
k = k + 1
return pi
t = 'brownfoxlazydog'
p = 'lazy'
kmp_matcher(t, p)
KMP stands for Knuth-Morris-Pratt it is a linear time string-matching algorithm.
Note that in python, the string is ZERO BASED, (while in the book the string starts with index 1).
So we can workaround this by inserting an empty space at the beginning of both strings.
This causes four facts:
The len of both text and pattern is augmented by 1, so in the loop range, we do NOT have to insert the +1 to the right interval. (note that in python the last step is excluded);
To avoid accesses out of range, you have to check the values of k+1 and q+1 BEFORE to give them as index to arrays;
Since the length of m is augmented by 1, in kmp_matcher, before to print the response, you have to check this instead: q==m-1;
For the same reason, to calculate the correct shift you have to compute this instead: i-(m-1)
so the correct code, based on your original question, and considering the starting code from Cormen, as you have requested, would be the following:
(note : I have inserted a matching pattern inside, and some debug text that helped me to find logical errors):
def compute_prefix_function(P):
m = len(P)
pi = [None] * m
pi[1] = 0
k = 0
for q in range(2, m):
print ("q=", q, "\n")
print ("k=", k, "\n")
if ((k+1) < m):
while (k > 0 and P[k+1] != P[q]):
print ("entered while: \n")
print ("k: ", k, "\tP[k+1]: ", P[k+1], "\tq: ", q, "\tP[q]: ", P[q])
k = pi[k]
if P[k+1] == P[q]:
k = k+1
print ("Entered if: \n")
print ("k: ", k, "\tP[k]: ", P[k], "\tq: ", q, "\tP[q]: ", P[q])
pi[q] = k
print ("Outside while or if: \n")
print ("pi[", q, "] = ", k, "\n")
print ("---next---")
print ("---end for---")
return pi
def kmp_matcher(T, P):
n = len(T)
m = len(P)
pi = compute_prefix_function(P)
q = 0
for i in range(1, n):
print ("i=", i, "\n")
print ("q=", q, "\n")
print ("m=", m, "\n")
if ((q+1) < m):
while (q > 0 and P[q+1] != T[i]):
q = pi[q]
if P[q+1] == T[i]:
q = q+1
if q == m-1:
print ("Pattern occurs with shift", i-(m-1))
q = pi[q]
print("---next---")
print("---end for---")
txt = " bacbababaabcbab"
ptn = " ababaab"
kmp_matcher(txt, ptn)
(so this would be the correct accepted answer...)
hope that it helps.