problem of decryption data using RSA method - python

is there away to solve this problem even the code is work prefect for encrypted and decrypted the data when they are in same file, the problem happen when i divide the code into two parts, one part encryption and second part decryption, but still i'm getting wrong decryption data even i use same public key and n value was generated in encryption part.
suppose:
data='hello'
p=23
q=19
public key=(185,437)
private key=(533,437)
when i use the public key for decryption the data is wrong!! i have also try use private same also wrong!! any suggestion !
encryption code:
import random
def gcd(a, b):
while b != 0:
a, b = b, a % b
return a
def multiplicative_inverse(e, phi):
d = 0
x1 = 0
x2 = 1
y1 = 1
temp_phi = phi
while e > 0:
temp1 = temp_phi//e
temp2 = temp_phi - temp1 * e
temp_phi = e
e = temp2
x = x2- temp1* x1
y = d - temp1 * y1
x2 = x1
x1 = x
d = y1
y1 = y
if temp_phi == 1:
return d + phi
'''
Tests to see if a number is prime.
'''
def is_prime(num):
if num == 2:
return True
if num < 2 or num % 2 == 0:
return False
for n in range(3, int(num**0.5)+2, 2):
if num % n == 0:
return False
return True
def generate_keypair(p, q):
if not (is_prime(p) and is_prime(q)):
raise ValueError('Both numbers must be prime.')
elif p == q:
raise ValueError('p and q cannot be equal')
#n = pq
n = p * q
#Phi is the totient of n
phi = (p-1) * (q-1)
#Choose an integer e such that e and phi(n) are coprime
e = random.randrange(1, phi)
#Use Euclid's Algorithm to verify that e and phi(n) are comprime
g = gcd(e, phi)
while g != 1:
e = random.randrange(1, phi)
g = gcd(e, phi)
#Use Extended Euclid's Algorithm to generate the private key
d = multiplicative_inverse(e, phi)
#Return public and private keypair
#Public key is (e, n) and private key is (d, n)
return ((e, n), (d, n))
def encrypt(pk, plaintext):
#Unpack the key into it's components
key, n = pk
#Convert each letter in the plaintext to numbers based on the character using a^b mod m
cipher = [(ord(char) ** key) % n for char in plaintext]
#Return the array of bytes
return cipher
if __name__ == '__main__':
print ("RSA Encrypter/ Decrypter")
p = int(23)
q = int(19)
print ("Generating your public/private keypairs now . . .")
public, private = generate_keypair(p, q)
print ("Your public key is ", public ," and your private key is ", private)
message = str('hello')
encrypted_msg = encrypt(private, message)
print ("Your encrypted message is: ")
print (''.join(map(lambda x: str(x), encrypted_msg)))
decryption code:
def decrypt(k,pk, ciphertext):
#Unpack the key into its components
key=k
n = pk
#Generate the plaintext based on the ciphertext and key using a^b mod m
plain = [chr((ord(char) ** key) % n) for char in ciphertext]
return ''.join(plain)
if __name__ == '__main__':
'''
Detect if the script is being run directly by the user
'''
print ("RSA Encrypter/ Decrypter")
key = int(533)
n = int(437)
public=(key,n)
message = '271169420420218'
print ("Decrypting message with public key ", public ," . . .")
print ("Your message is:")
print (decrypt(key,n, message))
i'm using python 3.6 spyder

Related

In python, open.write() gives me an error

This Code runs and saves all the values in my output text file till d but does not saves my value of public_key and private_key.it is a simple code in which i am trying to replicate RSA algorithm and trying to save all the values in a text file with open.write(). Can anyone tell me where it is wrong?
It looks like this:
import random
import time
import os
max_PrimLength = 1000000000000
output_folder_name=f"output/{int(time.time()*100)}"
os.makedirs(f"{output_folder_name}")
details_file=open(f"{output_folder_name}/details.txt","a+")
def egcd(a, b):
if a == 0:
return (b, 0, 1)
else:
g, y, x = egcd(b % a, a)
return (g, x - (b // a) * y, y)
def gcd(a, b):
while b != 0:
a, b = b, a % b
return a
def is_prime(num):
if num == 2:
return True
if num < 2 or num % 2 == 0:
return False
for n in range(3, int(num**0.5)+2, 2):
if num % n == 0:
return False
return True
def generateRandomPrim():
while(1):
ranPrime = random.randint(0,99999)
if is_prime(ranPrime):
return ranPrime
p = generateRandomPrim()
q = generateRandomPrim()
h = p*q
phi = (p-1) * (q-1)
e = random.randint(1, phi)
g = gcd(e,phi)
while g != 1:
e = random.randint(1, phi)
g = gcd(e, phi)
d = egcd(e, phi)[1]
d = d % phi
if(d < 0):
d += phi
print("First Prime Number(p): %d" % p)
details_file.write("First Prime Number(p): %d\n" % p)
print("Second Prime Number(q): %d" % q)
details_file.write("Second Prime Number(q): %d\n" % q)
print("h: %d" %h)
details_file.write("h: %d\n" %h)
print("phi(p-1)(q-1): %d" % phi)
details_file.write("phi(p-1)(q-1): %d\n" % phi)
print("e: %d" % e)
details_file.write("e: %d\n" %e)
print("d: %d" % d)
details_file.write("d: %d\n" % d)
def generate_keyPairs():
return ((e,h),(d,h))
if _name_ == 'main':
public_key,private_key = generate_keyPairs()
print("Public_Key(Traditional_RSA): ",public_key)
details_file.write("Public_Key(Traditional_RSA): \n",public_key)
print("Private_Key(Traditional_RSA): ",private_key)
details_file.write("Private_Key(Traditional_RSA): \n",private_key)
details_file.close()
Trackback Error occured:
TypeError Traceback (most recent call last)
<ipython-input-4-3018b2703be1> in <module>
144 public_key,private_key = generate_keyPairs()
145 print("Public_Key(Traditional_RSA): " ,public_key)
--> 146 details_file.write("Public_Key(Traditional_RSA): \n" ,public_key)
147 print("Private_Key(Traditional_RSA): " ,private_key)
148 details_file.write("Private_Key(Traditional_RSA): \n" ,private_key)
TypeError: write() takes exactly one argument (2 given)
The error is you are giving 2 parameters to write, use this:
...
details_file.write(f"Public_Key(Traditional_RSA): \n{public_key}")
...
details_file.write(f"Private_Key(Traditional_RSA): \n{private_key}")
also the __name__ == "__main__" is used to run the code block only when file has been run directly, i.e
> python this_file_name.py
But if you import this file to another file like:
# other_file.py
import this_file_name
and run that file:
python other_file.py
Then code block under if __name__ == "__main__": will not run.
You are giving 2 arguments to the write() function but it only expects one - the string you want to write to the file. You should format the arguments into a single strings for your last two calls to write(). Something like this:
details_file.write("Public_Key(Traditional_RSA):" + str(public_key) + "\n")
details_file.write("Private_Key(Traditional_RSA):" + str(private_key) + "\n")

How to encrypt and decrypt message RSA in python?

I am trying to write RSA encryption and decryption in python without Crypto library and in short I have generated public(e,N) and private(d,N) keys to exchange with message and I don't know how to that.The message I want to send is also key:b'gAAAAABenIFsZD5Oa7GPNKPV7yBHSKasuzpMzYPPoXbEqX3cbxO_9-3eP9UdKOXsrQmLSesKkaeKk9VZXI6Qx-iWS8tglsxbRwjgdAWPZKQa8NLyH1ICKJgEihrc-9ybO6WgV_jASgHH0zg4mdEP8XhxQmg6-S96HA=='
Does someone know how to encrypt message with my public key and decrypt it with private ?
import random
def gcd(a, b): # gdc to find proper e
if (b == 0):
return a
else:
return gcd(b, a % b)
def isPrime(num):
if num > 1:
for i in range(2, num):
if (num % i) == 0:
return False;
break
else:
return True
else:
return False;
def pGenerator():
p = 0
while p == 0:
pn = random.randint(0, 40)
if (isPrime(pn)):
p = pn
break
return p
def qGenerator(p):
q = 0
while q == 0:
pn = random.randint(0, 40)
if (isPrime(pn) and pn != p and pn <p):
q = pn
break
return q
def eGenerator( fiN):
e = 0
while e == 0:
pn = random.randint(0, fiN)
if (gcd(pn, fiN) == 1):
e = pn
break
return e
def start():
p = pGenerator()
print(p)
q = qGenerator(p)
print(q)
N = p * q
fiN = (p - 1) * (q - 1)
print(fiN)
e = eGenerator(fiN)
d=multiplicative_inverse(e,fiN)
c=encrypt(e, N, "d")
decrypt(d,N,c)
print(c)
def encrypt(e,n, plaintext):
#Unpack the key into it's components
key=e
#Convert each letter in the plaintext to numbers based on the character using a^b mod m
cipher = [(ord(char) ** key) % n for char in plaintext]
#Return the array of bytes
return cipher
def multiplicative_inverse(e, phi):
d = 0
x1 = 0
x2 = 1
y1 = 1
temp_phi = phi
while e > 0:
temp1 = temp_phi / e
temp2 = temp_phi - temp1 * e
temp_phi = e
e = temp2
x = x2 - temp1 * x1
y = d - temp1 * y1
x2 = x1
x1 = x
d = y1
y1 = y
if temp_phi == 1:
return d + phi
start()
Have you tried rsapy?
Just read its docs on its PyPI page.
Example:
import rsapy
pub, pri = rsapy.genkey(2**512)
print(rsapy.encode("message", pub))
# This also works with b"message"

Why does the encryption/decryption in my RSA code not work?

I'm currently coding a simplified RSA algorithm for a project at school but can't get it to work.
I've based the code off of the formulae c = m^e(mod N) and (c^d)mod N. The encryption function works to produce what looks like a feasible output but when I put it into the decryption function it either doesn't return the message correctly or gives me this error:
ValueError: chr() arg not in range(0x110000)
My code:
import random
import math
def is_prime(x):
for i in range(2,int(math.sqrt(x))+1):
if x % i == 0:
return False
break
return True
def gcd(a, b):
if (b == 0):
return a
else:
return gcd(b, a % b)
def generate_p_and_q(p,q):
p_and_q = []
p_and_q.append(p)
p_and_q.append(q)
return p_and_q
def generate_phi(p,q):
p_and_q = generate_p_and_q(p,q)
phi = (p_and_q[0] - 1)*(p_and_q[1] - 1)
return phi
def generate_N(p,q):
p_and_q = generate_p_and_q(p,q)
N = (p_and_q[0])*(p_and_q[1])
return N
def generate_e(p,q):
phi = generate_phi(p,q)
with open('First500Primes.txt') as f:
lines = f.read().splitlines()
for i in lines:
if int(i) > 1 and int(i)< phi:
if gcd(int(i), phi) == 1:
e = int(i)
break
return e
def encrypt_RSA():
encrypted = []
message = input("Enter a message to encrypt:")
message.lower()
with open('First500Primes.txt') as f:
lines = f.read().splitlines()
valid = False
choice = input("Do you want to: \nA: enter a key \nB: use a random key?\n")
if choice.lower() == 'a':
p = int(input("Enter a key - this must be a prime number between 0 and 500:"))
q = int(input("Enter a key - this must be a prime number between 0 and 500:\n"))
while valid != True:
valid = is_prime(p) and is_prime(q)
if valid == False:
print("Your numbers were not prime!")
p = int(input("Enter a key - this must be a prime number between 0 and 500:"))
q = int(input("Enter a key - this must be a prime number between 0 and 500:\n"))
else:
x = random.randint(0, 499)
y = random.randint(0, 499)
p = int(lines[x])
q = int(lines[y])
generate_p_and_q(p,q)
e = generate_e(p,q)
N = generate_N(p,q)
for char in message:
encrypted.append((ord(char) ** e) % N)
result = ''
for i in encrypted:
result = result + str(i)
print("encrypted message: " + result)
info = [encrypted, N, e]
return (info)
encrypt_RSA()
def egcd(a, b):
if a == 0:
return (b, 0, 1)
else:
g, y, x = egcd(b % a, a)
return (g, x - (b // a) * y, y)
def calculate_d(a,m):
g,x,y = egcd(a,m)
if g != 1:
return None
else:
return x%m
def calculate_phi(N):
with open('First500Primes.txt') as f:
lines = f.read().splitlines()
for num in lines:
if N%int(num) == 0:
p = int(num)
q = N/int(num)
phi = (p-1)*(q-1)
return int(phi)
def decrypt_RSA():
encrypted = encrypt_RSA()
encrypted_message, N, e = encrypted[0], encrypted[1], encrypted[2]
print(N)
phi = calculate_phi(N)
d = calculate_d(phi,e)
print("D: " + str(d))
message = []
encrypted_message = (encrypted[0])
for c in encrypted_message:
m = (c**d) % N
print(m)
message.append(chr(m))
print(message)
decrypt_RSA()
I need the code to firstly encrypt the message with the encrypt function then decrypt it with the decrypt function, so the encrypted and original message should be displayed.
Could someone tell me whats wrong with my code (since I'm still in school, it may need to be simplified), any additional feedback would be greatly appreciated.
After a bit of debugging, the problem is that the function calculate_d() does not seem to calculate the right number. It is solved when we invert the params of one of your function. Change this line
d = calculate_d(phi, e)
to this:
d = calculate_d(e, phi)
That got it working for me.
Also, since you asked for suggestions to improve your code, I made a few (a lot) improvements. Some ideas:
I replaced the parts that read the prime number file with a prime number generator, but that is just because I didn't have the file at hand. Choose whichever you like best.
Invoke the main functions inside a if __name__ == '__main__':. Read about it here.
I moved the input prompts outside of the encryption code. Implement those parts as needed (random or prompting user for input) and just pass the result to the function for encryption.
My version:
def generate_primes():
"""
Generate an infinite sequence of prime numbers.
Sieve of Eratosthenes
Code by David Eppstein, UC Irvine, 28 Feb 2002
http://code.activestate.com/recipes/117119/
https://stackoverflow.com/a/568618/9225671
"""
# Maps composites to primes witnessing their compositeness.
# This is memory efficient, as the sieve is not "run forward"
# indefinitely, but only as long as required by the current
# number being tested.
D = {}
# The running integer that's checked for primeness
q = 2
while True:
if q not in D:
# q is a new prime.
# Yield it and mark its first multiple that isn't
# already marked in previous iterations
yield q
D[q * q] = [q]
else:
# q is composite. D[q] is the list of primes that
# divide it. Since we've reached q, we no longer
# need it in the map, but we'll mark the next
# multiples of its witnesses to prepare for larger
# numbers
for p in D[q]:
D.setdefault(p + q, []).append(p)
del D[q]
q += 1
def choose_p_and_q():
p_i = random.randint(0, 100)
q_i = random.randint(0, 100)
p = 0
q = 0
for i, n in enumerate(generate_primes()):
if i <= p_i:
p = n
if i <= q_i:
q = n
if i > p_i and i > q_i:
break
return p, q
def generate_n(p, q):
return p * q
def generate_phi(p, q):
return (p - 1) * (q - 1)
def generate_e(phi):
e = None
for n in generate_primes():
if math.gcd(n, phi) == 1:
e = n
if n >= phi:
if e is None:
raise ValueError('no suitable prime number found; reached {}'.format(n))
# return the highest prime number found
return e
def find_p_and_q_from_n(n):
for i in generate_primes():
if n % i == 0:
p = i
q, remainder = divmod(n, p)
if remainder == 0:
return p, q
def egcd(a, b):
if a == 0:
return b, 0, 1
else:
g, y, x = egcd(b % a, a)
return g, x - (b // a) * y, y
def calculate_d(phi, e):
g, x, _ = egcd(phi, e)
if g == 1:
return x % e
raise ValueError('no modular multiplicative inverse found')
def encrypt_rsa(msg):
p, q = choose_p_and_q()
n = generate_n(p, q)
phi = generate_phi(p, q)
e = generate_e(phi)
print()
print('ENCRYPT')
print('p ', p)
print('q ', q)
print('n ', n)
print('phi ', phi)
print('e ', e)
encrypted_list = []
for char in msg:
m = (ord(char) ** e) % n
encrypted_list.append(m)
print('msg ', list(msg))
print('encrypted_list', encrypted_list)
return encrypted_list, n, e
def decrypt_rsa(encrypted_list, n, e):
p, q = find_p_and_q_from_n(n)
phi = generate_phi(p, q)
d = calculate_d(e, phi)
print()
print('DECRYPT')
print('p ', p)
print('q ', q)
print('n ', n)
print('phi ', phi)
print('e ', e)
print('d ', d)
decrypted_list = []
for elem in encrypted_list:
m = (elem**d) % n
decrypted_list.append(chr(m))
print('decrypted_list', decrypted_list)
if __name__ == '__main__':
msg = input('Enter a message to encrypt:').strip()
data = encrypt_rsa(msg)
decrypt_rsa(*data)

Is my math or my variables wrong in my RSA example?

I was reading up on cryptography, specifically RSA(https://www.khanacademy.org/computing/computer-science/cryptography/modern-crypt/v/rsa-encryption-part-4), and decided to make an example for myself. However, even though I'm pretty sure I got my variables right, I think I got my math wrong. Can someone help me find the error? I tried to put comments everywhere I thought needed an explanation. Written in Python 3.5.2
#m^phi(n) mod n == 1 where m & n dont share a common factor
#since 1^k = 1, m^(k * phi(n)) mod n == 1, too.
#since 1*m = m, m* (m^(k * phi(n)) mod n) == m
#^^^^simplifies to m^(k * phi(n) + 1) mod n == m
#b/c m^(e*d) mod n = m
#m^(e*d) mod n == m^(k * phi(n) + 1) mod n
#e*d = k * phi(n) + 1
#d = (k * phi(n) + 1)/e
from fractions import gcd
import random
i = 1
j = 1
t = 1
def is_prime(a):
return all(a % i for i in range(2, a))
while True:
p1 = random.randrange(10.00000)#gens the 1st random prime
if is_prime(p1):
if p1 == 0 or p1 == 1:
i+=1
continue
else:
print("First Random Prime Found on attempt "+str(i)+": "+str(p1))
break
i+=1
while True:
p2 = random.randrange(10.00000)#gens the 1st random prime
if is_prime(p2):
if p2 == 0 or p2 == 1:
j+=1
continue
else:
print("First Random Prime Found on attempt "+str(j)+": "+str(p2))
break
j+=1
n = p1 * p2
print("n = p1 * p2 = "+str(n))
phi_n = (p1 - 1) * (p2 - 1)#phi(n) = how many numbers below n share no factors w/ n. Given Definition of a prime, phi(any_prime_num) is always any_prime_num - 1.
print("phi_n = (p1 - 1) * (p2 - 1) = "+str(phi_n))
while True:
e = random.randrange(10)#gens the 3rd random prime
if e % 2 != 0:
if phi_n % e == 0:
k+=1
continue
else:
print("Public Random Prime(is e)Found on attempt "+str(t)+": "+str(e))
break
k = random.randrange(e)
print("num used to find d(is k): "+str(k))
d = (k * phi_n + 1)/e
print("PRIVATE key(is d): "+str(d))
#pub_key = [n, e]
#priv_key = [d, k, p1, p2, phi_n]
m = input("Type an int: ")
if gcd(int(m), n) != 1:
quit() #b/c m & n must not share a common factor(apparently)
c = (int(m)**e) % n #cipher text(nums)
print("Encrypted: "+str(c))
u = (c**d) % n #SHOULD be decrypted text(more nums)
print("Decrypted: "+str(u))
if int(m) == int(u):
print("Successful!!")
else:
print("Unsuccessful....")

Implementing Knuth-Morris-Pratt (KMP) algorithm for string matching with Python

I am following Cormen Leiserson Rivest Stein (clrs) book and came across "kmp algorithm" for string matching. I implemented it using Python (as-is).
However, it doesn't seem to work for some reason. where is my fault?
The code is given below:
def kmp_matcher(t,p):
n=len(t)
m=len(p)
# pi=[0]*n;
pi = compute_prefix_function(p)
q=-1
for i in range(n):
while(q>0 and p[q]!=t[i]):
q=pi[q]
if(p[q]==t[i]):
q=q+1
if(q==m):
print "pattern occurs with shift "+str(i-m)
q=pi[q]
def compute_prefix_function(p):
m=len(p)
pi =range(m)
pi[1]=0
k=0
for q in range(2,m):
while(k>0 and p[k]!=p[q]):
k=pi[k]
if(p[k]==p[q]):
k=k+1
pi[q]=k
return pi
t = 'brownfoxlazydog'
p = 'lazy'
kmp_matcher(t,p)
This is a class I wrote based on CLRs KMP algorithm, which contains what you are after. Note that only DNA "characters" are accepted here.
class KmpMatcher(object):
def __init__(self, pattern, string, stringName):
self.motif = pattern.upper()
self.seq = string.upper()
self.header = stringName
self.prefix = []
self.validBases = ['A', 'T', 'G', 'C', 'N']
#Matches the motif pattern against itself.
def computePrefix(self):
#Initialize prefix array
self.fillPrefixList()
k = 0
for pos in range(1, len(self.motif)):
#Check valid nt
if(self.motif[pos] not in self.validBases):
self.invalidMotif()
#Unique base in motif
while(k > 0 and self.motif[k] != self.motif[pos]):
k = self.prefix[k]
#repeat in motif
if(self.motif[k] == self.motif[pos]):
k += 1
self.prefix[pos] = k
#Initialize the prefix list and set first element to 0
def fillPrefixList(self):
self.prefix = [None] * len(self.motif)
self.prefix[0] = 0
#An implementation of the Knuth-Morris-Pratt algorithm for linear time string matching
def kmpSearch(self):
#Compute prefix array
self.computePrefix()
#Number of characters matched
match = 0
found = False
for pos in range(0, len(self.seq)):
#Check valid nt
if(self.seq[pos] not in self.validBases):
self.invalidSequence()
#Next character is not a match
while(match > 0 and self.motif[match] != self.seq[pos]):
match = self.prefix[match-1]
#A character match has been found
if(self.motif[match] == self.seq[pos]):
match += 1
#Motif found
if(match == len(self.motif)):
print(self.header)
print("Match found at position: " + str(pos-match+2) + ':' + str(pos+1))
found = True
match = self.prefix[match-1]
if(found == False):
print("Sorry '" + self.motif + "'" + " was not found in " + str(self.header))
#An invalid character in the motif message to the user
def invalidMotif(self):
print("Error: motif contains invalid DNA nucleotides")
exit()
#An invalid character in the sequence message to the user
def invalidSequence(self):
print("Error: " + str(self.header) + "sequence contains invalid DNA nucleotides")
exit()
You might want to try out my code:
def recursive_find_match(i, j, pattern, pattern_track):
if pattern[i] == pattern[j]:
pattern_track.append(i+1)
return {"append":pattern_track, "i": i+1, "j": j+1}
elif pattern[i] != pattern[j] and i == 0:
pattern_track.append(i)
return {"append":pattern_track, "i": i, "j": j+1}
else:
i = pattern_track[i-1]
return recursive_find_match(i, j, pattern, pattern_track)
def kmp(str_, pattern):
len_str = len(str_)
len_pattern = len(pattern)
pattern_track = []
if len_pattern == 0:
return
elif len_pattern == 1:
pattern_track = [0]
else:
pattern_track = [0]
i = 0
j = 1
while j < len_pattern:
data = recursive_find_match(i, j, pattern, pattern_track)
i = data["i"]
j = data["j"]
pattern_track = data["append"]
index_str = 0
index_pattern = 0
match_from = -1
while index_str < len_str:
if index_pattern == len_pattern:
break
if str_[index_str] == pattern[index_pattern]:
if index_pattern == 0:
match_from = index_str
index_pattern += 1
index_str += 1
else:
if index_pattern == 0:
index_str += 1
else:
index_pattern = pattern_track[index_pattern-1]
match_from = index_str - index_pattern
Try this:
def kmp_matcher(t, d):
n=len(t)
m=len(d)
pi = compute_prefix_function(d)
q = 0
i = 0
while i < n:
if d[q]==t[i]:
q=q+1
i = i + 1
else:
if q != 0:
q = pi[q-1]
else:
i = i + 1
if q == m:
print "pattern occurs with shift "+str(i-q)
q = pi[q-1]
def compute_prefix_function(p):
m=len(p)
pi =range(m)
k=1
l = 0
while k < m:
if p[k] <= p[l]:
l = l + 1
pi[k] = l
k = k + 1
else:
if l != 0:
l = pi[l-1]
else:
pi[k] = 0
k = k + 1
return pi
t = 'brownfoxlazydog'
p = 'lazy'
kmp_matcher(t, p)
KMP stands for Knuth-Morris-Pratt it is a linear time string-matching algorithm.
Note that in python, the string is ZERO BASED, (while in the book the string starts with index 1).
So we can workaround this by inserting an empty space at the beginning of both strings.
This causes four facts:
The len of both text and pattern is augmented by 1, so in the loop range, we do NOT have to insert the +1 to the right interval. (note that in python the last step is excluded);
To avoid accesses out of range, you have to check the values of k+1 and q+1 BEFORE to give them as index to arrays;
Since the length of m is augmented by 1, in kmp_matcher, before to print the response, you have to check this instead: q==m-1;
For the same reason, to calculate the correct shift you have to compute this instead: i-(m-1)
so the correct code, based on your original question, and considering the starting code from Cormen, as you have requested, would be the following:
(note : I have inserted a matching pattern inside, and some debug text that helped me to find logical errors):
def compute_prefix_function(P):
m = len(P)
pi = [None] * m
pi[1] = 0
k = 0
for q in range(2, m):
print ("q=", q, "\n")
print ("k=", k, "\n")
if ((k+1) < m):
while (k > 0 and P[k+1] != P[q]):
print ("entered while: \n")
print ("k: ", k, "\tP[k+1]: ", P[k+1], "\tq: ", q, "\tP[q]: ", P[q])
k = pi[k]
if P[k+1] == P[q]:
k = k+1
print ("Entered if: \n")
print ("k: ", k, "\tP[k]: ", P[k], "\tq: ", q, "\tP[q]: ", P[q])
pi[q] = k
print ("Outside while or if: \n")
print ("pi[", q, "] = ", k, "\n")
print ("---next---")
print ("---end for---")
return pi
def kmp_matcher(T, P):
n = len(T)
m = len(P)
pi = compute_prefix_function(P)
q = 0
for i in range(1, n):
print ("i=", i, "\n")
print ("q=", q, "\n")
print ("m=", m, "\n")
if ((q+1) < m):
while (q > 0 and P[q+1] != T[i]):
q = pi[q]
if P[q+1] == T[i]:
q = q+1
if q == m-1:
print ("Pattern occurs with shift", i-(m-1))
q = pi[q]
print("---next---")
print("---end for---")
txt = " bacbababaabcbab"
ptn = " ababaab"
kmp_matcher(txt, ptn)
(so this would be the correct accepted answer...)
hope that it helps.

Categories

Resources