Here is my code. In the calculateOptimalLambda() function, I am attempting to declare a copy of n and
store it as m, remove one point from m, and make some calculations and a graph. Then, the loop should
restart, make a fresh copy of m, remove the next point, and so on.
However, when in the next iteration
of the loop, a point has been removed. Eventually, I run out of points to remove, and I get an error.
How do I declare a fresh copy of m so I can remove the next point?
import numpy as np
from matplotlib import pyplot as plt
class Data:
def __init__(self, points, sigma, lamda):
self.points = points
self.sigma = sigma
self.sample = np.random.uniform(-1,1, (points, 2))
self.transformedData = np.ones((points, 5))
self.weight = np.zeros((5,1))
self.lamda = lamda
def changeLamda(self,x):
self.lamda = x
def removePoint(self, x):
self.points = self.points - 1
self.sample = np.delete(self.sample, x, 0)
self.transformedData = np.delete(self.transformedData, x, 0)
def transformedFunction(self, x):
transformedData = np.ones((1, 5))
transformedData[0,1] = x
transformedData[0,2] = 0.5 * (3*x**2 -1)
transformedData[0,3]= 0.5 * (5*x**3 - 3*x)
transformedData[0,4] = 0.125 * (35*x**4 -30*x**2 + 3)
return np.dot(transformedData, self.weight)
def setY(self):
for i in range(len(self.sample[0:,0])):
self.sample[i,1] = np.random.normal(0, self.sigma) + self.sample[i,0]**2
def transform(self):
for i in range(len(self.sample[0:,0])):
self.transformedData[i,1] = self.sample[i,0]
self.transformedData[i,2] = 0.5 * (3*self.sample[i,0]**2 -1)
self.transformedData[i,3]= 0.5 * (5*self.sample[i,0]**3 - 3*self.sample[i,0])
self.transformedData[i,4] = 0.125 * (35*self.sample[i,0]**4 -30*self.sample[i,0]**2 + 3)
def calculateWeight(self):
z = n.transformedData
zProd = np.linalg.inv(np.matmul(np.transpose(z), z) + np.identity(5)*self.lamda)
next1 = np.matmul(zProd,np.transpose(z))
a = self.sample[0:,1]
a = a.reshape((-1, 1))
print(a)
self.weight = np.matmul(next1,a)
def calculateError(self):
error= (np.matmul(self.transformedData, self.weight) - self.sample[1,0:])
return error/self.points
def calculateOptimalLambda(n, L):
a = 0
for i in range(len(L)):
n.changeLamda(L[i])
for x in range(n.getPoints()):
a+=1
plt.subplot(4,5,a)
m = n
m.removePoint(x)
m.calculateWeight()
weight = m.getWeight()
error = m.calculateError()
twoD_plot(m)
print(error)
def twoD_plot(n):
t = np.linspace(-1, 1, 400)
x = np.square(t)
plt.plot(t,x,'b')
error = 0
y = x
for i in range(len(t)):
y[i] = n.transformedFunction(t[i])
error += (y[i] - t[i]**2)**2
"""print(error/len(t))"""
plt.plot(t,y,'r')
plt.scatter(n.getSample()[0:,0],n.getSample()[0:,1], c = 'g', marker = 'o')
n = Data(5,0.1,0)
n.setY()
n.transform()
n.calculateWeight()
L = [1, 0.01, 0.00001, 0]
calculateOptimalLambda(n, L)
plt.show()
With the code below, I'm attempting to implement the Levy-Khintchine formula (https://en.wikipedia.org/wiki/L%C3%A9vy_process#L.C3.A9vy.E2.80.93Khintchine_representation). In the limit of no jumps, the Levy-Khitchine formula reduces to the multivariate normal distribution. My code uses the (multi-dimensional) trapeziodal integration rule (http://mathfaculty.fullerton.edu/mathews/n2003/SimpsonsRule2DMod.html) to approximate the Fourier transform of the characteristic function as a discrete Fourier transform. For the 1-dimensional case, the code works perfectly. For the 2-D case, I can't find what I'm doing wrong.
Does anyone have example numpy.fftn code that correctly implements multivariate_normal pdf?
class LevyKhintchine:
def __init__(self, mean, cov, jump_measure):
self.mean = mean
self.cov = cov
self.jump_measure = jump_measure
self.factors = mean.shape[0]
def logCF(self, k):
rolled = Roll(k)
out = np.empty(Shape(k))
return (self.jump_measure(k) -
Dot(rolled, self.cov, rolled, out)*0.5 +
np.sum(np.multiply(Roll(k), self.mean), axis=-1)*1j)
def pdf_grid(self, J):
diag = np.diagonal(self.cov)
tmp = np.pi*2/J
dk = np.sqrt(tmp/diag)
dx = np.sqrt(tmp*diag)
k = Grid(np.zeros(self.factors), dk, J)
x0 = self.mean - dx*J*0.5
f = np.exp(self.logCF(k) - Coef(dk, x0, J)*1j)
for n in range(self.factors):
f[ 0] *= 0.5
f[-1] *= 0.5
f = np.rollaxis(f, 0, factors)
pdf = np.fft.fftn(f)
return Grid(x0, dx, J), pdf.real*(np.product(dk)/np.pi)
def Grid(left, width, J):
def Slice(slices, j):
slices.append(slice(left[j], left[j] + width[j]*(J-1), 1j*J))
return slices
slices = reduce(Slice, range(len(left)), [])
return np.mgrid[slices]
def Shape(grid):
return np.asarray(grid).shape[1:]
def Roll(grid):
grid = np.asarray(grid)
try:
rolled = np.rollaxis(grid, 0, len(grid)+1)
except ValueError:
rolled = grid
return rolled
def Dot(x, cov, y, out): #x & y are "rolled"
for j in np.ndindex(out.shape):
out[j] = np.dot(x[j].T, np.dot(cov, y[j]))
return out
def Coef(dks, x0s, J):
factors = len(dks)
coef = np.zeros((J,)*factors)
for n, (dk, x0) in enumerate(zip(dks, x0s)):
shape = np.ones(factors, dtype=int)
shape[n] = J
coef += np.arange(J).reshape(shape)*(dk*x0)
return coef
Here's the tests:
from scipy.stats import multivariate_normal
J = 64
factors = 1
mean = np.full((factors,), -1)
cov = np.identity(factors)
rv = LevyKhintchine(mean, cov, lambda k: 0)
rv0 = multivariate_normal(mean, cov)
x, pdf = rv.pdf_grid(J)
plt.plot(x[0], pdf, x[0], rv0.pdf(Roll(x)))
factors = 2
mean = np.full((factors,), 5)
cov = np.identity(factors)
rv = LevyKhintchine(mean, cov, lambda k: 0)
x, pdf = rv.pdf_grid(J)
rv0 = multivariate_normal(mean, cov)
fig2 = plt.figure()
ax2 = fig2.add_subplot(111)
ax2.contourf(x[0], x[1], pdf)
fig3 = plt.figure()
ax3 = fig3.add_subplot(111)
ax3.contourf(x[0], x[1], rv0.pdf(Roll(x)))
I figured it out: in 1-d I can get away with integrating over only positive wave numbers k, in higher dimensions I cannot.
Here's the corrected code:
class LevyKhintchine:
def __init__(self, mean, cov, jump_measure):
self.mean = mean
self.cov = cov
self.jump_measure = jump_measure
self.factors = mean.shape[0]
def logCF(self, k):
rolled = Roll(k)
out = np.empty(Shape(k))
return (self.jump_measure(k) -
Dot(rolled, self.cov, rolled, out)*0.5 +
np.sum(np.multiply(Roll(k), self.mean), axis=-1)*1j)
def pdf_grid(self, J):
diag = np.diagonal(self.cov)
tmp = np.pi*2/J
dk = np.sqrt(tmp/diag)
dx = np.sqrt(tmp*diag)
k0 = -0.5*dk*J
x0 = -0.5*dx*J + self.mean
k = Grid(k0, dk, J)
x = Grid(x0, dx, J)
f = np.exp(-1j*Coef(dk, x0, J) + self.logCF(k))
for n in range(self.factors):
f[ 0] *= 0.5
f[-1] *= 0.5
f = np.rollaxis(f, 0, factors)
c = ((0.5/np.pi)**self.factors*np.product(dk)*np.exp(-1j*np.dot(k0, x0)))
pdf = np.fft.fftn(f)*np.exp(-1j*Coef(k0, dx, J))*c
return x, pdf.real
def Grid(left, width, J):
def Slice(slices, j):
slices.append(slice(left[j], left[j] + width[j]*(J-1), 1j*J))
return slices
slices = reduce(Slice, range(len(left)), [])
return np.mgrid[slices]
def Shape(grid):
return np.asarray(grid).shape[1:]
def Roll(grid):
grid = np.asarray(grid)
try:
rolled = np.rollaxis(grid, 0, len(grid)+1)
except ValueError:
rolled = grid
return rolled
def Dot(x, cov, y, out): #x & y are "rolled"
for j in np.ndindex(out.shape):
out[j] = np.dot(x[j].T, np.dot(cov, y[j]))
return out
def Coef(dks, x0s, J):
factors = len(dks)
coef = np.zeros((J,)*factors)
for n, (dk, x0) in enumerate(zip(dks, x0s)):
shape = np.ones(factors, dtype=int)
shape[n] = J
coef += np.arange(J).reshape(shape)*(dk*x0)
return coef
Here's the tests:
from scipy.stats import multivariate_normal
J = 32
for factors in range(1, 4):
mean = np.full((factors,), -1)
cov = np.identity(factors)
rv = LevyKhintchine(mean, cov, lambda k: 0)
rv0 = multivariate_normal(mean, cov)
x, pdf = rv.pdf_grid(J)
pdf0 = rv0.pdf(Roll(x))
print np.allclose(pdf, pdf0)
True
True
True
I'm looking for a Python implementation of the SHA-256 hash function. I want to use it to get a better understanding of how the SHA-256 function works, and I think Python is the ideal language for this. Pseudo-code has the limitation that I can't run/test it, to see what my modifications of the code do to the output.
PyPy's source contains a pure-python implementation of SHA-256 here. Poking around in that directory, you'll probably also find pure-python implementations of other standard hashes.
initial_hash_values=[
'6a09e667','bb67ae85','3c6ef372','a54ff53a',
'510e527f','9b05688c','1f83d9ab','5be0cd19'
]
sha_256_constants=[
'428a2f98','71374491','b5c0fbcf','e9b5dba5',
'3956c25b','59f111f1','923f82a4','ab1c5ed5',
'd807aa98','12835b01','243185be','550c7dc3',
'72be5d74','80deb1fe','9bdc06a7','c19bf174',
'e49b69c1','efbe4786','0fc19dc6','240ca1cc',
'2de92c6f','4a7484aa','5cb0a9dc','76f988da',
'983e5152','a831c66d','b00327c8','bf597fc7',
'c6e00bf3','d5a79147','06ca6351','14292967',
'27b70a85','2e1b2138','4d2c6dfc','53380d13',
'650a7354','766a0abb','81c2c92e','92722c85',
'a2bfe8a1','a81a664b','c24b8b70','c76c51a3',
'd192e819','d6990624','f40e3585','106aa070',
'19a4c116','1e376c08','2748774c','34b0bcb5',
'391c0cb3','4ed8aa4a','5b9cca4f','682e6ff3',
'748f82ee','78a5636f','84c87814','8cc70208',
'90befffa','a4506ceb','bef9a3f7','c67178f2'
]
def bin_return(dec):
return(str(format(dec,'b')))
def bin_8bit(dec):
return(str(format(dec,'08b')))
def bin_32bit(dec):
return(str(format(dec,'032b')))
def bin_64bit(dec):
return(str(format(dec,'064b')))
def hex_return(dec):
return(str(format(dec,'x')))
def dec_return_bin(bin_string):
return(int(bin_string,2))
def dec_return_hex(hex_string):
return(int(hex_string,16))
def L_P(SET,n):
to_return=[]
j=0
k=n
while k<len(SET)+1:
to_return.append(SET[j:k])
j=k
k+=n
return(to_return)
def s_l(bit_string):
bit_list=[]
for i in range(len(bit_string)):
bit_list.append(bit_string[i])
return(bit_list)
def l_s(bit_list):
bit_string=''
for i in range(len(bit_list)):
bit_string+=bit_list[i]
return(bit_string)
def rotate_right(bit_string,n):
bit_list = s_l(bit_string)
count=0
while count <= n-1:
list_main=list(bit_list)
var_0=list_main.pop(-1)
list_main=list([var_0]+list_main)
bit_list=list(list_main)
count+=1
return(l_s(list_main))
def shift_right(bit_string,n):
bit_list=s_l(bit_string)
count=0
while count <= n-1:
bit_list.pop(-1)
count+=1
front_append=['0']*n
return(l_s(front_append+bit_list))
def mod_32_addition(input_set):
value=0
for i in range(len(input_set)):
value+=input_set[i]
mod_32 = 4294967296
return(value%mod_32)
def xor_2str(bit_string_1,bit_string_2):
xor_list=[]
for i in range(len(bit_string_1)):
if bit_string_1[i]=='0' and bit_string_2[i]=='0':
xor_list.append('0')
if bit_string_1[i]=='1' and bit_string_2[i]=='1':
xor_list.append('0')
if bit_string_1[i]=='0' and bit_string_2[i]=='1':
xor_list.append('1')
if bit_string_1[i]=='1' and bit_string_2[i]=='0':
xor_list.append('1')
return(l_s(xor_list))
def and_2str(bit_string_1,bit_string_2):
and_list=[]
for i in range(len(bit_string_1)):
if bit_string_1[i]=='1' and bit_string_2[i]=='1':
and_list.append('1')
else:
and_list.append('0')
return(l_s(and_list))
def or_2str(bit_string_1,bit_string_2):
or_list=[]
for i in range(len(bit_string_1)):
if bit_string_1[i]=='0' and bit_string_2[i]=='0':
or_list.append('0')
else:
or_list.append('1')
return(l_s(or_list))
def not_str(bit_string):
not_list=[]
for i in range(len(bit_string)):
if bit_string[i]=='0':
not_list.append('1')
else:
not_list.append('0')
return(l_s(not_list))
'''
SHA-256 Specific Functions:
'''
def Ch(x,y,z):
return(xor_2str(and_2str(x,y),and_2str(not_str(x),z)))
def Maj(x,y,z):
return(xor_2str(xor_2str(and_2str(x,y),and_2str(x,z)),and_2str(y,z)))
def e_0(x):
return(xor_2str(xor_2str(rotate_right(x,2),rotate_right(x,13)),rotate_right(x,22)))
def e_1(x):
return(xor_2str(xor_2str(rotate_right(x,6),rotate_right(x,11)),rotate_right(x,25)))
def s_0(x):
return(xor_2str(xor_2str(rotate_right(x,7),rotate_right(x,18)),shift_right(x,3)))
def s_1(x):
return(xor_2str(xor_2str(rotate_right(x,17),rotate_right(x,19)),shift_right(x,10)))
def message_pad(bit_list):
pad_one = bit_list + '1'
pad_len = len(pad_one)
k=0
while ((pad_len+k)-448)%512 != 0:
k+=1
back_append_0 = '0'*k
back_append_1 = bin_64bit(len(bit_list))
return(pad_one+back_append_0+back_append_1)
def message_bit_return(string_input):
bit_list=[]
for i in range(len(string_input)):
bit_list.append(bin_8bit(ord(string_input[i])))
return(l_s(bit_list))
def message_pre_pro(input_string):
bit_main = message_bit_return(input_string)
return(message_pad(bit_main))
def message_parsing(input_string):
return(L_P(message_pre_pro(input_string),32))
def message_schedule(index,w_t):
new_word = bin_32bit(mod_32_addition([int(s_1(w_t[index-2]),2),int(w_t[index-7],2),int(s_0(w_t[index-15]),2),int(w_t[index-16],2)]))
return(new_word)
'''
This example of SHA_256 works for an input string <56 characters.
'''
def sha_256(input_string):
assert len(input_string) < 56, "This example of SHA_256 works for an input string <56 characters."
w_t=message_parsing(input_string)
a=bin_32bit(dec_return_hex(initial_hash_values[0]))
b=bin_32bit(dec_return_hex(initial_hash_values[1]))
c=bin_32bit(dec_return_hex(initial_hash_values[2]))
d=bin_32bit(dec_return_hex(initial_hash_values[3]))
e=bin_32bit(dec_return_hex(initial_hash_values[4]))
f=bin_32bit(dec_return_hex(initial_hash_values[5]))
g=bin_32bit(dec_return_hex(initial_hash_values[6]))
h=bin_32bit(dec_return_hex(initial_hash_values[7]))
for i in range(0,64):
if i <= 15:
t_1=mod_32_addition([int(h,2),int(e_1(e),2),int(Ch(e,f,g),2),int(sha_256_constants[i],16),int(w_t[i],2)])
t_2=mod_32_addition([int(e_0(a),2),int(Maj(a,b,c),2)])
h=g
g=f
f=e
e=mod_32_addition([int(d,2),t_1])
d=c
c=b
b=a
a=mod_32_addition([t_1,t_2])
a=bin_32bit(a)
e=bin_32bit(e)
if i > 15:
w_t.append(message_schedule(i,w_t))
t_1=mod_32_addition([int(h,2),int(e_1(e),2),int(Ch(e,f,g),2),int(sha_256_constants[i],16),int(w_t[i],2)])
t_2=mod_32_addition([int(e_0(a),2),int(Maj(a,b,c),2)])
h=g
g=f
f=e
e=mod_32_addition([int(d,2),t_1])
d=c
c=b
b=a
a=mod_32_addition([t_1,t_2])
a=bin_32bit(a)
e=bin_32bit(e)
hash_0 = mod_32_addition([dec_return_hex(initial_hash_values[0]),int(a,2)])
hash_1 = mod_32_addition([dec_return_hex(initial_hash_values[1]),int(b,2)])
hash_2 = mod_32_addition([dec_return_hex(initial_hash_values[2]),int(c,2)])
hash_3 = mod_32_addition([dec_return_hex(initial_hash_values[3]),int(d,2)])
hash_4 = mod_32_addition([dec_return_hex(initial_hash_values[4]),int(e,2)])
hash_5 = mod_32_addition([dec_return_hex(initial_hash_values[5]),int(f,2)])
hash_6 = mod_32_addition([dec_return_hex(initial_hash_values[6]),int(g,2)])
hash_7 = mod_32_addition([dec_return_hex(initial_hash_values[7]),int(h,2)])
final_hash = (hex_return(hash_0),
hex_return(hash_1),
hex_return(hash_2),
hex_return(hash_3),
hex_return(hash_4),
hex_return(hash_5),
hex_return(hash_6),
hex_return(hash_7))
return(final_hash)
Some time ago I was also studying SHA-256 and created pure-python class that implements this hash. If I remember correctly, mostly I've taken algorithm from Wikipedia SHA-256 Pseudocode and partially from some open-source projects.
Algorithm doesn't import any (even standard) modules. Of cause it is much slower than hashlib's variant and only meant for studying.
If you just run the script it executes 1000 tests comparing hashlib's and my variants. Only testing function imports some modules, algorithm's class itself doesn't need any modules. Interface is same as in hashlib's sha256 class. See test() function for examples of usage.
Try it online!
class Sha256:
ks = [
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
]
hs = [
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
]
M32 = 0xFFFFFFFF
def __init__(self, m = None):
self.mlen = 0
self.buf = b''
self.k = self.ks[:]
self.h = self.hs[:]
self.fin = False
if m is not None:
self.update(m)
#staticmethod
def pad(mlen):
mdi = mlen & 0x3F
length = (mlen << 3).to_bytes(8, 'big')
padlen = 55 - mdi if mdi < 56 else 119 - mdi
return b'\x80' + b'\x00' * padlen + length
#staticmethod
def ror(x, y):
return ((x >> y) | (x << (32 - y))) & Sha256.M32
#staticmethod
def maj(x, y, z):
return (x & y) ^ (x & z) ^ (y & z)
#staticmethod
def ch(x, y, z):
return (x & y) ^ ((~x) & z)
def compress(self, c):
w = [0] * 64
w[0 : 16] = [int.from_bytes(c[i : i + 4], 'big') for i in range(0, len(c), 4)]
for i in range(16, 64):
s0 = self.ror(w[i - 15], 7) ^ self.ror(w[i - 15], 18) ^ (w[i - 15] >> 3)
s1 = self.ror(w[i - 2], 17) ^ self.ror(w[i - 2], 19) ^ (w[i - 2] >> 10)
w[i] = (w[i - 16] + s0 + w[i - 7] + s1) & self.M32
a, b, c, d, e, f, g, h = self.h
for i in range(64):
s0 = self.ror(a, 2) ^ self.ror(a, 13) ^ self.ror(a, 22)
t2 = s0 + self.maj(a, b, c)
s1 = self.ror(e, 6) ^ self.ror(e, 11) ^ self.ror(e, 25)
t1 = h + s1 + self.ch(e, f, g) + self.k[i] + w[i]
h = g
g = f
f = e
e = (d + t1) & self.M32
d = c
c = b
b = a
a = (t1 + t2) & self.M32
for i, (x, y) in enumerate(zip(self.h, [a, b, c, d, e, f, g, h])):
self.h[i] = (x + y) & self.M32
def update(self, m):
if m is None or len(m) == 0:
return
assert not self.fin, 'Hash already finalized and can not be updated!'
self.mlen += len(m)
m = self.buf + m
for i in range(0, len(m) // 64):
self.compress(m[64 * i : 64 * (i + 1)])
self.buf = m[len(m) - (len(m) % 64):]
def digest(self):
if not self.fin:
self.update(self.pad(self.mlen))
self.digest = b''.join(x.to_bytes(4, 'big') for x in self.h[:8])
self.fin = True
return self.digest
def hexdigest(self):
tab = '0123456789abcdef'
return ''.join(tab[b >> 4] + tab[b & 0xF] for b in self.digest())
def test():
import secrets, hashlib, random
for itest in range(500):
data = secrets.token_bytes(random.randrange(257))
a, b = hashlib.sha256(data).hexdigest(), Sha256(data).hexdigest()
assert a == b, (a, b)
for itest in range(500):
a, b = hashlib.sha256(), Sha256()
for j in range(random.randrange(10)):
data = secrets.token_bytes(random.randrange(129))
a.update(data)
b.update(data)
a, b = a.hexdigest(), b.hexdigest()
assert a == b, (a, b)
print('Sha256 tested successfully.')
if __name__ == '__main__':
test()
If you only want the hash value:
from hashlib import sha256
data = input('Enter plaintext data: ')
output = sha256(data.encode('utf-8'))
print(output)
Python's hashlib also has SHA-1, SHA-384, SHA-512, and MD5 hash functions.
Here is my proposition with redis:
for i in range(len(rserver.keys())):
mdp_hash = rserver.get(rserver.keys()[i])
rserver.set(rserver.keys()[i], hashlib.sha256(mdp_hash.encode()).hexdigest())
Translating http://en.wikipedia.org/wiki/SHA-2#SHA-256_.28a_SHA-2_variant.29_pseudocode to Python should be straight forward.