Optimization of a command involving a sum and an exponential - python

I am coding in python, using numpy. I want to optimize a formula that looks like that, I use a picture for the sake of readibility.
In that example, the times t are from different lists, indicated by the superscript. The corresponding vector here is T_t, which is a list of list.
Here is my original code:
def first_version(m, n, k, T_t, BETA):
if k == 1:
return 0
ans = 0
for i in range(len(T_t[n])):
if T_t[n][i] < T_t[m][k - 1]:
ans += (T_t[m][k - 1] - T_t[n][i]) * np.exp(-BETA[m, n] * (T_t[m][k - 1] - T_t[n][i]))
else:
break
return ans
The break at the end allows me to spare some time. I had that brilliant idea of using the numpy library to improve the performances:
def second_version(m, n, k, T_t, BETA):
if k == 1:
return 0
the_times = np.maximum( T_t[m][k - 1] - np.array(T_t[n]) , 0 )
ans = sum(the_times * np.exp( -BETA[m, n] * the_times ))
return ans
For the sake of comparison, the second algorithm runs 100x faster. Is it possible to do better ? In particular, I regret the fact that numpy computes the maximum over the whole vector when probably half of it will be 0 at the end.
Do you have any idea how to improve those bits of code ?
I forgot a sum in the code nr 2. That slows down the code and make it only 20 times faster.

I have 2 main suggestions:
Using np.sum() instead of sum() about triples the speed of second_version
Using numba.jit increases the speed again about 8x. (Actually you can jit-compile either version and end up with about the same speed)
Full code example:
import numpy as np
import numba
import timeit
def first_version(m, n, k, T_t, BETA):
if k == 1:
return 0
ans = 0
for i in range(len(T_t[n])):
if T_t[n][i] < T_t[m][k - 1]:
ans += (T_t[m][k - 1] - T_t[n][i]) * np.exp(-BETA[m, n] * (T_t[m][k - 1] - T_t[n][i]))
else:
break
return ans
def second_version(m, n, k, T_t, BETA):
if k == 1:
return 0
the_times = np.maximum( T_t[m][k - 1] - np.array(T_t[n]) , 0 )
ans = np.sum(the_times * np.exp( -BETA[m, n] * the_times ))
return ans
def jit_version(m, n, k, T_t, BETA):
# wrapper makes it to that numba doesn't have to deal with
# the list-of-arrays data type
return jit_version_core(k, T_t[m], T_t[n], BETA[m, n])
#numba.jit(nopython=True)
def jit_version_core(k, t1, t2, b):
if k == 1:
return 0
ans = 0
for i in range(len(t2)):
if t2[i] < t1[k - 1]:
ans += (t1[k - 1] - t2[i]) * np.exp(-b * (t1[k - 1] - t2[i]))
else:
break
return ans
N = 10000
t1 = np.cumsum(np.random.random(size=N))
t2 = np.cumsum(np.random.random(size=N))
beta = np.random.random(size=(2, 2))
for fn in ['first_version', 'second_version', 'jit_version']:
print("------", fn)
v = globals()[fn](0, 1, len(t1), [t1, t2], beta)
t = timeit.timeit('%s(0, 1, len(t1), [t1, t2], beta)' % fn, number=100, globals=globals())
print("output:", v, "time:", t)
And the output:
------ first_version
output: 3.302938986817431 time: 2.900316455983557
------ second_version
output: 3.3029389868174306 time: 0.12064526398899034
------ jit_version
output: 3.302938986817431 time: 0.013476221996825188

Related

Given a rod of length N , you need to cut it into R pieces , such that each piece's length is positive, how many ways are there to do so?

Description:
Given two positive integers N and R, how many different ways are there to cut a rod of length N into R pieces, such that the length of each piece is a positive integer? Output this answer modulo 1,000,000,007.
Example:
With N = 7 and R = 3, there are 15 ways to cut a rod of length 7 into 3 pieces: (1,1,5) , (1,5,1), (5,1,1) , (1,2,4) , (1,4,2) (2,1,4), (2,4,1) , (4,1,2), (4,2,1) , (1,3,3), (3,1,3), (3,3,1), (2,2,3), (2,3,2), (3,2,2).
Constraints:
1 <= R <= N <= 200,000
Testcases:
N R Output
7 3 15
36 6 324632
81 66 770289477
96 88 550930798
My approach:
I know that the answer is (N-1 choose R-1) mod 1000000007. I have tried all different ways to calculate it, but always 7 out of 10 test cases went time limit exceeded. Here is my code, can anyone tell me what other approach I can use to make it in O(1) time complexity.
from math import factorial
def new(n, r):
D = factorial(n - 1) // (factorial(r - 1) * factorial(n - r))
return (D % 1000000007)
if __name__ == '__main__':
N = [7, 36, 81, 96]
R = [3, 6, 66, 88]
answer = [new(n, r) for n,r in zip(N,R)]
print(answer)
I think there's two big optimizations that the problem is looking for you to exploit. The first being to cache intermediate values of factorial() to save computational effort across large batches (large T). The second optimization being to reduce your value mod 1000000007 incrementally, so your numbers stay small, and multiplication stays a constant-time. I've updated the below example to precompute a factorial table using a custom function and itertools.accumulate, instead of merely caching the calls in a recursive implementation (which will eliminate the issues with recursion depth you were seeing).
from itertools import accumulate
MOD_BASE = 1000000007
N_BOUND = 200000
def modmul(m):
def mul(x, y):
return x * y % m
return mul
FACTORIALS = [1] + list(accumulate(range(1, N_BOUND+1), modmul(MOD_BASE)))
def nck(n, k, m):
numerator = FACTORIALS[n]
denominator = FACTORIALS[k] * FACTORIALS[n-k]
return numerator * pow(denominator, -1, m) % m
def solve(n, k):
return nck(n-1, k-1, MOD_BASE)
Running this against the example:
>>> pairs = [(36, 6), (81, 66), (96, 88)]
>>> print([solve(n, k) for n, k in pairs])
[324632, 770289477, 550930798]
I literally translated code from accepted answer of Ivaylo Strandjev here and it works much faster:
def get_degree(n, p):# { // returns the degree with which p is in n!
degree_num = 0
u = p
temp = n
while (u <= temp):
degree_num += temp // u
u *= p
return degree_num
def degree(a, k, p):
res = 1
cur = a
while (k):
if (k % 2):
res = (res * cur) % p
k //= 2
cur = (cur * cur) % p
return res
def CNKmodP( n, k, p):
num_degree = get_degree(n, p) - get_degree(n - k, p)
den_degree = get_degree(k, p)
if (num_degree > den_degree):
return 0
res = 1
for i in range(n, n - k, -1):
ti = i
while(ti % p == 0):
ti //= p
res = (res * ti) % p
denom = 1
for i in range(1, k + 1):
ti = i
while(ti % p == 0):
ti //= p
denom = (denom * ti) % p
res = (res * degree(denom, p-2, p)) % p
return res
To apply this approach, you just need to call
result = CNKmodP(n-1, r-1, 1000000007)
In Java we can use BigInteger because the value of factorials that we calculate may not fit in integer. Additionally BigInteger provides built in methods multiply and divide.
static int CNRmodP(int N, int R, int P) {
BigInteger ret = BigInteger.ONE;
for (int i = 0; i < R; i++) {
ret = ret.multiply(BigInteger.valueOf(N - i))
.divide(BigInteger.valueOf(i + 1));
}
BigInteger p = BigInteger.valueOf(P);
//Calculate Modulus
BigInteger answer = ret.mod(p);
//Convert BigInteger to integer and return it
return answer.intValue();
}
To apply the above approach, you just need to call
result = CNRmodP(N-1, R-1, 1000000007);

How do i convert the Maclaurin Series for tan x equation to python code?

I'm trying to calculate the nth term but its giving me wrong answers
import math
def bernoulli(m):
if m == 0:
return 1
else:
t = 0
for k in range(0, m):
t += math.comb(m, k) * bernoulli(k) / (m - k + 1)
return 1 - t
def pn(n, x):
sum = 0
for i in range(n):
sum += ((bernoulli(2 * i)) / math.factorial(2 * i)) * (-4**i) * (1 - (4**i)) * (x**((2 * i) - 1))
Equation:
Here are a few comments:
In python, the convention is to include the start, and exclude the end. list(range(1,4)) is only [1, 2, 3], not [1,2,3,4]. Thus your Bernouilli loop should be for k in range(0, m+1) and your pn loop should be for i in range(1, n+1).
Exponentiation has a higher precedence than most operators. -4**i is parsed as -(4**i), not as (-4)**i.
sum is already the name of a builtin function in python. It is very strongly advised not to shadow the names of builtins. Call that variable s or total or something else, not sum.
Finally, the code becomes:
import math
def bernoulli(m):
if m == 0:
return 1
else:
t = 0
for k in range(0, m+1):
t += math.comb(m, k) * bernoulli(k) / (m - k + 1)
return 1 - t
def pn(n, x):
s = 0
for i in range(1, n+1):
s += ((bernoulli(2 * i)) / math.factorial(2 * i)) * ((-4)**i) * (1 - (4**i)) * (x**(2 * i - 1))
return s
And, using builtin function sum:
import math
def bernoulli(m):
if m == 0:
return 1
else:
return 1 - sum(math.comb(m, k) * bernoulli(k) / (m - k + 1)
for k in range(0, m+1))
def pn(n, x):
return sum((bernoulli(2 * i)) / math.factorial(2 * i)) * ((-4)**i) * (1 - (4**i)) * (x**(2 * i - 1)
for i in range(1, n+1))

Implement the function fast modular exponentiation

I am trying to implement the function fast modular exponentiation(b, k, m) which computes:
b(2k) mod m using only around 2k modular multiplications.
I tried this method:
def FastModularExponentiation(b, k, m):
res = 1
b = b % m
while (k > 0):
if ((k & 1) == 1):
res = (res * b) % m
k = k >> 1
b = (b * b) % m
return res
but I am still stuck in same problem which is if I try b = 2, k = 1, m = 10, my code returns 22. However, the correct answer is:
2^(2^1) mod 10 = 2^2 mod 10 = 4
and I cannot find the reason why.
Update: I finally understood that you do not want regular modular exponentiation (i.e., b^k mod m), but b^(2^k) mod m (as you plainly stated).
Using the regular built-in Python function pow this would be:
def FastModularExponentiation(b, k, m):
return pow(b, pow(2, k), m)
Or, without using pow:
def FastModularExponentiation(b, k, m):
b %= m
for _ in range(k):
b = b ** 2 % m
return b
If you know r = phi(m) (Euler's totient function), you could reduce the exponent first: exp = pow(2, k, r) and then calculate pow(b, exp, m). Depending on the input values, this might speed things up.
(This was the original answer when I thought you wanted, b^k mod m)
This is what works for me:
def fast_mod_exp(b, exp, m):
res = 1
while exp > 1:
if exp & 1:
res = (res * b) % m
b = b ** 2 % m
exp >>= 1
return (b * res) % m
The only significant differences I spot is in the last line: return (b * res) % m and that my while loop terminates earlier: while exp > 1 (which should be the same thing you do - except it saves an unnecessary squaring operation).
Also note that the built-in function pow will do all that for free (if you supply a third argument):
pow(4, 13, 497)
# 445
def fast_exponentiation(k, x, q):
# make sure all variables are non-negative
assert (k >= 0 and x >= 0 and q >=1)
result = 1 # define a counter
while x:
if x % 2 == 1:
result = (result * k) % q
k = (k ^ 2) % q
x >> = 1 # bit shift operator, dividing x by 2 ** y thus x >> 2 ** 1 = x / 2
return result

Optimizing Mathematical Calculation

I have a model for four possibilities of purchasing a pair items (purchasing both, none or just one) and need to optimize the (pseudo-) log-likelihood function. Part of this, of course, is the calculation/definition of the pseudo-log-likelihood function.
The following is my code, where Beta is a 2-d vector for each customer (there are U customers and U different beta vectors), X is a 2-d vector for each item (different for each of the N items) and Gamma is a symmetric matrix with a scalar value gamma(i,j) for each pair of items. And df is a dataframe of the purchases - one row for each customer and N columns for the items.
It would seem to me that all of these loops are inefficient and take up too much time, but I am not sure how to speed up this calculation and would appreciate any help improving it.
Thank you in advance!
def pseudo_likelihood(Args):
Beta = np.reshape(Args[0:2*U], (U, 2))
Gamma = np.reshape(Args[2*U:], (N,N))
L = 0
for u in range(0,U,1):
print datetime.datetime.today(), " for user {}".format(u)
y = df.loc[u][1:]
beta_u = Beta[u,:]
for l in range(N):
print datetime.datetime.today(), " for item {}".format(l)
for i in range(N-1):
if i == l:
continue
for j in range(i+1,N):
if (y[i] == y[j]):
if (y[i] == 1):
L += np.dot(beta_u,(x_vals.iloc[i,1:]+x_vals.iloc[j,1:])) + Gamma[i,j] #Log of the exponent of this expression
else:
L += np.log(
1 - np.exp(np.dot(beta_u, (x_vals.iloc[i, 1:] + x_vals.iloc[j, 1:])) + Gamma[i, j])
- np.exp(np.dot(beta_u, x_vals.iloc[i, 1:])) * (
1 - np.exp(np.dot(beta_u, x_vals.iloc[j, 1:])))
- np.exp(np.dot(beta_u, x_vals.iloc[j, 1:])) * (
1 - np.exp(np.dot(beta_u, x_vals.iloc[i, 1:]))))
else:
if (y[i] == 1):
L += np.dot(beta_u,x_vals.iloc[i,1:]) + np.log(1 - np.exp(np.dot(beta_u,x_vals.iloc[j,1:])))
else:
L += (np.dot(beta_u, x_vals.iloc[j,1:])) + np.log(1 - np.exp(np.dot(beta_u, x_vals.iloc[i,1:])))
L -= (N-2)*np.dot(beta_u,x_vals.iloc[l,1:])
for k in range(N):
if k != l:
L -= np.dot(beta_u, x_vals.iloc[k,1:])
return -L
To add/clarify - I am using this calculation to optimize and find the beta and gamma parameters that generated the data for this pseudo-likelihood function.
I am using scipy optimize.minimize with the 'Powell' method.
Updating for whomever is interested-
I found numpy.einsum to speed up the calculations here by over 90%.
np.einsum performs matrix/vector operations using Einstein notation. Recall that for two matrices A, B their product can be represented as the sum of
a_ij*b_jk
i.e. the ik element of the matrix AB is the sum over j of a_ij*b_jk
Using the einsum function I could calculate in advance all of the values necessary for the iterative calculation, saving precious time and hundreds, if not thousands, of unnecessary calculations.
I rewrote the code as follows:
def pseudo_likelihood(Args):
Beta = np.reshape(Args[0:2*U], (U,2))
Gamma = np.reshape(Args[2*U:], (N,N))
exp_gamma = np.exp(Gamma)
L = 0
for u in xrange(U):
y = df.loc[u][1:]
beta_u = Beta[u,:]
beta_dot_x = np.einsum('ij,j',x_vals[['V1','V2']],beta_u)
exp_beta_dot_x = np.exp(beta_dot_x)
log_one_minus_exp = np.log(1 - exp_beta_dot_x)
for l in xrange(N):
for i in xrange(N-1):
if i == l:
continue
for j in xrange(i+1,N):
if (y[i] == y[j]):
if (y[i] == 1):
L += beta_dot_x[i] + beta_dot_x[j] + Gamma[i,j] #Log of the exponent of this expression
else:
L += math.log(
1 - exp_beta_dot_x[i]*exp_beta_dot_x[j]*exp_gamma[i,j]
- exp_beta_dot_x[i] * (1 - exp_beta_dot_x[j])
- exp_beta_dot_x[j] * (1 - exp_beta_dot_x[i]))
else:
if (y[i] == 1):
L += beta_dot_x[i] + log_one_minus_exp[j]
else:
L += (beta_dot_x[j]) + log_one_minus_exp[i]
L -= (N-2)*beta_dot_x[l]
for k in xrange(N):
if k != l:
L -= sum(beta_dot_x) + beta_dot_x[l]
return -L

Finding null space of binary matrix in python

In factoring methods based on the quadratic sieve, finding the left null space of a binary matrix (values computed mod 2) is a crucial step. (This is also the null space of the transpose.) Does numpy or scipy have tools to do this quickly?
For reference, here is my current code:
# Row-reduce binary matrix
def binary_rr(m):
rows, cols = m.shape
l = 0
for k in range(min(rows, cols)):
print(k)
if l >= cols: break
# Swap with pivot if m[k,l] is 0
if m[k,l] == 0:
found_pivot = False
while not found_pivot:
if l >= cols: break
for i in range(k+1, rows):
if m[i,l]:
m[[i,k]] = m[[k,i]] # Swap rows
found_pivot = True
break
if not found_pivot: l += 1
if l >= cols: break # No more rows
# For rows below pivot, subtract row
for i in range(k+1, rows):
if m[i,l]: m[i] ^= m[k]
l += 1
return m
It is pretty much a straightforward implementation of Gaussian elimination, but since it's written in python it is very slow.
qwr, I found a very fast gaussian elimination routine that finishes so qiuckly that the slow point is the Quadratic Sieving or SIQS Sieving step. The gaussian elimination functions were taken from skollmans factorise.py at https://raw.githubusercontent.com/skollmann/PyFactorise/master/factorise.py
I'll soon be working on a SIQS/GNFS implementation from scratch, and hope to write something super quick for python with multithreading and possiblly cython. In the meantime, if you want something that compiles C (Alpertons ECM Engine) but uses python, you can use: https://github.com/oppressionslayer/primalitytest/ which requires you to cd into calculators directory and run make before importing p2ecm with from sfactorint import p2ecm. With that you can factorise 60 digit numbers in a few seconds.
# Requires sympy and numpy to be installed
# Adjust B and I accordingly. Set for 32 length number
# Usage:
# N=1009732533765251*1896182711927299
# factorise(N, 5000, 25000000) # Takes about 45-60 seconds on a newer computer
# N=1009732533765251*581120948477
# Linear Algebra Step finishes in 1 second, if that
# N=factorise(N, 5000, 2500000) # Takes about 5 seconds on a newer computer
# #Out[1]: 581120948477
import math
import numpy as np
from sympy import isprime
#
# siqs_ functions are the Gaussian Elimination routines right from
# skollmans factorise.py. It is the fastest Gaussian Elimination that i have
# found in python
#
def siqs_factor_from_square(n, square_indices, smooth_relations):
"""Given one of the solutions returned by siqs_solve_matrix_opt,
return the factor f determined by f = gcd(a - b, n), where
a, b are calculated from the solution such that a*a = b*b (mod n).
Return f, a factor of n (possibly a trivial one).
"""
sqrt1, sqrt2 = siqs_calc_sqrts(square_indices, smooth_relations)
assert (sqrt1 * sqrt1) % n == (sqrt2 * sqrt2) % n
return math.gcd(abs(sqrt1 - sqrt2), n)
def siqs_find_factors(n, perfect_squares, smooth_relations):
"""Perform the last step of the Self-Initialising Quadratic Field.
Given the solutions returned by siqs_solve_matrix_opt, attempt to
identify a number of (not necessarily prime) factors of n, and
return them.
"""
factors = []
rem = n
non_prime_factors = set()
prime_factors = set()
for square_indices in perfect_squares:
fact = siqs_factor_from_square(n, square_indices, smooth_relations)
if fact != 1 and fact != rem:
if isprime(fact):
if fact not in prime_factors:
print ("SIQS: Prime factor found: %d" % fact)
prime_factors.add(fact)
while rem % fact == 0:
factors.append(fact)
rem //= fact
if rem == 1:
break
if isprime(rem):
factors.append(rem)
rem = 1
break
else:
if fact not in non_prime_factors:
print ("SIQS: Non-prime factor found: %d" % fact)
non_prime_factors.add(fact)
if rem != 1 and non_prime_factors:
non_prime_factors.add(rem)
for fact in sorted(siqs_find_more_factors_gcd(non_prime_factors)):
while fact != 1 and rem % fact == 0:
print ("SIQS: Prime factor found: %d" % fact)
factors.append(fact)
rem //= fact
if rem == 1 or sfactorint_isprime(rem):
break
if rem != 1:
factors.append(rem)
return factors
def add_column_opt(M_opt, tgt, src):
"""For a matrix produced by siqs_build_matrix_opt, add the column
src to the column target (mod 2).
"""
M_opt[tgt] ^= M_opt[src]
def find_pivot_column_opt(M_opt, j):
"""For a matrix produced by siqs_build_matrix_opt, return the row of
the first non-zero entry in column j, or None if no such row exists.
"""
if M_opt[j] == 0:
return None
return lars_last_powers_of_two_trailing(M_opt[j] + 1)
def siqs_build_matrix_opt(M):
"""Convert the given matrix M of 0s and 1s into a list of numbers m
that correspond to the columns of the matrix.
The j-th number encodes the j-th column of matrix M in binary:
The i-th bit of m[i] is equal to M[i][j].
"""
m = len(M[0])
cols_binary = [""] * m
for mi in M:
for j, mij in enumerate(mi):
cols_binary[j] += "1" if mij else "0"
return [int(cols_bin[::-1], 2) for cols_bin in cols_binary], len(M), m
def siqs_solve_matrix_opt(M_opt, n, m):
"""
Perform the linear algebra step of the SIQS. Perform fast
Gaussian elimination to determine pairs of perfect squares mod n.
Use the optimisations described in [1].
[1] Koç, Çetin K., and Sarath N. Arachchige. 'A Fast Algorithm for
Gaussian Elimination over GF (2) and its Implementation on the
GAPP.' Journal of Parallel and Distributed Computing 13.1
(1991): 118-122.
"""
row_is_marked = [False] * n
pivots = [-1] * m
for j in range(m):
i = find_pivot_column_opt(M_opt, j)
if i is not None:
pivots[j] = i
row_is_marked[i] = True
for k in range(m):
if k != j and (M_opt[k] >> i) & 1: # test M[i][k] == 1
add_column_opt(M_opt, k, j)
perf_squares = []
for i in range(n):
if not row_is_marked[i]:
perfect_sq_indices = [i]
for j in range(m):
if (M_opt[j] >> i) & 1: # test M[i][j] == 1
perfect_sq_indices.append(pivots[j])
perf_squares.append(perfect_sq_indices)
return perf_squares
def sqrt_int(N):
Nsqrt = math.isqrt(N)
assert Nsqrt * Nsqrt == N
return Nsqrt
def siqs_calc_sqrts(square_indices, smooth_relations):
"""Given on of the solutions returned by siqs_solve_matrix_opt and
the corresponding smooth relations, calculate the pair [a, b], such
that a^2 = b^2 (mod n).
"""
res = [1, 1]
for idx in square_indices:
res[0] *= smooth_relations[idx][0]
res[1] *= smooth_relations[idx][1]
res[1] = sqrt_int(res[1])
return res
def quad_residue(a,n):
l=1
q=(n-1)//2
x = q**l
if x==0:
return 1
a =a%n
z=1
while x!= 0:
if x%2==0:
a=(a **2) % n
x//= 2
else:
x-=1
z=(z*a) % n
return z
def STonelli(n, p):
assert quad_residue(n, p) == 1, "not a square (mod p)"
q = p - 1
s = 0
while q % 2 == 0:
q //= 2
s += 1
if s == 1:
r = pow(n, (p + 1) // 4, p)
return r,p-r
for z in range(2, p):
#print(quad_residue(z, p))
if p - 1 == quad_residue(z, p):
break
c = pow(z, q, p)
r = pow(n, (q + 1) // 2, p)
t = pow(n, q, p)
m = s
t2 = 0
while (t - 1) % p != 0:
t2 = (t * t) % p
for i in range(1, m):
if (t2 - 1) % p == 0:
break
t2 = (t2 * t2) % p
b = pow(c, 1 << (m - i - 1), p)
r = (r * b) % p
c = (b * b) % p
t = (t * c) % p
m = i
return (r,p-r)
def build_smooth_relations(smooth_base, root_base):
smooth_relations = []
for xx in range(len(smooth_base)):
smooth_relations.append((root_base[xx], smooth_base[xx], xx))
return smooth_relations
def strailing(N):
return N>>lars_last_powers_of_two_trailing(N)
def lars_last_powers_of_two_trailing(N):
p,y=1,2
orign = N
#if orign < 17: N = N%16
N = N&15
if N == 1:
if ((orign -1) & (orign -2)) == 0: return orign.bit_length()-1
while orign&y == 0:
p+=1
y<<=1
return p
if N in [3, 7, 11, 15]: return 1
if N in [5, 13]: return 2
if N == 9: return 3
return 0
def build_matrix(factor_base, smooth_base):
factor_base = factor_base.copy()
factor_base.insert(0, 2)
sparse_matrix = []
col = 0
for xx in smooth_base:
sparse_matrix.append([])
for fx in factor_base:
count = 0
factor_found = False
while xx % fx == 0:
factor_found = True
xx=xx//fx
count+=1
if count % 2 == 0:
sparse_matrix[col].append(0)
continue
else:
if factor_found == True:
sparse_matrix[col].append(1)
else:
sparse_matrix[col].append(0)
col+=1
return np.transpose(sparse_matrix)
def get_mod_congruence(root, N, withstats=False):
r = root - N
if withstats==True:
print(f"{root} ≡ {r} mod {N}")
return r
def primes_sieve2(limit):
a = np.ones(limit, dtype=bool)
a[0] = a[1] = False
for (i, isprime) in enumerate(a):
if isprime:
yield i
for n in range(i*i, limit, i):
a[n] = False
def remove_singletons(XX):
no_singletons = []
for xx in XX:
if len(xx) != 1:
no_singletons.append(xx)
return no_singletons
def fb_sm(N, B, I):
factor_base, sieve_base, sieve_list, smooth_base, root_base = [], [], [], [], []
primes = list(primes_sieve2(B))
i,root=-1,math.isqrt(N)
for x in primes[1:]:
if quad_residue(N, x) == 1:
factor_base.append(x)
for x in range(I):
xx = get_mod_congruence((root+x)**2, N)
sieve_list.append(xx)
if xx % 2 == 0:
xx = strailing(xx+1) # using lars_last_modulus_powers_of_two(xx) bit trick
sieve_base.append(xx)
for p in factor_base:
residues = STonelli(N, p)
for r in residues:
for i in range((r-root) % p, len(sieve_list), p):
while sieve_base[i] % p == 0:
sieve_base[i] //= p
for o in range(len(sieve_list)):
# This is set to 350, which is only good for numbers
# of len < 32. Modify
# to be more dynamic for larger numbers.
if len(smooth_base) >= 350:
break
if sieve_base[o] == 1:
smooth_base.append(sieve_list[o])
root_base.append(root+o)
return factor_base, smooth_base, root_base
def isSquare(hm):
cr=math.isqrt(hm)
if cr*cr == hm:
return True
return False
def find_square(smooth_base):
for x in smooth_base:
if isSquare(x):
return (True, smooth_base.index(x))
else:
return (False, -1)
t_matrix=[]
primes=list(primes_sieve2(1000000))
def factorise(N, B=10000, I=10000000):
global primes, t_matrix
if isprime(N):
return N
for xx in primes:
if N%xx == 0:
return xx
factor_base, smooth_base, root_base = fb_sm(N,B,I)
issquare, t_matrix = find_square(smooth_base)
if issquare == True:
return math.gcd(math.isqrt(smooth_base[t_matrix])+get_mod_congruence(root_base[t_matrix], N), N)
t_matrix = build_matrix(factor_base, smooth_base)
smooth_relations = build_smooth_relations(smooth_base, root_base)
M_opt, M_n, M_m = siqs_build_matrix_opt(np.transpose(t_matrix))
perfect_squares = remove_singletons(siqs_solve_matrix_opt(M_opt, M_n, M_m))
factors = siqs_find_factors(N, perfect_squares, smooth_relations)
return factors

Categories

Resources