I have the following code:
def isPP(n):
pos = [int(i) for i in range(n+1)]
pos = pos[2:] ##to ignore the trivial n** 1 == n case
y = []
for i in pos:
for it in pos:
if i** it == n:
y.append((i,it))
#return list((i,it))
#break
if len(y) <1:
return None
else:
return list(y[0])
Which works perfectly up until ~2000, since I'm storing far too much in memory. What can I do to make it work efficiently for large numbers (say, 50000 or 100000). I tried to make it end after finding one case, but my algorithm is still far too inefficient if the number is large.
Any tips?
A number n is a perfect power if there exists a b and e for which b^e = n. For instance 216 = 6^3 = 2^3 * 3^3 is a perfect power, but 72 = 2^3 * 3^2 is not.
The trick to determining if a number is a perfect power is to know that, if the number is a perfect power, then the exponent e must be less than log2 n, because if e is greater then 2^e will be greater than n. Further, it is only necessary to test prime es, because if a number is a perfect power to a composite exponent it will also be a perfect power to the prime factors of the composite component; for instance, 2^15 = 32768 = 32^3 = 8^5 is a perfect cube root and also a perfect fifth root.
The function isPerfectPower shown below tests each prime less than log2 n by first computing the integer root using Newton's method, then powering the result to check if it is equal to n. Auxiliary function primes compute a list of prime numbers by the Sieve of Eratosthenes, iroot computes the integer kth-root by Newton's method, and ilog computes the integer logarithm to base b by binary search.
def primes(n): # sieve of eratosthenes
i, p, ps, m = 0, 3, [2], n // 2
sieve = [True] * m
while p <= n:
if sieve[i]:
ps.append(p)
for j in range((p*p-3)/2, m, p):
sieve[j] = False
i, p = i+1, p+2
return ps
def iroot(k, n): # assume n > 0
u, s, k1 = n, n+1, k-1
while u < s:
s = u
u = (k1 * u + n // u ** k1) // k
return s
def ilog(b, n): # max e where b**e <= n
lo, blo, hi, bhi = 0, 1, 1, b
while bhi < n:
lo, blo, hi, bhi = hi, bhi, hi+hi, bhi*bhi
while 1 < (hi - lo):
mid = (lo + hi) // 2
bmid = blo * pow(b, (mid - lo))
if n < bmid: hi, bhi = mid, bmid
elif bmid < n: lo, blo = mid, bmid
else: return mid
if bhi == n: return hi
return lo
def isPerfectPower(n): # x if n == x ** y, or False
for p in primes(ilog(2,n)):
x = iroot(p, n)
if pow(x, p) == n: return x
return False
There is further discussion of the perfect power predicate at my blog.
IIRC, it's far easier to iteratively check "Does it have a square root? Does it have a cube root? Does it have a fourth root? ..." You will very quickly get to the point where putative roots have to be between 1 and 2, at which point you can stop.
I think a better way would be implementing this "hack":
import math
def isPP(n):
range = math.log(n)/math.log(2)
range = (int)(range)
result = []
for i in xrange(n):
if(i<=1):
continue
exponent = (int)(math.log(n)/math.log(i))
for j in [exponent-1, exponent, exponent+1]:
if i ** j == n:
result.append([i,j])
return result
print isPP(10000)
Result:
[[10,4],[100,2]]
The hack uses the fact that:
if log(a)/log(b) = c,
then power(b,c) = a
Since this calculation can be a bit off in floating points giving really approximate results, exponent is checked to the accuracy of +/- 1.
You can make necessary adjustments for handling corner cases like n=1, etc.
a relevant improvement would be:
import math
def isPP(n):
# first have a look at the length of n in binary representation
ln = int(math.log(n)/math.log(2)) + 1
y = []
for i in range(n+1):
if (i <= 1):
continue
# calculate max power
li = int(math.log(i)/math.log(2))
mxi = ln / li + 1
for it in range(mxi):
if (it <= 1):
continue
if i ** it == n:
y.append((i,it))
# break if you only need 1
if len(y) <1:
return None
else:
return list(y[0])
Related
I have written an implementation of the Sieve of Eratasthones and the segmented sieve. However, for certain segment sizes, the segmented sieve does not agree with the Sieve of Eratasthones. Here is my code:
https://gist.github.com/RahulRajkumar/c183dae594a17315aeca86f933df18e6
import math
def sieve(n):
"""
compute all prime numbers in [[1,n]] using a sieve of
Eratasthones
(using two optimizations from wikipedia:
1. remembering that you don't need to search above sqrt(n)
2. beginning setting values to 0 at i^2
"""
if isinstance(n,int) and n > 0:
sieve = [k+1 for k in range(0,n)]
for i in sieve:
if i > 1 and i <= math.sqrt(n):
j = 0
while (i**2 + j*i) <= len(sieve):
sieve[i**2 + j*i - 1] = 0
j = j + 1
# composites are set to 0, 1 is left alone
sieve = [m for m in sieve if m not in {0,1}]
return sieve
else:
raise ValueError("n must be a positive integer")
def segmented_sieve(n, segment_size):
"""
Segmented sieve of eratasthones
Use: pick segment_size <= sqrt(n)
"""
primes = sieve(segment_size)
# k is the largest integer such that k * segment_size <= n
k = n // segment_size
for i in range(1,k):
page = [i*segment_size + j for j in range(1, segment_size + 1)]
minrange = page[0]
maxrange = page[-1]
for p in primes:
if p <= math.sqrt(maxrange):
minmult = p * math.ceil(minrange / p)
maxmult = p * math.floor(maxrange / p)
for m in range(minmult, maxmult + 1, p):
try:
page[page.index(m)] = 0
except ValueError:
pass
page_primes = [m for m in page if m not in {0,1}]
primes.extend(page_primes)
return primes
Test values:
sieve(10000) == segmented_sieve(10000,100) # True
sieve(10000) == segmented_sieve(10000,95) # True
sieve(10000) == segmented_sieve(10000,94) # False
I'm not sure what could be causing this. Any advice would be greatly appreciated!
I ran into a problem: The code was very slow for 512 bit odd integers if you use classical division for (p-1)/2. But with floor division it works instantly. Is it caused by float conversion?
def solovayStrassen(p, iterations):
for i in range(iterations):
a = random.randint(2, p - 1)
if gcd(a, p) > 1:
return False
first = pow(a, int((p - 1) / 2), p)
j = (Jacobian(a, p) + p) % p
if first != j:
return False
return True
The full code
import random
from math import gcd
#Jacobian symbol
def Jacobian(a, n):
if (a == 0):
return 0
ans = 1
if (a < 0):
a = -a
if (n % 4 == 3):
ans = -ans
if (a == 1):
return ans
while (a):
if (a < 0):
a = -a
if (n % 4 == 3):
ans = -ans
while (a % 2 == 0):
a = a // 2
if (n % 8 == 3 or n % 8 == 5):
ans = -ans
a, n = n, a
if (a % 4 == 3 and n % 4 == 3):
ans = -ans
a = a % n
if (a > n // 2):
a = a - n
if (n == 1):
return ans
return 0
def solovayStrassen(p, iterations):
for i in range(iterations):
a = random.randint(2, p - 1)
if gcd(a, p) > 1:
return False
first = pow(a, int((p - 1) / 2), p)
j = (Jacobian(a, p) + p) % p
if first != j:
return False
return True
def findFirstPrime(n, k):
while True:
if solovayStrassen(n,k):
return n
n+=2
a = random.getrandbits(512)
if a%2==0:
a+=1
print(findFirstPrime(a,100))
As noted in comments, int((p - 1) / 2) can produce garbage if p is an integer with more than 53 bits. Only the first 53 bits of p-1 are retained when converting to float for the division.
>>> p = 123456789123456789123456789
>>> (p-1) // 2
61728394561728394561728394
>>> hex(_)
'0x330f7ef971d8cfbe022f8a'
>>> int((p-1) / 2)
61728394561728395668881408
>>> hex(_) # lots of trailing zeroes
'0x330f7ef971d8d000000000'
Of course the theory underlying the primality test relies on using exactly the infinitely precise value of (p-1)/2, not some approximation more-or-less good to only the first 53 most-significant bits.
As also noted in a comment, using garbage is likely to make this part return earlier, not later:
if first != j:
return False
So why is it much slower over all? Because findFirstPrime() has to call solovayStrassen() many more times to find garbage that passes by sheer blind luck.
To see this, change the code to show how often the loop is trying:
def findFirstPrime(n, k):
count = 0
while True:
count += 1
if count % 1000 == 0:
print(f"at count {count:,}")
if solovayStrassen(n,k):
return n, count
n+=2
Then add, e.g.,
random.seed(12)
at the start of the main program so you can get reproducible results.
Using floor (//) division, it runs fairly quickly, displaying
(6170518232878265099306454685234429219657996228748920426206889067017854517343512513954857500421232718472897893847571955479036221948870073830638539006377457, 906)
So it found a probable prime on the 906th try.
But with float (/) division, I never saw it succeed by blind luck:
at count 1,000
at count 2,000
at count 3,000
...
at count 1,000,000
Gave up then - "garbage in, garbage out".
One other thing to note, in passing: the + p in:
j = (Jacobian(a, p) + p) % p
has no effect on the value of j. Right? p % p is 0.
I am trying to implement a function primeFac() that takes as input a positive integer n and returns a list containing all the numbers in the prime factorization of n.
I have gotten this far but I think it would be better to use recursion here, not sure how to create a recursive code here, what would be the base case? to start with.
My code:
def primes(n):
primfac = []
d = 2
while (n > 1):
if n%d==0:
primfac.append(d)
# how do I continue from here... ?
A simple trial division:
def primes(n):
primfac = []
d = 2
while d*d <= n:
while (n % d) == 0:
primfac.append(d) # supposing you want multiple factors repeated
n //= d
d += 1
if n > 1:
primfac.append(n)
return primfac
with O(sqrt(n)) complexity (worst case). You can easily improve it by special-casing 2 and looping only over odd d (or special-casing more small primes and looping over fewer possible divisors).
The primefac module does factorizations with all the fancy techniques mathematicians have developed over the centuries:
#!python
import primefac
import sys
n = int( sys.argv[1] )
factors = list( primefac.primefac(n) )
print '\n'.join(map(str, factors))
This is a comprehension based solution, it might be the closest you can get to a recursive solution in Python while being possible to use for large numbers.
You can get proper divisors with one line:
divisors = [ d for d in xrange(2,int(math.sqrt(n))) if n % d == 0 ]
then we can test for a number in divisors to be prime:
def isprime(d): return all( d % od != 0 for od in divisors if od != d )
which tests that no other divisors divides d.
Then we can filter prime divisors:
prime_divisors = [ d for d in divisors if isprime(d) ]
Of course, it can be combined in a single function:
def primes(n):
divisors = [ d for d in range(2,n//2+1) if n % d == 0 ]
return [ d for d in divisors if \
all( d % od != 0 for od in divisors if od != d ) ]
Here, the \ is there to break the line without messing with Python indentation.
I've tweaked #user448810's answer to use iterators from itertools (and python3.4, but it should be back-portable). The solution is about 15% faster.
import itertools
def factors(n):
f = 2
increments = itertools.chain([1,2,2], itertools.cycle([4,2,4,2,4,6,2,6]))
for incr in increments:
if f*f > n:
break
while n % f == 0:
yield f
n //= f
f += incr
if n > 1:
yield n
Note that this returns an iterable, not a list. Wrap it in list() if that's what you want.
Most of the above solutions appear somewhat incomplete. A prime factorization would repeat each prime factor of the number (e.g. 9 = [3 3]).
Also, the above solutions could be written as lazy functions for implementation convenience.
The use sieve Of Eratosthenes to find primes to test is optimal, but; the above implementation used more memory than necessary.
I'm not certain if/how "wheel factorization" would be superior to applying only prime factors, for division tests of n.
While these solution are indeed helpful, I'd suggest the following two functions -
Function-1 :
def primes(n):
if n < 2: return
yield 2
plist = [2]
for i in range(3,n):
test = True
for j in plist:
if j>n**0.5:
break
if i%j==0:
test = False
break
if test:
plist.append(i)
yield i
Function-2 :
def pfactors(n):
for p in primes(n):
while n%p==0:
yield p
n=n//p
if n==1: return
list(pfactors(99999))
[3, 3, 41, 271]
3*3*41*271
99999
list(pfactors(13290059))
[3119, 4261]
3119*4261
13290059
Here is my version of factorization by trial division, which incorporates the optimization of dividing only by two and the odd integers proposed by Daniel Fischer:
def factors(n):
f, fs = 3, []
while n % 2 == 0:
fs.append(2)
n /= 2
while f * f <= n:
while n % f == 0:
fs.append(f)
n /= f
f += 2
if n > 1: fs.append(n)
return fs
An improvement on trial division by two and the odd numbers is wheel factorization, which uses a cyclic set of gaps between potential primes to greatly reduce the number of trial divisions. Here we use a 2,3,5-wheel:
def factors(n):
gaps = [1,2,2,4,2,4,2,4,6,2,6]
length, cycle = 11, 3
f, fs, nxt = 2, [], 0
while f * f <= n:
while n % f == 0:
fs.append(f)
n /= f
f += gaps[nxt]
nxt += 1
if nxt == length:
nxt = cycle
if n > 1: fs.append(n)
return fs
Thus, print factors(13290059) will output [3119, 4261]. Factoring wheels have the same O(sqrt(n)) time complexity as normal trial division, but will be two or three times faster in practice.
I've done a lot of work with prime numbers at my blog. Please feel free to visit and study.
def get_prime_factors(number):
"""
Return prime factor list for a given number
number - an integer number
Example: get_prime_factors(8) --> [2, 2, 2].
"""
if number == 1:
return []
# We have to begin with 2 instead of 1 or 0
# to avoid the calls infinite or the division by 0
for i in xrange(2, number):
# Get remainder and quotient
rd, qt = divmod(number, i)
if not qt: # if equal to zero
return [i] + get_prime_factors(rd)
return [number]
Most of the answer are making things too complex. We can do this
def prime_factors(n):
num = []
#add 2 to list or prime factors and remove all even numbers(like sieve of ertosthenes)
while(n%2 == 0):
num.append(2)
n /= 2
#divide by odd numbers and remove all of their multiples increment by 2 if no perfectlly devides add it
for i in xrange(3, int(sqrt(n))+1, 2):
while (n%i == 0):
num.append(i)
n /= i
#if no is > 2 i.e no is a prime number that is only divisible by itself add it
if n>2:
num.append(n)
print (num)
Algorithm from GeeksforGeeks
prime factors of a number:
def primefactors(x):
factorlist=[]
loop=2
while loop<=x:
if x%loop==0:
x//=loop
factorlist.append(loop)
else:
loop+=1
return factorlist
x = int(input())
alist=primefactors(x)
print(alist)
You'll get the list.
If you want to get the pairs of prime factors of a number try this:
http://pythonplanet.blogspot.in/2015/09/list-of-all-unique-pairs-of-prime.html
def factorize(n):
for f in range(2,n//2+1):
while n%f == 0:
n //= f
yield f
It's slow but dead simple. If you want to create a command-line utility, you could do:
import sys
[print(i) for i in factorize(int(sys.argv[1]))]
Here is an efficient way to accomplish what you need:
def prime_factors(n):
l = []
if n < 2: return l
if n&1==0:
l.append(2)
while n&1==0: n>>=1
i = 3
m = int(math.sqrt(n))+1
while i < m:
if n%i==0:
l.append(i)
while n%i==0: n//=i
i+= 2
m = int(math.sqrt(n))+1
if n>2: l.append(n)
return l
prime_factors(198765430488765430290) = [2, 3, 5, 7, 11, 13, 19, 23, 3607, 3803, 52579]
You can use sieve Of Eratosthenes to generate all the primes up to (n/2) + 1 and then use a list comprehension to get all the prime factors:
def rwh_primes2(n):
# http://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
""" Input n>=6, Returns a list of primes, 2 <= p < n """
correction = (n%6>1)
n = {0:n,1:n-1,2:n+4,3:n+3,4:n+2,5:n+1}[n%6]
sieve = [True] * (n/3)
sieve[0] = False
for i in xrange(int(n**0.5)/3+1):
if sieve[i]:
k=3*i+1|1
sieve[ ((k*k)/3) ::2*k]=[False]*((n/6-(k*k)/6-1)/k+1)
sieve[(k*k+4*k-2*k*(i&1))/3::2*k]=[False]*((n/6-(k*k+4*k-2*k*(i&1))/6-1)/k+1)
return [2,3] + [3*i+1|1 for i in xrange(1,n/3-correction) if sieve[i]]
def primeFacs(n):
primes = rwh_primes2((n/2)+1)
return [x for x in primes if n%x == 0]
print primeFacs(99999)
#[3, 41, 271]
from sets import Set
# this function generates all the possible factors of a required number x
def factors_mult(X):
L = []
[L.append(i) for i in range(2,X) if X % i == 0]
return L
# this function generates list containing prime numbers upto the required number x
def prime_range(X):
l = [2]
for i in range(3,X+1):
for j in range(2,i):
if i % j == 0:
break
else:
l.append(i)
return l
# This function computes the intersection of the two lists by invoking Set from the sets module
def prime_factors(X):
y = Set(prime_range(X))
z = Set(factors_mult(X))
k = list(y & z)
k = sorted(k)
print "The prime factors of " + str(X) + " is ", k
# for eg
prime_factors(356)
Simple way to get the desired solution
def Factor(n):
d = 2
factors = []
while n >= d*d:
if n % d == 0:
n//=d
# print(d,end = " ")
factors.append(d)
else:
d = d+1
if n>1:
# print(int(n))
factors.append(n)
return factors
This is the code I made. It works fine for numbers with small primes, but it takes a while for numbers with primes in the millions.
def pfactor(num):
div = 2
pflist = []
while div <= num:
if num % div == 0:
pflist.append(div)
num /= div
else:
div += 1
# The stuff afterwards is just to convert the list of primes into an expression
pfex = ''
for item in list(set(pflist)):
pfex += str(item) + '^' + str(pflist.count(item)) + ' * '
pfex = pfex[0:-3]
return pfex
I would like to share my code for finding the prime factors of number given input by the user:
a = int(input("Enter a number: "))
def prime(a):
b = list()
i = 1
while i<=a:
if a%i ==0 and i!=1 and i!=a:
b.append(i)
i+=1
return b
c = list()
for x in prime(a):
if len(prime(x)) == 0:
c.append(x)
print(c)
def prime_factors(num, dd=2):
while dd <= num and num>1:
if num % dd == 0:
num //= dd
yield dd
dd +=1
Lot of answers above fail on small primes, e.g. 3, 5 and 7. The above is succinct and fast enough for ordinary use.
print list(prime_factors(3))
[3]
Overall Problem: Project Euler 12 - What is the value of the first triangle number to have over five hundred divisors?
Focus of problem: The divisor function
Language: Python
Description: The function I used is brute and the time it take for the program to find a number with more divisors than x increases almost exponentially with each 10 or 20 numbers highers. I need to get to 500 or more divisors. I've identified that the divisor function is what is hogging down the program. The research I did lead me to divisor functions and specifically the divisor function which is supposed to be a function that will count all the divisors of any integer. Every page I've looked at seems to be directed toward mathematics majors and I only have high-school maths. Although I did come across some page that mentioned allot about primes and the Sieve of Atkins but I could not make the connection between primes and finding all the divisors of any integer nor find anything on the net about it.
Main Question: Could someone explain how to code the divisor function or even provide a sample? Maths concepts make more sense to me when I look at them with code. So much appreciated.
brute force divisor function:
def countdiv(a):
count = 0
for i in range(1,(a/2)+1):
if a % i == 0:
count += 1
return count + 1 # +1 to account for number itself as a divisor
If you need a bruteforce function to calculate Number of Divisors (also known as tau(n))
Here's what it looks like
def tau(n):
sqroot,t = int(n**0.5),0
for factor in range(1,sqroot+1):
if n % factor == 0:
t += 2 # both factor and N/factor
if sqroot*sqroot == n: t = t - 1 # if sqroot is a factor then we counted it twice, so subtract 1
return t
The second method involves a decomposing n into its prime factors (and its exponents).
tau(n) = (e1+1)(e2+1)....(em+1) where n = p1^e1 * p2^e2 .... pm^em and p1,p2..pm are primes
More info here
The third method and much more simpler to understand is simply using a Sieve to calculate tau.
def sieve(N):
t = [0]*(N+1)
for factor in range(1,N+1):
for multiple in range(factor,N+1,factor):
t[multiple]+=1
return t[1:]
Here's it in action at ideone
I agree with the two other answers submitted here in that you will only need to search up to the square root of the number. I have one thing to add to this however. The solutions offered will get you the correct answer in a reasonable amount of time. But when the problems start getting tougher, you will need an even more powerful function.
Take a look at Euler's Totient function. Though it only indirectly applies here, it is incredibly useful in later problems. Another related concept is that of Prime Factorization.
A quick way to improve your algorithm is to find the prime factorization of the number. In the Wikipedia article, they use 36 as an example, whose prime factorization is 2^2 * 3^2. Therefore, knowing this, you can use combinatorics to find the number of factors of 36. With this, you will not actually be computing each factor, plus you'd only have to check divisors 2 and 3 before you're complete.
When searching for divisors of n you never have to search beyond the square root of the number n. Whenever you find a divisor that's less than sqrt(n) there is exactly one matching divisor which is greater than the root, so you can increment your count by 2 (if you find divisor d of n then n/d will be the counterpart).
Watch out for square numbers, though. :) The root will be a divisor that doesn't count twice, of course.
If you're going to solve the Project Euler problems you need some functions that deal with prime numbers and integer factorization. Here is my modest library, which provides primes(n), is_prime(n) and factors(n); the focus is on simplicity, clarity and brevity at the expense of speed, though these functions should be sufficient for Project Euler:
def primes(n):
"""
list of primes not exceeding n in ascending
order; assumes n is an integer greater than
1; uses Sieve of Eratosthenes
"""
m = (n-1) // 2
b = [True] * m
i, p, ps = 0, 3, [2]
while p*p < n:
if b[i]:
ps.append(p)
j = 2*i*i + 6*i + 3
while j < m:
b[j] = False
j = j + 2*i + 3
i += 1; p += 2
while i < m:
if b[i]:
ps.append(p)
i += 1; p += 2
return ps
def is_prime(n):
"""
False if n is provably composite, else
True if n is probably prime; assumes n
is an integer greater than 1; uses
Miller-Rabin test on prime bases < 100
"""
ps = [2,3,5,7,11,13,17,19,23,29,31,37,41,
43,47,53,59,61,67,71,73,79,83,89,97]
def is_spsp(n, a):
d, s = n-1, 0
while d%2 == 0:
d /= 2; s += 1
if pow(a,d,n) == 1:
return True
for r in xrange(s):
if pow(a, d*pow(2,r), n) == n-1:
return True
return False
if n in ps: return True
for p in ps:
if not is_spsp(n,p):
return False
return True
def factors(n):
"""
list of prime factors of n in ascending
order; assumes n is an integer, may be
positive, zero or negative; uses Pollard's
rho algorithm with Floyd's cycle finder
"""
def gcd(a,b):
while b: a, b = b, a%b
return abs(a)
def facts(n,c,fs):
f = lambda(x): (x*x+c) % n
if is_prime(n): return fs+[n]
t, h, d = 2, 2, 1
while d == 1:
t = f(t); h = f(f(h))
d = gcd(t-h, n)
if d == n:
return facts(n, c+1, fs)
if is_prime(d):
return facts(n//d, c+1, fs+[d])
return facts(n, c+1, fs)
if -1 <= n <= 1: return [n]
if n < -1: return [-1] + factors(-n)
fs = []
while n%2 == 0:
n = n//2; fs = fs+[2]
if n == 1: return fs
return sorted(facts(n,1,fs))
Once you know how to factor a number, it is easy to count the number of divisors. Consider 76576500 = 2^2 * 3^2 * 5^3 * 7^1 * 11^1 * 13^1 * 17^1. Ignore the bases and look at the exponents, which are 2, 2, 3, 1, 1, 1, and 1. Add 1 to each exponent, giving 3, 3, 4, 2, 2, 2, and 2. Now multiply that list to get the number of divisors of the original number 76576500: 3 * 3 * 4 * 2 * 2 * 2 * 2 = 576. Here's the function:
def numdiv(n):
fs = factors(n)
f = fs.pop(0); d = 1; x = 2
while fs:
if f == fs[0]:
x += 1
else:
d *= x; x = 2
f = fs.pop(0)
return d * x
You can see these functions at work at http://codepad.org/4j8qp60u, and learn more about how they work at my blog. I'll leave it to you to work out the solution to Problem 12.
I'm attempting to implement the Sieve of Eratosthenes. The output seems to be correct (minus "2" that needs to be added) but if the input to the function is larger than 100k or so it seems to take an inordinate amount of time. What are ways that I can optimize this function?
def sieveErato(n):
numberList = range(3,n,2)
for item in range(int(math.sqrt(len(numberList)))):
divisor = numberList[item]
for thing in numberList:
if(thing % divisor == 0) and thing != divisor:
numberList.remove(thing)
return numberList
Your algorithm is not the Sieve of Eratosthenes. You perform trial division (the modulus operator) instead of crossing-off multiples, as Eratosthenes did over two thousand years ago. Here is an explanation of the true sieving algorithm, and shown below is my simple, straight forward implementation, which returns a list of primes not exceeding n:
def sieve(n):
m = (n-1) // 2
b = [True]*m
i,p,ps = 0,3,[2]
while p*p < n:
if b[i]:
ps.append(p)
j = 2*i*i + 6*i + 3
while j < m:
b[j] = False
j = j + 2*i + 3
i+=1; p+=2
while i < m:
if b[i]:
ps.append(p)
i+=1; p+=2
return ps
We sieve only on the odd numbers, stopping at the square root of n. The odd-looking calculations on j map between the integers being sieved 3, 5, 7, 9, ... and indexes 0, 1, 2, 3, ... in the b array of bits.
You can see this function in action at http://ideone.com/YTaMB, where it computes the primes to a million in less than a second.
You can try the same way Eratosthenes did. Take an array with all numbers you need to check order ascending, go to number 2 and mark it. Now scratch every second number till the end of the array. Then go to 3 and mark it. After that scratch every third number . Then go to 4 - it is already scratched, so skip it. Repeat this for every n+1 which is not already scratched.
In the end, the marked numbers are the prime one. This algorithm is faster, but sometimes need lots of memory. You can optimize it a little by drop all even numbers (cause they are not prime) and add 2 manually to the list. This will twist the logic a little, but will take half the memory.
Here is an illustration of what I'm talking about: http://en.wikipedia.org/wiki/Sieve_of_Eratosthenes
Warning: removing elements from an iterator while iterating on it can be dengerous...
You could make the
if(thing % divisor == 0) and thing != divisor:
test lighter by splitting it in the loop that breaks when you arrive to the index of 'divisor' and then the test:
for thing in numberList_fromDivisorOn:
if(thing % divisor == 0):
numberList.remove(thing)
This code takes 2 seconds to generate primes less than 10M
(it is not mine, i found it somewer on google)
def erat_sieve(bound):
if bound < 2:
return []
max_ndx = (bound - 1) // 2
sieve = [True] * (max_ndx + 1)
#loop up to square root
for ndx in range(int(bound ** 0.5) // 2):
# check for prime
if sieve[ndx]:
# unmark all odd multiples of the prime
num = ndx * 2 + 3
sieve[ndx+num:max_ndx:num] = [False] * ((max_ndx-ndx-num-1)//num + 1)
# translate into numbers
return [2] + [ndx * 2 + 3 for ndx in range(max_ndx) if sieve[ndx]]
I followed this link: Sieve of Eratosthenes - Finding Primes Python as suggested by #MAK and I've found that the accepted answer could be improved with an idea I've found in your code:
def primes_sieve2(limit):
a = [True] * limit # Initialize the primality list
a[0] = a[1] = False
sqrt = int(math.sqrt(limit))+1
for i in xrange(sqrt):
isprime = a[i]
if isprime:
yield i
for n in xrange(i*i, limit, i): # Mark factors non-prime
a[n] = False
for (i, isprime) in enumerate(a[sqrt:]):
if isprime:
yield i+sqrt
if given unlimited memory and time, the following code will print all the prime numbers. and it'll do it without using trial division. it is based on the haskell code in the paper: The Genuine Sieve of Eratosthenes by Melissa E. O'Neill
from heapq import heappush, heappop, heapreplace
def sieve():
w = [2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,8,6,4,6,2,4,6,2,6,6,4,2,4,6,2,6,4,2,4,2,10,2,10]
for p in [2,3,5,7]: print p
n,o = 11,0
t = []
l = len(w)
p = n
heappush(t, (p*p, n,o,p))
print p
while True:
n,o = n+w[o],(o+1)%l
p = n
if not t[0][0] <= p:
heappush(t, (p*p, n,o,p))
print p
continue
while t[0][0] <= p:
_, b,c,d = t[0]
b,c = b+w[c],(c+1)%l
heapreplace(t, (b*d, b,c,d))
sieve()