Why numpy vectorization is slower than a for loop

Why numpy vectorization is slower than a for loop - python

The below code has two functions that does the same thing: checks to see if the line between two points intersects with a circle.
from line_profiler import LineProfiler
from math import sqrt
import numpy as np
class Point:
x: float
y: float
def __init__(self, x: float, y: float):
self.x = x
self.y = y
def __repr__(self):
return f"Point(x={self.x}, y={self.y})"
class Circle:
ctr: Point
r: float
def __init__(self, ctr: Point, r: float):
self.ctr = ctr
self.r = r
def __repr__(self):
return f"Circle(r={self.r}, ctr={self.ctr})"
def loop(p1: Point, p2: Point, circles: list[Circle]):
m = (p1.y - p2.y) / (p1.x - p2.x)
n = p1.y - m * p1.x
max_x = max(p1.x, p2.x)
min_x = min(p1.x, p2.x)
for circle in circles:
if sqrt((circle.ctr.x - p1.x) ** 2 + (circle.ctr.y - p1.y) ** 2) < circle.r \
or sqrt((circle.ctr.x - p2.x) ** 2 + (circle.ctr.y - p2.y) ** 2) < circle.r:
return False
a = m ** 2 + 1
b = 2 * (m * n - m * circle.ctr.y - circle.ctr.x)
c = circle.ctr.x ** 2 + circle.ctr.y ** 2 + n ** 2 - circle.r ** 2 - 2 * n * circle.ctr.y
# compute the intersection points
discriminant = b ** 2 - 4 * a * c
if discriminant <= 0:
# no real roots, the line does not intersect the circle
continue
# two real roots, the line intersects the circle at two points
x1 = (-b + sqrt(discriminant)) / (2 * a)
x2 = (-b - sqrt(discriminant)) / (2 * a)
# check if both points in range
first = min_x <= x1 <= max_x
second = min_x <= x2 <= max_x
if first and second:
return False
return True
def vectorized(p1: Point, p2: Point, circles):
m = (p1.y - p2.y) / (p1.x - p2.x)
n = p1.y - m * p1.x
max_x = max(p1.x, p2.x)
min_x = min(p1.x, p2.x)
circle_ctr_x = circles['x']
circle_ctr_y = circles['y']
circle_radius = circles['r']
# Pt 1 inside circle
if np.any(np.sqrt((circle_ctr_x - p1.x) ** 2 + (circle_ctr_y - p1.y) ** 2) < circle_radius):
return False
# Pt 2 inside circle
if np.any(np.sqrt((circle_ctr_x - p2.x) ** 2 + (circle_ctr_y - p2.y) ** 2) < circle_radius):
return False
# Line intersects with circle in range
a = m ** 2 + 1
b = 2 * (m * n - m * circle_ctr_y - circle_ctr_x)
c = circle_ctr_x ** 2 + circle_ctr_y ** 2 + n ** 2 - circle_radius ** 2 - 2 * n * circle_ctr_y
# compute the intersection points
discriminant = b**2 - 4*a*c
discriminant_bigger_than_zero = discriminant > 0
discriminant = discriminant[discriminant_bigger_than_zero]
if discriminant.size == 0:
return True
b = b[discriminant_bigger_than_zero]
# two real roots, the line intersects the circle at two points
x1 = (-b + np.sqrt(discriminant)) / (2 * a)
x2 = (-b - np.sqrt(discriminant)) / (2 * a)
# check if both points in range
in_range = (min_x <= x1) & (x1 <= max_x) & (min_x <= x2) & (x2 <= max_x)
return not np.any(in_range)
a = Point(x=-2.47496075130008, y=1.3609840363748935)
b = Point(x=3.4637947060471084, y=-3.7779123453298817)
c = [Circle(r=1.2587063082677084, ctr=Point(x=3.618533781361757, y=2.179925931180058)), Circle(r=0.7625751871124099, ctr=Point(x=-0.3173290200183132, y=4.256206636932641)), Circle(r=0.4926043225930364, ctr=Point(x=-4.626312261120341, y=-1.5754603504419196)), Circle(r=0.6026364956540792, ctr=Point(x=3.775240278691819, y=1.7381168262343072)), Circle(r=1.2804597877349562, ctr=Point(x=4.403273380178893, y=-1.6890127555343681)), Circle(r=1.1562415624767421, ctr=Point(x=-1.0675000352105801, y=-0.23952113329203994)), Circle(r=1.112718432321835, ctr=Point(x=2.500137075066017, y=-2.77748519509295)), Circle(r=0.979889574640609, ctr=Point(x=4.494971251199753, y=-1.0530995423779388)), Circle(r=0.7817624050358268, ctr=Point(x=3.2419454348696544, y=4.3303373486692465)), Circle(r=1.0271176198616367, ctr=Point(x=-0.9740272820753071, y=-4.282195116754338)), Circle(r=1.1585218836700681, ctr=Point(x=-0.42096876790888915, y=2.135161027254492)), Circle(r=1.0242603387003988, ctr=Point(x=2.2617850544260767, y=-4.59942951839469)), Circle(r=1.5704233297828027, ctr=Point(x=-1.1182365440831088, y=4.2411408333943506)), Circle(r=0.37137272043983655, ctr=Point(x=3.280499587987774, y=-4.87871834733383)), Circle(r=1.1829610109115543, ctr=Point(x=-0.27755604766113606, y=-3.68429580935016)), Circle(r=1.0993567600839198, ctr=Point(x=0.23602306761027925, y=0.47530122196024704)), Circle(r=1.3865045367147553, ctr=Point(x=-2.537565761732492, y=4.719766182202855)), Circle(r=0.9492796511909753, ctr=Point(x=-3.7047245796551973, y=-2.501817905967274)), Circle(r=0.9866916911482386, ctr=Point(x=1.3021813533479742, y=4.754952371169189)), Circle(r=0.9053004331885084, ctr=Point(x=-3.4912157984801784, y=-0.5269727600532836)), Circle(r=1.3058987272565075, ctr=Point(x=-1.6983878085276427, y=-2.2910189455221053)), Circle(r=0.5342716756987732, ctr=Point(x=4.948676886704507, y=-1.2467089784975183)), Circle(r=1.0603926633240575, ctr=Point(x=-4.390462974765324, y=0.785568745976325)), Circle(r=0.3448422804513971, ctr=Point(x=-1.6459756952994697, y=2.7608629057950362)), Circle(r=0.8521457455807724, ctr=Point(x=-4.503217369041699, y=3.93796926957188)), Circle(r=0.602438849989669, ctr=Point(x=-2.0703406576157493, y=0.6142570312870999)), Circle(r=0.6453692950682722, ctr=Point(x=-0.14802220452893144, y=4.08189682338989)), Circle(r=0.6983361689325062, ctr=Point(x=0.09362196694661651, y=-1.0953438275586391)), Circle(r=1.880331563921456, ctr=Point(x=0.23481661751521776, y=-4.09217120864087)), Circle(r=0.5766225363413416, ctr=Point(x=3.149434524126505, y=-4.639582956406762)), Circle(r=0.6177559628867022, ctr=Point(x=-1.6758918144661683, y=-0.7954935787503492)), Circle(r=0.7347952666955615, ctr=Point(x=-3.1907522890427575, y=0.7048509241855683)), Circle(r=1.2795003337464894, ctr=Point(x=-1.777244415863577, y=2.936422879898364)), Circle(r=0.9181024765780231, ctr=Point(x=4.212544425778317, y=-1.953546993038261)), Circle(r=1.7681384709020282, ctr=Point(x=-1.3702722387909405, y=-1.7013020424154368)), Circle(r=0.5420789771729688, ctr=Point(x=4.063803796292818, y=-3.7159871611415065)), Circle(r=1.3863651881788939, ctr=Point(x=0.7685002210812408, y=-3.994230705171357)), Circle(r=0.5739750223225826, ctr=Point(x=0.08779554290638258, y=4.879912451441914)), Circle(r=1.2019825386919343, ctr=Point(x=-4.206623233886995, y=-1.1617382464768689))]
circle_dt = np.dtype('float,float,float')
circle_dt.names = ['x', 'y', 'r']
np_c = np.array([(x.ctr.x, x.ctr.y, x.r) for x in c], dtype=circle_dt)
lp1 = LineProfiler()
loop_wrapper = lp1(loop)
loop_wrapper(a, b, c)
lp1.print_stats()
lp2 = LineProfiler()
vectorized_wrapper = lp2(vectorized)
vectorized_wrapper(a, b, np_c)
lp2.print_stats()
One implementation is regular for loop implementation, and the other is vectorized implementation with numpy.
From my small knowledge of vectorization, I would have guessed that the vectorized function would yield better result, but as you can see below that is not the case:
Total time: 4.36e-05 s
Function: loop at line 31
Line # Hits Time Per Hit % Time Line Contents
==============================================================
31 def loop(p1: Point, p2: Point, circles: list[Circle]):
32 1 9.0 9.0 2.1 m = (p1.y - p2.y) / (p1.x - p2.x)
33 1 5.0 5.0 1.1 n = p1.y - m * p1.x
34
35 1 19.0 19.0 4.4 max_x = max(p1.x, p2.x)
36 1 5.0 5.0 1.1 min_x = min(p1.x, p2.x)
37
38 6 30.0 5.0 6.9 for circle in circles:
39 6 73.0 12.2 16.7 if sqrt((circle.ctr.x - p1.x) ** 2 + (circle.ctr.y - p1.y) ** 2) < circle.r \
40 6 62.0 10.3 14.2 or sqrt((circle.ctr.x - p2.x) ** 2 + (circle.ctr.y - p2.y) ** 2) < circle.r:
41 return False
42
43 6 29.0 4.8 6.7 a = m ** 2 + 1
44 6 32.0 5.3 7.3 b = 2 * (m * n - m * circle.ctr.y - circle.ctr.x)
45 6 82.0 13.7 18.8 c = circle.ctr.x ** 2 + circle.ctr.y ** 2 + n ** 2 - circle.r ** 2 - 2 * n * circle.ctr.y
46
47 # compute the intersection points
48 6 33.0 5.5 7.6 discriminant = b ** 2 - 4 * a * c
49 5 11.0 2.2 2.5 if discriminant <= 0:
50 # no real roots, the line does not intersect the circle
51 5 22.0 4.4 5.0 continue
52
53 # two real roots, the line intersects the circle at two points
54 1 7.0 7.0 1.6 x1 = (-b + sqrt(discriminant)) / (2 * a)
55 1 4.0 4.0 0.9 x2 = (-b - sqrt(discriminant)) / (2 * a)
56
57 # check if one point in range
58 1 5.0 5.0 1.1 first = min_x < x1 < max_x
59 1 3.0 3.0 0.7 second = min_x < x2 < max_x
60 1 2.0 2.0 0.5 if first and second:
61 1 3.0 3.0 0.7 return False
62
63 return True
Total time: 0.0001534 s
Function: vectorized at line 66
Line # Hits Time Per Hit % Time Line Contents
==============================================================
66 def vectorized(p1: Point, p2: Point, circles):
67 1 10.0 10.0 0.7 m = (p1.y - p2.y) / (p1.x - p2.x)
68 1 5.0 5.0 0.3 n = p1.y - m * p1.x
69
70 1 7.0 7.0 0.5 max_x = max(p1.x, p2.x)
71 1 4.0 4.0 0.3 min_x = min(p1.x, p2.x)
72
73 1 10.0 10.0 0.7 circle_ctr_x = circles['x']
74 1 3.0 3.0 0.2 circle_ctr_y = circles['y']
75 1 3.0 3.0 0.2 circle_radius = circles['r']
76
77 # Pt 1 inside circle
78 1 652.0 652.0 42.5 if np.any(np.sqrt((circle_ctr_x - p1.x) ** 2 + (circle_ctr_y - p1.y) ** 2) < circle_radius):
79 return False
80 # Pt 2 inside circle
81 1 161.0 161.0 10.5 if np.any(np.sqrt((circle_ctr_x - p2.x) ** 2 + (circle_ctr_y - p2.y) ** 2) < circle_radius):
82 return False
83 # Line intersects with circle in range
84 1 13.0 13.0 0.8 a = m ** 2 + 1
85 1 120.0 120.0 7.8 b = 2 * (m * n - m * circle_ctr_y - circle_ctr_x)
86 1 77.0 77.0 5.0 c = circle_ctr_x ** 2 + circle_ctr_y ** 2 + n ** 2 - circle_radius ** 2 - 2 * n * circle_ctr_y
87
88 # compute the intersection points
89 1 25.0 25.0 1.6 discriminant = b**2 - 4*a*c
90 1 46.0 46.0 3.0 discriminant_bigger_than_zero = discriminant > 0
91 1 56.0 56.0 3.7 discriminant = discriminant[discriminant_bigger_than_zero]
92
93 1 6.0 6.0 0.4 if discriminant.size == 0:
94 return True
95
96 1 12.0 12.0 0.8 b = b[discriminant_bigger_than_zero]
97
98 # two real roots, the line intersects the circle at two points
99 1 77.0 77.0 5.0 x1 = (-b + np.sqrt(discriminant)) / (2 * a)
100 1 28.0 28.0 1.8 x2 = (-b - np.sqrt(discriminant)) / (2 * a)
101
102 # check if both points in range
103 1 96.0 96.0 6.3 in_range = (min_x <= x1) & (x1 <= max_x) & (min_x <= x2) & (x2 <= max_x)
104 1 123.0 123.0 8.0 return not np.any(in_range)
For some reason the non vectorized function runs faster.
My simple guess is that it is because the vectorized function runs over the whole array every time and the non vectorized one stops in the middle when it find a circle intersections.
So my questions are:
Is there a numpy function which doesn't iterate over the whole array but stops when the results are false?
What is the reason the vectorized function takes longer to run?
Any general optimization suggestions would be appreciated

Is there a numpy function which doesn't iterate over the whole array but stops when the results are false?
No. This is a long standing feature requested by Numpy users but it will certainly never be added to Numpy. For simple cases, like returning the first index of a boolean array, Numpy could implement that, but the thing is the boolean array needs to be fully created in the first place. In order to support the general case, Numpy should merge multiple operations and do some kind of lazy computation. This basically means rewriting completely Numpy from scratch for an efficient implementation (which is a huge work).
If you need to do that, there are two main solution:
operating on chunks so for the computation to stop early (while computing up to len(chunk) additional items);
writing your own fast compiled implementation using Numba or Cython (with views).
What is the reason the vectorized function takes longer to run?
The input is pretty small and Numpy is not optimized for small arrays. Indeed, each call to a Numpy function typically takes 0.4-4 us on a mainstream processor (like my i5-9600KF). This is because Numpy as many checks to do, new arrays to allocates, generic internal iterators to build, etc. As a result, a line like np.any(np.sqrt((circle_ctr_x - p1.x) ** 2 + (circle_ctr_y - p1.y) ** 2) < circle_radius) doing 8 Numpy calls and creating 7 temporary arrays takes about 8 us on my machine. The second similar line takes the same time. Together, they are already slower than the non-vectorized version.
As pointed out in the question and the comments, the non-vectorized function can stop early and this can also help the non-vectorized version to be even faster than the other.
Any general optimization suggestions would be appreciated
Regarding your code, using Numba (with plain loops and Numpy arrays) is certainly a good idea for performance. Note the first call can be slower due to the compilation time (you can provide the signature to do this at loading time or just use an AOT compiler including Cython).
Note that array of structure are generally not efficient since they prevent the efficient use of SIMD instructions. They are also certainly not efficiently computed by Numpy since the datatype is dynamically created and the Numpy code is already compiled ahead of time (so it cannot implement function for this specific datatype and has to use generic dynamic operation on each item of the array which is significantly slower than basic datatypes). Please consider using structure of arrays. For more information please read this post and more generally this post.

Related

Is there an error with pandas.Dataframe.ewm calculation or I am wrong?

I choose the recursive option in order to calculate weighted moving average starting from the latest calculated value.
According to Documentation :
When adjust=False, the exponentially weighted function is calculated
recursively:
y0 = x0
y(t) = (1-alpha) * y(t-1) + alpha * x(t)
So I have the following code :
import pandas as pd
df = pd.DataFrame({'col1':[1, 1, 2, 3, 3, 5, 8, 9],
})
alpha=0.5
df['ewm'] = df['col1'].ewm(alpha, adjust=False).mean()
which gives :
>>> df
col1 ewm
0 1 1.000000
1 1 1.000000
2 2 1.666667
3 3 2.555556
4 3 2.851852
5 5 4.283951
6 8 6.761317
7 9 8.253772
The problem is that it's not corresponding to following mathematical calculations :
y0 = x0 = 1
y1 = (1-0.5) * y0 + 0.5 * x1 = 0.5 + 0.5 = 1
y2 = (1-0.5) * y1 + 0.5 * x2 = 0.5 + 0.5 * 2 = 1.5
y3 = (1-0.5) * y2 + 0.5 * x3 = 0.5 * 1.5 + 0.5 * 3 = 0.75 + 1.5 = 2.25
...
We do not have the same values. What's wrong ?

Like I read in comments parameters should be named.
Documentation do not exposed this fact clearly.
One must be careful because no exception is raised when using no named arguments, but calculations are false.

I am .5 off when trying to figure this out

x = 1 / 2 + 3 // 3 + 4 ** 2 # Why is this equivalent to 17?
y = 1 / 2 + 3 # This is equivalent to 3.5
z = 1 / 2 + 3 // 3 # This should be equivalent to 3.5 // 3
t = 3.5 // 3 + 4 ** 2 # Why is this 17 but the original statement is 17.5?
Why are the expressions for t and x providing different results? Are they not equivalent?
(Original image)

It's 17.5 because your statement is as follows:
1/2 - this is evaluated to 0.5
3 // 3 - this is evaluated to 1
4 ** 2 - this is evaluated to 16
16 + 1 + 0.5 = 17.5
You need to understand the order of operations in your initial statement:
1 / 2 + 3 // 3 + 4 ** 2
This can be bracketed according to standard order of operations (BODMAS or some variant):
(1 / 2) + (3 // 3) + (4 ** 2)
Which then evaluates as above. Your confusion stems from the fact that 1 / 2 + 3 // 3 is not equivalent to (1/2 + 3) // 3, but instead equivalent to (1/2) + (3 // 3) - they're both division, so they'll both take precedence over the addition operator.

Sampling Without Replacement Probabilities

I am using np.random.choice to do sampling without replacement.
I would like the following code to choose 0 50% of the time, 1 30% of the time, and 2 20% of the time.
import numpy as np
draws = []
for _ in range(10000):
draw = np.random.choice(3, size=2, replace=False, p=[0.5, 0.3, 0.2])
draws.append(draw)
result = np.r_[draws]
How can I correctly choose the parameters for np.random.choice to give me the result that I want?
The numbers I want represent the probability of the events being drawn in either 1st or 2nd position exclusively.
print(np.any(result==0, axis=1).mean()) # 0.83, want 0.8
print(np.any(result==1, axis=1).mean()) # 0.68, want 0.7
print(np.any(result==2, axis=1).mean()) # 0.47, want 0.5

I'm giving two interpretations of the problem. One I prefer ("Timeless") and one I consider technically valid but inferior ("Naive")
Timeless:
Given probabilities x, y, z this approach computes x', y', z' such that if we draw twice independently and discard all equal pairs the frequencies of 0, 1, 2 are x, y, z.
This gives the right total frequencies over both trials and has the added benefit of being simple and being timeless in the sense that first and second trial are equivalent.
For this to hold we must have
(x'y' + x'z') / [2 (x'y' + x'z' + y'z')] = x
(x'y' + y'z') / [2 (x'y' + x'z' + y'z')] = y (1)
(y'z' + x'z') / [2 (x'y' + x'z' + y'z')] = z
If we add two of those and subtract the third we get
x'y' / (x'y' + x'z' + y'z') = x + y - z = 1 - 2 z
x'z' / (x'y' + x'z' + y'z') = x - y + z = 1 - 2 y (2)
y'z' / (x'y' + x'z' + y'z') = -x + y + z = 1 - 2 x
Multiplying 2 of those and dividing by the third
x'^2 / (x'y' + x'z' + y'z') = (1 - 2 z) (1 - 2 y) / (1 - 2 x)
y'^2 / (x'y' + x'z' + y'z') = (1 - 2 z) (1 - 2 x) / (1 - 2 y) (3)
z'^2 / (x'y' + x'z' + y'z') = (1 - 2 x) (1 - 2 y) / (1 - 2 z)
Therefore up to a constant factor
x' ~ sqrt[(1 - 2 z) (1 - 2 y) / (1 - 2 x)]
y' ~ sqrt[(1 - 2 z) (1 - 2 x) / (1 - 2 y)] (4)
z' ~ sqrt[(1 - 2 x) (1 - 2 y) / (1 - 2 z)]
Since we know that x', y', z' must sum to one this is enough to solve.
But: we needn't actually completely solve for x', y', z'. Since we are only interested in unequal pairs, all we need are the conditional probabilities x'y' / (x'y' + x'z' + y'z'), x'z' / (x'y' + x'z' + y'z') and y'z' / (x'y' + x'z' + y'z'). These we can compute using equation (2).
We then halve each of them to get the probabilities for ordered pairs and draw from the six legal pairs with these probabilities.
Naive:
This is based on the (arbitrary in my opinion) postulate that after the first draw with probability x', y', z', the second must have conditional probability 0, y' / (y'+z'), z' / (y'+z') if first was 0 x' / (x'+z'), 0, z' / (x'+z') if first was 1 and probability x' / (x'+y'), y' / (x'+y'), 0) if first was 2.
This has the disadvantage that as far as I can tell there is no simple, closed-form solution and the second and first draws are quite different.
The advantage is that one can use it directly with np.random.choice; this is, however, so slow that in the implementation below I give a workaround that avoids this function.
After some algebra one finds:
1/x' - x' = c (1 - 2x)
1/y' - y' = c (1 - 2y)
1/z' - z' = c (1 - 2z)
where c = 1/x' + 1/y' + 1/z' - 1. This I only managed to solve numerically.
Implementation and results:
And here is the implementation.
import numpy as np
from scipy import optimize
def f_pairs(n, p):
p = np.asanyarray(p)
p /= p.sum()
assert np.all(p <= 0.5)
pp = 1 - 2*p
# the following two lines show how to compute x', y', z'
# pp = np.sqrt(pp.prod()) / pp
# pp /= pp.sum()
# now pp contains x', y', z'
i, j = np.triu_indices(3, 1)
i, j = i[::-1], j[::-1]
pairs = np.c_[np.r_[i, j], np.r_[j, i]]
pp6 = np.r_[pp/2, pp/2]
return pairs[np.random.choice(6, size=(n,), replace=True, p=pp6)]
def f_opt(n, p):
p = np.asanyarray(p)
p /= p.sum()
pp = 1 - 2*p
def target(l):
lp2 = l*pp/2
return (np.sqrt(1 + lp2**2) - lp2).sum() - 1
l = optimize.root(target, 8).x
lp2 = l*pp/2
pp = np.sqrt(1 + lp2**2) - lp2
fst = np.random.choice(3, size=(n,), replace=True, p=pp)
snd = (
(np.random.random((n,)) < (1 / (1 + (pp[(fst+1)%3] / pp[(fst-1)%3]))))
+ fst + 1) % 3
return np.c_[fst, snd]
def f_naive(n, p):
p = np.asanyarray(p)
p /= p.sum()
pp = 1 - 2*p
def target(l):
lp2 = l*pp/2
return (np.sqrt(1 + lp2**2) - lp2).sum() - 1
l = optimize.root(target, 8).x
lp2 = l*pp/2
pp = np.sqrt(1 + lp2**2) - lp2
return np.array([np.random.choice(3, (2,), replace=False, p=pp)
for _ in range(n)])
def check_sol(p, sol):
N = len(sol)
print("Frequencies [value: observed, desired]")
c1 = np.bincount(sol[:, 0], minlength=3) / N
print(f"1st column: 0: {c1[0]:8.6f} {p[0]:8.6f} 1: {c1[1]:8.6f} {p[1]:8.6f} 2: {c1[2]:8.6f} {p[2]:8.6f}")
c2 = np.bincount(sol[:, 1], minlength=3) / N
print(f"2nd column: 0: {c2[0]:8.6f} {p[0]:8.6f} 1: {c2[1]:8.6f} {p[1]:8.6f} 2: {c2[2]:8.6f} {p[2]:8.6f}")
c = c1 + c2
print(f"1st or 2nd: 0: {c[0]:8.6f} {2*p[0]:8.6f} 1: {c[1]:8.6f} {2*p[1]:8.6f} 2: {c[2]:8.6f} {2*p[2]:8.6f}")
print()
print("2nd column conditioned on 1st column [value 1st: val / prob 2nd]")
for i in range(3):
idx = np.flatnonzero(sol[:, 0]==i)
c = np.bincount(sol[idx, 1], minlength=3) / len(idx)
print(f"{i}: 0 / {c[0]:8.6f} 1 / {c[1]:8.6f} 2 / {c[2]:8.6f}")
print()
# demo
p = 0.4, 0.35, 0.25
n = 1000000
print("Method: Naive")
check_sol(p, f_naive(n//10, p))
print("Method: naive, optimized")
check_sol(p, f_opt(n, p))
print("Method: Timeless")
check_sol(p, f_pairs(n, p))
Sample output:
Method: Naive
Frequencies [value: observed, desired]
1st column: 0: 0.449330 0.400000 1: 0.334180 0.350000 2: 0.216490 0.250000
2nd column: 0: 0.349050 0.400000 1: 0.366640 0.350000 2: 0.284310 0.250000
1st or 2nd: 0: 0.798380 0.800000 1: 0.700820 0.700000 2: 0.500800 0.500000
2nd column conditioned on 1st column [value 1st: val / prob 2nd]
0: 0 / 0.000000 1 / 0.608128 2 / 0.391872
1: 0 / 0.676133 1 / 0.000000 2 / 0.323867
2: 0 / 0.568617 1 / 0.431383 2 / 0.000000
Method: naive, optimized
Frequencies [value: observed, desired]
1st column: 0: 0.450606 0.400000 1: 0.334881 0.350000 2: 0.214513 0.250000
2nd column: 0: 0.349624 0.400000 1: 0.365469 0.350000 2: 0.284907 0.250000
1st or 2nd: 0: 0.800230 0.800000 1: 0.700350 0.700000 2: 0.499420 0.500000
2nd column conditioned on 1st column [value 1st: val / prob 2nd]
0: 0 / 0.000000 1 / 0.608132 2 / 0.391868
1: 0 / 0.676515 1 / 0.000000 2 / 0.323485
2: 0 / 0.573727 1 / 0.426273 2 / 0.000000
Method: Timeless
Frequencies [value: observed, desired]
1st column: 0: 0.400756 0.400000 1: 0.349099 0.350000 2: 0.250145 0.250000
2nd column: 0: 0.399128 0.400000 1: 0.351298 0.350000 2: 0.249574 0.250000
1st or 2nd: 0: 0.799884 0.800000 1: 0.700397 0.700000 2: 0.499719 0.500000
2nd column conditioned on 1st column [value 1st: val / prob 2nd]
0: 0 / 0.000000 1 / 0.625747 2 / 0.374253
1: 0 / 0.714723 1 / 0.000000 2 / 0.285277
2: 0 / 0.598129 1 / 0.401871 2 / 0.000000

Why is my program stopping when doing a seemingly infinite loop?

This must be really obvious but I am currently doing a little tutorial that features this code snippet:
n=0
a=1
while a>0:
n=n+1
a=(1.0+2.0**(-n))-1.0
print (n)
And I've tried to run it but it keeps getting stuck at n=53. Why? I just assumed that while would always be true ...

If you change the last line to print(n, a) you can see what's happening more clearly:
n = 0
a = 1
while a > 0:
n = n + 1
a = (1.0 + 2.0 ** (-n)) - 1.0
print(n, a)
Output:
1 0.5
2 0.25
3 0.125
4 0.0625
# ...
50 8.881784197001252e-16
51 4.440892098500626e-16
52 2.220446049250313e-16
53 0.0
As you can see, a is half the size each time through the loop. Eventually, 2.0 ** (-n) is so small that floating point math (which has limited precision) is unable to tell the difference between 1.0 and 1.0 + 2.0 ** (-n):
>>> 1.0 + 2.0 ** -51
1.0000000000000004
>>> 1.0 + 2.0 ** -52
1.0000000000000002
>>> 1.0 + 2.0 ** -53
1.0
… and when that happens, subtracting 1.0 from 1.0 gives you 0.0, and the while loop terminates.

Hotelling's T^2 scores in python

I applied pca on a data set using matplotlib in python. However, matplotlib does not provide a t-squared scores like Matlab. Is there a way to compute Hotelling's T^2 score like Matlab?
Thanks.

matplotlib's PCA class doesn't include the Hotelling T2 calculation, but it can be done with just a couple lines of code. The following code includes a function to compute the T2 values for each point. The __main__ script applies PCA to the same example as used in Matlab's pca documentation, so you can verify that the function generates the same values as Matlab.
from __future__ import print_function, division
import numpy as np
from matplotlib.mlab import PCA
def hotelling_tsquared(pc):
"""`pc` should be the object returned by matplotlib.mlab.PCA()."""
x = pc.a.T
cov = pc.Wt.T.dot(np.diag(pc.s)).dot(pc.Wt) / (x.shape[1] - 1)
w = np.linalg.solve(cov, x)
t2 = (x * w).sum(axis=0)
return t2
if __name__ == "__main__":
hald_text = """Y X1 X2 X3 X4
78.5 7 26 6 60
74.3 1 29 15 52
104.3 11 56 8 20
87.6 11 31 8 47
95.9 7 52 6 33
109.2 11 55 9 22
102.7 3 71 17 6
72.5 1 31 22 44
93.1 2 54 18 22
115.9 21 47 4 26
83.8 1 40 23 34
113.3 11 66 9 12
109.4 10 68 8 12
"""
hald = np.loadtxt(hald_text.splitlines(), skiprows=1)
ingredients = hald[:, 1:]
pc = PCA(ingredients, standardize=False)
coeff = pc.Wt
np.set_printoptions(precision=4)
# For coeff and latent, compare to
# http://www.mathworks.com/help/stats/pca.html#btjpztu-1
print("coeff:")
print(coeff)
print()
latent = pc.s / (ingredients.shape[0] - 1)
print("latent:" + (" %9.4f"*len(latent)) % tuple(latent))
print()
# For tsquared, compare to
# http://www.mathworks.com/help/stats/pca.html#bti6r0c-1
tsquared = hotelling_tsquared(pc)
print("tsquared:")
print(tsquared)
Output:
coeff:
[[ 0.0678 0.6785 -0.029 -0.7309]
[ 0.646 0.02 -0.7553 0.1085]
[-0.5673 0.544 -0.4036 0.4684]
[ 0.5062 0.4933 0.5156 0.4844]]
latent: 517.7969 67.4964 12.4054 0.2372
tsquared:
[ 5.6803 3.0758 6.0002 2.6198 3.3681 0.5668 3.4818 3.9794 2.6086
7.4818 4.183 2.2327 2.7216]

Even though this is an old question, I am posting the code as it may help someone.
Here is the code, as a bonus this does multiple hotelling tests at once
import numpy as np
from scipy.stats import f as f_distrib
def hotelling_t2(X, Y):
# X and Y are 3D arrays
# dim 0: number of features
# dim 1: number of subjects
# dim 2: number of mesh nodes or voxels (numer of tests)
nx = X.shape[1]
ny = Y.shape[1]
p = X.shape[0]
Xbar = X.mean(1)
Ybar = Y.mean(1)
Xbar = Xbar.reshape(Xbar.shape[0], 1, Xbar.shape[1])
Ybar = Ybar.reshape(Ybar.shape[0], 1, Ybar.shape[1])
X_Xbar = X - Xbar
Y_Ybar = Y - Ybar
Wx = np.einsum('ijk,ljk->ilk', X_Xbar, X_Xbar)
Wy = np.einsum('ijk,ljk->ilk', Y_Ybar, Y_Ybar)
W = (Wx + Wy) / float(nx + ny - 2)
Xbar_minus_Ybar = Xbar - Ybar
x = np.linalg.solve(W.transpose(2, 0, 1),
Xbar_minus_Ybar.transpose(2, 0, 1))
x = x.transpose(1, 2, 0)
t2 = np.sum(Xbar_minus_Ybar * x, 0)
t2 = t2 * float(nx * ny) / float(nx + ny)
stat = (t2 * float(nx + ny - 1 - p) / (float(nx + ny - 2) * p))
pval = 1 - np.squeeze(f_distrib.cdf(stat, p, nx + ny - 1 - p))
return pval, t2

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Why numpy vectorization is slower than a for loop - python

Related

Is there an error with pandas.Dataframe.ewm calculation or I am wrong?

I am .5 off when trying to figure this out

Sampling Without Replacement Probabilities

Why is my program stopping when doing a seemingly infinite loop?

Hotelling's T^2 scores in python

Categories

Resources