When I multiply two big integers using FFT, I find the result of FFT and IFFT is always not right.
method
To realize FFT, I just follow the pseudocode as followed:
the pseudocode of FFT
The equations of FFT and IFFT are as followed. So, when realizing IFFT, I just replace a with y, replace omega with omega ^^ -1 and divide it by n. And, use flag to distinguish them in my function.
For FFT, y will be
For IFFT, a will be
problem
To find the problem, I try to compare the results between numpy.fft and my function.
FFT.
The results of numpy and my function look the same, but the sign of images is the opposite. For example (the second element of case2 below):
my function result: -4-9.65685424949238j
numpy result: -4+9.65685424949238j
IFFT. I just find it wrong, and can't find any rule.
python code
Here is my function FFT, and comparison:
from typing import List
from cmath import pi, exp
from numpy.fft import fft, ifft
def FFT(a: List, flag: bool) -> List:
"""realize DFT using FFT"""
n = len(a)
if n == 1:
return a
# complex root
omg_n = exp(2 * pi * 1j / n)
if flag:
# IFFT
omg_n = 1 / omg_n
omg = 1
# split a into 2 part
a0 = a[::2] # even
a1 = a[1::2] # odd
# corresponding y
y0 = FFT(a0, flag)
y1 = FFT(a1, flag)
# result y
y = [0] * n
for k in range(n // 2):
y[k] = y0[k] + omg * y1[k]
y[k + n // 2] = y0[k] - omg * y1[k]
omg = omg * omg_n
# IFFT
if flag:
y = [i / n for i in y]
return y
if __name__ == '__main__':
test_cases = [
[1, 1],
[1, 2, 3, 4, 5, 6, 7, 8],
[1, 4, 2, 9, 0, 0, 3, 8, 9, 1, 4, 0, 0, 0, 0, 0, ],
]
print("test FFT")
for i, case in enumerate(test_cases):
print(f"case{i + 1}", case)
manual_result = FFT(case, False)
numpy_result = fft(case).tolist()
print("manual_result:", manual_result)
print("numpy_result:", numpy_result)
print("difference:", [i - j for i, j in zip(manual_result, numpy_result)])
print()
print("test IFFT")
for i, case in enumerate(test_cases):
print(f"case{i + 1}", case)
manual_result = FFT(case, True)
numpy_result = ifft(case).tolist()
print("manual_result:", manual_result)
print("numpy_result:", numpy_result)
print("difference:", [i - j for i, j in zip(manual_result, numpy_result)])
print()
The FFT output:
test FFT
case1 [1, 1]
manual_result: [2, 0]
numpy_result: [(2+0j), 0j]
difference: [0j, 0j]
case2 [1, 2, 3, 4, 5, 6, 7, 8]
manual_result: [36, (-4-9.65685424949238j), (-4-4.000000000000001j), (-4-1.6568542494923815j), -4, (-4+1.6568542494923806j), (-4+4.000000000000001j), (-3.999999999999999+9.656854249492381j)]
numpy_result: [(36+0j), (-4+9.65685424949238j), (-4+4j), (-4+1.6568542494923806j), (-4+0j), (-4-1.6568542494923806j), (-4-4j), (-4-9.65685424949238j)]
difference: [0j, -19.31370849898476j, -8j, -3.313708498984762j, 0j, 3.313708498984761j, 8j, (8.881784197001252e-16+19.31370849898476j)]
case3 [1, 4, 2, 9, 0, 0, 3, 8, 9, 1, 4, 0, 0, 0, 0, 0]
manual_result: [41, (-12.710780677203363+13.231540329804117j), (12.82842712474619+7.2426406871192865j), (-14.692799048494296+7.4256307475248935j), (1.0000000000000013-12j), (5.763866860359768+6.0114171851517995j), (7.171572875253808+1.2426406871192839j), (-10.360287134662114+11.817326767431025j), -3, (-10.360287134662112-11.817326767431021j), (7.17157287525381-1.2426406871192848j), (5.763866860359771-6.011417185151798j), (0.9999999999999987+12j), (-14.692799048494292-7.425630747524895j), (12.828427124746192-7.242640687119286j), (-12.710780677203362-13.23154032980412j)]
numpy_result: [(41+0j), (-12.710780677203363-13.231540329804115j), (12.82842712474619-7.242640687119286j), (-14.692799048494292-7.4256307475248935j), (1+12j), (5.763866860359768-6.011417185151798j), (7.17157287525381-1.2426406871192857j), (-10.360287134662112-11.81732676743102j), (-3+0j), (-10.360287134662112+11.81732676743102j), (7.17157287525381+1.2426406871192857j), (5.763866860359768+6.011417185151798j), (1-12j), (-14.692799048494292+7.4256307475248935j), (12.82842712474619+7.242640687119286j), (-12.710780677203363+13.231540329804115j)]
difference: [0j, 26.46308065960823j, 14.485281374238571j, (-3.552713678800501e-15+14.851261495049787j), (1.3322676295501878e-15-24j), 12.022834370303597j, (-1.7763568394002505e-15+2.4852813742385695j), (-1.7763568394002505e-15+23.634653534862046j), 0j, -23.63465353486204j, -2.4852813742385704j, (3.552713678800501e-15-12.022834370303595j), (-1.3322676295501878e-15+24j), -14.851261495049789j, (1.7763568394002505e-15-14.485281374238571j), (1.7763568394002505e-15-26.463080659608238j)]
The IFFT result:
test IFFT
case1 [1, 1]
manual_result: [1.0, 0.0]
numpy_result: [(1+0j), 0j]
difference: [0j, 0j]
case2 [1, 2, 3, 4, 5, 6, 7, 8]
manual_result: [0.5625, (-0.0625+0.15088834764831843j), (-0.0625+0.062499999999999986j), (-0.0625+0.025888347648318405j), -0.0625, (-0.0625-0.025888347648318433j), (-0.0625-0.062499999999999986j), (-0.062499999999999986-0.1508883476483184j)]
numpy_result: [(4.5+0j), (-0.5-1.2071067811865475j), (-0.5-0.5j), (-0.5-0.20710678118654757j), (-0.5+0j), (-0.5+0.20710678118654757j), (-0.5+0.5j), (-0.5+1.2071067811865475j)]
difference: [(-3.9375+0j), (0.4375+1.357995128834866j), (0.4375+0.5625j), (0.4375+0.23299512883486598j), (0.4375+0j), (0.4375-0.232995128834866j), (0.4375-0.5625j), (0.4375-1.357995128834866j)]
case3 [1, 4, 2, 9, 0, 0, 3, 8, 9, 1, 4, 0, 0, 0, 0, 0]
manual_result: [0.0400390625, (-0.01241287175508141-0.012921426103324331j), (0.012527760864009951-0.007072891296014926j), (-0.014348436570795205-0.007251592526879778j), (0.0009765625000000013+0.01171875j), (0.005628776230820083-0.005870524594874804j), (0.007003489135990047-0.0012135162960149274j), (-0.01011746790494347-0.011540358171319353j), -0.0029296875, (-0.010117467904943469+0.011540358171319355j), (0.007003489135990049+0.0012135162960149274j), (0.005628776230820081+0.005870524594874803j), (0.0009765624999999987-0.01171875j), (-0.014348436570795205+0.0072515925268797805j), (0.012527760864009953+0.007072891296014926j), (-0.012412871755081408+0.01292142610332433j)]
numpy_result: [(2.5625+0j), (-0.7944237923252102+0.8269712706127572j), (0.8017766952966369+0.45266504294495535j), (-0.9182999405308933+0.46410192172030584j), (0.0625-0.75j), (0.3602416787724855+0.37571357407198736j), (0.44822330470336313+0.07766504294495535j), (-0.647517945916382+0.7385829229644387j), (-0.1875+0j), (-0.647517945916382-0.7385829229644387j), (0.44822330470336313-0.07766504294495535j), (0.3602416787724855-0.37571357407198736j), (0.0625+0.75j), (-0.9182999405308933-0.46410192172030584j), (0.8017766952966369-0.45266504294495535j), (-0.7944237923252102-0.8269712706127572j)]
difference: [(-2.5224609375+0j), (0.7820109205701288-0.8398926967160816j), (-0.7892489344326269-0.45973793424097026j), (0.903951503960098-0.47135351424718563j), (-0.0615234375+0.76171875j), (-0.3546129025416654-0.38158409866686216j), (-0.4412198155673731-0.07887855924097029j), (0.6374004780114385-0.7501232811357581j), (0.1845703125+0j), (0.6374004780114385+0.7501232811357581j), (-0.4412198155673731+0.07887855924097029j), (-0.3546129025416654+0.38158409866686216j), (-0.0615234375-0.76171875j), (0.903951503960098+0.47135351424718563j), (-0.7892489344326269+0.45973793424097026j), (0.7820109205701288+0.8398926967160816j)]
#pjs, Thank you for your reminder that FFT requires len(data) to be a power of 2.
As was pointed out in comments, you used a positive sign in the computation of omg_n. There are different definitions of the DFT, so it isn't wrong by itself. However this would naturally lead to differences if you compare your results with an implementation that uses a negative sign, as is the case with numpy.fft.fft. Adjusting your implementation to also use a negative sign would cover all forward transform cases (leaving only small roundoff errors on the order of ~10-16).
For the inverse transform cases, your implementation ends up scaling the result by 1/n at every stage, instead of only the final stage. To correct this, simply remove the scaling from the recursion, and normalize only on the final stage:
def FFTrecursion(a: List, flag: bool) -> List:
"""Recursion of the FFT implementation"""
n = len(a)
if n == 1:
return a
# complex root
omg_n = exp(-2 * pi * 1j / n)
if flag:
# IFFT
omg_n = 1 / omg_n
omg = 1
# split a into 2 part
a0 = a[::2] # even
a1 = a[1::2] # odd
# corresponding y
y0 = FFTrecursion(a0, flag)
y1 = FFTrecursion(a1, flag)
# result y
y = [0] * n
for k in range(n // 2):
y[k] = y0[k] + omg * y1[k]
y[k + n // 2] = y0[k] - omg * y1[k]
omg = omg * omg_n
return y
def FFT(a: List, flag: bool) -> List:
"""realize DFT using FFT"""
y = FFTrecursion(a, flag)
# IFFT final scaling
if flag:
n = len(a)
y = [i / n for i in y]
return y
I have a 2-dimensional xarray dataset that I want to interpolate on the lon and lot coordinates such that I have a higher resolution, but the values correspond exactly with the original values at each coordinate.
I thought the excellent xr.interp function would be able to do this, but following the example I see some discrepancy between the original and interpolated values. I am increasing the longitude and latitude resolution by 4, and thus would except all air values that occur once in the original dataset, to occur 16 times in the interpolated dataset, but this is not the case.
Does anyone know what the cause is that the original and interpolated dataset do not align and how I could solve it?
ds = xr.tutorial.open_dataset("air_temperature").isel(time=0)
fig, axes = plt.subplots(ncols=2, figsize=(10, 4))
ds_sel=ds.sel(lon=slice(250,260),lat=slice(40,30))
ds.air.plot(ax=axes[0],xlim=(250,260),ylim=(30,40))
axes[0].set_title("Raw data")
# Interpolated data
new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims["lon"] * 4)
new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims["lat"] * 4)
dsi = ds.interp(lat=new_lat, lon=new_lon,method="nearest")
dsi_sel=dsi.sel(lon=slice(250,260),lat=slice(40,30))
dsi.air.plot(ax=axes[1],xlim=(250,260),ylim=(30,40))
axes[1].set_title("Interpolated data")
Showing the unique values with
unique, counts = np.unique(ds_sel.air.values, return_counts=True)
print("original values",dict(zip(unique, counts)))
unique, counts = np.unique(dsi_sel.air.values, return_counts=True)
print("interpolated values",dict(zip(unique, counts)))
I get
original values {262.1: 1, 263.1: 1, 263.9: 1, 264.4: 1, 265.19998: 1, 266.6: 1, 266.79: 1, 266.9: 2, 268.29: 1, 269.79: 1, 270.4: 1, 273.0: 1, 273.6: 1, 275.19998: 1, 276.29: 1, 278.0: 1, 278.5: 1, 278.6: 1, 281.5: 1, 282.1: 1, 282.29: 1, 284.6: 1, 286.79: 1, 288.0: 1}
interpolated values {262.1: 4, 263.1: 8, 263.9: 8, 264.4: 8, 265.19998: 4, 266.6: 16, 266.79: 16, 266.9: 24, 268.29: 8, 269.79: 20, 270.4: 10, 273.0: 20, 273.6: 16, 275.19998: 8, 276.29: 20, 278.0: 16, 278.5: 10, 278.6: 8, 281.5: 4, 282.1: 16, 282.29: 8, 284.6: 8, 286.79: 8, 288.0: 4}
I think you're conceptually running up against a fencepost error (see the section on this page: https://en.wikipedia.org/wiki/Off-by-one_error)
You should interpret the xarray coordinates as "midpoints", not as the cell boundaries.
Your new_lon isn't nicely divided into 1/2, 1/4, 1/8, etc.:
print(new_lon)
[200. 200.61611374 201.23222749 201.84834123 202.46445498
203.08056872 203.69668246 204.31279621 204.92890995]
And it doesn't include all the original coordinates.
Taking the "off-by-ones" into account:
new_lon = np.linspace(ds.lon[0], ds.lon[-1], (ds.dims["lon"] - 1) * 4 + 1)
new_lat = np.linspace(ds.lat[0], ds.lat[-1], (ds.dims["lat"] - 1) * 4 + 1)
print(new_lon)
[200. 200.625 201.25 201.875 202.5 203.125 203.75 204.375 205. ]
You can then e.g. inspect the part of the first row of the original and the interpolated one:
selection = ds["air"][0, :3]
selection_i = dsi["air"][0, :9]
print(selection["lon"])
print(selection.values)
print(selection_i["lon"])
print(selection_i.values)
This looks good to me:
[200. 202.5 205. ]
[241.2 242.5 243.5]
[200. 200.625 201.25 201.875 202.5 203.125 203.75 204.375 205. ]
[241.2 241.2 241.2 242.5 242.5 242.5 242.5 243.5 243.5]
Of course, when doing nearest interpolation, you might end up with ties:
0.5 is equally far removed from 0.0 as it is from 1.0 -- and so you inadverntely have to bias either "up" or "down" to get a single nearest value.
Also note that the .plot() command, which draws a Matplotlib QuadMesh has to infer boundaries from midpoints somehow. This can sometimes lead to boundaries being drawn slightly differently from what you might have in mind (especially if the coordinate is unevenly spaced).
Hi I'm trying to figure out how to fit those values with a piecewise linear function. I have read this question but I can't get forward (How to apply piecewise linear fit in Python? ). In this example is show how to implement a piecewise function for a 2 segment case. But I need to do it in a three segment case as in figure.
I'have written this code:
from scipy import optimize
import matplotlib.pyplot as plt
import numpy as np
x1 = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ,11, 12, 13, 14, 15,16,17,18,19,20,21], dtype=float)
y1 = np.array([5, 7, 9, 11, 13, 15, 28.92, 42.81, 56.7, 70.59, 84.47, 98.36, 112.25, 126.14, 140.03,145,147,149,151,153,155])
x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ,11, 12, 13, 14, 15], dtype=float)
y = np.array([5, 7, 9, 11, 13, 15, 28.92, 42.81, 56.7, 70.59, 84.47, 98.36, 112.25, 126.14, 140.03])
def piecewise(x,x0,x1,y0,y1,k0,k1,k2):
return np.piecewise(x , [x <= x0, (x>= x1)] , [lambda x:k0*x + y0-k0*x0, lambda x:k1*(x-(x1+x0))-y1, lambda x:k2*x + y1-k2*x1])
p , e = optimize.curve_fit(piecewise_linear, x1, y1)
xd = np.linspace(0, 15, 100)
plt.figure()
plt.plot(x1, y1, "o")
plt.plot(xd, piecewise_linear(xd, *p))
but this is the output
Any suggestion? I belive that the problem is in return np.piecewise(x , [x <= x0, (x>= x1)] , [lambda x:k0*x + y0-k0*x0, lambda x:k1*(x-(x1+x0))-y1, lambda x:k2*x + y1-k2*x1]) in particular in the second lambda.
EDIT 1:
If I try to different data the solution provided by A.L. I don't get good results.
I get this result:
with
x=[ 16.01690476, 16.13801587, 14.63628571, 15.32664399,
15.8145 , 15.71507143, 15.56107143, 15.553 ,
15.08734524, 14.97275 , 15.51958333, 16.61981859,
16.36589286, 14.78708333, 14.41565476, 13.47763158,
13.42412281, 12.95551378, 13.66601504, 13.63315789,
13.21463659, 13.53464286, 14.60130952, 14.7774881 ,
13.04319048, 12.53385965, 12.65745614, 13.90535714,
14.82412281, 14.6565 , 15.09541667, 13.41434524,
13.66033333, 14.57964286, 13.55416667, 13.43041667,
13.01137566, 12.76429825, 11.55241667, 11.0634881 ,
10.92729762, 11.21625 , 10.72092857, 11.80380952,
12.55233333, 12.11307143, 11.78892857, 12.45458333,
11.05539286, 10.69214286, 10.32566667, 11.3439881 ,
9.69563492, 10.72535714, 10.26180272, 7.77272727,
6.37704082, 8.49666667, 8.5389881 , 5.68547619,
7.00616667, 8.22015873, 10.20315476, 15.35736842,
12.25158333, 11.09622153, 10.4118254 , 9.8602381 ,
10.16727273, 15.10858333, 13.82215539, 12.44719298,
10.92341667, 11.44565476, 11.43333333, 10.5045 ,
11.14357143, 10.37625 , 8.93421769, 9.48444444,
10.43483333, 10.8659881 , 10.96166667, 10.12872619,
9.64663265, 9.29979762, 9.67173469, 8.978322 ,
9.10419501, 9.45411565, 10.46411565, 7.95739229,
8.72616667, 7.03892857, 7.32547619, 7.56441667,
6.61022676, 9.09014739, 10.78141667, 10.85918367,
11.11665476, 10.141 , 9.17760771, 8.27968254,
11.02625 , 12.34809524, 11.17807018, 11.25416667,
11.29236905, 9.28357143, 9.77033333, 11.52086168,
9.8625 , 12.60281955, 12.42785714, 12.11902256,
13.1 , 13.02791667, 13.87779449, 15.09857143,
13.93935185, 13.69821429, 13.39880952, 12.45692982,
12.76921053, 13.23708333, 13.71666667, 15.39807143,
15.27916667, 14.66464286, 13.38694444, 10.97555556,
10.02191667, 11.99608333, 14.26325 , 15.40991667,
15.12908333, 15.76265476, 12.12763158, 15.01641667,
14.39602381, 12.98532143, 14.98807018, 18.30547619,
16.7564966 , 16.82982143, 19.8487013 , 19.18600907]
and
y=[ 2.36846863, 2.73722628, 2.77177583, 2.63930636, 2.80864749,
2.57066667, 2.65277287, 2.57162347, 2.76295667, 2.79835391,
2.60431154, 2.17326401, 2.67740698, 2.47138153, 2.49882574,
2.60987338, 2.69935565, 2.60755362, 2.77702029, 2.62996942,
2.45959517, 2.52750434, 2.73833005, 2.52009 , 2.80933226,
1.63807085, 2.49230099, 2.55441614, 3.19256506, 2.52609288,
1.02931596, 2.40266963, 2.3306463 , 2.69094276, 2.60779985,
2.48351648, 2.45131766, 2.40526763, 2.03952569, 1.86217009,
1.79971848, 1.91772218, 1.85895421, 2.32725731, 2.28189713,
2.11835833, 2.09636517, 2.2230303 , 1.85863317, 1.77550406,
1.68862391, 1.79187765, 1.70887476, 1.81911193, 1.74802483,
1.65776432, 1.58012849, 1.67781494, 1.62451541, 1.60555884,
1.56172214, 1.60083809, 1.65256994, 2.74794704, 2.27089627,
1.80364982, 1.51412482, 1.77738757, 1.56979564, 2.46538633,
2.37679625, 2.40389294, 2.04165763, 1.82086407, 1.90609219,
1.87480978, 1.8877854 , 1.76080074, 1.68369028, 1.57419297,
1.66470126, 1.74522552, 1.72459756, 1.65510503, 1.72131148,
1.6254417 , 1.57091907, 1.68755268, 1.70307911, 1.59445121,
1.74393783, 1.72913779, 1.66883237, 1.59859545, 1.62335831,
1.73378184, 1.62621588, 1.79532164, 1.78289992, 1.79475101,
1.7826266 , 1.68778918, 1.64484127, 1.62332696, 1.75372393,
1.99038021, 1.87268137, 1.86124502, 1.82435911, 1.62927102,
1.66443723, 1.86743516, 1.62745098, 2.20200312, 2.09641026,
2.26649111, 2.63271605, 2.18050721, 2.57138433, 2.51833359,
2.74684184, 2.57209998, 2.63762019, 2.30027877, 2.28471286,
2.40323668, 2.37103313, 2.16414489, 1.01027109, 2.64181007,
2.45467765, 2.05773672, 1.73624917, 2.05233688, 2.70820669,
2.65594222, 2.67445635, 2.37212985, 2.48221803, 2.77655216,
2.62839879, 2.26481307, 2.58005799, 2.1188172 , 2.14017268,
2.16459571, 1.95083406, 1.46224418]
Fitting a piecewise linear function is a nonlinear optimization problem which may have local optimas. The result you see is probably one of the local optimas where your optimization algorithm gets stuck.
One way to solve this problem is to repeat your optimization algorithm with different initial values and take the best fit. I used the mean absolute error (MAE) to compare the different fits against each other.
perr = np.sum(np.abs(y1-piecewise(x1, *p)))
I also changed your piecewise funtion because it was a bit confusing for me. But it still a piecewise function as before
Further think you forgot to extend the x and xd array to the value of 21. (thats why the green line ends early).
from scipy import optimize
import matplotlib.pyplot as plt
import numpy as np
def piecewise(x,x0,x1,y0,y1,k0,k1,k2):
return np.piecewise(x , [x <= x0, np.logical_and(x0<x, x<= x1),x>x1] , [lambda x:k0*x + y0, lambda x:k1*(x-x0)+y1+k0*x0,
lambda x:k2*(x-x1) + y0+y1+k0*x0+k1*(x1-x0)])
x1 = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ,11, 12, 13, 14, 15,16,17,18,19,20,21], dtype=float)
y1 = np.array([5, 7, 9, 11, 13, 15, 28.92, 42.81, 56.7, 70.59, 84.47, 98.36, 112.25, 126.14, 140.03,145,147,149,151,153,155])
x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ,11, 12, 13, 14, 15,16,17,18,19,20,21], dtype=float)
y = np.array([5, 7, 9, 11, 13, 15, 28.92, 42.81, 56.7, 70.59, 84.47, 98.36, 112.25, 126.14, 140.03,145,147,149,151,153,155])
perr_min = np.inf
p_best = None
for n in range(100):
k = np.random.rand(7)*20
p , e = optimize.curve_fit(piecewise, x1, y1,p0=k)
perr = np.sum(np.abs(y1-piecewise(x1, *p)))
if(perr < perr_min):
perr_min = perr
p_best = p
xd = np.linspace(0, 21, 100)
plt.figure()
plt.plot(x1, y1, "o")
y_out = piecewise(xd, *p_best)
plt.plot(xd, y_out)
plt.show()
this gives me:
with p = [ 6.34259491 15.00000023 2.97272604 7.05498314 2.00751828
13.88881542 1.99960597]
Edit1
You edited your question, and this ist the answer to the edited one.
Sorry Iam new at stackoverlfow and not sure if I should post another answer instead
In your second dataset you added noise to data. In my opinion there are two kinds of noises. A gaussian one, which places the points close to the underlying piecewise line and outlier noise which places points far away from the original underlying line.
Under the hood the optimization algorithm you use optimizes the following according to p:
E = sum(square(y-piecewise(x,p)))
http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html#scipy.optimize.curve_fit
The gaussian noise is not very problematic. The optimization you use assumes indirectly this gaussian noise (by minimizing the least square error) and fits the line as good as possible. The real problem comes in with the outliers.
The problem is that outliers are far way from the original function. Even if the optimization tries the optimal parameters, the Energy function E will be not minimal, as your outliers are far away from the original function and this distance is even squared so it shifts away the minimum of the Function E far away from the true parameters of your function.
So whats the solution ?
Get rid of the outliers.
An automized approach to to that is ransac
https://en.wikipedia.org/wiki/RANSAC.
In Brief: You choose a random subset of the original data. You hope the subset has not outliers. You fit your function to the subset and discard the points, which are far way from the fitted function. If enough points survived this step, you take all the surviving points and repeat the fit. The error on this "inlier" set is a measure of the quality of your fit. Then you repeat the whole process and take the best final fit.
I ajusted my script accordingly:
from scipy import optimize
import matplotlib.pyplot as plt
import numpy as np
def piecewise(x,x0,x1,y0,y1,k0,k1,k2):
return np.piecewise(x , [x <= x0, np.logical_and(x0<x, x<= x1),x>x1] , [lambda x:k0*x + y0, lambda x:k1*(x-x0)+y1+k0*x0,
lambda x:k2*(x-x1) + y0+y1+k0*x0+k1*(x1-x0)])
x = np.array(x)
y = np.array(y)
x1 = x
y1 = y
perr_min = np.inf
p_best = None
for n in range(100):
idx = np.random.choice(np.arange(len(x)), 10, replace=False)
x_sample = x[idx]
y_sample = y[idx]
k = np.random.rand(7)*20
try:
p , e = optimize.curve_fit(piecewise, x_sample,y_sample ,p0=k)
each_error = np.abs(y-piecewise(x, *p))
x_inliner = x[each_error < 1]
y_inlier = y[each_error < 1]
if(x_inliner.shape[0] < 0.8 * x.shape[0]):
continue
p_inlier , e_inlier = optimize.curve_fit(piecewise, x_inliner,y_inlier ,p0=p)
perr = np.sum(np.abs(y-piecewise(x, *p_inlier)))
if(perr < perr_min):
perr_min = perr
p_best = p_inlier
except RuntimeError:
pass
xd = np.linspace(0, 21, 100)
plt.figure()
plt.plot(x, y, "o")
y_out = piecewise(xd, *p_best)
plt.plot(xd, y_out)
print p_best
plt.show()
With 100 repetitions I get the following result:
The piecewise-regression python library can fit models with different numbers of breakpoints.
First of all, for demonstration purposes generate some data with 2 breakpoints:
import numpy as np
gradients = [2.5,12,2]
constant = 0
breakpoints = [6, 15]
n_points = 100
np.random.seed(1)
xx = np.linspace(0, 25, n_points)
yy = constant + gradients[0]*xx + np.random.normal(size=n_points)*10
for bp_n in range(len(breakpoints)):
yy += (gradients[bp_n+1] - gradients[bp_n]) * np.maximum(xx - breakpoints[bp_n], 0)
To fit and plot the model:
import piecewise_regression
import matplotlib.pyplot as plt
pw_fit = piecewise_regression.Fit(xx, yy, n_breakpoints=2)
pw_fit.plot()
plt.xlabel("x")
plt.ylabel("y")
plt.show()
It also gives you a statistical analysis:
pw_fit.summary()
It won't work well with the data you provided in your edit, because there are outliers that dominate the error cost function. This will be an issue whichever method you use to fit the data, you need to decide how to handle the outliers in this instance.