Find graphs intersection python - python

Does anyone have any idea of how I can find the intersection of these two graphs? (image below)
energ_ac, price_compvend and energ_ac1, price_compven1 are set of x,y values.
Please note the following code which gets the values from a database and then plots the two graphs:
I can only get the intersection manually, and I want to get it automatically
import matplotlib.pyplot as plt
import pyodbc
import pandas as pd
import numpy as np
import string as str
import sys
np.set_printoptions(suppress=True)
np.set_printoptions(threshold=sys.maxsize)
conn = pyodbc.connect(Trusted_Connection='yes', driver='{SQL Server}', server='srv03',
database='mercadoOMIE_curvas') # Ligação à BD no sqlserver
SQL_Query = pd.read_sql_query("""SELECT * FROM curva_pbc_uof_2020_1_12 WHERE ("4" = 'C' AND "0" = '1' AND "7" = 'O')""", conn)
df = pd.DataFrame(SQL_Query, columns=['0','1','2','3','4','5','6','7','8'])
df['5'] = df['5'].str.replace('.','', regex = True)
df['6'] = df['6'].str.replace('.','', regex = True)
df['5'] = pd.to_numeric(df['5'].str.replace(',','.'), errors='coerce')
df['6'] = pd.to_numeric(df['6'].str.replace(',','.'), errors='coerce')
energ_ac = np.zeros(len(df['5']))
energ_ac[0] = df['5'][0]
for x in range (1, len(df['5'])):
energ_ac[x] = energ_ac[x-1]+df['5'][x]
price_compvend = df['6'].to_numpy()
plt.plot(energ_ac,price_compvend)
SQL_Query1 = pd.read_sql_query("""SELECT * FROM curva_pbc_uof_2020_1_12 WHERE ("4" = 'V' AND "0" = '1' AND "7" = 'O')""", conn)
df1 = pd.DataFrame(SQL_Query1, columns=['0','1','2','3','4','5','6','7','8'])
#with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
#print(df1)
df1['5'] = df1['5'].str.replace('.','', regex = True)
df1['6'] = df1['6'].str.replace('.','', regex = True)
df1['5'] = pd.to_numeric(df1['5'].str.replace(',','.'), errors='coerce')
df1['6'] = pd.to_numeric(df1['6'].str.replace(',','.'), errors='coerce')
energ_ac1 = np.zeros(len(df1['5']))
energ_ac1[0] = df1['5'][0]
for x in range (1, len(df1['5'])):
energ_ac1[x] = energ_ac1[x-1]+df1['5'][x]
price_compvend1 = df1['6'].to_numpy()
plt.plot(energ_ac1,price_compvend1)
plt.show()

The solution is this link: np.array intersection // AttributeError: 'module' object has no attribute 'PiecewisePolynomial'
import scipy.interpolate as interpolate
import scipy.optimize as optimize
import numpy as np
x1 = np.array([1.4,2.1,3,5.9,8,9,23])
y1 = np.array([2.3,3.1,1,3.9,8,9,11])
x2 = np.array([1,2,3,4,6,8,9])
y2 = np.array([4,12,7,1,6.3,8.5,12])
# linear interpolators
opts = {'fill_value': 'extrapolate'}
f1 = interpolate.interp1d(x1,y1,**opts)
f2 = interpolate.interp1d(x2,y2,**opts)
# possible range for an intersection
xmin = np.min((x1,x2))
xmax = np.max((x1,x2))
# number of intersections
xuniq = np.unique((x1,x2))
xvals = xuniq[(xmin<=xuniq) & (xuniq<=xmax)]
# note that it's bad practice to compare floats exactly
# but worst case here is a bit of redundance, no harm
# for each combined interval there can be at most 1 intersection,
# so looping over xvals should hopefully be enough
# one can always err on the safe side and loop over a `np.linspace`
intersects = []
for xval in xvals:
x0, = optimize.fsolve(lambda x: f1(x)-f2(x), xval)
if (xmin<=x0<=xmax
and np.isclose(f1(x0),f2(x0))
and not any(np.isclose(x0,intersects))):
intersects.append(x0)
print(intersects)
print(f1(intersects))
print(f2(intersects))

Use bellow script:
diff_vector = abs(price_compvend - price_compvend1)
min_index = np.where(diff_vector == np.min(diff_vector))
print('Intersection point is ({},{})'.format(energ_ac[min_index],
price_compvend[min_index]))

You could use set.intersection() method to get intersection points of the graphs.
graph_points1 = set(zip(energ_ac,price_compvend))
graph_points2 = set(zip(energ_ac1,price_compvend1))
intersection_points = graph_points1.intersection(graph_points2)

Related

Is there a way to make a bit of gap between x-axis and the graph? (Python, bqplot)

I created a random number graph and I want to have a more gap between the x axis and the graph because it looks hard to read. Does anyone have a idea how to do that? Thank you so much. Here is my example code.
from bqplot import Axis, LinearScale, OrdinalScale, Scatter, Figure, Tooltip, DateScale, Lines, Bars
import bql
import pandas as pd
from numpy import nan, isnan
from numpy.random import randint
import numpy as np
x_sc_bar_fsa = OrdinalScale()
y_sc_bar_fsa = LinearScale()
tt_bar_fsa = Tooltip(fields=['x', 'y'], formats=['','.2f'])
bar_fsa = Bars(stroke = 'white',scales={'x': x_sc_bar_fsa, 'y': y_sc_bar_fsa},padding=0.5,\
tooltip=tt_bar_fsa, unhovered_style={'opacity': 0.9}, type = 'grouped')
ax_x_bar_fsa = Axis(scale=x_sc_bar_fsa)
ax_y_bar_fsa = Axis(scale=y_sc_bar_fsa, orientation='vertical',tick_format='0.2f')
#Final Output
fig_bar_fsa = Figure(title='Random Number Graph for Testing',marks=[], axes=[ax_x_bar_fsa, ax_y_bar_fsa],padding_x=0)
def plot_bars_fsa(x_data, y_data, label_data,colors=['darkblue','royalblue','darkgreen','darkred','red']):
if (len(x_data) == 0) | (len(y_data) == 0) | (len(label_data) == 0):
fig_bar_fsa.marks = [] #Creates empty list if x-data, y-data and label data is missing i.e. =0
else: #otherwise display
bar_fsa.x = x_data
bar_fsa.y = y_data
y_sc_bar_fsa.min = 0
y_sc_bar_fsa.max = np.nanmax([np.nanmax(x) for x in y_data]) * 1.1
bar_fsa.labels = label_data
bar_fsa.colors = colors # Defined above
ax_x_bar_fsa.tick_rotate = -90 # This is the angle of the letters on the graph
ax_x_bar_fsa.tick_style = {'text-anchor': 'end'}
fig_bar_fsa.marks = [bar_fsa] #links marks to bar chart
example_df = pd.DataFrame(data=randint(0, 100, size=(10, 5)),
columns=['column_' + str(i) for i in range(5)])
example_df.index = ['namenumber1253','namenumber26675','namenumber339', 'namenumber2556','namenumber3109', 'namenumber5894','namenumber1355','namenumber685890', 'namenumber397', 'namenumber85']
plot_bars_fsa(example_df.index.values,\
[example_df[x].values for x in ['column_0','column_1','column_2','column_3','column_4']],\
['column_0','column_1','column_2','column_3','column_4'])
fig_bar_fsa
enter image description here

How to loop over a list of statistic type and apply them on an xarray.DataArray?

I need to compute a list statistics through time on a xarray.DataArray ans store them in a xarray.Dataset:
import xarray as xr
import numpy as np
import pandas as pd
np.random.seed(1234)
da = xr.DataArray(data=np.random.rand(4,5,10),
dims=["lon", "lat","time"],
coords={"lon": np.random.uniform(low=-90, high=90, size=4),
"lat": np.random.uniform(low=-90, high=90, size=5),
"time": pd.date_range(start="2021-01-01", freq="D", periods=10)})
da
first = True
if first:
first = False
ds = da.min(dim=['time']).to_dataset(name = "min")
else:
ds = ds.merge(da.min(dim=['time']).to_dataset(name = "min"))
ds = ds.merge(da.max(dim=['time']).to_dataset(name = "max"))
ds = ds.merge(da.median(dim=['time']).to_dataset(name = "median"))
ds = ds.merge(da.mean(dim=['time']).to_dataset(name = "mean"))
ds = ds.merge(da.std(dim=['time']).to_dataset(name = "std"))
ds
As I need to frequently change the statistics to apply, I tried to use a list of statistics and loop through it:
stats = ('min', 'max', 'median', 'mean', 'std')
first = True
for stat in stats:
if first:
first = False
ds = da.vars()['stat'](dim=['time']).to_dataset(name = vars()['stat'])
else:
ds = ds.merge(da.vars()['stat'](dim=['time']).to_dataset(name = vars()['stat']))
But I get an Error AttributeError: 'DataArray' object has no attribute 'vars'when trying to retrieve and apply the statistic type.
Thanks for any hint you could provide.
Thanks to #michael-delgado for informing me functions can as well be listed. Here is the answer I came through:
stats = [np.nanmin, np.nanmax, np.nanmedian, np.nanmean, np.nanstd]
first = True
for i in range(0, len(stats)):
da_stat = xr.DataArray(stats[i](a = da, axis = 2), dims = ['lon', 'lat'])
ds_stat = da_stat.assign_coords(lon = da.lon.values,
lat = da.lat.values).to_dataset(name = stats[i].__name__)
if first:
first = False
ds = ds_stat
else:
ds = ds.merge(ds_stat)
ds
The only downside with this solution, is I was not able to use xarray.DataArray statistic function, and had to replace them with numpy statistic functions, which double processing time.

Having some problem to understand the x_bin in regplot of Seaborn

I used the seaborn.regplot to plot data, but not quite understand how the error bar in regplot was calculated. I have compared the results with the mean and standard deviation derived from mannual calculation. Here is my testing script.
import numpy as np
import pandas as pd
import seaborn as sn
def get_data_XYE(p):
x_list = []
lower_list = []
upper_list = []
for line in p.lines:
x_list.append(line.get_xdata()[0])
lower_list.append(line.get_ydata()[0])
upper_list.append(line.get_ydata()[1])
y = 0.5 * (np.asarray(lower_list) + np.asarray(upper_list))
y_error = np.asarray(upper_list) - y
x = np.asarray(x_list)
return x, y, y_error
x = [37.3448,36.6026,42.7795,34.7072,75.4027,226.2615,192.7984,140.8045,242.9952,458.451,640.6542,726.1024,231.7347,107.5605,200.2254,190.0006,314.1349,146.8131,152.4497,175.9096,284.9926,116.9681,118.2953,312.3787,815.8389,458.0146,409.5797,595.5373,188.9955,15.7716,36.1839,244.8689,57.4579,94.8717,112.2237,87.0687,72.79,22.3457,24.1728,29.505,80.8765,252.7454,280.6002,252.9573,348.246,112.705,98.7545,317.0541,300.9573,402.8411,406.6884,56.1286,30.1385,32.9909,497.556,19.3606,20.8409,95.2324,108.6074,15.7753,54.5511,45.5623,64.564,101.1934,81.8459,88.286,58.2642,56.1225,51.2943,38.0649,63.5882,63.6847,120.495,102.4097,49.3255,111.3309,171.6028,58.9526,28.7698,144.6884,180.0661,116.6028,146.2594,199.8702,128.9378,423.2363,119.8537,124.6508,518.8625,306.3023,79.5213,121.0309,116.9346,170.8863,930.361,48.9983,55.039,47.1092,72.0548,75.4045,103.521,83.4134,142.3253,146.6215,121.4467,101.4252,68.4812,291.4275,143.9475,142.647,78.9826,47.094,204.2196,89.0208,82.792,27.1346,142.4764,83.7874,67.3216,112.9531,138.2549,133.3446,86.2659,45.3464,56.1604,43.5882,54.3623,86.296,115.7272,96.5498,111.8081,36.1756,40.2947,34.2532,89.1452,53.9062,36.458,113.9297,176.9962,77.3125,77.8891,64.807,64.1515,127.7242,119.6876,976.2324,322.8454,434.2883,168.6923,250.0284,234.7329,131.0793,152.335,118.8838,243.1772,24.1776,168.6327,170.7541,167.8444,75.9315,110.1045,113.4417,60.5464,66.8956,79.7606,71.6659,72.5251,77.513,207.8019,21.8592,35.2787,169.7698,146.5012,412.9934,248.0708,318.5489,104.1278,184.7592,108.0581,175.2646,169.7698,340.3732,570.3396,23.9853,69.0405,66.7391,67.9435,294.6085,68.0537,77.6344,433.2713,104.3178,229.4615,187.8587,78.1399,121.4737,122.5451,384.5935,38.5232,117.6835,50.3308,318.2513,103.6695,20.7181,321.9601,510.3248,13.4754,16.1188,44.8082,37.7291,733.4587,446.6241,21.1822,287.9603,327.2367,274.1109,195.4713,158.2114,64.4537,26.9857,172.8503]
y = [37,40,30,29,24,23,27,12,21,20,29,28,27,32,23,29,28,22,28,23,24,29,32,18,22,12,12,14,29,31,34,31,22,40,25,36,27,27,29,35,33,25,25,27,27,19,35,26,18,24,25,37,52,47,34,39,40,48,41,44,35,36,53,46,38,44,23,26,26,28,27,21,25,21,20,27,35,24,46,34,22,30,30,30,31,26,25,28,21,31,24,27,33,21,31,33,29,33,32,21,25,22,39,31,34,26,23,18,20,18,34,25,20,12,23,25,21,21,25,31,17,27,28,29,25,24,25,21,24,27,23,22,23,22,22,26,22,19,26,35,33,35,29,26,26,30,22,32,33,33,28,32,26,29,36,37,37,28,24,30,25,20,29,24,33,35,30,32,31,33,40,35,37,24,34,29,27,24,36,26,26,26,27,27,20,17,28,34,18,20,20,18,19,23,20,22,25,32,44,41,39,41,40,44,36,42,31,32,26,29,23,29,29,28,31,22,29,24,28,28,25]
xbreaks = [13.4754, 27.1346, 43.5882, 58.9526, 72.79, 89.1452, 110.1045, 131.0793, 158.2114, 180.0661, 207.8019, 234.7329, 252.9573, 300.9573, 327.2367, 348.246, 412.9934, 434.2883, 458.451, 518.8625, 595.5373, 640.6542, 733.4587, 815.8389, 930.361, 976.2324]
df = pd.DataFrame([x,y]).T
df.columns = ['x','y']
# Check the bin average and std using agge
bins = pd.cut(df.x,xbreaks,right=False)
t = df[['x','y']].groupby(bins).agg({"x": "mean", "y": ["mean","std"]})
t.reset_index(inplace=True)
t.columns = ['range_cut','x_avg_cut','y_avg_cut','y_std_cut']
t.index.name ='id'
# Get the bin average from
g = sns.regplot(x='x',y='y',data=df,fit_reg=False,x_bins=xbreaks,seed=seed)
xye = pd.DataFrame(get_data_XYE(g)).T
xye.columns = ['x_regplot','y_regplot','e_regplot']
xye.index.name = 'id'
t2 = xye.merge(t,on='id',how='left')
t2
You can see the y and e from the two ways are different. I understand that the default x_ci or x_estimator may afect the result of regplot, but I still can not the these values in excel by removing some lowest and/or highest values in each bin.
In seaborn.regplot, the x_bins are the center of each bin, and the original x values are assigned to the nearest bin value. Whereas in pandas.cut, the breaks define the bin edges.

heatmap of values grouped by time - seaborn

I'm plotting the counts of a variable grouped by time as a heatmap. However, when including both hour and minute, the counts are quite low so the resulting heatmap doesn't really provide any real insight. Is it possible to group the counts in a bigger block of time? I'm hoping to test some different periods (5, 10 mins).
I'm also hoping to plot time on the x-axis. Similar to the output attached.
import seaborn as sns
import pandas as pd
from datetime import datetime
from datetime import timedelta
start = datetime(1900,1,1,10,0,0)
end = datetime(1900,1,1,13,0,0)
seconds = (end - start).total_seconds()
step = timedelta(minutes = 1)
array = []
for i in range(0, int(seconds), int(step.total_seconds())):
array.append(start + timedelta(seconds=i))
array = [i.strftime('%Y-%m-%d %H:%M%:%S') for i in array]
df2 = pd.DataFrame(array).rename(columns = {0:'Time'})
df2['Count'] = np.random.uniform(0.0, 0.5, size = len(df2))
df2['Count'] = df2['Count'].round(1)
df2['Time'] = pd.to_datetime(df2['Time'])
df2['Hour'] = df2['Time'].dt.hour
df2['Min'] = df2['Time'].dt.minute
g = df2.groupby(['Hour','Min','Count'])
count_df = g['Count'].nunique().unstack()
count_df.fillna(0, inplace = True)
sns.heatmap(count_df)
To deal with such cases, I think it would be easy to use data downsampling. It is also easy to change the thresholds. The axis labels in the output graph will need to be modified, but we recommend this method.
import seaborn as sns
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
start = datetime(1900,1,1,10,0,0)
end = datetime(1900,1,1,13,0,0)
seconds = (end - start).total_seconds()
step = timedelta(minutes = 1)
array = []
for i in range(0, int(seconds), int(step.total_seconds())):
array.append(start + timedelta(seconds=i))
array = [i.strftime('%Y-%m-%d %H:%M:%S') for i in array]
df2 = pd.DataFrame(array).rename(columns = {0:'Time'})
df2['Count'] = np.random.uniform(0.0, 0.5, size = len(df2))
df2['Count'] = df2['Count'].round(1)
df2['Time'] = pd.to_datetime(df2['Time'])
df2['Hour'] = df2['Time'].dt.hour
df2['Min'] = df2['Time'].dt.minute
df2.set_index('Time', inplace=True)
count_df = df2.resample('10min')['Count'].value_counts().unstack()
count_df.fillna(0, inplace = True)
sns.heatmap(count_df.T)
The way you could achieve this is by creating a column with numbers that have repeating elements for the number of minutes.
For example:
minutes = 3
x = [0,1,2]
np.repeat(x, repeats=minutes, axis=0)
>>>> [0,0,0,1,1,1,2,2,2]
and then group your data using this column.
So your code would look like:
...
minutes = 5
x = [i for i in range(int(df2.shape[0]/5))]
df2['group'] = np.repeat(x, repeats=minutes, axis=0)
g = df2.groupby(['Min', 'Count'])
count_df = g['Count'].nunique().unstack()
count_df.fillna(0, inplace = True)

Python - Apply a function over a labeled multidimensional array

I have a numpy array that is labelled using scipy connected component labelling.
import numpy
from scipy import ndimage
a = numpy.zeros((8,8), dtype=numpy.int)
a[1,1] = a[1,2] = a[2,1] = a[2,2] = a[3,1] = a[3,2] = 1
a[5,5] = a[5,6] = a[6,5] = a[6,6] = a[7,5] = a[7,6] = 1
lbl, numpatches = ndimage.label(a)
I want to apply a custom function (calculation of a specific value) over all labels within the labelled array.
Similar as for instance the ndimage algebra functions:
ndimage.sum(a,lbl,range(1,numpatches+1))
( Which in this case returns me the number of values for each label [6,6]. )
Is there a way to do this?
You can pass an arbitrary function to ndimage.labeled_comprehension, which is roughly equivalent to
[func(a[lbl == i]) for i in index]
Here is the labeled_comprehension-equivalent of ndimage.sum(a,lbl,range(1,numpatches+1)):
import numpy as np
from scipy import ndimage
a = np.zeros((8,8), dtype=np.int)
a[1,1] = a[1,2] = a[2,1] = a[2,2] = a[3,1] = a[3,2] = 1
a[5,5] = a[5,6] = a[6,5] = a[6,6] = a[7,5] = a[7,6] = 1
lbl, numpatches = ndimage.label(a)
def func(x):
return x.sum()
print(ndimage.labeled_comprehension(a, lbl, index=range(1, numpatches+1),
func=func, out_dtype='float', default=None))
# [6 6]

Categories

Resources