Find graphs intersection python - python
Does anyone have any idea of how I can find the intersection of these two graphs? (image below)
energ_ac, price_compvend and energ_ac1, price_compven1 are set of x,y values.
Please note the following code which gets the values from a database and then plots the two graphs:
I can only get the intersection manually, and I want to get it automatically
import matplotlib.pyplot as plt
import pyodbc
import pandas as pd
import numpy as np
import string as str
import sys
np.set_printoptions(suppress=True)
np.set_printoptions(threshold=sys.maxsize)
conn = pyodbc.connect(Trusted_Connection='yes', driver='{SQL Server}', server='srv03',
database='mercadoOMIE_curvas') # Ligação à BD no sqlserver
SQL_Query = pd.read_sql_query("""SELECT * FROM curva_pbc_uof_2020_1_12 WHERE ("4" = 'C' AND "0" = '1' AND "7" = 'O')""", conn)
df = pd.DataFrame(SQL_Query, columns=['0','1','2','3','4','5','6','7','8'])
df['5'] = df['5'].str.replace('.','', regex = True)
df['6'] = df['6'].str.replace('.','', regex = True)
df['5'] = pd.to_numeric(df['5'].str.replace(',','.'), errors='coerce')
df['6'] = pd.to_numeric(df['6'].str.replace(',','.'), errors='coerce')
energ_ac = np.zeros(len(df['5']))
energ_ac[0] = df['5'][0]
for x in range (1, len(df['5'])):
energ_ac[x] = energ_ac[x-1]+df['5'][x]
price_compvend = df['6'].to_numpy()
plt.plot(energ_ac,price_compvend)
SQL_Query1 = pd.read_sql_query("""SELECT * FROM curva_pbc_uof_2020_1_12 WHERE ("4" = 'V' AND "0" = '1' AND "7" = 'O')""", conn)
df1 = pd.DataFrame(SQL_Query1, columns=['0','1','2','3','4','5','6','7','8'])
#with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
#print(df1)
df1['5'] = df1['5'].str.replace('.','', regex = True)
df1['6'] = df1['6'].str.replace('.','', regex = True)
df1['5'] = pd.to_numeric(df1['5'].str.replace(',','.'), errors='coerce')
df1['6'] = pd.to_numeric(df1['6'].str.replace(',','.'), errors='coerce')
energ_ac1 = np.zeros(len(df1['5']))
energ_ac1[0] = df1['5'][0]
for x in range (1, len(df1['5'])):
energ_ac1[x] = energ_ac1[x-1]+df1['5'][x]
price_compvend1 = df1['6'].to_numpy()
plt.plot(energ_ac1,price_compvend1)
plt.show()
The solution is this link: np.array intersection // AttributeError: 'module' object has no attribute 'PiecewisePolynomial'
import scipy.interpolate as interpolate
import scipy.optimize as optimize
import numpy as np
x1 = np.array([1.4,2.1,3,5.9,8,9,23])
y1 = np.array([2.3,3.1,1,3.9,8,9,11])
x2 = np.array([1,2,3,4,6,8,9])
y2 = np.array([4,12,7,1,6.3,8.5,12])
# linear interpolators
opts = {'fill_value': 'extrapolate'}
f1 = interpolate.interp1d(x1,y1,**opts)
f2 = interpolate.interp1d(x2,y2,**opts)
# possible range for an intersection
xmin = np.min((x1,x2))
xmax = np.max((x1,x2))
# number of intersections
xuniq = np.unique((x1,x2))
xvals = xuniq[(xmin<=xuniq) & (xuniq<=xmax)]
# note that it's bad practice to compare floats exactly
# but worst case here is a bit of redundance, no harm
# for each combined interval there can be at most 1 intersection,
# so looping over xvals should hopefully be enough
# one can always err on the safe side and loop over a `np.linspace`
intersects = []
for xval in xvals:
x0, = optimize.fsolve(lambda x: f1(x)-f2(x), xval)
if (xmin<=x0<=xmax
and np.isclose(f1(x0),f2(x0))
and not any(np.isclose(x0,intersects))):
intersects.append(x0)
print(intersects)
print(f1(intersects))
print(f2(intersects))
Use bellow script:
diff_vector = abs(price_compvend - price_compvend1)
min_index = np.where(diff_vector == np.min(diff_vector))
print('Intersection point is ({},{})'.format(energ_ac[min_index],
price_compvend[min_index]))
You could use set.intersection() method to get intersection points of the graphs.
graph_points1 = set(zip(energ_ac,price_compvend))
graph_points2 = set(zip(energ_ac1,price_compvend1))
intersection_points = graph_points1.intersection(graph_points2)
Related
Is there a way to make a bit of gap between x-axis and the graph? (Python, bqplot)
I created a random number graph and I want to have a more gap between the x axis and the graph because it looks hard to read. Does anyone have a idea how to do that? Thank you so much. Here is my example code. from bqplot import Axis, LinearScale, OrdinalScale, Scatter, Figure, Tooltip, DateScale, Lines, Bars import bql import pandas as pd from numpy import nan, isnan from numpy.random import randint import numpy as np x_sc_bar_fsa = OrdinalScale() y_sc_bar_fsa = LinearScale() tt_bar_fsa = Tooltip(fields=['x', 'y'], formats=['','.2f']) bar_fsa = Bars(stroke = 'white',scales={'x': x_sc_bar_fsa, 'y': y_sc_bar_fsa},padding=0.5,\ tooltip=tt_bar_fsa, unhovered_style={'opacity': 0.9}, type = 'grouped') ax_x_bar_fsa = Axis(scale=x_sc_bar_fsa) ax_y_bar_fsa = Axis(scale=y_sc_bar_fsa, orientation='vertical',tick_format='0.2f') #Final Output fig_bar_fsa = Figure(title='Random Number Graph for Testing',marks=[], axes=[ax_x_bar_fsa, ax_y_bar_fsa],padding_x=0) def plot_bars_fsa(x_data, y_data, label_data,colors=['darkblue','royalblue','darkgreen','darkred','red']): if (len(x_data) == 0) | (len(y_data) == 0) | (len(label_data) == 0): fig_bar_fsa.marks = [] #Creates empty list if x-data, y-data and label data is missing i.e. =0 else: #otherwise display bar_fsa.x = x_data bar_fsa.y = y_data y_sc_bar_fsa.min = 0 y_sc_bar_fsa.max = np.nanmax([np.nanmax(x) for x in y_data]) * 1.1 bar_fsa.labels = label_data bar_fsa.colors = colors # Defined above ax_x_bar_fsa.tick_rotate = -90 # This is the angle of the letters on the graph ax_x_bar_fsa.tick_style = {'text-anchor': 'end'} fig_bar_fsa.marks = [bar_fsa] #links marks to bar chart example_df = pd.DataFrame(data=randint(0, 100, size=(10, 5)), columns=['column_' + str(i) for i in range(5)]) example_df.index = ['namenumber1253','namenumber26675','namenumber339', 'namenumber2556','namenumber3109', 'namenumber5894','namenumber1355','namenumber685890', 'namenumber397', 'namenumber85'] plot_bars_fsa(example_df.index.values,\ [example_df[x].values for x in ['column_0','column_1','column_2','column_3','column_4']],\ ['column_0','column_1','column_2','column_3','column_4']) fig_bar_fsa enter image description here
How to loop over a list of statistic type and apply them on an xarray.DataArray?
I need to compute a list statistics through time on a xarray.DataArray ans store them in a xarray.Dataset: import xarray as xr import numpy as np import pandas as pd np.random.seed(1234) da = xr.DataArray(data=np.random.rand(4,5,10), dims=["lon", "lat","time"], coords={"lon": np.random.uniform(low=-90, high=90, size=4), "lat": np.random.uniform(low=-90, high=90, size=5), "time": pd.date_range(start="2021-01-01", freq="D", periods=10)}) da first = True if first: first = False ds = da.min(dim=['time']).to_dataset(name = "min") else: ds = ds.merge(da.min(dim=['time']).to_dataset(name = "min")) ds = ds.merge(da.max(dim=['time']).to_dataset(name = "max")) ds = ds.merge(da.median(dim=['time']).to_dataset(name = "median")) ds = ds.merge(da.mean(dim=['time']).to_dataset(name = "mean")) ds = ds.merge(da.std(dim=['time']).to_dataset(name = "std")) ds As I need to frequently change the statistics to apply, I tried to use a list of statistics and loop through it: stats = ('min', 'max', 'median', 'mean', 'std') first = True for stat in stats: if first: first = False ds = da.vars()['stat'](dim=['time']).to_dataset(name = vars()['stat']) else: ds = ds.merge(da.vars()['stat'](dim=['time']).to_dataset(name = vars()['stat'])) But I get an Error AttributeError: 'DataArray' object has no attribute 'vars'when trying to retrieve and apply the statistic type. Thanks for any hint you could provide.
Thanks to #michael-delgado for informing me functions can as well be listed. Here is the answer I came through: stats = [np.nanmin, np.nanmax, np.nanmedian, np.nanmean, np.nanstd] first = True for i in range(0, len(stats)): da_stat = xr.DataArray(stats[i](a = da, axis = 2), dims = ['lon', 'lat']) ds_stat = da_stat.assign_coords(lon = da.lon.values, lat = da.lat.values).to_dataset(name = stats[i].__name__) if first: first = False ds = ds_stat else: ds = ds.merge(ds_stat) ds The only downside with this solution, is I was not able to use xarray.DataArray statistic function, and had to replace them with numpy statistic functions, which double processing time.
Having some problem to understand the x_bin in regplot of Seaborn
I used the seaborn.regplot to plot data, but not quite understand how the error bar in regplot was calculated. I have compared the results with the mean and standard deviation derived from mannual calculation. Here is my testing script. import numpy as np import pandas as pd import seaborn as sn def get_data_XYE(p): x_list = [] lower_list = [] upper_list = [] for line in p.lines: x_list.append(line.get_xdata()[0]) lower_list.append(line.get_ydata()[0]) upper_list.append(line.get_ydata()[1]) y = 0.5 * (np.asarray(lower_list) + np.asarray(upper_list)) y_error = np.asarray(upper_list) - y x = np.asarray(x_list) return x, y, y_error x = [37.3448,36.6026,42.7795,34.7072,75.4027,226.2615,192.7984,140.8045,242.9952,458.451,640.6542,726.1024,231.7347,107.5605,200.2254,190.0006,314.1349,146.8131,152.4497,175.9096,284.9926,116.9681,118.2953,312.3787,815.8389,458.0146,409.5797,595.5373,188.9955,15.7716,36.1839,244.8689,57.4579,94.8717,112.2237,87.0687,72.79,22.3457,24.1728,29.505,80.8765,252.7454,280.6002,252.9573,348.246,112.705,98.7545,317.0541,300.9573,402.8411,406.6884,56.1286,30.1385,32.9909,497.556,19.3606,20.8409,95.2324,108.6074,15.7753,54.5511,45.5623,64.564,101.1934,81.8459,88.286,58.2642,56.1225,51.2943,38.0649,63.5882,63.6847,120.495,102.4097,49.3255,111.3309,171.6028,58.9526,28.7698,144.6884,180.0661,116.6028,146.2594,199.8702,128.9378,423.2363,119.8537,124.6508,518.8625,306.3023,79.5213,121.0309,116.9346,170.8863,930.361,48.9983,55.039,47.1092,72.0548,75.4045,103.521,83.4134,142.3253,146.6215,121.4467,101.4252,68.4812,291.4275,143.9475,142.647,78.9826,47.094,204.2196,89.0208,82.792,27.1346,142.4764,83.7874,67.3216,112.9531,138.2549,133.3446,86.2659,45.3464,56.1604,43.5882,54.3623,86.296,115.7272,96.5498,111.8081,36.1756,40.2947,34.2532,89.1452,53.9062,36.458,113.9297,176.9962,77.3125,77.8891,64.807,64.1515,127.7242,119.6876,976.2324,322.8454,434.2883,168.6923,250.0284,234.7329,131.0793,152.335,118.8838,243.1772,24.1776,168.6327,170.7541,167.8444,75.9315,110.1045,113.4417,60.5464,66.8956,79.7606,71.6659,72.5251,77.513,207.8019,21.8592,35.2787,169.7698,146.5012,412.9934,248.0708,318.5489,104.1278,184.7592,108.0581,175.2646,169.7698,340.3732,570.3396,23.9853,69.0405,66.7391,67.9435,294.6085,68.0537,77.6344,433.2713,104.3178,229.4615,187.8587,78.1399,121.4737,122.5451,384.5935,38.5232,117.6835,50.3308,318.2513,103.6695,20.7181,321.9601,510.3248,13.4754,16.1188,44.8082,37.7291,733.4587,446.6241,21.1822,287.9603,327.2367,274.1109,195.4713,158.2114,64.4537,26.9857,172.8503] y = [37,40,30,29,24,23,27,12,21,20,29,28,27,32,23,29,28,22,28,23,24,29,32,18,22,12,12,14,29,31,34,31,22,40,25,36,27,27,29,35,33,25,25,27,27,19,35,26,18,24,25,37,52,47,34,39,40,48,41,44,35,36,53,46,38,44,23,26,26,28,27,21,25,21,20,27,35,24,46,34,22,30,30,30,31,26,25,28,21,31,24,27,33,21,31,33,29,33,32,21,25,22,39,31,34,26,23,18,20,18,34,25,20,12,23,25,21,21,25,31,17,27,28,29,25,24,25,21,24,27,23,22,23,22,22,26,22,19,26,35,33,35,29,26,26,30,22,32,33,33,28,32,26,29,36,37,37,28,24,30,25,20,29,24,33,35,30,32,31,33,40,35,37,24,34,29,27,24,36,26,26,26,27,27,20,17,28,34,18,20,20,18,19,23,20,22,25,32,44,41,39,41,40,44,36,42,31,32,26,29,23,29,29,28,31,22,29,24,28,28,25] xbreaks = [13.4754, 27.1346, 43.5882, 58.9526, 72.79, 89.1452, 110.1045, 131.0793, 158.2114, 180.0661, 207.8019, 234.7329, 252.9573, 300.9573, 327.2367, 348.246, 412.9934, 434.2883, 458.451, 518.8625, 595.5373, 640.6542, 733.4587, 815.8389, 930.361, 976.2324] df = pd.DataFrame([x,y]).T df.columns = ['x','y'] # Check the bin average and std using agge bins = pd.cut(df.x,xbreaks,right=False) t = df[['x','y']].groupby(bins).agg({"x": "mean", "y": ["mean","std"]}) t.reset_index(inplace=True) t.columns = ['range_cut','x_avg_cut','y_avg_cut','y_std_cut'] t.index.name ='id' # Get the bin average from g = sns.regplot(x='x',y='y',data=df,fit_reg=False,x_bins=xbreaks,seed=seed) xye = pd.DataFrame(get_data_XYE(g)).T xye.columns = ['x_regplot','y_regplot','e_regplot'] xye.index.name = 'id' t2 = xye.merge(t,on='id',how='left') t2 You can see the y and e from the two ways are different. I understand that the default x_ci or x_estimator may afect the result of regplot, but I still can not the these values in excel by removing some lowest and/or highest values in each bin.
In seaborn.regplot, the x_bins are the center of each bin, and the original x values are assigned to the nearest bin value. Whereas in pandas.cut, the breaks define the bin edges.
heatmap of values grouped by time - seaborn
I'm plotting the counts of a variable grouped by time as a heatmap. However, when including both hour and minute, the counts are quite low so the resulting heatmap doesn't really provide any real insight. Is it possible to group the counts in a bigger block of time? I'm hoping to test some different periods (5, 10 mins). I'm also hoping to plot time on the x-axis. Similar to the output attached. import seaborn as sns import pandas as pd from datetime import datetime from datetime import timedelta start = datetime(1900,1,1,10,0,0) end = datetime(1900,1,1,13,0,0) seconds = (end - start).total_seconds() step = timedelta(minutes = 1) array = [] for i in range(0, int(seconds), int(step.total_seconds())): array.append(start + timedelta(seconds=i)) array = [i.strftime('%Y-%m-%d %H:%M%:%S') for i in array] df2 = pd.DataFrame(array).rename(columns = {0:'Time'}) df2['Count'] = np.random.uniform(0.0, 0.5, size = len(df2)) df2['Count'] = df2['Count'].round(1) df2['Time'] = pd.to_datetime(df2['Time']) df2['Hour'] = df2['Time'].dt.hour df2['Min'] = df2['Time'].dt.minute g = df2.groupby(['Hour','Min','Count']) count_df = g['Count'].nunique().unstack() count_df.fillna(0, inplace = True) sns.heatmap(count_df)
To deal with such cases, I think it would be easy to use data downsampling. It is also easy to change the thresholds. The axis labels in the output graph will need to be modified, but we recommend this method. import seaborn as sns import pandas as pd import numpy as np from datetime import datetime from datetime import timedelta start = datetime(1900,1,1,10,0,0) end = datetime(1900,1,1,13,0,0) seconds = (end - start).total_seconds() step = timedelta(minutes = 1) array = [] for i in range(0, int(seconds), int(step.total_seconds())): array.append(start + timedelta(seconds=i)) array = [i.strftime('%Y-%m-%d %H:%M:%S') for i in array] df2 = pd.DataFrame(array).rename(columns = {0:'Time'}) df2['Count'] = np.random.uniform(0.0, 0.5, size = len(df2)) df2['Count'] = df2['Count'].round(1) df2['Time'] = pd.to_datetime(df2['Time']) df2['Hour'] = df2['Time'].dt.hour df2['Min'] = df2['Time'].dt.minute df2.set_index('Time', inplace=True) count_df = df2.resample('10min')['Count'].value_counts().unstack() count_df.fillna(0, inplace = True) sns.heatmap(count_df.T)
The way you could achieve this is by creating a column with numbers that have repeating elements for the number of minutes. For example: minutes = 3 x = [0,1,2] np.repeat(x, repeats=minutes, axis=0) >>>> [0,0,0,1,1,1,2,2,2] and then group your data using this column. So your code would look like: ... minutes = 5 x = [i for i in range(int(df2.shape[0]/5))] df2['group'] = np.repeat(x, repeats=minutes, axis=0) g = df2.groupby(['Min', 'Count']) count_df = g['Count'].nunique().unstack() count_df.fillna(0, inplace = True)
Python - Apply a function over a labeled multidimensional array
I have a numpy array that is labelled using scipy connected component labelling. import numpy from scipy import ndimage a = numpy.zeros((8,8), dtype=numpy.int) a[1,1] = a[1,2] = a[2,1] = a[2,2] = a[3,1] = a[3,2] = 1 a[5,5] = a[5,6] = a[6,5] = a[6,6] = a[7,5] = a[7,6] = 1 lbl, numpatches = ndimage.label(a) I want to apply a custom function (calculation of a specific value) over all labels within the labelled array. Similar as for instance the ndimage algebra functions: ndimage.sum(a,lbl,range(1,numpatches+1)) ( Which in this case returns me the number of values for each label [6,6]. ) Is there a way to do this?
You can pass an arbitrary function to ndimage.labeled_comprehension, which is roughly equivalent to [func(a[lbl == i]) for i in index] Here is the labeled_comprehension-equivalent of ndimage.sum(a,lbl,range(1,numpatches+1)): import numpy as np from scipy import ndimage a = np.zeros((8,8), dtype=np.int) a[1,1] = a[1,2] = a[2,1] = a[2,2] = a[3,1] = a[3,2] = 1 a[5,5] = a[5,6] = a[6,5] = a[6,6] = a[7,5] = a[7,6] = 1 lbl, numpatches = ndimage.label(a) def func(x): return x.sum() print(ndimage.labeled_comprehension(a, lbl, index=range(1, numpatches+1), func=func, out_dtype='float', default=None)) # [6 6]