How to create separate sub-plots - python

These are the outputs I am getting for my PDP:
[output 1][1]
How can I modify the code to create a separate PDP for each feature (6 separate plots, not combined in 1 plot)?
Code Source
https://stats.stackexchange.com/questions/604209/creating-a-partial-dependant-plot-for-a-prediction-function/604288?noredirect=1#comment1121216_604288
Code:
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils import check_X_y, check_array
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.inspection import PartialDependenceDisplay
import matplotlib.pyplot as plt
data_set = pd.read_excel("Alpha copy.xlsx")
pd.set_option('max_columns', 35)
pd.set_option('max_rows', 300)
data_set.head(300)
X, y = data_set[["B","D","k","fc","Le","N"]], data_set [["R"]]
class DummyRegressor(BaseEstimator, RegressorMixin):
def fit(self, X, y):
# Check that X and y have correct shape
X, y = check_X_y(X, y, y_numeric=True)
self.X_ = X
self.y_ = y
return self
def predict(self, X):
X = check_array(X)
# X is an numpy array, not a pandas DataFrame.
# Make sure the columns of X are in the order required by `objective`
return objective(X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4], X[:, 5])
dummy_reg = DummyRegressor()
dummy_reg.fit(X, y.values.ravel())
display = PartialDependenceDisplay.from_estimator(
dummy_reg, X, [0, 1,2,3,4,5],
feature_names=["B", "D","k", "fc","Le", "N"]
)
display.axes_[0][0].set_ylabel("R")

You can use plt.subplots. Here are a couple of example vectors being plotted on a 2x2 set of graphics.
# Vectors to plot
a = [1,2,3,4,5,6]
b = [2,3,4,5,6,7]
c = [1,3,3,5,5,6]
d = [0,2,2,5,5,6]
First setup the plt subplot figure with number of rows and cols you want.
# Setup figure
plt.subplots(nrows=2, ncols=2, figsize=(15,6))
The subplots start at index 1, so here it will be plt.subplot(total num rows, total num cols, index of the graphic).
# Plot 1 (rows, cols, position 0)
plt.subplot(2,2,1)
plt.plot(a, b, c='red')
plt.title('graphic 1')
# Plot 2 (rows, cols, position 1)
plt.subplot(2,2,2)
plt.plot(c, d, c='gold')
plt.title('graphic 2')
# Plot 3 (rows, cols, position 1)
plt.subplot(2,2,3)
plt.plot(c, d, c='green')
plt.title('graphic 3')
# Plot 4 (rows, cols, position 1)
plt.subplot(2,2,4)
plt.plot(c, d, c='black')
plt.title('graphic 4');

Related

Calculate linear trend in each grid cell

I have an xarray dataset of the dimensions time=350, xc=432, yc=432 that contains data on sea ice concentration (variable ice_conc). I want to calculate the linear trend in each grid cell and create a trend map. I have tried using numpy.polyfit:
x=np.linspace(1,350, num=350) #number of days in dataset
y=[ds.ice_conc] #ice concentration variable in the dataset
trend = np.polyfit(x, y, 1)
I keep getting this error:
TypeError: expected 1D or 2D array for y
The variable ice_conc is a 3D array with time, xc, yc.
Thank you in advance!
If you have a xarray.Dataset already, you could simply use xarray.Dataset.polyfit!
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
# Create some example data.
tsize = 10
xsize = 2
ysize = 2
data = np.ones((tsize, xsize, ysize)) * np.nan
data[:,0,0] = np.linspace(0, 10, tsize)
data[:,1,0] = np.logspace(0, 1, tsize)
data[:,0,1] = np.logspace(1.5, 0.5, tsize)
data[:,1,1] = np.linspace(40, 10, tsize)
# Put the data in a xarray.Dataset.
ds = xr.Dataset({"data": (["time", "x", "y"], data)})
# Apply polyfit.
result = ds.polyfit(dim = "time", deg = 1)
# The polyfit coefficients can be accessed like this.
a = result.data_polyfit_coefficients.sel(degree=1, x=0, y=0).values
b = result.data_polyfit_coefficients.sel(degree=0, x=0, y=0).values
The we could make a graph like this:
fig, axs = plt.subplots(xsize, ysize, sharex=True)
fig.set_size_inches(8, 8)
xs = np.arange(tsize)
for x in ds.x:
for y in ds.y:
ax = axs[y,x]
a = result.data_polyfit_coefficients.sel(degree=1, x=x, y=y).values
b = result.data_polyfit_coefficients.sel(degree=0, x=x, y=y).values
ax.scatter(xs, data[:, x, y], marker="*",
c="tab:blue", label="original data")
ax.plot(xs, a * xs + b, c="tab:orange", label=f"{a:.1f} * x + {b:.1f}")
ax.legend()
ax.grid()
ax.set_facecolor("lightgray")
ax.set_title(f"({x.values}, {y.values})")

How to identify certain coordinates (x,y) for its color after matplotlib.pyplot fill function?

Is there any way to input a certain coordinates (x,y) to pyplot and output its filling color in return?
ex. (0,0) -> Red , (0.75,0) -> blue , (1,1) ->white
import numpy as np
import matplotlib.pyplot as plt
plt.figure(figsize=(15,15))
x,y,x2,y2=[],[],[],[]
for i in np.linspace(0,2,300):
x.append(np.cos(np.pi*i))
y.append(np.sin(np.pi*i))
x2.append(0.5*np.cos(np.pi*i))
y2.append(0.5*np.sin(np.pi*i))
plt.fill(x,y,'b')
plt.fill(x2,y2,'r')
Color Image
Here is one possible method. Let's begin with a modified code (based on yours):
# Modified code
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon # additional code
plt.figure(figsize=(15,15))
x,y,x2,y2=[],[],[],[]
for i in np.linspace(0,2,300):
x.append(np.cos(np.pi*i))
y.append(np.sin(np.pi*i))
x2.append(0.5*np.cos(np.pi*i))
y2.append(0.5*np.sin(np.pi*i))
# grab the fill objects for use later
# .fill return: matplotlib.patches.Polygon object
blue_pgn = plt.fill(x,y,'b') #Plot filled polygons
red_pgn = plt.fill(x2,y2,'r')
plt.show()
It produces the same plot as your code does, but also exposes 2 useful objects, blue_pgn and red_pgn .
This is the second part:
# Create geometries from objects in the plot
blue_geom = Polygon(blue_pgn[0].get_xy())
red_geom = Polygon(red_pgn[0].get_xy())
# create a function
def from_xy_to_color(x,y):
point_xy = Point(x,y)
if red_geom.contains(point_xy):
print("xy:",x,y,", color:","Red")
elif blue_geom.contains(point_xy):
print("xy:",x,y,", color:","Blue")
else:
print("xy:",x,y,", color:","White")
# test all the points
xys = [[0,0], [.75,0], [1,1]]
for xy in xys:
#print(xy)
from_xy_to_color(*xy)
The output from this part:
xy: 0 0 , color: Red
xy: 0.75 0 , color: Blue
xy: 1 1 , color: White
from matplotlib.backends.backend_agg import FigureCanvasAgg
import numpy as np
import matplotlib.pyplot as plt
def circle_coords():
t = np.linspace(0, 2, 300) * np.pi
x = np.cos(t)
y = np.sin(t)
return x, y
def plot(fig):
ax = fig.gca()
x, y = circle_coords()
ax.fill(x, y, 'b')
ax.fill(x / 2, y / 2, 'r')
def capture():
plt.Figure((4, 4), dpi=20)
fig = plt.gcf()
canvas = FigureCanvasAgg(fig)
plot(fig)
canvas.draw()
r = canvas_to_numpy(canvas)
plt.close()
return r
def canvas_to_numpy(canvas):
s, (width, height) = canvas.print_to_buffer()
x = np.frombuffer(s, np.uint8)
return x.reshape((height, width, 4))
def random_points(shape, n_points=4):
height, width = shape
x = np.random.uniform(0, width, size=n_points)
y = np.random.uniform(0, height, size=n_points)
return np.vstack([x, y]).T
def main():
arr = capture()
p = [[360, 274],
[379, 48],
[117, 216]]
fig = plt.gcf()
ax = fig.gca()
np.set_printoptions(precision=2)
print(p)
for x, y in p:
r, g, b, a = arr[y, x] / 255
c = f"{r, g, b}"
print(arr[y, x])
ax.text(x, y, c)
ax.scatter(x, y, c="yellow")
plt.imshow(arr)
plt.show()
plt.close()
main()
See: Matplotlib figure to image as a numpy array

3d plot from two vectors and an array

I have two vectors that store my X, Y values than are lengths 81, 105 and then a (81,105) array (actually a list of lists) that stores my Z values for those X, Y. What would be the best way to plot this in 3d? This is what i've tried:
Z = np.load('Z.npy')
X = np.load('X.npy')
Y = np.linspace(0, 5, 105)
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z, cmap= 'viridis')
plt.show()
I get the following error : ValueError: shape mismatch: objects cannot be broadcast to a single shape
OK, I got it running. There is some tricks here. I will mention them in the codes.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from random import shuffle
# produce some data.
x = np.linspace(0,1,81)
y = np.linspace(0,1,105)
z = [[i for i in range(81)] for x in range(105)]
array_z = np.array(z)
# Make them randomized.
shuffle(x)
shuffle(y)
shuffle(z)
# Match data in x and y.
data = []
for i in range(len(x)):
for j in range(len(y)):
data.append([x[i], y[j], array_z[j][i]])
# Be careful how you data is stored in your Z array.
# Stored in dataframe
results = pd.DataFrame(data, columns = ['x','y','z'])
# Plot the data.
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(results.x, results.y, results.z, cmap= 'viridis')
The picture looks weird because I produced some data. Hope it helps.

How to convert arrays of x,y,z coordinates to 3D path in numpy

Given three 1D arrays of X, Y and Z coordinates, how to convert into a 3D mesh path using numpy?
I managed to do this for 2D using numpy (ie no for loops):
import numpy
def path_2d_numpy(x, y):
m1, m2 = numpy.meshgrid(x, y)
m1[1::2] = m1[1::2,::-1]
r = numpy.append(m1, m2)
r.shape = 2,-1
return r.T
from matplotlib import lines
from matplotlib import pyplot
def plot_path_2d(path):
x, y = path.T
pyplot.plot(x, y, '-ro', lw=3)
pyplot.show()
x = numpy.linspace(4, 1, 4)
y = numpy.linspace(1, 5, 5)
path = path_2d_numpy(x, y)
plot_path_2d(path)
which outputs:
...but was unable to do it for 3D. Showing pure python solution (ie without numpy):
import numpy
def path_3d(x, y, z):
nb_points =len(x)*len(y)*len(z)
path = numpy.empty((nb_points, 3))
xord, yord, i = True, True, 0
for zi in z:
for yi in y[::1 if yord else -1]:
for xi in x[::1 if xord else -1]:
path[i] = xi, yi, zi
i += 1
xord = not xord
yord = not yord
return path
from matplotlib import pyplot
from mpl_toolkits.mplot3d import Axes3D
def plot_path_3d(path):
fig = pyplot.figure()
ax = fig.gca(projection='3d')
xx, yy, zz = path.T
ax.plot(xx, yy, zz, '-bo', lw=3)
pyplot.show()
x = numpy.linspace(4, 1, 4)
y = numpy.linspace(1, 5, 5)
z = numpy.linspace(-3, 0, 3)
path = path_3d(x, y, z)
plot_path_3d(path)
which outputs:
Essencialy, what I am looking for is for a numpy implementation of path_3d as I did for path_2d_numpy.
I need this because the actual arrays I am dealing with are quite big. Doing it without numpy is just too slow.
How's this look?
import numpy as np
def path_3d_numpy(x, y, z):
coords = np.stack(np.meshgrid(x, y, z), axis=-1) # shape = (nx, ny, nz, 3)
coords[1::2,:,:] = coords[1::2,::-1,:]
coords[:,1::2,:] = coords[:,1::2,::-1]
return coords.reshape(-1, 3) # flatten out the other axes
Doesn't iterate the points in quite the same order as yours, but you could fix that simply by swapping some indices around
Similarly, your 2d case could be written as
def path_2d_numpy(x, y):
coords = np.stack(np.meshgrid(x, y), axis=-1)
coords[1::2] = coords[1::2,::-1]
return coords.reshape(-1, 2)
For some real overkill, you can extend this to N dimensions:
def path_nd(*args):
coords = np.stack(np.meshgrid(*args), axis=-1)
N = len(args)
axes = np.arange(N)
for i in range(N-1):
# the last axis isn't part of our mesh, so don't roll it
rolled_axes = tuple(np.roll(axes, -i)) + (N,)
rolled_view = np.transpose(coords, rolled_axes)
rolled_view[1::2,:] = rolled_view[1::2,::-1]
return coords.reshape(-1, N)

scipy: interpolation, cubic & linear

I'm trying to interpolate my set of data (first columnt is the time, third columnt is the actual data):
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
data = np.genfromtxt("data.csv", delimiter=" ")
x = data[:, 0]
y = data[:, 2]
xx = np.linspace(x.min(), x.max(), 1000)
y_smooth = interp1d(x, y)(xx)
#y_smooth = interp1d(x, y, kind="cubic")(xx)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xx, y_smooth, "r-")
plt.show()
but I see some strange difference between linear and cubic interpolation.
Here is the result for linear:
Here is the same for cubic:
I'm not sure, why is graph jumping all the time and y_smooth contains incorrect values?
ipdb> y_smooth_linear.max()
141.5481144
ipdb> y_smooth_cubic.max()
1.2663431888584225e+18
Can anybody explain to me, how can I change my code to achieve correct interpolation?
UPD: here is data.cvs file
Your data contains several y values for the same x value. This violates the assumptions of most interpolation algorithms.
Either discard the rows with duplicate x values, average the y values for each individual x, or obtain a better resolution for the x values such that they aren't the same anymore.
Given cfh's observation that x has duplicate values, you could use np.unique
to select a unique value of y for each x:
x2, idx = np.unique(x, return_index=True)
y2 = y[idx]
return_index=True causes np.unique to return not only the unique values, x2, but also the locations, idx, of the unique xs in the original x array. Note that this selects the first value of y for each unique x.
If you'd like to average all the y values for each unique x, you could use
stats.binned_statistic:
import scipy.stats as stats
x2, inv = np.unique(x, return_inverse=True)
y2, bin_edges, binnumber = stats.binned_statistic(
x=inv, values=y, statistic='mean', bins=inv.max()+1)
return_inverse=True tells np.unique to return indices from which the
original array can be reconstructed. Those indices can also serve as categorical
labels or "factors", which is how they are being used in the call to
binned_statistic above.
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
import scipy.stats as stats
data = np.genfromtxt("data.csv", delimiter=" ")
x = data[:, 0]
y = data[:, 1]
x2, idx, inv = np.unique(x, return_index=True, return_inverse=True)
y_uniq = y[idx]
y_ave, bin_edges, binnumber = stats.binned_statistic(
x=inv, values=y, statistic='mean', bins=inv.max()+1)
xx = np.linspace(x.min(), x.max(), 1000)
y_smooth = interp1d(x, y)(xx)
y_smooth2 = interp1d(x2, y_uniq, kind="cubic")(xx)
y_smooth3 = interp1d(x2, y_ave, kind="cubic")(xx)
fig, ax = plt.subplots(nrows=3, sharex=True)
ax[0].plot(xx, y_smooth, "r-", label='linear')
ax[1].plot(xx, y_smooth2, "b-", label='cubic (first y)')
ax[2].plot(xx, y_smooth3, "b-", label='cubic (ave y)')
ax[0].legend(loc='best')
ax[1].legend(loc='best')
ax[2].legend(loc='best')
plt.show()

Categories

Resources