AttributeError: 'History' object has no attribute 'topl' - python

This is the model I trained:
#Train
batch_size = 128
epochs = 500
topl=model.fit(x_train,
y_train,
batch_size=batch_size,
epochs= epochs,
callbacks=tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10),
validation_split=0.2)
# Evaluate model
scores = model.evaluate(x_test, y_test)
print('Test loss: {} - Accuracy: {}'.format(*scores))
And this is the plot I'm trying to plot:
def plot_topl(topl):
plt.figure(figsize=(15, 5))
# Auxiliary info and funcs
best_epoch = np.argmin(topl.topl['val_accuracy'])
epochs = topl.epoch
smooth = lambda y: np.polyval(np.polyfit(epochs, y, deg=5), epochs)
#Plot training & validation accuracy values
plt.plot(smooth(topl.topl['accuracy']), c='C0', alpha=0.7, lw=3)
plt.plot(smooth(topl.topl['val_accuracy']), c='C1', alpha=0.7, lw=3)
plt.axvline(best_epoch, label='best_epoch', c='k', ls='--', alpha=0.3)
# Empirical values
plt.plot(topl.topl['accuracy'], label='train_accuracy', c='C0')
plt.plot(topl.topl['val_accuracy'], label='val_accuracy', c='C1')
plt.ylabel('accuracy')
plt.xlabel('epochs')
plt.legend()
plot_topl(topl)
Why doesn't it work? It gives back the same error for every line containing the word topl.
TY

Related

ploting live each epoch during training

I am trying to follow this tutorial to plot live epochs,https://medium.com/geekculture/how-to-plot-model-loss-while-training-in-tensorflow-9fa1a1875a5.
I don't know why not plotting each epoch, so please help how to fix it. it seems cannot read the logs!!
import numpy as np
from tensorflow import keras
from matplotlib import pyplot as plt
from IPython.display import clear_output
class PlotLearning(keras.callbacks.Callback):
"""
Callback to plot the learning curves of the model during training.
"""
def on_train_begin(self, logs={}):
self.metrics = {}
for metric in logs:
self.metrics[metric] = []
def on_epoch_end(self, epoch, logs={}):
# Storing metrics
for metric in logs:
if metric in self.metrics:
self.metrics[metric].append(logs.get(metric))
else:
self.metrics[metric] = [logs.get(metric)]
# Plotting
metrics = [x for x in logs if 'val' not in x]
f, axs = plt.subplots(1, len(metrics), figsize=(15,5))
clear_output(wait=True)
for i, metric in enumerate(metrics):
axs[i].plot(range(1, epoch + 2),
self.metrics[metric],
label=metric)
if logs['val_' + metric]:
axs[i].plot(range(1, epoch + 2),
self.metrics['val_' + metric],
label='val_' + metric)
axs[i].legend()
axs[i].grid()
plt.tight_layout()
plt.show()
logs == {
'accuracy' : 0.98,
'loss': 0.1
}
callbacks_list = [PlotLearning()]
model.fit(x_train,
y_train,
epochs = nb_epochs,
batch_size = batch_size,
validation_data=(x_test, y_test),
verbose=1,
shuffle=True,
class_weight=class_weight,
callbacks=callbacks_list
)

Learning-curve for classification in python

To plot the learning-curve in a regression problem, we should use 'RMSE' as a measure of analysis, like:
def plot_learning_curves(model, X_train, y_train, X_val, y_val):
plt.figure(figsize=(15, 5))
train_errors, val_errors = [], []
for m in range(5, len(X_train)):
model.fit(X_train[:m], y_train[:m])
y_train_predict = model.predict(X_train[:m])
y_val_predict = model.predict(X_val)
train_errors.append(metrics.mean_squared_error(y_train_predict, y_train[:m]))
val_errors.append(metrics.mean_squared_error(y_val_predict, y_val))
plt.plot(np.sqrt(train_errors), "r-+", linewidth=1, label="training data")
plt.plot(np.sqrt(val_errors), "b-", linewidth=1, label="validation data")
plt.legend(loc="upper right", fontsize=10)
plt.xlabel("Size", fontsize=10)
plt.ylabel("RMSE", fontsize=10)
plt.title("Learning Curves")
plt.show()
However, I would like to have a learning curve for the classification problem, and I know that we should use accuracy as a metric for analysis instead of 'RMSE'.
So far, I've only found snippet codes for this problem using the 'learning-curve' class in sklearn , like:
from sklearn.model_selection import learning_curve
from sklearn.model_selection import ShuffleSplit
def plot_learning_curve(estimator, X, y, ax=None, cv=None, n_jobs=4, train_sizes=np.linspace(.1, 1.0, 5)):
train_sizes, train_scores, test_scores = \
learning_curve(estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
# Plot learning curve
ax.set_xlabel("Training examples")
ax.set_ylabel("Score")
ax.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score")
ax.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score")
ax.legend(loc="best")
return plt
However, as you can see in this solution, we need to pass the x and y datasets and it uses cross-validation to split them into train and test datasets.
But, I've already split my dataset and applied a lot of preprocessing to x-train and x-test.
So, I intend to use my train and test dataset just like the code I wrote for the regression problem, which is based on the "RMSE" metric (without using the "learning-curve" class in sklearn).

Training and Testing Model PollyPlot

I'm trying to plot a Polynomial Plot with Matplotlib/Seaborn. I am new to Data Science and thus I'm having trouble with this bit of code:
def PollyPlot(xtrain, xtest, y_train, y_test, lr,poly_transform):
width = 12
height = 10
plt.figure(figsize=(width, height))
#training data
#testing data
# lr: linear regression object
#poly_transform: polynomial transformation object
xmax=max([xtrain.values.max(), xtest.values.max()])
xmin=min([xtrain.values.min(), xtest.values.min()])
x=np.arange(xmin, xmax, 0.1)
plt.plot(xtrain, y_train, 'ro', label='Training Data')
plt.plot(xtest, y_test, 'go', label='Test Data')
plt.plot(x, lr.predict(poly_transform.fit_transform(x.reshape(-1, 1))), label='Predicted Function')
plt.ylim([-10000, 60000])
plt.ylabel('Price')
plt.legend()
This is the function that plots the polynomial function. However when I call the function with:
PollyPlot(x_train[['horsepower']], x_test[['horsepower']], y_train, y_test, poly, pr)
I get the following error:
InvalidIndexError: (slice(None, None, None), None)
Any assistance given would be greatly appreciated.
I have done 2 changes:
I've removed from the function the .values. to calculate the max and min because I will convert the xtrain and xtest to numpy:
def PollyPlot(xtrain, xtest, y_train, y_test, lr,poly_transform):
width = 12
height = 10
plt.figure(figsize=(width, height))
#training data
#testing data
# lr: linear regression object
#poly_transform: polynomial transformation object
xmax=max([xtrain.max(), xtest.max()])
xmin=min([xtrain.min(), xtest.min()])
x=np.arange(xmin, xmax, 0.1)
plt.plot(xtrain, y_train, 'ro', label='Training Data')
plt.plot(xtest, y_test, 'go', label='Test Data')
plt.plot(x, lr.predict(poly_transform.fit_transform(x.reshape(-1, 1))), label='Predicted Function')
plt.ylim([-10000, 60000])
plt.ylabel('Price')
plt.legend()
To execute I have converted x_train and x_test to numpy:
PollyPlot(x_train[['horsepower']].to_numpy(), x_test[['horsepower']].to_numpy(), y_train.to_numpy(), y_test.to_numpy(), poly,pr)

Representation of a training and validation metric in a pipeline

I have a problem. I want to plot my RMSE value. However, I now use a pipeline because I use cross-validation and also use other steps like feature selection.
My question is, is there a way to get this plot through the pipeline (without training the model a second time)? So how can I display the training and validation RMSE value nicely in a diagram in the pipeline?
Pipeline
dfListingsFeature_regression = pd.read_csv(r"https://raw.githubusercontent.com/Coderanker3/dataset4/main/listings_cleaned.csv")
d = {True: 1, False: 0, np.nan : np.nan}
dfListingsFeature_regression['host_is_superhost'] = dfListingsFeature_regression[
'host_is_superhost'].map(d).astype('int')
X = dfListingsFeature_regression.drop(columns=['host_id', 'id', 'price']) # Features
y = dfListingsFeature_regression['price'] # Target variable
print(dfListingsFeature_nor.shape)
steps = [('feature_selection', SelectFromModel(estimator=LogisticRegression(max_iter=10000))),
('lasso', Lasso(alpha=0.4))]
pipeline = Pipeline(steps)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=30)
parameteres = { }
grid = GridSearchCV(pipeline, param_grid=parameteres, cv=5)
grid.fit(X_train, y_train)
print("score = %3.2f" %(grid.score(X_test,y_test)))
print('Training set score: ' + str(grid.score(X_train,y_train)))
print('Test set score: ' + str(grid.score(X_test,y_test)))
y_pred = grid.predict(X_test)
print("RMSE Val:", metrics.mean_squared_error(y_test, y_pred, squared=False))
y_train_predict = grid.predict(X_train)
print("Train:" , metrics.mean_squared_error(y_train, y_train_predict , squared=False))
r2 = metrics.r2_score(y_test, y_pred)
print(r2)
Plot
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
def plot_learning_curves(model, X, y):
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=10)
train_errors, val_errors = [], []
for m in range(1, 500 + 1):
model.fit(X_train[:m], y_train[:m])
y_train_predict = model.predict(X_train[:m])
y_val_predict = model.predict(X_val)
train_errors.append(mean_squared_error(y_train[:m], y_train_predict))
val_errors.append(mean_squared_error(y_val, y_val_predict))
plt.figure( figsize=(10,10))
plt.plot(np.sqrt(train_errors), "r-+", linewidth=2, label="train")
plt.plot(np.sqrt(val_errors), "b-", linewidth=3, label="val")
plt.legend(loc="upper right", fontsize=14)
plt.xlabel("Training set size", fontsize=14)
plt.ylabel("RMSE", fontsize=14)
%%time
lin_reg = Lasso(alpha=0.1)
plot_learning_curves(lin_reg, X, y)
#plt.axis([0, 80, 0, 3])
plt.show()
You don't have to fit() your model again in plot_learning_curves. You can simply use your fitted pipeline to predict value for both train and validation set and then plot your learning curve.
You function should look as follow without the model.fit():
def plot_learning_curves(model, X, y):
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=10)
train_errors, val_errors = [], []
for m in range(1, 500 + 1):
y_train_predict = model.predict(X_train[:m])
y_val_predict = model.predict(X_val)
train_errors.append(mean_squared_error(y_train[:m], y_train_predict))
val_errors.append(mean_squared_error(y_val, y_val_predict))
plt.figure( figsize=(10,10))
plt.plot(np.sqrt(train_errors), "r-+", linewidth=2, label="train")
plt.plot(np.sqrt(val_errors), "b-", linewidth=3, label="val")
plt.legend(loc="upper right", fontsize=14)
plt.xlabel("Training set size", fontsize=14)
plt.ylabel("RMSE", fontsize=14)
Then you should call this function using your fitted model as parameter.

How to draw best fit plane for multi variant regression in scikit learn?

I am not software back ground yet i am learning regression technique to predict motor data.
I have 3d data for which i have used multi variant regression.
Result is fine. But now i want to visualize the best fir plane for this data.
following are the code which i copied paste from different site to try to visualize my data.
X_final=df3[['Ampere','Voltage']]
y_final=df3[['ReactivePower']].copy() #copy column data in to y_final
X_final=X_final.dropna()
y_final=y_final.dropna()
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size = 0.33, random_state = 0 )
lr = LinearRegression().fit(X_train,y_train)
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)
#print score
print("lr.coef_: {}".format(lr.coef_))
print("lr.intercept_: {}".format(lr.intercept_))
print('lr train score %.3f, lr test score: %.3f' % (
lr.score(X_train,y_train),
lr.score(X_test, y_test)))
# Visualize the Data for Multiple Linear Regression
x_surf, y_surf = np.meshgrid(np.linspace(df3.Voltage.min(), df3.Voltage.max()),np.linspace(df3.Ampere.min(), df3.Ampere.max()))
y_train_pred_random= y_train_pred[np.random.choice(y_train_pred.shape[0], 2500, replace=False), :]
y_train_pred_random=np.array(y_train_pred_random)
y_train_pred1=y_train_pred_random.reshape(x_surf.shape)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(df3['Voltage'],df3['Ampere'],df3['ReactivePower'],c='red', marker='o', alpha=0.5)
ax.plot_surface(x_surf,y_surf,y_train_pred1,rstride=1, cstride=1, color='b', alpha=0.3)
ax.set_xlabel('Voltage')
ax.set_ylabel('Ampere')
ax.set_zlabel('Reactive Power')
plt.show()
when i run code for visualization i get following graph,
Please help
yeah, i solved myself with some refrence online,
here is the code,
#Test train split mullti variant
X_final=df3[['Ampere','Voltage']]
y_final=df3[['ReactivePower']].copy() #copy column data in to y_final
X_final=X_final.dropna()
y_final=y_final.dropna()
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size = 0.33, random_state = 0 )
lr = LinearRegression().fit(X_train,y_train)
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)
#print score
print("lr.coef_: {}".format(lr.coef_))
print("lr.intercept_: {}".format(lr.intercept_))
print('lr train score %.3f, lr test score: %.3f' % (
lr.score(X_train,y_train),
lr.score(X_test, y_test)))
# Visualize the Data for Multiple Linear Regression
x_surf, y_surf = np.meshgrid(np.linspace(df3.Ampere.min(), df3.Ampere.max()),np.linspace(df3.Voltage.min(), df3.Voltage.max()))
z_surf=lr.coef_[0,0]*x_surf+lr.coef_[0,1]*y_surf+lr.intercept_
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(df3['Ampere'],df3['Voltage'],df3['ReactivePower'],c='red', marker='o', alpha=0.5)
ax.plot_surface(x_surf,y_surf,z_surf,rstride=1, cstride=1, color='b', alpha=0.3)
ax.set_xlabel('Ampere')
ax.set_ylabel('Voltage')
ax.set_zlabel('Reactive Power')
plt.show()
Here is the plot,
Thanks,

Categories

Resources