Getting the ML model output to react front end - python

I've written an ML model that will predict diseases when symptoms are entered. I'm not quite sure on how to get the final ML model output (prediction) to display in a ReactJS frontend. Below is my model:
from cgi import test
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import mode
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
# from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
import pickle
import requests
import json
# Reading the train.csv by removing the last column since it's an empty column
DATA_PATH = "D:/Final Year Project/Diabetdeck-V3/flask-server/dataset/Training.csv"
data = pd.read_csv(DATA_PATH).dropna(axis = 1)
# Checking whether the dataset is balanced or not
disease_counts = data["prognosis"].value_counts()
temp_df = pd.DataFrame({
"Disease": disease_counts.index,
"Counts": disease_counts.values
})
# plt.figure(figsize = (18,8))
# sns.barplot(x = "Disease", y = "Counts", data = temp_df)
# plt.xticks(rotation=90)
# plt.show()
# Encoding the target value into numerical value using LabelEncoder
encoder = LabelEncoder()
data["prognosis"] = encoder.fit_transform(data["prognosis"])
X = data.iloc[:,:-1]
y = data.iloc[:, -1]
X_training_data, X_testing_data, y_training_data, y_testing_data =train_test_split(
X, y, test_size = 0.2, random_state = 24)
# Defining scoring metric for k-fold cross validation
def cv_scoring(estimator, X, y):
return accuracy_score(y, estimator.predict(X))
# Initializing Models
models = {
"SVC":SVC(),
# "Gaussian NB":GaussianNB(),
"Logistic Regression":LogisticRegression(),
"Random Forest":RandomForestClassifier(random_state=18)
# "Linear Regression" :LinearRegression()
}
# Producing cross validation score for the models
for model_name in models:
model = models[model_name]
scores = cross_val_score(model, X, y, cv = 10,
n_jobs = -1,
scoring = cv_scoring)
# Training and testing SVM Classifier
svm_model = SVC()
svm_model.fit(X_training_data, y_training_data)
preds = svm_model.predict(X_testing_data)
# pickle.dump(svm_model, open('model.pkl','wb'))
# Training and testing Linear Regression
# lr_model = LinearRegression()
# lr_model.fit(X_training_data, y_training_data)
# preds = lr_model.predict(X_testing_data)
lr_model = LogisticRegression(C=0.1, penalty='l2', solver='liblinear')
lr_model.fit(X_training_data, y_training_data)
lr_model.score(X_training_data, y_training_data)
preds = lr_model.predict(X_testing_data)
# pickle.dump(lr_model, open('model.pkl','wb'))
# Training and testing Random Forest Classifier
rf_model = RandomForestClassifier(random_state=18)
rf_model.fit(X_training_data, y_training_data)
preds = rf_model.predict(X_testing_data)
# pickle.dump(rf_model, open('model.pkl','wb'))
# Training the models on whole data
final_svm_model = SVC()
final_lr_model = LogisticRegression()
final_rf_model = RandomForestClassifier(random_state=18)
final_svm_model.fit(X, y)
pickle.dump(svm_model, open('model.pkl','wb'))
final_lr_model.fit(X, y)
pickle.dump(lr_model, open('model.pkl','wb'))
final_rf_model.fit(X, y)
pickle.dump(rf_model, open('model.pkl','wb'))
# Reading the test data
test_data = pd.read_csv("D:/Final Year Project/Diabetdeck-V3/flask-server/dataset/Testing.csv").dropna(axis=1)
test_X = test_data.iloc[:, :-1]
test_Y = encoder.transform(test_data.iloc[:, -1])
# Making prediction by take mode of predictions made by all the classifiers
svm_preds = final_svm_model.predict(test_X)
lr_preds = final_lr_model.predict(test_X)
rf_preds = final_rf_model.predict(test_X)
final_preds = [mode([i,j,k])[0][0] for i,j,
k in zip(svm_preds, lr_preds, rf_preds)]
symptoms = X.columns.values
# Creating a symptom index dictionary to encode the input symptoms into numerical form
symptom_index = {}
for index, value in enumerate(symptoms):
symptom = " ".join([i.capitalize() for i in value.split("_")])
symptom_index[symptom] = index
data_dict = {
"symptom_index":symptom_index,
"predictions_classes":encoder.classes_
}
model = pickle.load(open('model.pkl', 'rb'))
# Defining the Function
# Input: string containing symptoms separated by commmas
# Output: Generated predictions by models
def predictDisease(symptoms):
symptoms = symptoms.split(",")
# creating input data for the models
input_data = [0] * len(data_dict["symptom_index"])
for symptom in symptoms:
index = data_dict["symptom_index"][symptom]
input_data[index] = 1
# reshaping the input data and converting it into suitable format for model predictions
input_data = np.array(input_data).reshape(1,-1)
# generating individual outputs
rf_prediction = data_dict["predictions_classes"][final_rf_model.predict(input_data)[0]]
lr_prediction = data_dict["predictions_classes"][final_lr_model.predict(input_data)[0]]
svm_prediction = data_dict["predictions_classes"][final_svm_model.predict(input_data)[0]]
# making final prediction by taking mode of all predictions
final_prediction = mode([rf_prediction, lr_prediction, svm_prediction])[0][0]
predictions = {
"rf_model_prediction": rf_prediction,
"lr_model_prediction": lr_prediction,
"svm_model_prediction": svm_prediction,
"final_prediction":final_prediction
}
if final_prediction == 'Diabetes ':
# return final_prediction
return ("You have Type 1 Diabetes")
else:
# print("Not Diabetes")
# return final_prediction
return ("You do not have Diabetes")
# Testing the function
print(predictDisease("Itching,Skin Rash,Nodal Skin Eruptions"))
# print(predictDisease("Polyuria,Increased Appetite,Excessive Hunger"))
It would be great if someone could guide me on how to get the final output of the model (prediction) to display in a ReactJS frontend. Do I have to do this using flask or is it there another way to send it straight to the frontend?

Related

NLP classification with sparse and numerical features crashes

I have a dataset of 10 million english shows, which has been cleaned and lemmatized, and their classification into different category types such as comedy, documentary, action, ... etc
I also have a feature called duration, which is the length of the tv show.
Data can be found here
I perform tfidf vectorization on the titles, which returns a sparse matrix and normalization on the duration column.
Then I want to feed the data to a logistic regression classifier.
side question: I want to know if theres a better way to handle combining a sparse matrix and a numerical column
when I try to do it using todense() or toarray(), It works
When i pass it to the logistic regression function, the notebook crashes. But if i dont have the duration col, which means i dont have to apply the toarray() or todense() function, it works perfectly. Is this a memory issue?
This is my code:
import os
import pandas as pd
from sklearn import metrics
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
def normalize(df, col = ''):
mms = MinMaxScaler()
mms_col = mms.fit_transform(df[[col]])
return mms_col
def tfidf(X, col = ''):
tfidf_vectorizer = TfidfVectorizer(max_df = 0.8, max_features = 10000)
return tfidf_vectorizer.fit_transform(X[col])
def get_training_data(df):
df = shuffle(pd.read_csv(df).dropna())
data = df[['name_title', 'Duration']]
X_duration = normalize(data, col = 'Duration')
X_sparse = tfidf(data, col = 'name_title')
X = pd.DataFrame(X_sparse.toarray())
X['Duration'] = X_duration
y = df['target']
return X, y
def logistic_regression(X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y)
lr = LogisticRegression(C = 100.0, random_state = 1, solver = 'lbfgs', multi_class = 'ovr')
lr.fit(X_train, y_train)
y_predict = lr.predict(X_test)
print(y_predict)
print("Logistic Regression Accuracy %.3f" %metrics.accuracy_score(y_test, y_predict))
data_path = '../data/'
X, y = get_training_data(os.path.join(data_path, 'podcasts_en_processed.csv'))
print(X.shape) # this prints (971426, 10001)
logistic_regression(X, y)

How I can fix the error in: "Fit the PCA transformer on the training data and transform the data line" and "model = svm.SVC(kernel='linear')"

I have create this Support Vector machine model but I get errors in "Fit the PCA transformer on the training data and transform the data line" and "model = svm.SVC(kernel='linear')"
The first error is:
NameError: name 'x_train' is not defined
The second error is:
NameError: name 'svm' is not defined
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import seaborn as sns
import seaborn as sns # for data visualization
train_df = pd.read_csv('diabetes_data_upload.csv')
train_df.head()
# checking total of rows and columns
train_df.shape
# Transforming the Gender into 0 and 1
train_df["Gender"] = train_df["Gender"].map({"Male": 0, "Female": 1}).astype(int)
#Rounding the Age
train_df["Age"] = train_df.Age.round()
# Separating the data to predict the missing ages
X_train = train_df[train_df.Age.notnull()][['Age','Gender','weakness','Obesity', 'class']]
X_test = train_df[train_df.Age.isnull()][['Age','Gender','weakness','Obesity', 'class']]
y = train_df.Age.dropna()
# Just confirming if there is no more ages missing
train_df.Age.isnull().sum()
# Taking only the features that is important for now
X = train_df[['Gender', 'Age', 'weakness']]
# Taking the labels
Y = train_df['class']
# Spliting into 80% for training set and 20% for testing set so we can see our accuracy
X_train, x_test, Y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
# Declaring the SVC with no tunning
print(X_train.shape)
print(Y_train.shape)
from sklearn.decomposition import PCA
# Create a PCA transformer with 3 components
pca = PCA(n_components=3)
# Fit the PCA transformer on the training data and transform the data
x_train_pca = pca.fit_transform(x_train)
# Transform the test data using the PCA transformer fitted on the training data
x_test_pca = pca.transform(x_test)
# Fit the classifier on the transformed training data
classifier.fit(x_train_pca, y_train)
# Predict the labels for the transformed test data
predictions = classifier.predict(x_test_pca)
# Calculate the accuracy of the model
accuracy = classifier.score(x_test_pca, y_test)
print("Accuracy:", accuracy)
#make it binary classification problem
X = X[np.logical_or(Y==0,Y==1)]
Y = Y[np.logical_or(Y==0,Y==1)]
model = svm.SVC(kernel='linear')
clf = model.fit(X, Y)
# The equation of the separating plane is given by all x so that np.dot(svc.coef_[0], x) + b = 0.
# Solve for w3 (z)
z = lambda x,y: (-clf.intercept_[0]-clf.coef_[0][0]*x -clf.coef_[0][1]*y) / clf.coef_[0][2]
tmp = np.linspace(-5,5,30)
x,y = np.meshgrid(tmp,tmp)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot3D(X[Y==0,0], X[Y==0,1], X[Y==0,2],'ob')
ax.plot3D(X[Y==1,0], X[Y==1,1], X[Y==1,2],'sr')
ax.plot_surface(x, y, z(x,y))
ax.view_init(30, 60)
plt.show()
The two errors you mention can be solved by the following -
1. NameError: name 'x_train' is not defined
This is because you are using x_train instead of the X_train variable you have defined right above. Remember, variables names are case sensitive.
x_train_pca = pca.fit_transform(x_train) # your code
x_train_pca = pca.fit_transform(X_train) # change it to this
2. NameError: name 'svm' is not defined
This is because you are already importing the SVC class using from svm import SVC. But while trying to instantiating the class the model you are using svm.SVC.
model = svm.SVC(kernel='linear') # your code
model = SVC(kernel='linear') # change it to this

how to use numpy mutual information correctly

i want to use principal component analysis-mutual information (PCA-MI) to have data representation from source which has source relevance of (value from smartinsole) and ouput variable (value from force plate). PCA was used to determine the principal component of Ni provided that the cumulative variance is greater than 98% of the source information measured from 89 insole sensors. MI is generally used in the selection of input variables for predictive models because it is a good indicator of the relationship between input variables and output variables. here I want to get results like a flowchart as below
then I try to make code like below. but I can't generate like what's in the flowchart
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_classif
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
# load the dataset
def load_dataset(filename):
# load the dataset as a pandas DataFrame
data = read_csv(filename, header=None)
# retrieve numpy array
dataset = data.values
y = dataset
return y
def load_dataset2(filename):
# load the dataset as a pandas DataFrame
data2 = read_csv(filename, header=None)
# retrieve numpy array
dataset2 = data2.values
X = dataset2
return X
# feature selection
def select_features(X_train, y_train, X_test):
# configure to select a subset of features
fs = SelectKBest(score_func=mutual_info_classif, k=4)
# learn relationship from training data
fs.fit(X_train, y_train)
# transform train input data
X_train_fs = fs.transform(X_train)
# transform test input data
X_test_fs = fs.transform(X_test)
return X_train_fs, X_test_fs, fs
# load the dataset
Insole = pd.read_csv('1119_Rwalk40s1_list.txt', header=None, low_memory=False)
SIData = np.asarray(Insole)
df = pd.read_csv('1119_Rwalk40s1.csv', low_memory=False)
columns = ['Fx','Fy','Fz','Mx','My','Mz']
selected_df = df[columns]
FCDatas = selected_df
SmartInsole = np.array(SIData)
FCData = np.array(FCDatas)
scaler_x = MinMaxScaler(feature_range=(0, 1))
scaler_x.fit(SmartInsole)
xscale = scaler_x.transform(SmartInsole)
scaler_y = MinMaxScaler(feature_range=(0, 1))
scaler_y.fit(FCData)
yscale = scaler_y.transform(FCData)
SIDataPCA = xscale
pca = PCA(n_components=89)
pca.fit(SIDataPCA)
SIdata_pca = pca.transform(SIDataPCA)
X = SIdata_pca
y = yscale
X = SIdata_pca
y = yscale
# split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
# feature selection
X_train_fs, X_test_fs, fs = select_features(X_train, y_train, X_test)
# fit the model
model = LogisticRegression(solver='liblinear')
model.fit(X_train_fs, y_train)
# evaluate the model
yhat = model.predict(X_test_fs)
# evaluate predictions
accuracy = accuracy_score(y_test, yhat)
print('Accuracy: %.2f' % (accuracy*100))
how can I get the correct PCA-MI result data?

How to output Shap values in probability and make force_plot from binary classifier

I need to plot how each feature impacts the predicted probability for each sample from my LightGBM binary classifier. So I need to output Shap values in probability, instead of normal Shap values. It does not appear to have any options to output in term of probability.
The example code below is what I use to generate dataframe of Shap values and do a force_plot for the first data sample. Does anyone know how I should modify the code to change the output?
I'm new to Shap value and the Shap package. Thanks a lot in advance.
import pandas as pd
import numpy as np
import shap
import lightgbm as lgbm
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = lgbm.LGBMClassifier()
model.fit(X_train, y_train)
explainer = shap.TreeExplainer(model)
shap_values = explainer(X_train)
# force plot of first row for class 1
class_idx = 1
row_idx = 0
expected_value = explainer.expected_value[class_idx]
shap_value = shap_values[:,:,class_idx].values[row_idx]
shap.force_plot (base_value = expected_value, shap_values = shap_value, features = X_train.iloc[row_idx, :], matplotlib=True)
# dataframe of shap values for class 1
shap_df = pd.DataFrame(shap_values[:,:, 1 ].values, columns = shap_values.feature_names)
TL;DR:
You can achieve plotting results in probability space with link="logit" in the force_plot method:
import pandas as pd
import numpy as np
import shap
import lightgbm as lgbm
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from scipy.special import expit
shap.initjs()
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
model = lgbm.LGBMClassifier()
model.fit(X_train, y_train)
explainer_raw = shap.TreeExplainer(model)
shap_values = explainer_raw(X_train)
# force plot of first row for class 1
class_idx = 1
row_idx = 0
expected_value = explainer_raw.expected_value[class_idx]
shap_value = shap_values[:, :, class_idx].values[row_idx]
shap.force_plot(
base_value=expected_value,
shap_values=shap_value,
features=X_train.iloc[row_idx, :],
link="logit",
)
Expected output:
Alternatively, you may achieve the same with the following, explicitly specifying model_output="probability" you're interested in to explain:
explainer = shap.TreeExplainer(
model,
data=X_train,
feature_perturbation="interventional",
model_output="probability",
)
shap_values = explainer(X_train)
# force plot of first row for class 1
class_idx = 1
row_idx = 0
shap_value = shap_values.values[row_idx]
shap.force_plot(
base_value=expected_value,
shap_values=shap_value,
features=X_train.iloc[row_idx, :]
)
Expected output:
However, it might be more interesting for understanding what's happening here to find out where these figures come from:
Our target proba for the point of interest:
model_proba= model.predict_proba(X_train.iloc[[row_idx]])
model_proba
# array([[0.00275887, 0.99724113]])
Base case raw from model given X_train as background (note, LightGBM outputs raw for class 1):
model.predict(X_train, raw_score=True).mean()
# 2.4839751932445577
Base case raw from SHAP (note, they are symmetric):
bv = explainer_raw(X_train).base_values[0]
bv
# array([-2.48397519, 2.48397519])
Raw SHAP values for the point of interest:
sv_0 = explainer_raw(X_train).values[row_idx].sum(0)
sv_0
# array([-3.40619584, 3.40619584])
Proba inferred from SHAP values (via sigmoid):
shap_proba = expit(bv + sv_0)
shap_proba
# array([0.00275887, 0.99724113])
Check:
assert np.allclose(model_proba, shap_proba)
Please ask questions if something is not clear.
Side notes
Proba might be misleading if you're analyzing raw size effect of different features because sigmoid is non-linear and saturates after reaching certain threshold.
Some people expect to see SHAP values in probability space as well, but this is not feasible because:
SHAP values are additive by construction (to be precise SHapley Additive exPlanations are average marginal contributions over all possible feature coalitions)
exp(a + b) != exp(a) + exp(b)
You may find useful:
Feature importance in a binary classification and extracting SHAP values for one of the classes only answer
How to interpret base_value of GBT classifier when using SHAP? answer
You can consider running your output values through a softmax() function. For reference, it is defined as :
def get_softmax_probabilities(x):
return np.exp(x) / np.sum(np.exp(x)).reshape(-1, 1)
and there is a scipy implementation as well:
from scipy.special import softmax
The output from softmax() will be probabilities proportional to the (relative) values in vector x, which are your shop values.
import pandas as pd
import numpy as np
import shap
import lightgbm as lgbm
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print('X_train: ',X_train.shape)
print('X_test: ',X_test.shape)
model = lgbm.LGBMClassifier()
model.fit(X_train, y_train)
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_train)
# plot
# shap.summary_plot(shap_values[class_idx], X_train, plot_type='bar')
# shap.summary_plot(shap_values[class_idx], X_train)
# shap_value = shap_values[:,:,class_idx].values[row_idx]
# shap.force_plot (base_value = expected_value, shap_values = shap_value, features = X_train.iloc[row_idx, :], matplotlib=True)
# # dataframe of shap values for class 1
# shap_df = pd.DataFrame(shap_values[:,:, 1 ].values, columns = shap_values.feature_names)
# verification
def verification(index_number,class_idx):
print('-----------------------------------')
print('index_number: ', index_number)
print('class_idx: ', class_idx)
print('')
y_base = explainer.expected_value[class_idx]
print('y_base: ', y_base)
player_explainer = pd.DataFrame()
player_explainer['feature_value'] = X_train.iloc[j].values
player_explainer['shap_value'] = shap_values[class_idx][j]
print('verification: ')
print('y_base + sum_of_shap_values: %.2f'%(y_base + player_explainer['shap_value'].sum()))
print('y_pred: %.2f'%(y_train[j]))
j = 10 # index
verification(j,0)
verification(j,1)
# show:
# X_train: (455, 30)
# X_test: (114, 30)
# -----------------------------------
# index_number: 10
# class_idx: 0
# y_base: -2.391423081639827
# verification:
# y_base + sum_of_shap_values: -9.40
# y_pred: 1.00
# -----------------------------------
# index_number: 10
# class_idx: 1
# y_base: 2.391423081639827
# verification:
# y_base + sum_of_shap_values: 9.40
# y_pred: 1.00
# -9.40,9.40 takes the maximum value(class_idx:1 = y_pred), and the result is obviously correct.
I helped you achieve it and verified the reliability of the results.

Poor accuarcy score for Semi-Supervised Support Vector machine

I am using a Semi-Supervised approach for Support Vector Machine in Python for the image classification from PASCAL VOC 2007 data.
I have tried with the default parameters from the libraries and also tuned them but it get extremely bad accuracy of about only ~ 2%.
Below is my code:
import pandas as pd
import numpy as np
from sklearn import decomposition
from sklearn.model_selection import train_test_split
from numpy import concatenate
import numpy as np
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import decomposition
import warnings
warnings.filterwarnings("ignore")
color_layout_features = pd.read_pickle("color_layout_descriptor.pkl")
bow_surf = pd.read_pickle("bow_surf.pkl")
color_hist_features = pd.read_pickle("hist.pkl")
labels = pd.read_pickle("labels.pkl")
# Feat. Scaling
def scale(X, x_min, x_max):
nom = (X-X.min(axis=0))*(x_max-x_min)
denom = X.max(axis=0) - X.min(axis=0)
denom[denom==0] = 1
return x_min + nom/denom
# normalization
def normalize(x):
return (x - np.min(x))/(np.max(x) - np.min(x))
color_layout_features_scaled = scale(color_layout_features, 0, 1)
color_hist_features_scaled = scale(color_hist_features, 0, 1)
bow_surf_scaled = scale(bow_surf, 0, 1)
features = np.hstack([color_layout_features_scaled, color_hist_features_scaled, bow_surf_scaled])
# define dataset
X, Y = features, labels
X = normalize(X)
pca = decomposition.PCA(n_components=100)
pca.fit(X)
X = pca.transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.30, random_state=1, stratify=Y)
# split train into labeled and unlabeled
X_train_lab, X_test_unlab, y_train_lab, y_test_unlab = train_test_split(X_train, y_train, test_size=0.30, random_state=1, stratify=y_train)
# create the training dataset input
X_train_mixed = concatenate((X_train_lab, X_test_unlab))
# create "no label" for unlabeled data
nolabel = [-1 for _ in range(len(y_test_unlab))]
# recombine training dataset labels
y_train_mixed = concatenate((y_train_lab, nolabel))
from semisupervised import S3VM
model = S3VM(kernel="Linear", C = 1e-2, gamma = 0.5, lamU = 1.0, probability=True)
#model.fit(X_train_mixed, _train_mixed)
model.fit(np.vstack((X_train_lab, X_test_unlab)), np.append(y_train_lab, nolabel))
#model.fit(np.vstack((label_X_train, unlabel_X_train)), np.append(label_y_train, unlabel_y))
# predict
predict = model.predict(X_test)
acc = metrics.accuracy_score(y_test, predict)
# metric
print("accuracy", acc*100)
accuracy 2.6692291266282298
I am using a Transductive version of SVM (TSVM) from the semisupervised library. But not sure what am I doing wrong so that even after tweaking the parameters I still get the same result. Any inputs would be helpful.
I refer https://github.com/rosefun/SemiSupervised/blob/master/semisupervised/TSVM.py to make the implementation. Any inputs would be helpful.
Please consider that according to link Documentation "The unlabeled samples should be labeled as -1".

Categories

Resources