I am just Starting this naive Bayes Theoram and Getting stuck by this please tell me what to do,
this is my code ::
import sklearn
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score
df = (pd.read_csv("C:\\Users\\dhana\\Downloads\\Mechanics\\train.csv"))
X = []
Y = []
for i in df.Age:
X.append(str(i))
for j in df.Fare:
Y.append(j)
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.3,random_state= 17)
model = GaussianNB()
model.fit(X_train,y_train)
print(model)
y_expect = y_test
y_pred = model.predict(X_test)
print(accuracy_score(y_expect,y_pred))```
Related
I'm practicing about classifications. I could see iris has both attribute when I printed it. But I still getting same error.
from sklearn import datasets
from sklearn.metrics import classification_report
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
iris = datasets.load_iris(),
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
y_pred = nb_classifier.predict(X_test)
print(classification_report(y_test, y_pred))
Remove the , at the end of
iris = datasets.load_iris(),
Otherwise iris becomes a tuple with one element. Your line is identical to:
iris = (datasets.load_iris(),)
Ideally I should get same result as score is nothing but R-Square. But not sure why results are coming different.
from sklearn.datasets import california_housing
data = california_housing.fetch_california_housing()
data.data.shape
data.feature_names
data.target_names
import pandas as pd
house_data = pd.DataFrame(data.data, columns=data.feature_names)
house_data.describe()
house_data['Price'] = data.target
X = house_data.iloc[:, 0:8].values
y = house_data.iloc[:, -1].values
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 0)
# Fitting Simple Linear Regression to the Training set
from sklearn.linear_model import LinearRegression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
#Check R-square on training data
from sklearn.metrics import mean_squared_error, r2_score
y_pred = linear_model.predict(X_test)
print(linear_model.score(X_test, y_test))
print(r2_score(y_pred, y_test))
Output
0.5957643114594776
0.34460597952465033
from the docs: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html
sklearn.metrics.r2_score(y_true, y_pred,...)
You are passing y_true and y_pred the wrong way around. If you switch them you get the correct result.
print(linear_model.score(X_test, y_test))
print(r2_score(y_test, y_pred))
0.5957643114594777
0.5957643114594777
Trying to do logisticregression with Dask but get the error
Could not find signature for add_intercept: <DataFrame>"
my code:
from dask_ml.linear_model import LogisticRegression
from dask_ml.model_selection import train_test_split
from dask_ml.feature_extraction.text import HashingVectorizer
from dask_ml.datasets import make_classification
dask_df = dd.from_pandas(df2,chunksize=50)
dask_df.head()
X_train, X_test, y_train, y_test = train_test_split(df2[features], df2[label], test_size = 0.20, random_state = 42)
lr = LogisticRegression()
lr.fit(X_train, y_train)
lr.decision_function(X_test)
This is for an assignment where the SVM methods has to be used for model accuracy.
There were 3 parts, wrote the below code
import sklearn.datasets as datasets
import sklearn.model_selection as ms
from sklearn.model_selection import train_test_split
digits = datasets.load_digits();
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30, stratify=y)
print(X_train.shape)
print(X_test.shape)
from sklearn.svm import SVC
svm_clf = SVC().fit(X_train, y_train)
print(svm_clf.score(X_test,y_test))
But after this, the question is as below
Perform Standardization of digits.data and store the transformed data
in variable digits_standardized.
Hint : Use required utility from sklearn.preprocessing. Once again,
split digits_standardized into two sets names X_train and X_test.
Also, split digits.target into two sets Y_train and Y_test.
Hint: Use train_test_split method from sklearn.model_selection; set
random_state to 30; and perform stratified sampling. Build another SVM
classifier from X_train set and Y_train labels, with default
parameters. Name the model as svm_clf2.
Evaluate the model accuracy on testing data set and print it's score.
On top of the above code, tried writing this, but seems to be failing. Can anyone help on how the data can be standardized.
std_scale = preprocessing.StandardScaler().fit(X_train)
X_train_std = std_scale.transform(X_train)
X_test_std = std_scale.transform(X_test)
svm_clf2 = SVC().fit(X_train, y_train)
print(svm_clf.score(X_test,y_test))
Tried the below. Seems to be working.
import sklearn.datasets as datasets
import sklearn.model_selection as ms
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
digits = datasets.load_digits();
X = digits.data
scaler = StandardScaler()
scaler.fit(X)
digits_standardized = scaler.transform(X)
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(digits_standardized, y, random_state=30, stratify=y)
#print(X_train.shape)
#print(X_test.shape)
from sklearn.svm import SVC
svm_clf2 = SVC().fit(X_train, y_train)
print("Accuracy ",svm_clf2.score(X_test,y_test))
Try this as final code includes all Tasks
import sklearn.datasets as datasets
import sklearn.model_selection as ms
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
digits = datasets.load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30, stratify=y)
print(X_train.shape)
print(X_test.shape)
svm_clf = SVC().fit(X_train, y_train)
print(svm_clf.score(X_test,y_test))
scaler = StandardScaler()
scaler.fit(X)
digits_standardized = scaler.transform(X)
X_train, X_test, y_train, y_test = train_test_split(digits_standardized, y, random_state=30, stratify=y)
svm_clf2 = SVC().fit(X_train, y_train)
print(svm_clf2.score(X_test,y_test))
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
x=np.array([0.1,0.2,0.7,8.0,45.0,56.0,66.0,0.7,0.6,64.0])
y=np.array([0,0,0,1,1,1,1,0,0,1])
x = np.array(x).reshape((1, -1))
y = np.array(y).reshape((1, -1))
x_train, x_test, y_train, y_test = train_test_split(x, y,test_size=0.4, train_size=0.5, random_state=7, stratify=y)
knn = KNeighborsClassifier()
knn.fit(y_train, x_train)
y_train_predict = knn.predict(x_train)
y_test_predict = knn.predict(x_test)
print(y_train_predict)
print(y_test_predict)
Error:
With n_samples=1, test_size=0.4 and train_size=0.5, the resulting train set will be empty. Adjust any of the aforementioned parameters.
Try:
x = np.array(x).reshape(-1, 1)
y = np.array(y).reshape(-1, 1)