'str' object has no attribute 'dropna' - python

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import linear_model
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from google.colab import files
df = files.upload()
df='Dataset.csv'
df=df.dropna()
AttributeError Traceback (most recent call last)
in ()
----> 1 df=df.dropna()
AttributeError: 'str' object has no attribute 'dropna'

You are not loading the file as a dataframe, you just assign the file name of df. Use instead -
df = pd.read_csv('Dataset.csv')
df = df.dropna()

Related

AttributeError 'GridSearchCV' object has no attribute 'cv_results_'

I'm working through the load_boston() data for a scikit-learn tutorial. I'm running into this attribute error:
AttributeError 'GridSearchCV' object has no attribute 'cv_results_'
Does anyone know if there is a bug? I am using 1.1.1 version of scikit-learn.
import sklearn
from sklearn.datasets import load_boston
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
import matplotlib.pylab as plt
import pandas as pd
from sklearn.model_selection import cross_val_score
print(sklearn.__version__)
X, y = load_boston(return_X_y=True)
mod = KNeighborsRegressor().fit(X, y)
pipe = Pipeline([
("scale", StandardScaler()),
("model", KNeighborsRegressor(n_neighbors=3))
])
print(pipe.get_params())
mod1 = GridSearchCV(estimator=pipe, param_grid={'model__n_neighbors': [1,2,3,4,5,6,7,8,9,10]},cv = 3)
pipe.fit(X, y)
pred = pipe.predict(X)
df = pd.DataFrame(mod1.cv_results_)
plt.scatter(pred, y) #pred instead of X
plt.title("Boston Housing Market")
plt.show()
Point is that cv_results_ is an attribute of the fitted GridSearchCV instance, while you've only fitted the pipeline (its base estimator). Therefore, you should fit mod1 to make it work.
import sklearn
from sklearn.datasets import load_boston
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
import matplotlib.pylab as plt
import pandas as pd
from sklearn.model_selection import cross_val_score
X, y = load_boston(return_X_y=True)
mod = KNeighborsRegressor().fit(X,y)
pipe = Pipeline([
("scale", StandardScaler()),
("model", KNeighborsRegressor(n_neighbors=3))
])
print(pipe.get_params())
mod1 = GridSearchCV(estimator=pipe,param_grid={'model__n_neighbors':
[1,2,3,4,5,6,7,8,9,10]},cv = 3)
mod1.fit(X, y)
df = pd.DataFrame(mod1.cv_results_)
Be aware, though, that method .fit() of GridSearchCV does not return the fitted base estimator (despite fitting it, of course). Therefore, you won't be able to call pipe.predict(X) if you just substitute pipe.fit(X, y) via mod1.fit(X, y).

Tensorflow from tensor slices attribute error

I am trying to convert pandas dataframe into Tensorflow dataset to build a model upon. But from_tensor_slices gives error. Any idea to fix it or another way to use pandas df in tensorflow model?
Thanks in advance.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
tf.compat.v1.disable_eager_execution()
df = pd.read_csv('insurance.csv')
X = pd.get_dummies(df, columns = ['sex', 'smoker', 'region'])
y = X.pop('charges')
ds = tf.data.Dataset.from_tensor_slices((X.values, y.values))
Error:
Traceback (most recent call last):
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\training\tracking\tracking.py", line 269, in
__del__
File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-
packages\tensorflow\python\framework\ops.py", line 4011, in as_default
AttributeError: 'NoneType' object has no attribute 'get_controller'
I solved it now, and I am posting the answer for those who get the same error.
You need to add the line below before from_tensor_slices() line:
tf.compat.v1.enable_eager_execution()

KeyError: "['Close'] not found in axis"

I am making an algorithm with ML in Python but I came across this error.
KeyError: "['Close'] not found in axis"
Here is my code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import plot_roc_curve
plt.style.use("seaborn")
%matplotlib inline
data = pd.read_csv("data.csv")
data.head()
X = data.drop(columns="Close", axis=1)
y = data["Close"].values

Importing Max_Error from sklearn.metrics

I'm attempting to import max error from sklearn.metrics
from sklearn.metrics import max_error
However when I receive the following message:
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-27-861960ac8e03> in <module>
1 from sklearn.metrics import explained_variance_score
----> 2 from sklearn.metrics import max_error
3 from sklearn.metrics import mean_squared_error
4 from sklearn.metrics import r2_score
ImportError: cannot import name 'max_error' from 'sklearn.metrics' (/opt/conda/lib/python3.7/site-packages/sklearn/metrics/__init__.py)
Any advice on how to fix this?

Scikit-learn binarize categorical data

I've been trying to load a CSV file into scikit via pandas and setting the target column to be a list of 20 categorical variables. I've tried using label_binarize but that didn't seem to do any good so after some reading I've switched to LabelEncoder but it doesn't appear to change much.
from io import StringIO
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import permutation_test_score
from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc, confusion_matrix
from sklearn.model_selection import train_test_split, ShuffleSplit
from sklearn.preprocessing import label_binarize, MultiLabelBinarizer, LabelEncoder
from sklearn.multiclass import OneVsRestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.naive_bayes import GaussianNB
#loading the data
data=pd.read_csv("data.csv")
y = data.iloc[:,19]
X = data.iloc[:,1:18+20:22]
#Binarize the output
le = LabelEncoder()
le.fit(["0-1","1-1.5","1.5-2","2-2.5","2.5-3","3-3.5","3.5-4","4-4.5","4.5-5","5-5.5","5.5-6","6-6.5","6.5-7","7-7.5","7.5-8","8-8.5","8.5-9","9-9.5","9.5-10","10+"
])
LabelEncoder()
le.transform(y)
y = label_binarize(y, le)
n_classes = y.shape[1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,
random_state=0)
model3 = KNeighborsClassifier(n_neighbors=7)
yet when I run this I get:
Traceback (most recent call last):
File "file, line 30, in <module>
le.transform(y)
File "C:\Anaconda3\lib\site-packages\sklearn\preprocessing\label.py", line 149, in transform
classes = np.unique(y)
File "\Anaconda3\lib\site-packages\numpy\lib\arraysetops.py", line 198, in unique
ar.sort()
TypeError: '>' not supported between instances of 'str' and 'float'
Is this kind of target data even possible for scikit?
Ok so to solve this issue I found you needed to surround the categorical data itself with quotation marks like this: "0-1"
Otherwise Python would read it as the long of 0-1 and get confused. The data loads correctly.

Categories

Resources