Python shapes not aligned - python

I have searched Google for help on this error and tried using dummies, but so far nothing has helped. My code is:
df_movies_train_reg = df_movies[df_movies['Release Year'] < 2014]
df_movies_test_reg = df_movies[df_movies['Release Year'] > 2013]
df_train = pd.DataFrame()
df_test = pd.DataFrame()
df_train['x'] = df_movies_train_reg['Theaters in First Week'].copy()
df_train['y'] = df_movies_train_reg['Worldwide Gross'].copy()
df_test['x'] = df_movies_test_reg['Theaters in First Week'].copy()
df_test['y'] = df_movies_test_reg['Worldwide Gross'].copy()
df_train['x^2'] = df_train['x'] * df_train['x']
X_reg = df_train[['x', 'x^2']]
y_reg = df_train[['y']]
reg = LinearRegression()
model = reg.fit(X_reg, y_reg)
score = model.score(df_test[['x']], df_test[['y']])
The dataset is a list of 2328 rows, the training set that is left after the filter by year is just 2037 rows, the test set is then 291 rows.
ValueError Traceback (most recent call last)
<ipython-input-116-aee4ec5bd647> in <module>
50 model = reg.fit(X_reg, y_reg)
51
---> 52 score = model.score(df_test[['x']], df_test[['y']])
53
54
~\Anaconda3\lib\site-packages\sklearn\base.py in score(self, X, y, sample_weight)
326
327 from .metrics import r2_score
--> 328 return r2_score(y, self.predict(X), sample_weight=sample_weight,
329 multioutput='variance_weighted')
330
~\Anaconda3\lib\site-packages\sklearn\linear_model\base.py in predict(self, X)
211 Returns predicted values.
212 """
--> 213 return self._decision_function(X)
214
215 _preprocess_data = staticmethod(_preprocess_data)
~\Anaconda3\lib\site-packages\sklearn\linear_model\base.py in _decision_function(self, X)
196 X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
197 return safe_sparse_dot(X, self.coef_.T,
--> 198 dense_output=True) + self.intercept_
199
200 def predict(self, X):
~\Anaconda3\lib\site-packages\sklearn\utils\extmath.py in safe_sparse_dot(a, b, dense_output)
171 return ret
172 else:
--> 173 return np.dot(a, b)
174
175
ValueError: shapes (291,1) and (2,1) not aligned: 1 (dim 1) != 2 (dim 0)

Related

Sklearn: TypeError Seen while fitting tthe data intto the Logistic Regression Model

i get the following error while doing fit_transform using Logistic Regression
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_train_tfidf.shape
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(solver = 'lbfgs')
clf.fit(X_train_tfidf,y_train)
I went through tthis thread LabelEncoder: TypeError: '>' not supported between instances of 'float' and 'str' but that too did nott help. any help would be greatly appreciated
TypeError: '<' not supported between instances of 'float' and 'str'
As per the above link also i dont have any null values..
X_train.isnull().value_counts()
False 2584
Name: Headline, dtype: int64
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-65-e676010d2b44> in <module>
3 clf = LogisticRegression(solver = 'lbfgs')
4
----> 5 clf.fit(X_train_tfidf,y_train)
~/Desktop/Anaconda/anaconda3/envs/nlp_course/lib/python3.7/site-packages/sklearn/linear_model/logistic.py in fit(self, X, y, sample_weight)
1284 X, y = check_X_y(X, y, accept_sparse='csr', dtype=_dtype, order="C",
1285 accept_large_sparse=solver != 'liblinear')
-> 1286 check_classification_targets(y)
1287 self.classes_ = np.unique(y)
1288 n_samples, n_features = X.shape
~/Desktop/Anaconda/anaconda3/envs/nlp_course/lib/python3.7/site-packages/sklearn/utils/multiclass.py in check_classification_targets(y)
166 y : array-like
167 """
--> 168 y_type = type_of_target(y)
169 if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
170 'multilabel-indicator', 'multilabel-sequences']:
~/Desktop/Anaconda/anaconda3/envs/nlp_course/lib/python3.7/site-packages/sklearn/utils/multiclass.py in type_of_target(y)
285 return 'continuous' + suffix
286
--> 287 if (len(np.unique(y)) > 2) or (y.ndim >= 2 and len(y[0]) > 1):
288 return 'multiclass' + suffix # [1, 2, 3] or [[1., 2., 3]] or [[1, 2]]
289 else:
~/Desktop/Anaconda/anaconda3/envs/nlp_course/lib/python3.7/site-packages/numpy/lib/arraysetops.py in unique(ar, return_index, return_inverse, return_counts, axis)
231 ar = np.asanyarray(ar)
232 if axis is None:
--> 233 ret = _unique1d(ar, return_index, return_inverse, return_counts)
234 return _unpack_tuple(ret)
235
~/Desktop/Anaconda/anaconda3/envs/nlp_course/lib/python3.7/site-packages/numpy/lib/arraysetops.py in _unique1d(ar, return_index, return_inverse, return_counts)
279 aux = ar[perm]
280 else:
--> 281 ar.sort()
282 aux = ar
283 mask = np.empty(aux.shape, dtype=np.bool_)
TypeError: '<' not supported between instances of 'float' and 'str'

MemoryError when using pandas_profiling profile_report

I'm trying to profile an excel file, it is a very small data set, only 30 columns and 535 rows, but when I run the profile_report function it stops each time in a different percentage but always has the same message:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-41-283dd2cb2000> in <module>
1 df=pd.read_excel(path_working+'Documents/Information/'+'sample.xlsx')
2 profile = df.profile_report(title='Sample Exploratory')
----> 3 profile.to_file(path_working+'sample.html')
~\anaconda3\lib\site-packages\pandas_profiling\profile_report.py in to_file(self, output_file, silent)
276 create_html_assets(output_file)
277
--> 278 data = self.to_html()
279
280 if output_file.suffix != ".html":
~\anaconda3\lib\site-packages\pandas_profiling\profile_report.py in to_html(self)
384
385 """
--> 386 return self.html
387
388 def to_json(self) -> str:
~\anaconda3\lib\site-packages\pandas_profiling\profile_report.py in html(self)
199 def html(self):
200 if self._html is None:
--> 201 self._html = self._render_html()
202 return self._html
203
~\anaconda3\lib\site-packages\pandas_profiling\profile_report.py in _render_html(self)
306 from pandas_profiling.report.presentation.flavours import HTMLReport
307
--> 308 report = self.report
309
310 disable_progress_bar = not config["progress_bar"].get(bool)
~\anaconda3\lib\site-packages\pandas_profiling\profile_report.py in report(self)
193 def report(self):
194 if self._report is None:
--> 195 self._report = get_report_structure(self.description_set)
196 return self._report
197
~\anaconda3\lib\site-packages\pandas_profiling\profile_report.py in description_set(self)
172 def description_set(self):
173 if self._description_set is None:
--> 174 self._description_set = describe_df(
175 self.title, self.df, self.summarizer, self.typeset, self._sample
176 )
~\anaconda3\lib\site-packages\pandas_profiling\model\describe.py in describe(title, df, summarizer, typeset, sample)
72 total=number_of_tasks, desc="Summarize dataset", disable=disable_progress_bar
73 ) as pbar:
---> 74 series_description = get_series_descriptions(df, summarizer, typeset, pbar)
75
76 pbar.set_postfix_str("Get variable types")
~\anaconda3\lib\site-packages\pandas_profiling\model\summary.py in get_series_descriptions(df, summarizer, typeset, pbar)
97 # TODO: use `Pool` for Linux-based systems
98 with multiprocessing.pool.ThreadPool(pool_size) as executor:
---> 99 for i, (column, description) in enumerate(
100 executor.imap_unordered(multiprocess_1d, args)
101 ):
~\anaconda3\lib\multiprocessing\pool.py in next(self, timeout)
866 if success:
867 return value
--> 868 raise value
869
870 __next__ = next # XXX
~\anaconda3\lib\multiprocessing\pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
123 job, i, func, args, kwds = task
124 try:
--> 125 result = (True, func(*args, **kwds))
126 except Exception as e:
127 if wrap_exception and func is not _helper_reraises_exception:
~\anaconda3\lib\site-packages\pandas_profiling\model\summary.py in multiprocess_1d(args)
76 """
77 column, series = args
---> 78 return column, describe_1d(series, summarizer, typeset)
79
80 pool_size = config["pool_size"].get(int)
~\anaconda3\lib\site-packages\pandas_profiling\model\summary.py in describe_1d(series, summarizer, typeset)
50 vtype = typeset.detect_type(series)
51
---> 52 return summarizer.summarize(series, dtype=vtype)
53
54
~\anaconda3\lib\site-packages\pandas_profiling\model\summarizer.py in summarize(self, series, dtype)
54 """
55 summarizer_func = compose(self.summary_map.get(dtype, []))
---> 56 _, summary = summarizer_func(series, {"type": dtype})
57 return summary
58
~\anaconda3\lib\site-packages\pandas_profiling\model\handler.py in func2(*x)
21 return f(*x)
22 else:
---> 23 return f(*res)
24
25 return func2
~\anaconda3\lib\site-packages\pandas_profiling\model\handler.py in func2(*x)
21 return f(*x)
22 else:
---> 23 return f(*res)
24
25 return func2
~\anaconda3\lib\site-packages\pandas_profiling\model\handler.py in func2(*x)
21 return f(*x)
22 else:
---> 23 return f(*res)
24
25 return func2
~\anaconda3\lib\site-packages\pandas_profiling\model\handler.py in func2(*x)
17 def func(f, g):
18 def func2(*x):
---> 19 res = g(*x)
20 if type(res) == bool:
21 return f(*x)
~\anaconda3\lib\site-packages\pandas_profiling\model\summary_algorithms.py in inner(series, summary)
70 if not summary["hashable"]:
71 return series, summary
---> 72 return fn(series, summary)
73
74 return inner
~\anaconda3\lib\site-packages\visions\utils\series_utils.py in inner(series, state, *args, **kwargs)
40 return False
41
---> 42 return fn(series, state, *args, **kwargs)
43
44 return inner
~\anaconda3\lib\site-packages\pandas_profiling\model\summary_algorithms.py in describe_numeric_1d(series, summary)
208
209 if chi_squared_threshold > 0.0:
--> 210 stats["chi_squared"] = chi_square(finite_values)
211
212 stats["range"] = stats["max"] - stats["min"]
~\anaconda3\lib\site-packages\pandas_profiling\model\summary_helpers.py in chi_square(values, histogram)
352 def chi_square(values=None, histogram=None):
353 if histogram is None:
--> 354 histogram, _ = np.histogram(values, bins="auto")
355 return dict(chisquare(histogram)._asdict())
356
<__array_function__ internals> in histogram(*args, **kwargs)
~\anaconda3\lib\site-packages\numpy\lib\histograms.py in histogram(a, bins, range, normed, weights, density)
790 a, weights = _ravel_and_check_weights(a, weights)
791
--> 792 bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights)
793
794 # Histogram is an integer or a float array depending on the weights.
~\anaconda3\lib\site-packages\numpy\lib\histograms.py in _get_bin_edges(a, bins, range, weights)
444
445 # bin edges must be computed
--> 446 bin_edges = np.linspace(
447 first_edge, last_edge, n_equal_bins + 1,
448 endpoint=True, dtype=bin_type)
<__array_function__ internals> in linspace(*args, **kwargs)
~\anaconda3\lib\site-packages\numpy\core\function_base.py in linspace(start, stop, num, endpoint, retstep, dtype, axis)
126
127 delta = stop - start
--> 128 y = _nx.arange(0, num, dtype=dt).reshape((-1,) + (1,) * ndim(delta))
129 # In-place multiplication y *= delta/div is faster, but prevents the multiplicant
130 # from overriding what class is produced, and thus prevents, e.g. use of Quantities,
MemoryError: Unable to allocate 1.75 EiB for an array with shape (251938683619878560,) and data type float64
I ran the same code in a different python installation and it ran fine.
Thank you all in advance and let me know if you need more information.
This is a bug in numpy.histogram (https://github.com/numpy/numpy/issues/10297), also reported on SO (Numpy histogram extremely slow on small data set).
This error is caused by the call to np.histogram(x, bin='auto'). When input has very large values, the "auto" method can fail while trying to generate an enormous number of bin that cannot fit in ram.
As a workaround, you can remove the large values manually before generating the report.

Error during data scaling in pandas data fram

I have a dataset in CSV format. I am trying to perform scaling in my dataset, but I am getting an error. As I understood, I need to convert from 3D to 2D. But I am not sure, how to do that.
Example of my dataset:
63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0
56.0,1.0,2.0,120.0,236.0,0.0,0.0,178.0,0.0,0.8,1.0,0.0,3.0,0
62.0,0.0,4.0,140.0,268.0,0.0,2.0,160.0,0.0,3.6,3.0,2.0,3.0,3
57.0,0.0,4.0,120.0,354.0,0.0,0.0,163.0,1.0,0.6,1.0,0.0,3.0,0
63.0,1.0,4.0,130.0,254.0,0.0,2.0,147.0,0.0,1.4,2.0,1.0,7.0,2
53.0,1.0,4.0,140.0,203.0,1.0,2.0,155.0,1.0,3.1,3.0,0.0,7.0,1
57.0,1.0,4.0,140.0,192.0,0.0,0.0,148.0,0.0,0.4,2.0,0.0,6.0,0
56.0,0.0,2.0,140.0,294.0,0.0,2.0,153.0,0.0,1.3,2.0,0.0,3.0,0
56.0,1.0,3.0,130.0,256.0,1.0,2.0,142.0,1.0,0.6,2.0,1.0,6.0,2
44.0,1.0,2.0,120.0,263.0,0.0,0.0,173.0,0.0,0.0,1.0,0.0,7.0,0
52.0,1.0,3.0,172.0,199.0,1.0,0.0,162.0,0.0,0.5,1.0,0.0,7.0,0
57.0,1.0,3.0,150.0,168.0,0.0,0.0,174.0,0.0,1.6,1.0,0.0,3.0,0
48.0,1.0,2.0,110.0,229.0,0.0,0.0,168.0,0.0,1.0,3.0,0.0,7.0,1
54.0,1.0,4.0,140.0,239.0,0.0,0.0,160.0,0.0,1.2,1.0,0.0,3.0,0
My code:
import pandas as pd
from sklearn.preprocessing import StandardScaler
df = pd.read_csv('processed_cleveland_data.csv')
ss = StandardScaler()
df_scaled = pd.DataFrame(ss.fit_transform(df),columns = df.columns)
Error:
ValueError
Traceback (most recent call last)
<ipython-input-5-6db223ceefcd> in <module>
4 df = pd.read_csv('processed_cleveland_data.csv')
5 ss = StandardScaler()
----> 6 df_scaled = pd.DataFrame(ss.fit_transform(df),columns = df.columns)
~\Miniconda3\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
697 if y is None:
698 # fit method of arity 1 (unsupervised transformation)
--> 699 return self.fit(X, **fit_params).transform(X)
700 else:
701 # fit method of arity 2 (supervised transformation)
~\Miniconda3\lib\site-packages\sklearn\preprocessing\_data.py in fit(self, X, y, sample_weight)
728 # Reset internal state before fitting
729 self._reset()
--> 730 return self.partial_fit(X, y, sample_weight)
731
732 def partial_fit(self, X, y=None, sample_weight=None):
~\Miniconda3\lib\site-packages\sklearn\preprocessing\_data.py in partial_fit(self, X, y, sample_weight)
764 """
765 first_call = not hasattr(self, "n_samples_seen_")
--> 766 X = self._validate_data(X, accept_sparse=('csr', 'csc'),
767 estimator=self, dtype=FLOAT_DTYPES,
768 force_all_finite='allow-nan', reset=first_call)
~\Miniconda3\lib\site-packages\sklearn\base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
419 out = X
420 elif isinstance(y, str) and y == 'no_validation':
--> 421 X = check_array(X, **check_params)
422 out = X
423 else:
~\Miniconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
~\Miniconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
614 array = array.astype(dtype, casting="unsafe", copy=False)
615 else:
--> 616 array = np.asarray(array, order=order, dtype=dtype)
617 except ComplexWarning as complex_warning:
618 raise ValueError("Complex data not supported\n"
~\Miniconda3\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
81
82 """
---> 83 return array(a, dtype, copy=False, order=order)
84
85
~\Miniconda3\lib\site-packages\pandas\core\generic.py in __array__(self, dtype)
1897
1898 def __array__(self, dtype=None) -> np.ndarray:
-> 1899 return np.asarray(self._values, dtype=dtype)
1900
1901 def __array_wrap__(
~\Miniconda3\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
81
82 """
---> 83 return array(a, dtype, copy=False, order=order)
84
85
ValueError: could not convert string to float: '?'
Use na_values for convert ? to missing values:
df = pd.read_csv('processed_cleveland_data.csv', na_values='?')
#if csv has no header
#df = pd.read_csv('processed_cleveland_data.csv', na_values='?', header=None)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
df_scaled = pd.DataFrame(ss.fit_transform(df),columns = df.columns)

XGBRegressor.fit() throws TypeError: Expected sequence or array-like, got <class 'xgboost.core.DMatrix'>

When trying to fit my XGBRegressor model I got TypeError: Expected sequence or array-like, got <class 'xgboost.core.DMatrix'> on x parameter, even though it is of type numpy.ndarray
from xgboost import XGBRegressor
from sklearn.metrics import r2_score
model = XGBRegressor(max_depth=5, learning_rate=0.001, n_estimators=5000)
eval_set = [(X_test, y_test)]
model.fit(X_train, y_train, eval_set=eval_set, eval_metric=r2_score, early_stopping_rounds=20, verbose=True)
Error message:
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-50-4bb3ebe8ef00> in <module>
----> 1 model.fit(X_train, y_train, eval_set=eval_set, eval_metric=r2_score, early_stopping_rounds=20, verbose=True)
~/anaconda3/envs/ds/lib/python3.6/site-packages/xgboost/sklearn.py in fit(self, X, y, sample_weight, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, callbacks)
376 evals_result=evals_result, obj=obj, feval=feval,
377 verbose_eval=verbose, xgb_model=xgb_model,
--> 378 callbacks=callbacks)
379
380 if evals_result:
~/anaconda3/envs/ds/lib/python3.6/site-packages/xgboost/training.py in train(params, dtrain, num_boost_round, evals, obj, feval, maximize, early_stopping_rounds, evals_result, verbose_eval, xgb_model, callbacks, learning_rates)
214 evals=evals,
215 obj=obj, feval=feval,
--> 216 xgb_model=xgb_model, callbacks=callbacks)
217
218
~/anaconda3/envs/ds/lib/python3.6/site-packages/xgboost/training.py in
_train_internal(params, dtrain, num_boost_round, evals, obj, feval, xgb_model, callbacks)
82 # check evaluation result.
83 if len(evals) != 0:
---> 84 bst_eval_set = bst.eval_set(evals, i, feval)
85 if isinstance(bst_eval_set, STRING_TYPES):
86 msg = bst_eval_set
~/anaconda3/envs/ds/lib/python3.6/site-packages/xgboost/core.py in eval_set(self, evals, iteration, feval) 1175 if feval is not None: 1176 for dmat, evname in evals:
-> 1177 feval_ret = feval(self.predict(dmat), dmat) 1178 if isinstance(feval_ret, list): 1179 for name, val in feval_ret:
~/anaconda3/envs/ds/lib/python3.6/site-packages/sklearn/metrics/regression.py in r2_score(y_true, y_pred, sample_weight, multioutput)
532 """
533 y_type, y_true, y_pred, multioutput = _check_reg_targets(
--> 534 y_true, y_pred, multioutput)
535 check_consistent_length(y_true, y_pred, sample_weight)
536
~/anaconda3/envs/ds/lib/python3.6/site-packages/sklearn/metrics/regression.py in _check_reg_targets(y_true, y_pred, multioutput)
73
74 """
---> 75 check_consistent_length(y_true, y_pred)
76 y_true = check_array(y_true, ensure_2d=False)
77 y_pred = check_array(y_pred, ensure_2d=False)
~/anaconda3/envs/ds/lib/python3.6/site-packages/sklearn/utils/validation.py in check_consistent_length(*arrays)
229 """
230
--> 231 lengths = [_num_samples(X) for X in arrays if X is not None]
232 uniques = np.unique(lengths)
233 if len(uniques) > 1:
~/anaconda3/envs/ds/lib/python3.6/site-packages/sklearn/utils/validation.py in <listcomp>(.0)
229 """
230
--> 231 lengths = [_num_samples(X) for X in arrays if X is not None]
232 uniques = np.unique(lengths)
233 if len(uniques) > 1:
~/anaconda3/envs/ds/lib/python3.6/site-packages/sklearn/utils/validation.py in _num_samples(x)
136 else:
137 raise TypeError("Expected sequence or array-like, got %s" %
--> 138 type(x))
139 if hasattr(x, 'shape'):
140 if len(x.shape) == 0:
TypeError: Expected sequence or array-like, got <class 'xgboost.core.DMatrix'>
# XGBoost Classifier
from xgboost import XGBClassifier
xgb_classifier = XGBClassifier()
xgb_classifier.fit(X_train, y_train)
y_pred_xgb = xgb_classifier.predict(X_test)
accuracy_score(y_test, y_pred_xgb)
I got what was causing this error. It is because I set wrong eval_metric=r2_score

JuPyter: Creating Decision Tree, TypeError: '<' not supported between instances of 'str' and 'float''

I'm creating decision tree with JuPyter notebook and when I started creating the decision and putting the features and target class the jupyter give me this error that is found in this cell for the decision tree.dt = c.fit(X_train, y_train)
This is the error.
TypeError
Traceback (most recent call last)
<ipython-input-10-0f9186b7935c> in <module>()
----> 1 dt = c.fit(X_train, y_train)
~\Anaconda3\lib\site-packages\sklearn\tree\tree.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
788 sample_weight=sample_weight,
789 check_input=check_input,
--> 790 X_idx_sorted=X_idx_sorted)
791 return self
792
~\Anaconda3\lib\site-packages\sklearn\tree\tree.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
138
139 if is_classification:
--> 140 check_classification_targets(y)
141 y = np.copy(y)
142
~\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py in check_classification_targets(y)
167 y : array-like
168 """
--> 169 y_type = type_of_target(y)
170 if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
171 'multilabel-indicator', 'multilabel-sequences']:
~\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py in type_of_target(y)
286 return 'continuous' + suffix
287
--> 288 if (len(np.unique(y)) > 2) or (y.ndim >= 2 and len(y[0]) > 1):
289 return 'multiclass' + suffix # [1, 2, 3] or [[1., 2., 3]] or [[1, 2]]
290 else:
~\Anaconda3\lib\site-packages\numpy\lib\arraysetops.py in unique(ar, return_index, return_inverse, return_counts, axis)
221 ar = np.asanyarray(ar)
222 if axis is None:
--> 223 return _unique1d(ar, return_index, return_inverse, return_counts)
224 if not (-ar.ndim <= axis < ar.ndim):
225 raise ValueError('Invalid axis kwarg specified for unique')
~\Anaconda3\lib\site-packages\numpy\lib\arraysetops.py in _unique1d(ar, return_index, return_inverse, return_counts)
281 aux = ar[perm]
282 else:
--> 283 ar.sort()
284 aux = ar
285 flag = np.concatenate(([True], aux[1:] != aux[:-1]))
TypeError: '<' not supported between instances of 'str' and 'float'
I'm confused because my data set feature is clean it is all Int and the target class in the only categorical.
Can someone tell me what is happening and what to do so that dt = c.fit(X_train, y_train) will work? c = DecisionTreeClassifier(min_samples_split=100) c is a decision tree classifier.
This is my sample data set:

Categories

Resources