Speech Emotional recognition ValueError Taceback (most recent call last) python - python

I tried SER in machine learning from a site called dataflair and for the following code,
#DataFlair - Load the data and extract features for each sound file
def load_data(test_size=0.2):
x,y=[],[]
for file in glob.glob("D:\archive\Actor_01\03-01-01-01-01-01-01.wav"):
file_name=os.path.basename(file)
emotion=emotions[file_name.split("-")[2]]
if emotion not in observed_emotions:
continue
feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
x.append(feature)
y.append(emotion)
return train_test_split(np.array(x), y, test_size=test_size, random_state=9)
#DataFlair - Split the dataset
x_train,x_test,y_train,y_test=load_data(test_size=0.25)
i got this error-
ValueError Traceback (most recent call last)
Input In [10], in <cell line: 2>()
1 #DataFlair - Split the dataset
----> 2 x_train,x_test,y_train,y_test=load_data(test_size=0.25)
Input In [9], in load_data(test_size)
10 x.append(feature)
11 y.append(emotion)
---> 12 return train_test_split(np.array(x), y, test_size=test_size, random_state=9)
File ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py:2420, in train_test_split(test_size, train_size, random_state, shuffle, stratify, *arrays)
2417 arrays = indexable(*arrays)
2419 n_samples = _num_samples(arrays[0])
-> 2420 n_train, n_test = _validate_shuffle_split(
2421 n_samples, test_size, train_size, default_test_size=0.25
2422 )
2424 if shuffle is False:
2425 if stratify is not None:
File ~\anaconda3\lib\site-packages\sklearn\model_selection\_split.py:2098, in _validate_shuffle_split(n_samples, test_size, train_size, default_test_size)
2095 n_train, n_test = int(n_train), int(n_test)
2097 if n_train == 0:
-> 2098 raise ValueError(
2099 "With n_samples={}, test_size={} and train_size={}, the "
2100 "resulting train set will be empty. Adjust any of the "
2101 "aforementioned parameters.".format(n_samples, test_size, train_size)
2102 )
2104 return n_train, n_test
ValueError: With n_samples=0, test_size=0.25 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.
what are these errors and how do i overcome them? it works just fine on the website, https://data-flair.training/blogs/python-mini-project-speech-emotion-recognition/

Related

ValueError: if 'bert' is selected model, then preprocess_mode='bert' should be used and vice versa

I have a problem. I got the following error ValueError: if 'bert' is selected model, then preprocess_mode='bert' should be used and vice versa. But I do not see any problem. What is wrong with my code?
%%time
#Importing
import ktrain
from ktrain import text
(x_train_bert, y_train_bert), (x_val_bert, y_val_bert), preproc = text.texts_from_array(
x_train=train_X.tolist(), y_train=train_y.tolist(),
x_test=test_X.tolist(), y_test=test_y.tolist(),
class_names=df_complete['forwarder_name'].unique(),
preprocess_mode='bert',
lang='en',
maxlen=65,
max_features=35000)
model = text.text_classifier(name='bert', train_data=(train_X, train_y), preproc=preproc)
learner = ktrain.get_learner(model,train_data=(train_X, train_y), val_data=(test_X, test_y), batch_size=6)
Complete error
model = text.text_classifier(name='bert', train_data=(train_X, train_y), preproc=preproc)
#learner = ktrain.get_learner(model,train_data=(train_X, train_y), val_data=(test_X, test_y), batch_size=6)
model = text.text_classifier(name='bert', train_data=(train_X, train_y), preproc=preproc)
#learner = ktrain.get_learner(model,train_data=(train_X, train_y), val_data=(test_X, test_y), batch_size=6)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [111], in <cell line: 1>()
----> 1 model = text.text_classifier(name='bert', train_data=(train_X, train_y), preproc=preproc)
File ~\AppData\Roaming\Python\Python39\site-packages\ktrain\text\models.py:589, in text_classifier(name, train_data, preproc, multilabel, metrics, verbose)
585 if preproc is not None and not preproc.get_classes():
586 raise ValueError(
587 "preproc.get_classes() is empty, but required for text classification"
588 )
--> 589 return _text_model(
590 name,
591 train_data,
592 preproc=preproc,
593 multilabel=multilabel,
594 classification=True,
595 metrics=metrics,
596 verbose=verbose,
597 )
File ~\AppData\Roaming\Python\Python39\site-packages\ktrain\text\models.py:109, in _text_model(name, train_data, preproc, multilabel, classification, metrics, verbose)
107 is_bert = U.bert_data_tuple(train_data)
108 if (is_bert and name != BERT) or (not is_bert and name == BERT):
--> 109 raise ValueError(
110 "if '%s' is selected model, then preprocess_mode='%s' should be used and vice versa"
111 % (BERT, BERT)
112 )
113 is_huggingface = U.is_huggingface(data=train_data)
114 if (is_huggingface and name not in HUGGINGFACE_MODELS) or (
115 not is_huggingface and name in HUGGINGFACE_MODELS
116 ):
ValueError: if 'bert' is selected model, then preprocess_mode='bert' should be used and vice versa
There's a typo in your code. The problem is that you're using train_X and train_y (not preprocessed for BERT) instead of x_train_bert and y_train_bert (which were processed for BERT).
Use this instead:
model = text.text_classifier(name='bert', train_data=(x_train_bert, y_train_bert), preproc=preproc)
learner = ktrain.get_learner(model,train_data=(x_train_bert, y_train_bert), val_data=(x_val_bert, y_val_bert), batch_size=6)

how do i solve Key 8 error while using pytorch?

from torch.utils.data import (TensorDataset, DataLoader, RandomSampler,
SequentialSampler)
def data_loader(train_inputs, val_inputs, train_labels, val_labels, batch_size=50):
"""
Convert train and validation sets to torch.Tensors and load them to DataLoader.
"""
# Convert data type to torch.Tensor
train_inputs, val_inputs, train_labels, val_labels =\
tuple(torch.tensor(data) for data in
[train_inputs, val_inputs, train_labels, val_labels])
# Specify batch_size
batch_size = 50
# Create DataLoader for training data
train_data = TensorDataset(train_inputs, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler,
batch_size=batch_size)
# Create DataLoader for validation data
val_data = TensorDataset(val_inputs, val_labels)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)
return train_dataloader, val_dataloader
The code works fine when the train_inputs and val_inputs tensors are of of type int64, but doesn't when the type is int32.
Can someone tell me what's wrong here?
ERROR:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py:3621, in Index.get_loc(self, key, method, tolerance)
3620 try:
-> 3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
File ~\Anaconda3\lib\site-packages\pandas\_libs\index.pyx:136, in pandas._libs.index.IndexEngine.get_loc()
File ~\Anaconda3\lib\site-packages\pandas\_libs\index.pyx:163, in pandas._libs.index.IndexEngine.get_loc()
File pandas\_libs\hashtable_class_helper.pxi:2131, in pandas._libs.hashtable.Int64HashTable.get_item()
File pandas\_libs\hashtable_class_helper.pxi:2140, in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 8
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Input In [31], in <cell line: 6>()
2 train_inputs, val_inputs, train_labels, val_labels = train_test_split(
3 input_ids, labels, test_size=0.1, random_state=42)
5 # Load data to PyTorch DataLoader
----> 6 train_dataloader, val_dataloader = data_loader(train_inputs, val_inputs, train_labels, val_labels, batch_size=50)
Input In [28], in data_loader(train_inputs, val_inputs, train_labels, val_labels, batch_size)
6 """Convert train and validation sets to torch.Tensors and load them to
7 DataLoader.
8 """
10 # Convert data type to torch.Tensor
11 train_inputs, val_inputs, train_labels, val_labels =\
---> 12 tuple(torch.tensor(data) for data in
13 [train_inputs, val_inputs, train_labels, val_labels])
15 # Specify batch_size
16 batch_size = 50
Input In [28], in <genexpr>(.0)
6 """Convert train and validation sets to torch.Tensors and load them to
7 DataLoader.
8 """
10 # Convert data type to torch.Tensor
11 train_inputs, val_inputs, train_labels, val_labels =\
---> 12 tuple(torch.tensor(data) for data in
13 [train_inputs, val_inputs, train_labels, val_labels])
15 # Specify batch_size
16 batch_size = 50
File ~\Anaconda3\lib\site-packages\pandas\core\series.py:958, in Series.__getitem__(self, key)
955 return self._values[key]
957 elif key_is_scalar:
--> 958 return self._get_value(key)
960 if is_hashable(key):
961 # Otherwise index.get_value will raise InvalidIndexError
962 try:
963 # For labels that don't resolve as scalars like tuples and frozensets
File ~\Anaconda3\lib\site-packages\pandas\core\series.py:1069, in Series._get_value(self, label, takeable)
1066 return self._values[label]
1068 # Similar to Index.get_value, but we do not fall back to positional
-> 1069 loc = self.index.get_loc(label)
1070 return self.index._get_values_for_loc(self, loc, label)
File ~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py:3623, in Index.get_loc(self, key, method, tolerance)
3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
-> 3623 raise KeyError(key) from err
3624 except TypeError:
3625 # If we have a listlike key, _check_indexing_error will raise
3626 # InvalidIndexError. Otherwise we fall through and re-raise
3627 # the TypeError.
3628 self._check_indexing_error(key)
KeyError: 8
I was using the same code on my data set and had the same issue. I did 2 things. changed the random_state to not be 42 (which probably wasn't what fixed it) and I also changed my labels to np.array and now it works

TypeError: Invalid parameters passed: {'n_samples': 16000}

I am trying to use train_test_split from package scikit Learn, but I am having trouble with parameter.
here is my code
files = glob.glob(DATA_DIR + "*.wav")
X_train, X_val = train_test_split(files, n_samples=16000, test_size=0.2, train_size=0.8, random_state=SEED)
print('# Training examples: {}'.format(len(X_train)))
print('# Validation examples: {}'.format(len(X_val)))
here is the output:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-10-14b557c632f0> in <module>
1 files = glob.glob(DATA_DIR + "*.wav")
----> 2 X_train, X_val = train_test_split(files, n_samples=16000, test_size=0.2, train_size=0.8, random_state=SEED)
3
4 print('# Training examples: {}'.format(len(X_train)))
5 print('# Validation examples: {}'.format(len(X_val)))
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py in train_test_split(*arrays, **options)
2092
2093 if options:
-> 2094 raise TypeError("Invalid parameters passed: %s" % str(options))
2095
2096 arrays = indexable(*arrays)
TypeError: Invalid parameters passed: {'n_samples': 16000}

Xgboost 'DataFrame' object has no attribute 'num_row'

I am working on a multi-class classification problem using xgboost.
The shape of my data is
print(train_ohe.shape, test_ohe.shape)
# (43266, 190) (18543, 190)
Custom F1 eval function and model training code
def f1_eval(y_pred, dtrain):
y_true = dtrain.get_label()
err = 1-f1_score(y_true, np.round(y_pred),average='weighted')
return 'f1_err', err
def train_model(algo,train,test,predictors,useTrainCV=True,
cv_folds=5,early_stopping_rounds=50):
if useTrainCV:
xgb_param = algo.get_params()
xgb_train = xgb.DMatrix(train[predictors].values,label=train[target].values)
xgb_test = xgb.DMatrix(test[predictors].values)
print(xgb_train.num_row())
print(xgb_test.num_row())
cv_result = xgb.cv(xgb_param,
train,
num_boost_round=xgb_param['n_estimators'],
nfold=cv_folds,
metrics='f1_eval',
early_stopping_rounds=early_stopping_rounds)
algo.set_params(n_estimators=cv_result.shape[0])
# Fit algorithm on data
algo.fit(train[predictors],train[target],eval_metric=f1_eval)
# Predict train data
train_predictions = algo.predict(train[predictors])
train_pred_prob = algo.predict_proba(train[predictors])[:,1]
# Report model performance
print("Model performance")
print("F1 Score Train {}".format(f1_score(train[target].values,train_predictions)))
# Predict test data
test_predictions = algo.predict(test[predictors])
# Performance
print("F1 Score Test {}".format(f1_score(test[target].values,test_predictions)))
Here is my XgbClassifier code. Trying to find the number of estimators for a high learning rate.
target = 'Complaint-Status'
predictors = [x for x in train_ohe.columns if x not in target]
xgb1 = XGBClassifier(learning_rate=0.1,
n_estimators=1000,
max_depth=5,
min_child_weight=1,
gamma=0,
subsample=0.8,
colsample_bytree=0.8,
objective='multi:softmax',
nthread=8,
scale_pos_weight=1,
seed=145)
train_model(xgb1, train_ohe, test_ohe, predictors)
I am getting following Attribute error saying 'DataFrame' object has no attribute 'num_row'in the xgb.cv line in train_model function.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-116-5933227c171d> in <module>
18 seed=145)
19 print(xgb1.get_params())
---> 20 train_model(xgb1, train_ohe, test_ohe, predictors)
21 # xgb_param = xgb1.get_params()
22 # cv_folds=5
<ipython-input-114-a9df39c19abf> in train_model(algo, train, test, predictors, useTrainCV, cv_folds, early_stopping_rounds)
19 nfold=cv_folds,
20 metrics='f1_eval',
---> 21 early_stopping_rounds=early_stopping_rounds)
22 algo.set_params(n_estimators=cv_result.shape[0])
23
/opt/virtual_env/py3/lib/python3.6/site-packages/xgboost/training.py in cv(params, dtrain, num_boost_round, nfold, stratified, folds, metrics, obj, feval, maximize, early_stopping_rounds, fpreproc, as_pandas, verbose_eval, show_stdv, seed, callbacks, shuffle)
413 results = {}
414 cvfolds = mknfold(dtrain, nfold, params, seed, metrics, fpreproc,
--> 415 stratified, folds, shuffle)
416
417 # setup callbacks
/opt/virtual_env/py3/lib/python3.6/site-packages/xgboost/training.py in mknfold(dall, nfold, param, seed, evals, fpreproc, stratified, folds, shuffle)
246 # Do standard k-fold cross validation
247 if shuffle is True:
--> 248 idx = np.random.permutation(dall.num_row())
249 else:
250 idx = np.arange(dall.num_row())
/opt/virtual_env/py3/lib/python3.6/site-packages/pandas/core/generic.py in __getattr__(self, name)
4374 if self._info_axis._can_hold_identifiers_and_holds_name(name):
4375 return self[name]
-> 4376 return object.__getattribute__(self, name)
4377
4378 def __setattr__(self, name, value):
AttributeError: 'DataFrame' object has no attribute 'num_row'
Saw your post when I was searching around for the same error.
Your second parameter train of the code:
cv_result = xgb.cv(xgb_param,
train,
num_boost_round=xgb_param['n_estimators'],
nfold=cv_folds,
metrics='f1_eval',
early_stopping_rounds=early_stopping_rounds)
algo.set_params(n_estimators=cv_result.shape[0])
should be a matrix such as
train = xgb.DMatrix(X_train, y_train)
hope this helps

XGBoostError: b'[19:12:58] src/metric/rank_metric.cc:89: Check failed: (preds.size()) == (info.labels.size()) label size predict size not match'

I am training a XGBoostClassifier for my training set.
My training features are in the shape of (45001, 10338) which is a numpy array and my training labels are in the shape of (45001,) [I have 1161 unique labels so I have done a label encoding for the labels] which is also a numpy array.
From the documentation, it clearly says that I can create DMatrix from numpy array. So I am using the above mentioned training features and labels as numpy arrays straightaway. But I am getting the following error
---------------------------------------------------------------------------
XGBoostError Traceback (most recent call last)
<ipython-input-30-3de36245534e> in <module>()
13 scale_pos_weight=1,
14 seed=27)
---> 15 modelfit(xgb1, train_x, train_y)
<ipython-input-27-9d215eac135e> in modelfit(alg, train_data_features, train_labels, useTrainCV, cv_folds, early_stopping_rounds)
6 xgtrain = xgb.DMatrix(train_data_features, label=train_labels)
7 cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
----> 8 metrics='auc',early_stopping_rounds=early_stopping_rounds)
9 alg.set_params(n_estimators=cvresult.shape[0])
10
/home/carnd/anaconda3/envs/dl/lib/python3.5/site-packages/xgboost/training.py in cv(params, dtrain, num_boost_round, nfold, stratified, folds, metrics, obj, feval, maximize, early_stopping_rounds, fpreproc, as_pandas, verbose_eval, show_stdv, seed, callbacks)
399 for fold in cvfolds:
400 fold.update(i, obj)
--> 401 res = aggcv([f.eval(i, feval) for f in cvfolds])
402
403 for key, mean, std in res:
/home/carnd/anaconda3/envs/dl/lib/python3.5/site-packages/xgboost/training.py in <listcomp>(.0)
399 for fold in cvfolds:
400 fold.update(i, obj)
--> 401 res = aggcv([f.eval(i, feval) for f in cvfolds])
402
403 for key, mean, std in res:
/home/carnd/anaconda3/envs/dl/lib/python3.5/site-packages/xgboost/training.py in eval(self, iteration, feval)
221 def eval(self, iteration, feval):
222 """"Evaluate the CVPack for one iteration."""
--> 223 return self.bst.eval_set(self.watchlist, iteration, feval)
224
225
/home/carnd/anaconda3/envs/dl/lib/python3.5/site-packages/xgboost/core.py in eval_set(self, evals, iteration, feval)
865 _check_call(_LIB.XGBoosterEvalOneIter(self.handle, iteration,
866 dmats, evnames, len(evals),
--> 867 ctypes.byref(msg)))
868 return msg.value
869 else:
/home/carnd/anaconda3/envs/dl/lib/python3.5/site-packages/xgboost/core.py in _check_call(ret)
125 """
126 if ret != 0:
--> 127 raise XGBoostError(_LIB.XGBGetLastError())
128
129
XGBoostError: b'[19:12:58] src/metric/rank_metric.cc:89: Check failed: (preds.size()) == (info.labels.size()) label size predict size not match'
Please find my model Code below:
def modelfit(alg, train_data_features, train_labels,useTrainCV=True, cv_folds=5, early_stopping_rounds=50):
if useTrainCV:
xgb_param = alg.get_xgb_params()
xgb_param['num_class'] = 1161
xgtrain = xgb.DMatrix(train_data_features, label=train_labels)
cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
metrics='auc',early_stopping_rounds=early_stopping_rounds)
alg.set_params(n_estimators=cvresult.shape[0])
#Fit the algorithm on the data
alg.fit(train_data_features, train_labels, eval_metric='auc')
#Predict training set:
dtrain_predictions = alg.predict(train_data_features)
dtrain_predprob = alg.predict_proba(train_data_features)[:,1]
#Print model report:
print("\nModel Report")
print("Accuracy : %.4g" % metrics.accuracy_score(train_labels, dtrain_predictions))
Where am I going wrong in the above place ?
My classifier as follows :
xgb1 = xgb.XGBClassifier(
learning_rate =0.1,
n_estimators=50,
max_depth=5,
min_child_weight=1,
gamma=0,
subsample=0.8,
colsample_bytree=0.8,
objective='multi:softmax',
nthread=4,
scale_pos_weight=1,
seed=27)
EDIT - 2
After changing evaluation metric,
---------------------------------------------------------------------------
XGBoostError Traceback (most recent call last)
<ipython-input-9-30c62a886c2e> in <module>()
13 scale_pos_weight=1,
14 seed=27)
---> 15 modelfit(xgb1, train_x_trail, train_y_trail)
<ipython-input-8-9d215eac135e> in modelfit(alg, train_data_features, train_labels, useTrainCV, cv_folds, early_stopping_rounds)
6 xgtrain = xgb.DMatrix(train_data_features, label=train_labels)
7 cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
----> 8 metrics='auc',early_stopping_rounds=early_stopping_rounds)
9 alg.set_params(n_estimators=cvresult.shape[0])
10
/home/carnd/anaconda3/envs/dl/lib/python3.5/site-packages/xgboost/training.py in cv(params, dtrain, num_boost_round, nfold, stratified, folds, metrics, obj, feval, maximize, early_stopping_rounds, fpreproc, as_pandas, verbose_eval, show_stdv, seed, callbacks)
398 evaluation_result_list=None))
399 for fold in cvfolds:
--> 400 fold.update(i, obj)
401 res = aggcv([f.eval(i, feval) for f in cvfolds])
402
/home/carnd/anaconda3/envs/dl/lib/python3.5/site-packages/xgboost/training.py in update(self, iteration, fobj)
217 def update(self, iteration, fobj):
218 """"Update the boosters for one iteration"""
--> 219 self.bst.update(self.dtrain, iteration, fobj)
220
221 def eval(self, iteration, feval):
/home/carnd/anaconda3/envs/dl/lib/python3.5/site-packages/xgboost/core.py in update(self, dtrain, iteration, fobj)
804
805 if fobj is None:
--> 806 _check_call(_LIB.XGBoosterUpdateOneIter(self.handle, iteration, dtrain.handle))
807 else:
808 pred = self.predict(dtrain)
/home/carnd/anaconda3/envs/dl/lib/python3.5/site-packages/xgboost/core.py in _check_call(ret)
125 """
126 if ret != 0:
--> 127 raise XGBoostError(_LIB.XGBGetLastError())
128
129
XGBoostError: b'[03:43:03] src/objective/multiclass_obj.cc:42: Check failed: (info.labels.size()) != (0) label set cannot be empty'
The original error that you get is because this metric was not designed for multi-class classification (see here).
You could use scikit learn wrapper of xgboost to overcome this issue. I modified your code with this wrapper, to produce similar function. I am not sure why are you doing gridsearch though, as you are not enumerating over parameters. Instead, you are using the parameters you specified in xgb1. Here is the modified code:
import xgboost as xgb
import sklearn
import numpy as np
from sklearn.model_selection import GridSearchCV
def modelfit(alg, train_data_features, train_labels,useTrainCV=True, cv_folds=5):
if useTrainCV:
params=alg.get_xgb_params()
xgb_param=dict([(key,[params[key]]) for key in params])
boost = xgb.sklearn.XGBClassifier()
cvresult = GridSearchCV(boost,xgb_param,cv=cv_folds)
cvresult.fit(X,y)
alg=cvresult.best_estimator_
#Fit the algorithm on the data
alg.fit(train_data_features, train_labels)
#Predict training set:
dtrain_predictions = alg.predict(train_data_features)
dtrain_predprob = alg.predict_proba(train_data_features)[:,1]
#Print model report:
print("\nModel Report")
print("Accuracy : %.4g" % sklearn.metrics.accuracy_score(train_labels, dtrain_predictions))
xgb1 = xgb.sklearn.XGBClassifier(
learning_rate =0.1,
n_estimators=50,
max_depth=5,
min_child_weight=1,
gamma=0,
subsample=0.8,
colsample_bytree=0.8,
objective='multi:softmax',
nthread=4,
scale_pos_weight=1,
seed=27)
X=np.random.normal(size=(200,30))
y=np.random.randint(0,5,200)
modelfit(xgb1, X, y)
The output that I get is
Model Report
Accuracy : 1
Note that I used much smaller size for the data. With the size that you mentioned, the algorithm may be very slow.
The error is b/c you are trying to use AUC evaluation metric for multiclass classification, but AUC is only applicable for two-class problems. In xgboost implementation, "auc" expects prediction size to be the same as label size, while your multiclass prediction size would be 45001*1161. Use either "mlogloss" or "merror" multiclass metrics.
P.S.: currently, xgboost would be rather slow with so many classes, as there is some inefficiency with predictions caching during training.

Categories

Resources