Memory error on Random Forest Classifier prediction - python

I have fitted a Random Forest Classifier on my dataset containing 7 features and about 1 million rows or records.
Following is my code.
randForestClassifier=RandomForestClassifier(n_estimators=10,max_depth=3)
randForestClassifier.fit(X_train,y)
pred=randForestClassifier.predict(featues_test)
I am getting Memory error when I use predict method of my classifier.How to fix it?
Following is my complete log
randForestClassifier.predict(featues_test)
Traceback (most recent call last):
File "<ipython-input-15-0b7612d6e958>", line 1, in <module>
randForestClassifier.predict(featues_test)
File "C:\Python27\lib\site-packages\sklearn\ensemble\forest.py", line 462, in predict
proba = self.predict_proba(X)
File "C:\Python27\lib\site-packages\sklearn\ensemble\forest.py", line 513, in predict_proba
for e in self.estimators_)
File "C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py", line 659, in __call__
self.dispatch(function, args, kwargs)
File "C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py", line 406, in dispatch
job = ImmediateApply(func, args, kwargs)
File "C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py", line 140, in __init__
self.results = func(*args, **kwargs)
File "C:\Python27\lib\site-packages\sklearn\ensemble\forest.py", line 106, in _parallel_helper
return getattr(obj, methodname)(*args, **kwargs)
File "C:\Python27\lib\site-packages\sklearn\tree\tree.py", line 592, in predict_proba
proba = self.tree_.predict(X)
File "sklearn/tree/_tree.pyx", line 3207, in sklearn.tree._tree.Tree.predict (sklearn\tree\_tree.c:24468)
File "sklearn/tree/_tree.pyx", line 3209, in sklearn.tree._tree.Tree.predict (sklearn\tree\_tree.c:24340)
MemoryError

Yes, you are getting the MemoryError at randForestClassifier.predict(featues_test), as shown by the stack trace:
File "<ipython-input-15-0b7612d6e958>", line 1, in <module>
randForestClassifier.predict(featues_test)
The remaining lines of the stack trace shows that the problems comes from sklearn, in the C code: sklearn\tree\_tree.c:24340

Related

FitFailedWarning: Estimator fit failed. The score on this train- test partition for these parameters will be set to nan

There are other stack overflow questions same as this, but still, I couldn't find the correct fix,
def _scorerForUnSupervised(estimator, X):
return np.mean(estimator.decision_function(X))
ifclassifier = IForest(behaviour='new',
max_samples="auto",
random_state=np.random.RandomState(42),
verbose=1,
n_jobs=-1)
IF_Hyperparams = {'n_estimators': [100,200], 'contamination': [0.01,0.05], 'bootstrap': [True,False]}
ifgrid = GridSearchCV(ifclassifier,
IF_Hyperparams,
scoring=_scorerForUnSupervised,
cv=3,
n_jobs=-1)
grid_result = ifgrid.fit(train_data)
fit throws the warning, but also getting an OSError
C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\sklearn\model_selection\_validation.py:615: FitFailedWarning: Estimator fit failed. The score on this train-
test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\sklearn\model_selection\_validation.py", line 596, in _fit_and_score
estimator.fit(X_train, **fit_params)
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\pyod\models\iforest.py", line 230, in fit
self.detector_.fit(X=X, y=None, sample_weight=None)
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\sklearn\ensemble\_iforest.py", line 278, in fit
super()._fit(X, y, max_samples,
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\sklearn\ensemble\_bagging.py", line 370, in _fit
all_results = Parallel(n_jobs=n_jobs, verbose=self.verbose,
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\parallel.py", line 1041, in __call__
if self.dispatch_one_batch(iterator):
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
self._dispatch(tasks)
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\parallel.py", line 777, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\_parallel_backends.py", line 531, in apply_async
future = self._workers.submit(SafeFunction(func))
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\reusable_executor.py", line 177, in submit
return super(_ReusablePoolExecutor, self).submit(
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\process_executor.py", line 1122, in submit
self._ensure_executor_running()
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\process_executor.py", line 1096, in _ensure_executor_running
self._adjust_process_count()
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\process_executor.py", line 1087, in _adjust_process_count
p.start()
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\backend\process.py", line 39, in _Popen
return Popen(process_obj)
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\backend\popen_loky_win32.py", line 54, in __init__
prep_data = spawn.get_preparation_data(
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\backend\spawn.py", line 86, in get_preparation_data
_resource_tracker.ensure_running()
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\backend\resource_tracker.py", line 102, in ensure_running
if self._check_alive():
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\backend\resource_tracker.py", line 182, in _check_alive
self._send('PROBE', '', '')
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\backend\resource_tracker.py", line 209, in _send
nbytes = os.write(self._fd, msg)
OSError: [Errno 22] Invalid argument
data and parameters are good, there is no missing values or typos in parameters
data is
train_data: [[0.39646672]
[0.32037798]
[0.09515201]
[0.08167625]
[0.06491372]
[0.07173377]
[0.16557108]
[0.62966311]
[1. ]
[0.06244864]
....
I would like to know why that warning is coming?
Can I suppress it?
also, not sure why I am getting "OSError: [Errno 22] Invalid argument", I have seen this before due to n_jobs parameter at -1, when I use n_jobs=1, that error went off, in this case even with n_jobs=1 it is still throwing the OSError

ML Models results in `AttributeError: 'OneHotEncoder' object has no attribute '_infrequent_enabled'` [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 9 months ago.
Improve this question
I am trying to run the ServingMLFastCelery, which is also available and explained on the Towards Data Science website.
The machine learning model is working perfectly, but when I test the complete project the error appears:
[2022-05-18 11:37:45,306: ERROR/MainProcess] Task celery_task_app.tasks.Churn raised unexpected: AttributeError("'OneHotEncoder' object has no attribute '_infrequent_enabled'")
Traceback (most recent call last):
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\celery\app\trace.py", line 405, in trace_task
R = retval = fun(*args, **kwargs)
File "C:\Users\diego\codes\ServingMLFastCelery\celery_task_app\tasks.py", line 30, in __call__
return self.run(*args, **kwargs)
File "C:\Users\diego\codes\ServingMLFastCelery\celery_task_app\tasks.py", line 42, in predict_churn_single
pred_array = self.model.predict([data])
File "C:\Users\diego\codes\ServingMLFastCelery\celery_task_app\ml\model.py", line 27, in predict
predictions = self.model.predict_proba(df)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\pipeline.py", line 523, in predict_proba
Xt = transform.transform(Xt)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\compose\_column_transformer.py", line 746, in transform
Xs = self._fit_transform(
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\compose\_column_transformer.py", line 604, in _fit_transform
return Parallel(n_jobs=self.n_jobs)(
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\parallel.py", line 1044, in __call__
while self.dispatch_one_batch(iterator):
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
self._dispatch(tasks)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\parallel.py", line 777, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
result = ImmediateResult(func)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\_parallel_backends.py", line 572, in __init__
self.results = batch()
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\parallel.py", line 262, in __call__
return [func(*args, **kwargs)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\parallel.py", line 262, in <listcomp>
return [func(*args, **kwargs)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\utils\fixes.py", line 117, in __call__
return self.function(*args, **kwargs)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\pipeline.py", line 853, in _transform_one
res = transformer.transform(X)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\preprocessing\_encoders.py", line 888, in transform
self._map_infrequent_categories(X_int, X_mask)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\preprocessing\_encoders.py", line 726, in _map_infrequent_categories
if not self._infrequent_enabled:
AttributeError: 'OneHotEncoder' object has no attribute '_infrequent_enabled'
The part of the prediction model that uses OneHotEnconder is:
preprocessing_pipeline = ColumnTransformer(transformers=[
('num', StandardScaler(), NUMERICAL_FEATURES),
('cat', OneHotEncoder(sparse=False), CATEGORICAL_FEATURES)
])
df_new = pd.DataFrame(preprocessing_pipeline.fit_transform(df))
I tried some solutions available on the internet, but none worked for this case.

I am getting this error of Float.Tensor and cuda.FloatTenson mismatch

I am getting this error while running the training code of a model.
Traceback (most recent call last):
File "train.py", line 273, in <module>
train_loss[epoch - 1] = process_epoch(
File "train.py", line 240, in process_epoch
loss = loss_fn(model, batch)
File "train.py", line 221, in <lambda>
loss_fn = lambda model, batch: weak_loss(model, batch, normalization="softmax")
File "train.py", line 171, in weak_loss
corr4d = model(batch).to("cuda")
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/home/srtf/ncnet/lib/model.py", line 263, in forward
feature_A = self.FeatureExtraction(tnf_batch['source_image'])
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/home/srtf/ncnet/lib/model.py", line 84, in forward
features = self.model(image_batch)
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/container.py", line 100, in forward
input = module(input)
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 353, in forward
return self._conv_forward(input, self.weight)
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 349, in _conv_forward
return F.conv2d(input, weight, self.bias, self.stride,
RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same
Cuda is there on the system. Where do I need to make changes in the code?
Your input needs to be sent to the correct device:
>>> corr4d = model(batch.cuda())
Which will copy the batch to the GPU device ('cuda:0' by default).

Problems with StackingRegressor

this error occurred when I used scikit-learn to perform model fusion on 7 sub-models. I checked the official document and there was no relevant introduction.
code:
# model fusion
estimators = [('DT', model_dt_x), ('KNN', model_knn_x), ('SVR', model_svr_x), ('ANN', model_ann_x), ('RF', model_rf_x), ('GBDT', model_gbdt_x), ('XGBT', model_xgbt_x)]
stacking_regressor = StackingRegressor(estimators=estimators, final_estimator=RidgeCV())
stacking_regressor.fit(X_train, y_train)
error:
Traceback (most recent call last):
File "i:/Lab/20210xxx/ex.py", line 86, in <module>
stacking_regressor.fit(X_train, y_train)
File "C:\Users\xxx\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py", line 680, in fit
return super().fit(X, y, sample_weight)
File "C:\Users\xxx\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py", line 148, in fit
for est in all_estimators if est != 'drop'
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\parallel.py", line 921, in __call__
if self.dispatch_one_batch(iterator):
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\parallel.py", line 759, in dispatch_one_batch
self._dispatch(tasks)
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\parallel.py", line 716, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 182, in
apply_async
result = ImmediateResult(func)
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 549, in __init__
self.results = batch()
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\parallel.py", line 225, in __call__
for func, args, kwargs in self.items]
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\parallel.py", line 225, in <listcomp>
for func, args, kwargs in self.items]
File "C:\Users\xxx\Anaconda3\lib\site-packages\sklearn\ensemble\_base.py", line 40, in
_fit_single_estimator
estimator.fit(X, y)
File "C:\Users\xxx\Anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py",
line 641, in fit
return self._fit(X, y, incremental=False)
File "C:\Users\xxx\Anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py",
line 321, in _fit
self._validate_hyperparameters()
File "C:\Users\xxx\Anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py",
line 385, in _validate_hyperparameters
if self.max_fun <= 0:
TypeError: '<=' not supported between instances of 'NoneType' and 'int'
Problem solved
model_ann_x = MLPRegressor(**model_ann.get_params())
model_ann_x.set_params(max_fun=15000) # get rid of bug
model_rf_x = RandomForestRegressor(**model_rf.get_params())
model_rf_x.set_params(ccp_alpha=0.0) # get rid of bug
model_gbdt_x = GradientBoostingRegressor(**model_gbdt.get_params())
model_gbdt_x.set_params(ccp_alpha=0.0) # get rid of bug

How to solve the problem of PyTorch stack?

I want to run Python program using PyTorch. How should I make each tensor in batch equal? Because the following problem appears:
Traceback (most recent call last):
File "C:\Users\hp\Downloads\efficient_densenet_pytorch-master\demoEmotion.py", line 311, in <module>
fire.Fire(demo)
File "C:\Users\hp\Anaconda3\envs\tf-gpu\lib\site-packages\fire\core.py", line 138, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "C:\Users\hp\Anaconda3\envs\tf-gpu\lib\site-packages\fire\core.py", line 468, in _Fire
target=component.__name__)
File "C:\Users\hp\Anaconda3\envs\tf-gpu\lib\site-packages\fire\core.py", line 672, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
File "C:\Users\hp\Downloads\efficient_densenet_pytorch-master\demoEmotion.py", line 289, in demo
n_epochs=n_epochs, batch_size=batch_size, seed=seed)
File "C:\Users\hp\Downloads\efficient_densenet_pytorch-master\demoEmotion.py", line 168, in train
n_epochs=n_epochs,
File "C:\Users\hp\Downloads\efficient_densenet_pytorch-master\demoEmotion.py", line 42, in train_epoch
for batch_idx, (input, target) in enumerate(loader):
File "C:\Users\hp\Anaconda3\envs\tf-gpu\lib\site-packages\torch\utils\data\dataloader.py", line 346, in __next__
data = self._next_data()
File "C:\Users\hp\Anaconda3\envs\tf-gpu\lib\site-packages\torch\utils\data\dataloader.py", line 386, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "C:\Users\hp\Anaconda3\envs\tf-gpu\lib\site-packages\torch\utils\data\_utils\fetch.py", line 47, in fetch
return self.collate_fn(data)
File "C:\Users\hp\Anaconda3\envs\tf-gpu\lib\site-packages\torch\utils\data\_utils\collate.py", line 87, in default_collate
return [default_collate(samples) for samples in transposed]
File "C:\Users\hp\Anaconda3\envs\tf-gpu\lib\site-packages\torch\utils\data\_utils\collate.py", line 87, in <listcomp>
return [default_collate(samples) for samples in transposed]
File "C:\Users\hp\Anaconda3\envs\tf-gpu\lib\site-packages\torch\utils\data\_utils\collate.py", line 72, in default_collate
return default_collate([torch.as_tensor(b) for b in batch])
File "C:\Users\hp\Anaconda3\envs\tf-gpu\lib\site-packages\torch\utils\data\_utils\collate.py", line 63, in default_collate
return torch.stack(batch, 0, out=out)
RuntimeError: stack expects each tensor to be equal size, but got [650] at entry 0 and [108] at entry 1

Categories

Resources