Problems with StackingRegressor - python

this error occurred when I used scikit-learn to perform model fusion on 7 sub-models. I checked the official document and there was no relevant introduction.
code:
# model fusion
estimators = [('DT', model_dt_x), ('KNN', model_knn_x), ('SVR', model_svr_x), ('ANN', model_ann_x), ('RF', model_rf_x), ('GBDT', model_gbdt_x), ('XGBT', model_xgbt_x)]
stacking_regressor = StackingRegressor(estimators=estimators, final_estimator=RidgeCV())
stacking_regressor.fit(X_train, y_train)
error:
Traceback (most recent call last):
File "i:/Lab/20210xxx/ex.py", line 86, in <module>
stacking_regressor.fit(X_train, y_train)
File "C:\Users\xxx\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py", line 680, in fit
return super().fit(X, y, sample_weight)
File "C:\Users\xxx\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py", line 148, in fit
for est in all_estimators if est != 'drop'
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\parallel.py", line 921, in __call__
if self.dispatch_one_batch(iterator):
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\parallel.py", line 759, in dispatch_one_batch
self._dispatch(tasks)
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\parallel.py", line 716, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 182, in
apply_async
result = ImmediateResult(func)
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 549, in __init__
self.results = batch()
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\parallel.py", line 225, in __call__
for func, args, kwargs in self.items]
File "C:\Users\xxx\Anaconda3\lib\site-packages\joblib\parallel.py", line 225, in <listcomp>
for func, args, kwargs in self.items]
File "C:\Users\xxx\Anaconda3\lib\site-packages\sklearn\ensemble\_base.py", line 40, in
_fit_single_estimator
estimator.fit(X, y)
File "C:\Users\xxx\Anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py",
line 641, in fit
return self._fit(X, y, incremental=False)
File "C:\Users\xxx\Anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py",
line 321, in _fit
self._validate_hyperparameters()
File "C:\Users\xxx\Anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py",
line 385, in _validate_hyperparameters
if self.max_fun <= 0:
TypeError: '<=' not supported between instances of 'NoneType' and 'int'

Problem solved
model_ann_x = MLPRegressor(**model_ann.get_params())
model_ann_x.set_params(max_fun=15000) # get rid of bug
model_rf_x = RandomForestRegressor(**model_rf.get_params())
model_rf_x.set_params(ccp_alpha=0.0) # get rid of bug
model_gbdt_x = GradientBoostingRegressor(**model_gbdt.get_params())
model_gbdt_x.set_params(ccp_alpha=0.0) # get rid of bug

Related

FitFailedWarning: Estimator fit failed. The score on this train- test partition for these parameters will be set to nan

There are other stack overflow questions same as this, but still, I couldn't find the correct fix,
def _scorerForUnSupervised(estimator, X):
return np.mean(estimator.decision_function(X))
ifclassifier = IForest(behaviour='new',
max_samples="auto",
random_state=np.random.RandomState(42),
verbose=1,
n_jobs=-1)
IF_Hyperparams = {'n_estimators': [100,200], 'contamination': [0.01,0.05], 'bootstrap': [True,False]}
ifgrid = GridSearchCV(ifclassifier,
IF_Hyperparams,
scoring=_scorerForUnSupervised,
cv=3,
n_jobs=-1)
grid_result = ifgrid.fit(train_data)
fit throws the warning, but also getting an OSError
C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\sklearn\model_selection\_validation.py:615: FitFailedWarning: Estimator fit failed. The score on this train-
test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\sklearn\model_selection\_validation.py", line 596, in _fit_and_score
estimator.fit(X_train, **fit_params)
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\pyod\models\iforest.py", line 230, in fit
self.detector_.fit(X=X, y=None, sample_weight=None)
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\sklearn\ensemble\_iforest.py", line 278, in fit
super()._fit(X, y, max_samples,
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\sklearn\ensemble\_bagging.py", line 370, in _fit
all_results = Parallel(n_jobs=n_jobs, verbose=self.verbose,
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\parallel.py", line 1041, in __call__
if self.dispatch_one_batch(iterator):
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
self._dispatch(tasks)
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\parallel.py", line 777, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\_parallel_backends.py", line 531, in apply_async
future = self._workers.submit(SafeFunction(func))
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\reusable_executor.py", line 177, in submit
return super(_ReusablePoolExecutor, self).submit(
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\process_executor.py", line 1122, in submit
self._ensure_executor_running()
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\process_executor.py", line 1096, in _ensure_executor_running
self._adjust_process_count()
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\process_executor.py", line 1087, in _adjust_process_count
p.start()
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\backend\process.py", line 39, in _Popen
return Popen(process_obj)
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\backend\popen_loky_win32.py", line 54, in __init__
prep_data = spawn.get_preparation_data(
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\backend\spawn.py", line 86, in get_preparation_data
_resource_tracker.ensure_running()
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\backend\resource_tracker.py", line 102, in ensure_running
if self._check_alive():
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\backend\resource_tracker.py", line 182, in _check_alive
self._send('PROBE', '', '')
File "C:\Users\AD\AppData\Local\Programs\Python\Python39\Lib\site-packages\joblib\externals\loky\backend\resource_tracker.py", line 209, in _send
nbytes = os.write(self._fd, msg)
OSError: [Errno 22] Invalid argument
data and parameters are good, there is no missing values or typos in parameters
data is
train_data: [[0.39646672]
[0.32037798]
[0.09515201]
[0.08167625]
[0.06491372]
[0.07173377]
[0.16557108]
[0.62966311]
[1. ]
[0.06244864]
....
I would like to know why that warning is coming?
Can I suppress it?
also, not sure why I am getting "OSError: [Errno 22] Invalid argument", I have seen this before due to n_jobs parameter at -1, when I use n_jobs=1, that error went off, in this case even with n_jobs=1 it is still throwing the OSError

ML Models results in `AttributeError: 'OneHotEncoder' object has no attribute '_infrequent_enabled'` [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 9 months ago.
Improve this question
I am trying to run the ServingMLFastCelery, which is also available and explained on the Towards Data Science website.
The machine learning model is working perfectly, but when I test the complete project the error appears:
[2022-05-18 11:37:45,306: ERROR/MainProcess] Task celery_task_app.tasks.Churn raised unexpected: AttributeError("'OneHotEncoder' object has no attribute '_infrequent_enabled'")
Traceback (most recent call last):
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\celery\app\trace.py", line 405, in trace_task
R = retval = fun(*args, **kwargs)
File "C:\Users\diego\codes\ServingMLFastCelery\celery_task_app\tasks.py", line 30, in __call__
return self.run(*args, **kwargs)
File "C:\Users\diego\codes\ServingMLFastCelery\celery_task_app\tasks.py", line 42, in predict_churn_single
pred_array = self.model.predict([data])
File "C:\Users\diego\codes\ServingMLFastCelery\celery_task_app\ml\model.py", line 27, in predict
predictions = self.model.predict_proba(df)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\pipeline.py", line 523, in predict_proba
Xt = transform.transform(Xt)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\compose\_column_transformer.py", line 746, in transform
Xs = self._fit_transform(
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\compose\_column_transformer.py", line 604, in _fit_transform
return Parallel(n_jobs=self.n_jobs)(
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\parallel.py", line 1044, in __call__
while self.dispatch_one_batch(iterator):
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
self._dispatch(tasks)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\parallel.py", line 777, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
result = ImmediateResult(func)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\_parallel_backends.py", line 572, in __init__
self.results = batch()
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\parallel.py", line 262, in __call__
return [func(*args, **kwargs)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\joblib\parallel.py", line 262, in <listcomp>
return [func(*args, **kwargs)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\utils\fixes.py", line 117, in __call__
return self.function(*args, **kwargs)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\pipeline.py", line 853, in _transform_one
res = transformer.transform(X)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\preprocessing\_encoders.py", line 888, in transform
self._map_infrequent_categories(X_int, X_mask)
File "c:\users\diego\anaconda3\envs\k38\lib\site-packages\sklearn\preprocessing\_encoders.py", line 726, in _map_infrequent_categories
if not self._infrequent_enabled:
AttributeError: 'OneHotEncoder' object has no attribute '_infrequent_enabled'
The part of the prediction model that uses OneHotEnconder is:
preprocessing_pipeline = ColumnTransformer(transformers=[
('num', StandardScaler(), NUMERICAL_FEATURES),
('cat', OneHotEncoder(sparse=False), CATEGORICAL_FEATURES)
])
df_new = pd.DataFrame(preprocessing_pipeline.fit_transform(df))
I tried some solutions available on the internet, but none worked for this case.

I am getting this error of Float.Tensor and cuda.FloatTenson mismatch

I am getting this error while running the training code of a model.
Traceback (most recent call last):
File "train.py", line 273, in <module>
train_loss[epoch - 1] = process_epoch(
File "train.py", line 240, in process_epoch
loss = loss_fn(model, batch)
File "train.py", line 221, in <lambda>
loss_fn = lambda model, batch: weak_loss(model, batch, normalization="softmax")
File "train.py", line 171, in weak_loss
corr4d = model(batch).to("cuda")
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/home/srtf/ncnet/lib/model.py", line 263, in forward
feature_A = self.FeatureExtraction(tnf_batch['source_image'])
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/home/srtf/ncnet/lib/model.py", line 84, in forward
features = self.model(image_batch)
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/container.py", line 100, in forward
input = module(input)
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 353, in forward
return self._conv_forward(input, self.weight)
File "/home/srtf/anaconda3/envs/ncnet/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 349, in _conv_forward
return F.conv2d(input, weight, self.bias, self.stride,
RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same
Cuda is there on the system. Where do I need to make changes in the code?
Your input needs to be sent to the correct device:
>>> corr4d = model(batch.cuda())
Which will copy the batch to the GPU device ('cuda:0' by default).

get_flat_tensor_specs nest.flatten(element_spec), []) results in AttributeError: 'list' object has no attribute '_flat_tensor_specs'

AttributeError: 'list' object has no attribute '_flatten_tensor_specs'
problem:
Currently, when creating an iterative process with a build federated averaging process, I am able to pass in a functools. partial successful.
however, there must be an error in the above code that produces a structure needed for the model_fn to go through
correctly.
have tried:
looking at the input data. This custom data set is built for a autoencoder and the original TF solution never used labels. As this solution is built to take in the training data, train the model, then work on the test data, and validate on the validation set to produce a threshold.
There might be a issue with the underlying production.
sources:
df, y_train = get_train_data(sysarg)
x_train, x_opt, x_test = np.split(df.sample(frac=1,
random_state=17),
[int(1 / 3 * len(df)), int(2 / 3 * len(df))])
x_train, x_opt, x_test = create_scalar(x_opt, x_test, x_train)
input_spec = tf.nest.map_structure(tf.TensorSpec.from_tensor,
[tf.convert_to_tensor(x_train),
tf.convert_to_tensor(y_train)])
assign_weights_fn = compression_process_adapter.CompressionServerState.assign_weights_to_keras_model
iterative_process = tff.learning.build_federated_averaging_process(
model_fn=functools.partial(model_builder,
input_dim=sysarg, input_spec=input_spec),
client_optimizer_fn=client_optimizer_fn,
server_optimizer_fn=server_optimizer_fn,
)
iterative_process = compression_process_adapter.CompressionProcessAdapter(iterative_process)
trackback error
Traceback (most recent call last):
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\IPython\core\interactiveshell.py", line 3331, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-47998fd56829>", line 1, in <module>
runfile('B:/projects/openProjects/githubprojects/BotnetTrafficAnalysisFederaedLearning/anomaly-detection/train_v04.py', args=['--experiment_name=temp', '--client_batch_size=20', '--client_optimizer=sgd', '--client_learning_rate=0.2', '--server_optimizer=sgd', '--server_learning_rate=1.0', '--total_rounds=200', '--rounds_per_eval=1', '--rounds_per_checkpoint=50', '--rounds_per_profile=0', '--root_output_dir=B:/projects/openProjects/githubprojects/BotnetTrafficAnalysisFederaedLearning/anomaly-detection/logs/fed_out/'], wdir='B:/projects/openProjects/githubprojects/BotnetTrafficAnalysisFederaedLearning/anomaly-detection')
File "B:\tools and software\PyCharm 2020.1\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "B:\tools and software\PyCharm 2020.1\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "B:/projects/openProjects/githubprojects/BotnetTrafficAnalysisFederaedLearning/anomaly-detection/train_v04.py", line 306, in <module>
app.run(main)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\absl\app.py", line 299, in run
_run_main(main, args)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\absl\app.py", line 250, in _run_main
sys.exit(main(argv))
File "B:/projects/openProjects/githubprojects/BotnetTrafficAnalysisFederaedLearning/anomaly-detection/train_v04.py", line 299, in main
train_main()
File "B:/projects/openProjects/githubprojects/BotnetTrafficAnalysisFederaedLearning/anomaly-detection/train_v04.py", line 262, in train_main
server_optimizer_fn=server_optimizer_fn,
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow_federated\python\learning\federated_averaging.py", line 211, in build_federated_averaging_process
stateful_delta_aggregate_fn, stateful_model_broadcast_fn)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow_federated\python\learning\framework\optimizer_utils.py", line 521, in build_model_delta_optimizer_process
model_broadcast_state_type=model_broadcast_state_type)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow_federated\python\learning\framework\optimizer_utils.py", line 368, in _build_one_round_computation
#tff.tf_computation(dataset_type, model_weights_type)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow_federated\python\core\impl\wrappers\computation_wrapper.py", line 337, in <lambda>
return lambda fn: _wrap(fn, arg_type, self._wrapper_fn)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow_federated\python\core\impl\wrappers\computation_wrapper.py", line 89, in _wrap
concrete_fn = wrapper_fn(fn, parameter_type, unpack=None)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow_federated\python\core\impl\wrappers\computation_wrapper_instances.py", line 51, in _tf_wrapper_fn
target_fn, parameter_type, ctx_stack)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow_federated\python\core\impl\tensorflow_serialization.py", line 274, in serialize_py_fn_as_tf_computation
result = target(*args)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow_federated\python\core\impl\utils\function_utils.py", line 517, in <lambda>
return lambda arg: _unpack_and_call(fn, arg_types, kwarg_types, arg)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow_federated\python\core\impl\utils\function_utils.py", line 510, in _unpack_and_call
return fn(*args, **kwargs)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow_federated\python\learning\framework\optimizer_utils.py", line 381, in _compute_local_training_and_client_delta
client_output = client_delta_fn(dataset, initial_model_weights)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\eager\def_function.py", line 580, in __call__
result = self._call(*args, **kwds)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\eager\def_function.py", line 627, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\eager\def_function.py", line 506, in _initialize
*args, **kwds))
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\eager\function.py", line 2446, in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\eager\function.py", line 2777, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\eager\function.py", line 2667, in _create_graph_function
capture_by_value=self._capture_by_value),
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\framework\func_graph.py", line 981, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\eager\def_function.py", line 441, in wrapped_fn
return weak_wrapped_fn().__wrapped__(*args, **kwds)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\eager\function.py", line 3299, in bound_method_wrapper
return wrapped_fn(*args, **kwargs)
File "B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\framework\func_graph.py", line 968, in wrapper
raise e.ag_error_metadata.to_exception(e)
Additional Error list:
AttributeError: in user code:
B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow_federated\python\learning\federated_averaging.py:90 __call__ *
num_examples_sum = dataset.reduce(
B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py:1932 reduce **
add_to_graph=False)
B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py:3210 __init__
self._input_structure),
B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\data\util\structure.py:270 get_flat_tensor_specs
nest.flatten(element_spec), [])
B:\tools and software\Anaconda\envs\bookProjects\lib\site-packages\tensorflow\python\data\util\structure.py:269 <lambda>
return functools.reduce(lambda state, value: state + value._flat_tensor_specs,
AttributeError: 'list' object has no attribute '_flat_tensor_specs'
GitHub link

Getting error AttributeError: 'bool' object has no attribute 'transpose' when attempting to fit machine learning model

I am trying to create a machine learning model to predict who would survive on the Titanic. Everytime I try to fit my model, I get this error :
Traceback (most recent call last):
File "c:\Users\seand\.vscode\extensions\ms-python.python-2020.6.89148\pythonFiles\ptvsd_launcher.py", line 48, in <module>
main(ptvsdArgs)
File "c:\Users\seand\.vscode\extensions\ms-python.python-2020.6.89148\pythonFiles\lib\python\old_ptvsd\ptvsd\__main__.py", line 432, in main
run()
File "c:\Users\seand\.vscode\extensions\ms-python.python-2020.6.89148\pythonFiles\lib\python\old_ptvsd\ptvsd\__main__.py", line 316, in run_file
runpy.run_path(target, run_name='__main__')
return _run_module_code(code, init_globals, run_name,
File "D:\Python\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "D:\Python\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "d:\Kaggle\Titanic\titanic4.py", line 100, in <module>
cat_cols2 = pd.DataFrame(OneHot1.fit_transform(new_df[cat_columns]))
File "D:\Python\lib\site-packages\pandas\core\frame.py", line 2806, in __getitem__
indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
File "D:\Python\lib\site-packages\pandas\core\indexing.py", line 1552, in _get_listlike_indexer
self._validate_read_indexer(
File "D:\Python\lib\site-packages\pandas\core\indexing.py", line 1640, in _validate_read_indexer
raise KeyError(f"None of [{key}] are in the [{axis_name}]")
KeyError: "None of [Index(['Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], dtype='object')] are in the [columns]"
PS D:\Kaggle\Titanic> cd 'd:\Kaggle\Titanic'; ${env:PYTHONIOENCODING}='UTF-8'; ${env:PYTHONUNBUFFERED}='1'; & 'D:\Python\python.exe' 'c:\Users\seand\.vscode\extensions\ms-python.python-2020.6.89148\pythonFiles\ptvsd_launcher.py' '--default' '--client' '--host' 'localhost' '--port' '60778' 'd:\Kaggle\Titanic\titanic4.py'
Traceback (most recent call last):
File "c:\Users\seand\.vscode\extensions\ms-python.python-2020.6.89148\pythonFiles\ptvsd_launcher.py", line 48, in <module>
main(ptvsdArgs)
File "c:\Users\seand\.vscode\extensions\ms-python.python-2020.6.89148\pythonFiles\lib\python\old_ptvsd\ptvsd\__main__.py", line 432, in main
run()
File "c:\Users\seand\.vscode\extensions\ms-python.python-2020.6.89148\pythonFiles\lib\python\old_ptvsd\ptvsd\__main__.py", line 316, in run_file
runpy.run_path(target, run_name='__main__')
File "D:\Python\lib\runpy.py", line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File "D:\Python\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "D:\Python\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "d:\Kaggle\Titanic\titanic4.py", line 143, in <module>
my_pipeline.fit(new_df,y)
File "D:\Python\lib\site-packages\sklearn\pipeline.py", line 330, in fit
Xt = self._fit(X, y, **fit_params_steps)
File "D:\Python\lib\site-packages\sklearn\pipeline.py", line 292, in _fit
X, fitted_transformer = fit_transform_one_cached(
File "D:\Python\lib\site-packages\joblib\memory.py", line 352, in __call__
return self.func(*args, **kwargs)
File "D:\Python\lib\site-packages\sklearn\pipeline.py", line 740, in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "D:\Python\lib\site-packages\sklearn\compose\_column_transformer.py", line 531, in fit_transform
result = self._fit_transform(X, y, _fit_transform_one)
File "D:\Python\lib\site-packages\sklearn\compose\_column_transformer.py", line 458, in _fit_transform
return Parallel(n_jobs=self.n_jobs)(
File "D:\Python\lib\site-packages\joblib\parallel.py", line 1032, in __call__
while self.dispatch_one_batch(iterator):
File "D:\Python\lib\site-packages\joblib\parallel.py", line 847, in dispatch_one_batch
self._dispatch(tasks)
File "D:\Python\lib\site-packages\joblib\parallel.py", line 765, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "D:\Python\lib\site-packages\joblib\_parallel_backends.py", line 206, in apply_async
result = ImmediateResult(func)
File "D:\Python\lib\site-packages\joblib\_parallel_backends.py", line 570, in __init__
self.results = batch()
File "D:\Python\lib\site-packages\joblib\parallel.py", line 252, in __call__
return [func(*args, **kwargs)
File "D:\Python\lib\site-packages\joblib\parallel.py", line 252, in <listcomp>
return [func(*args, **kwargs)
res = transformer.fit_transform(X, y, **fit_params)
File "D:\Python\lib\site-packages\sklearn\pipeline.py", line 367, in fit_transform
Xt = self._fit(X, y, **fit_params_steps)
File "D:\Python\lib\site-packages\sklearn\pipeline.py", line 292, in _fit
X, fitted_transformer = fit_transform_one_cached(
File "D:\Python\lib\site-packages\joblib\memory.py", line 352, in __call__
return self.func(*args, **kwargs)
File "D:\Python\lib\site-packages\sklearn\pipeline.py", line 740, in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "D:\Python\lib\site-packages\sklearn\base.py", line 693, in fit_transform
return self.fit(X, y, **fit_params).transform(X)
File "D:\Python\lib\site-packages\sklearn\impute\_base.py", line 459, in transform
coordinates = np.where(mask.transpose())[::-1]
AttributeError: 'bool' object has no attribute 'transpose'
PS D:\Kaggle\Titanic> cd 'd:\Kaggle\Titanic'; ${env:PYTHONIOENCODING}='UTF-8'; ${env:PYTHONUNBUFFERED}='1'; & 'D:\Python\python.exe' 'c:\Users\seand\.vscode\extensions\ms-python.python-2020.6.89148\pythonFiles\ptvsd_launcher.py' '--default' '--client' '--host' 'localhost' '--port' '60800' 'd:\Kaggle\Titanic\titanic4.py'
Traceback (most recent call last):
File "c:\Users\seand\.vscode\extensions\ms-python.python-2020.6.89148\pythonFiles\ptvsd_launcher.py", line 48, in <module>
main(ptvsdArgs)
File "c:\Users\seand\.vscode\extensions\ms-python.python-2020.6.89148\pythonFiles\lib\python\old_ptvsd\ptvsd\__main__.py", line 432, in main
run()
File "c:\Users\seand\.vscode\extensions\ms-python.python-2020.6.89148\pythonFiles\lib\python\old_ptvsd\ptvsd\__main__.py", line 316, in run_file
runpy.run_path(target, run_name='__main__')
File "D:\Python\lib\runpy.py", line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File "D:\Python\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "D:\Python\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "d:\Kaggle\Titanic\titanic4.py", line 122, in <module>
my_pipeline.fit(new_df,y)
File "D:\Python\lib\site-packages\sklearn\pipeline.py", line 330, in fit
Xt = self._fit(X, y, **fit_params_steps)
File "D:\Python\lib\site-packages\sklearn\pipeline.py", line 292, in _fit
X, fitted_transformer = fit_transform_one_cached(
File "D:\Python\lib\site-packages\joblib\memory.py", line 352, in __call__
return self.func(*args, **kwargs)
File "D:\Python\lib\site-packages\sklearn\pipeline.py", line 740, in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "D:\Python\lib\site-packages\sklearn\compose\_column_transformer.py", line 531, in fit_transform
result = self._fit_transform(X, y, _fit_transform_one)
File "D:\Python\lib\site-packages\sklearn\compose\_column_transformer.py", line 458, in _fit_transform
return Parallel(n_jobs=self.n_jobs)(
File "D:\Python\lib\site-packages\joblib\parallel.py", line 1032, in __call__
while self.dispatch_one_batch(iterator):
File "D:\Python\lib\site-packages\joblib\parallel.py", line 847, in dispatch_one_batch
self._dispatch(tasks)
File "D:\Python\lib\site-packages\joblib\parallel.py", line 765, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "D:\Python\lib\site-packages\joblib\_parallel_backends.py", line 206, in apply_async
result = ImmediateResult(func)
File "D:\Python\lib\site-packages\joblib\_parallel_backends.py", line 570, in __init__
self.results = batch()
File "D:\Python\lib\site-packages\joblib\parallel.py", line 252, in __call__
return [func(*args, **kwargs)
File "D:\Python\lib\site-packages\joblib\parallel.py", line 252, in <listcomp>
return [func(*args, **kwargs)
File "D:\Python\lib\site-packages\sklearn\pipeline.py", line 740, in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "D:\Python\lib\site-packages\sklearn\pipeline.py", line 367, in fit_transform
Xt = self._fit(X, y, **fit_params_steps)
File "D:\Python\lib\site-packages\sklearn\pipeline.py", line 292, in _fit
X, fitted_transformer = fit_transform_one_cached(
File "D:\Python\lib\site-packages\joblib\memory.py", line 352, in __call__
return self.func(*args, **kwargs)
File "D:\Python\lib\site-packages\sklearn\pipeline.py", line 740, in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "D:\Python\lib\site-packages\sklearn\base.py", line 693, in fit_transform
return self.fit(X, y, **fit_params).transform(X)
File "D:\Python\lib\site-packages\sklearn\impute\_base.py", line 459, in transform
coordinates = np.where(mask.transpose())[::-1]
AttributeError: 'bool' object has no attribute 'transpose'
The code I am running is the following :
from xgboost import XGBClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectFromModel
from itertools import combinations
import pandas as pd
import numpy as np
#read in data
training_data = pd.read_csv('train.csv')
testing_data = pd.read_csv('test.csv')
#seperate X and Y
X_train_full = training_data.copy()
y = X_train_full.Survived
X_train_full.drop(['Survived'], axis=1, inplace=True)
y_test = testing_data
#get all str columns
cat_columns1 = [cname for cname in X_train_full.columns if
X_train_full[cname].dtype == "object"]
interactions = pd.DataFrame(index= X_train_full)
#create new features
for combination in combinations(cat_columns1,2):
imputer = SimpleImputer(strategy='constant')
new_col_name = '_'.join(combination)
col1 = X_train_full[combination[0]]
col2 = X_train_full[combination[1]]
col1 = np.array(col1).reshape(-1,1)
col2 = np.array(col2).reshape(-1,1)
col1 = imputer.fit_transform(col1)
col2 = imputer.fit_transform(col2)
new_vals = col1 + '_' + col2
OneHot = OneHotEncoder()
interactions[new_col_name] = OneHot.fit_transform(new_vals)
interactions = interactions.reset_index(drop = True)
#create new dataframe with new features included
new_df = X_train_full.join(interactions)
#do the same for the test file
interactions2 = pd.DataFrame(index= y_test)
for combination in combinations(cat_columns1,2):
imputer = SimpleImputer(strategy='constant')
new_col_name = '_'.join(combination)
col1 = y_test[combination[0]]
col2 = y_test[combination[1]]
col1 = np.array(col1).reshape(-1,1)
col2 = np.array(col2).reshape(-1,1)
col1 = imputer.fit_transform(col1)
col2 = imputer.fit_transform(col2)
new_vals = col1 + '_' + col2
OneHot = OneHotEncoder()
interactions2[new_col_name] = OneHot.fit_transform(new_vals)
interactions2[new_col_name] = new_vals
interactions2 = interactions2.reset_index(drop = True)
y_test = y_test.join(interactions2)
#get names of cat columns (with new features added)
cat_columns = [cname for cname in new_df.columns if
new_df[cname].dtype == "object"]
# Select numerical columns
num_columns = [cname for cname in new_df.columns if
new_df[cname].dtype in ['int64', 'float64']]
#set up pipeline
numerical_transformer = SimpleImputer(strategy = 'constant')
categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='constant')),
('onehot', OneHotEncoder(handle_unknown='ignore'))
])
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, num_columns),
('cat', categorical_transformer, cat_columns)
])
model = XGBClassifier()
my_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
('model', model)
])
#fit model
my_pipeline.fit(new_df,y)
The csv files I am reading are available from Kaggle at this link :
https://www.kaggle.com/c/titanic/data
I cannot figure out what is causing this problem. Any help would be much appreciated.
This probably happens because your data contains pd.NA values. pd.NA was introduced in pandas 1.0.0, but is still marked as experimental.
SimpleImputer will ultimately run data == np.nan, which would usually return a numpy array. In stead, it is returning a single boolean scalar when data contains pd.NA values.
An example:
import pandas as pd
import numpy as np
test_pd_na = pd.DataFrame({"A": [1, 2, 3, pd.NA]})
test_np_nan = pd.DataFrame({"A": [1, 2, 3, np.nan]})
test_np_nan.to_numpy() == np.nan:
> array([[False],
[False],
[False],
[False]])
test_pd_na.to_numpy() == np.nan
> False
The solution would be to convert all pd.NA values to np.nan before running SimpleImputer. You can use .replace({pd.NA: np.nan})on your data frames for this purpose. The downside is obviously that you loose the benefits pd.NA brings, such as integer columns with missing data, in stead of those columns being converted to float columns.

Categories

Resources