Custom function transformer not performing as expected - sklearn pipeline

Custom function transformer not performing as expected - sklearn pipeline - python

I'm writing a custom transformer for a scikit-learn Pipeline. The transformer seems to work on it's own, and the fit() and transform() methods work individually, but when I include it in a pipeline, it raises an error stating:
AttributeError: 'NoneType' object has no attribute 'transform'
For reference, here is the code for my custom transformer:
class feature_union(TransformerMixin, BaseEstimator):
def __init__(self):
self.Xt = None
self.PI2_categories = ['D3', 'D4', 'A6', 'A5', 'D1', 'D2', 'A8', 'B2', 'E1',
'A1', 'A2', 'C1', 'C4', 'A7', 'C2', 'C3', 'A4', 'A3', 'B1']
def fit(self, X, y=None):
product_columns = ['Product_Info_1', 'Product_Info_3', 'Product_Info_5', 'Product_Info_6', 'Product_Info_7'] + self.PI2_categories
product_idx = [col for col in range(X.shape[1]) if X.columns[col] in product_columns]
personal_columns = ['Ins_Age', 'Ht', 'Wt', 'BMI']
personal_idx = [col for col in range(X.shape[1]) if X.columns[col] in personal_columns]
medical_hist_columns = ["Medical_History_{}".format(x) for x in range(1, 42, 1)]
medical_hist_idx = [col for col in range(X.shape[1]) if X.columns[col] in medical_hist_columns]
family_hist_columns = ["Family_Hist_{}".format(x) for x in range(1, 6, 1)]
family_hist_idx = [col for col in range(X.shape[1]) if X.columns[col] in family_hist_columns]
insured_info_columns = ["InsuredInfo_{}".format(x) for x in range(1, 8, 1)]
insured_info_idx = [col for col in range(X.shape[1]) if X.columns[col] in insured_info_columns]
insurance_hist_columns = ["Insurance_History_{}".format(x) for x in range(1, 10, 1)]
insurance_hist_idx = [col for col in range(X.shape[1]) if X.columns[col] in insurance_hist_columns]
employment_info_columns = ["Employment_Info_{}".format(x) for x in range(1, 7, 1)]
employment_info_idx = [col for col in range(X.shape[1]) if X.columns[col] in employment_info_columns]
medical_keyword_columns = ["Medical_Keyword_{}".format(x) for x in range(1, 49, 1)]
medical_keyword_idx = [col for col in range(X.shape[1]) if X.columns[col] in medical_keyword_columns]
medical_keyword_columns = ["Medical_Keyword_{}".format(x) for x in range(1, 49, 1)]
medical_keyword_idx = [col for col in range(X.shape[1]) if X.columns[col] in medical_keyword_columns]
get_original_features = lambda X: X
get_product_columns = lambda X: X[:, product_idx]
get_personal_columns = lambda X: X[:, personal_idx]
get_medical_hist_columns = lambda X: X[:, medical_hist_idx]
get_family_hist_columns = lambda X: X[:, family_hist_idx]
get_insured_info_columns = lambda X: X[:, insured_info_idx]
get_insurance_hist_columns = lambda X: X[:, insurance_hist_idx]
get_employment_info_columns = lambda X: X[:, employment_info_idx]
get_medical_keyword_columns = lambda X: X[:, medical_keyword_idx]
get_medical_and_family = lambda X: X[:, medical_keyword_idx + medical_hist_idx + family_hist_idx]
union = FeatureUnion([
("original_features", FunctionTransformer(get_original_features)),
("product_interaction", Pipeline([('select_product', FunctionTransformer(get_product_columns)),
('product_interaction', PolynomialFeatures(2, include_bias=False, interaction_only=True))
])),
("personal_interaction", Pipeline([('select_personal', FunctionTransformer(get_personal_columns)),
('personal_interaction', PolynomialFeatures(4, include_bias=False, interaction_only=True))
])),
("medical_hist_interaction", Pipeline([('select_medical', FunctionTransformer(get_medical_hist_columns)),
('medical_interaction', PolynomialFeatures(2, include_bias=False, interaction_only=True))
])),
("family_hist_interaction", Pipeline([('select_family_hist', FunctionTransformer(get_family_hist_columns)),
('family_hist_interaction', PolynomialFeatures(5, include_bias=False, interaction_only=True))
])),
("insured_info_interaction", Pipeline([('select_insured_info', FunctionTransformer(get_insured_info_columns)),
('insured_info_interaction', PolynomialFeatures(2, include_bias=False, interaction_only=True))
])),
("insurance_hist_interaction", Pipeline([('select_insurance_hist', FunctionTransformer(get_insurance_hist_columns)),
('insurance_hist_interaction', PolynomialFeatures(2, include_bias=False, interaction_only=True))
])),
("employment_info_interaction", Pipeline([('select_employment_info', FunctionTransformer(get_employment_info_columns)),
('employment_info_interaction', PolynomialFeatures(2, include_bias=False, interaction_only=True))
])),
("medical_keyword_interaction", Pipeline([('select_medical_keyword', FunctionTransformer(get_medical_keyword_columns)),
('medical_keyword_interaction', PolynomialFeatures(2, include_bias=False, interaction_only=True))
])),
])
Xt = union.fit_transform(X)
return self.Xt
def transform(self, X, y=None):
Xt = self.Xt
return Xt
And when I use it in a pipeline like this:
pipeline_feat_union = Pipeline([('preprocess', preprocess()),
('feat_union', feature_union()),
('classifier', GaussianNB())])
It raises the following error:
AttributeError: 'NoneType' object has no attribute 'transform'

When writing custom transformer for a sklearn pipeline, your fit() method needs to return self or something with a similar interface, like so:
class Intercept(BaseEstimator, TransformerMixin):
def __init__(self):
# maybe do some initialization here, if your transformer needs it
def fit(self, X,y=None):
# Do something here to "fit" your transformer
return self # Always return self or something with a similar interface.
def transform(self, X,y=None):
# apply your transformation here
return some_awesome_transformation(X)
and for reference, this is most likely the line that is throwing the exception (which is helpful because you can see why you need to return self in the fit() method)

I ran into the same problem. The GuassianNB() class doesn't have a transform method defined.
But you don't need to use the transform method at all if you are including your classifier in the pipeline. The only two methods that you need are the fit method and the predict method.
pipeline_feat_union.fit(X_train, y_train)
pipeline_feat_union.predict(X_train)

Related

Transform y_train in scikit learn pipeline

I have a test train split named X_train and y_train that I pass to a sci-kit learn pipeline. Is it possible to have a custom step to only transform y_train i.e remove nan and infs from y_train.
class columnDropperTransformer():
def __init__(self,columns):
self.columns=columns
def transform(self,X,y=None):
print('---- Dropping ID cols :', self.columns)
return X.drop(self.columns,axis=1)
def fit(self, X, y=None):
return self
print('---- Making pipeline')
drop_cols = Pipeline(steps=[
("columnDropper", columnDropperTransformer(id_cols))
])
feature_remover = Pipeline(steps=[
("columnDropper", missingRemover())
])
fill_na_zero_transformer = Pipeline(steps=[
('zero_imputer', SimpleImputer(strategy='constant', fill_value=0))
])
numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy = "constant", fill_value=-1, add_indicator=True)),
('scaler', StandardScaler())
])
class SkipSimpleImputer(SimpleImputer):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def transform(self, X, y=None):
if 'MARITAL_STATUS' in X.columns:
print('\t---- MARITAL STATUS found in skipsimpleimpute, all cols are: ', X.columns)
transformed_X = super().transform(X['MARITAL_STATUS'])
X['MARITAL_STATUS'] = transformed_X
return X
def fit(self, X, y=None):
return self
categorical_transformer = Pipeline(steps=[
('categorical_imputer', SkipSimpleImputer(strategy="constant", fill_value='Unknown')),
('encoder', OneHotEncoder(handle_unknown='ignore'))
])
preprocess_ppl = ColumnTransformer(
transformers=[
('encode', categorical_transformer, make_column_selector(dtype_include=object)),
('zero_impute', fill_na_zero_transformer, lambda X: [col for col in fill_zero_cols if col in X.columns] ),
('numeric', numeric_transformer, lambda X: [col for col in num_cols if col in X.columns])
]
)
pipeline2 = Pipeline(
steps=[
('dropper', drop_cols),
('remover',feature_remover),
("preprocessor", preprocess_ppl),
("estimator", customOLS(sm.OLS, LinearRegression()))
]
)
Could this be done via custom column transformer or via pipeline step transformer. In custom COlumntransformer we return X, how do we update y?

ColumnTransformer fit_transform not working with pipeline

I am writing a pipeline with custom transformer. When calling fit_transform of categorical pipeline I am getting the desired result but when calling fit_transform of ColumnTransformer, whatever I have initialised in init of custom transformer is getting lost.
Note: not including code of numericalTransformer for readability
class categoryTransformer(BaseEstimator, TransformerMixin):
def __init__(self, use_dates=['year', 'month', 'day']):
self._use_dates = use_dates
print('==========>',self._use_dates)
def fit(self, X, y=None):
return self
def get_year(self, obj):
return str(obj)[:4]
def get_month(self, obj):
return str(obj)[4:6]
def get_day(self, obj):
return str(obj)[6:8]
def create_boolean(self, obj):
if obj == '0':
return 'No'
else:
return 'Yes'
def transform(self, X, y=None):
print(self._use_dates)
for spec in self._use_dates:
print(spec)
exec("X.loc[:,'{}'] = X['date'].apply(self.get_{})".format(spec, spec))
X = X.drop('date', axis=1)
X.loc[:,'yr_renovated'] = X['yr_renovated'].apply(self.create_boolean)
X.loc[:, 'view'] = X['view'].apply(self.create_boolean)
return X.values
cat_pipe = Pipeline([
('cat_transform', categoryTransformer()),
('one_hot', OneHotEncoder(sparse=False))])
num_pipe = Pipeline([
('num_transform', numericalTransformer()),
('imputer', SimpleImputer(strategy = 'median')),
('std_scaler', StandardScaler())])
full_pipe = ColumnTransformer([
('num', num_pipe, numerical_features),
('cat', cat_pipe, categorical_features)])
cat_pipe.fit_transform(data[categorical_features])#working fine
df2 = full_pipe.fit_transform(X_train)# __init__ initialisation lost
"output"
==========> ['year', 'month', 'day']
['year', 'month', 'day']
year
month
day
==========> None
None
After that long traceback that I am not able to debug. Workaround is if I can create use_dates=['year', 'month', 'day'] in transform function itself but I want to understand why this is happening.

The parameters of __init__ need to have the same names as the attributes that get set (so use_dates and _use_dates is the problem).
This is required for cloning to work properly, and ColumnTransformer clones all its transformers before fitting.
https://scikit-learn.org/stable/developers/develop.html#instantiation

How use leave one out encoding in sklearn pipelines

I would like to test different encoding strategies as implemented in categorical encoding package using sklearn pipelines.
I mean something like this:
num_attribs = list(housing_num)
cat_attribs = ["ocean_proximity"]
num_pipeline = Pipeline([
('selector', DataFrameSelector(num_attribs)),
('imputer', Imputer(strategy="median")),
('std_scaler', StandardScaler()),
])
cat_pipeline = Pipeline([
('selector', DataFrameSelector(cat_attribs)),
('cat_encoder', LeaveOneOutEncoder()),
])
from sklearn.pipeline import FeatureUnion
full_pipeline = FeatureUnion(transformer_list=[
("num_pipeline", num_pipeline),
("cat_pipeline", cat_pipeline),
])
housing_prepared = full_pipeline.fit_transform(housing)
housing_prepared
But I get an error:
TypeError: fit() missing 1 required positional argument: 'y'
Can anyone suggest a solution?

Let show just part of the code as I do. I add XGBRegressor because I think you may predict housing price
class MultiColumn(BaseEstimator, TransformerMixin):
def __init__(self,columns = None):
self.columns = columns # array of column names to encode
def fit(self,X,y=None):
return self
def transform(self, X):
return X[self.columns]
NUMERIC = df[['var1', 'var2']]
CATEGORICAL = df[['var3', 'var4']]
class Imputation(BaseEstimator, TransformerMixin):
def transform(self, X, y=None, **fit_params):
return X.fillna(NUMERIC.median())
def fit_transform(self, X, y=None, **fit_params):
self.fit(X, y, **fit_params)
return self.transform(X)
def fit(self, X, y=None, **fit_params):
return self
class Cat(BaseEstimator, TransformerMixin):
def transform(self, X, y=None, **fit_params):
enc = DictVectorizer(sparse = False)
encc = enc.fit(CATEGORICAL.T.to_dict().values())
enc_data = encc.transform(X.T.to_dict().values())
enc_data[np.isnan(enc_data)] = 1
return enc_data
def fit_transform(self, X, y=None, **fit_params):
self.fit(X, y, **fit_params)
return self.transform(X)
def fit(self, X, y=None, **fit_params):
return self
And Pipeline
pipeline = Pipeline([
# Use FeatureUnion to combine the features
('union', FeatureUnion(
transformer_list=[
# numeric
('numeric', Pipeline([
('selector', MultiColumn(columns=['var1', 'var2'])),
('imp', Imputation()),
('scaling', preprocessing.StandardScaler(with_mean = 0.))
])),
# categorical
('categorical', Pipeline([
('selector', MultiColumn(columns=['var3', 'var4'])),
('one_hot', Cat()),
(CategoricalImputer())
])),
])),
('model_fitting', xgb.XGBRegressor()),
])

Your categorical encoder (LeaveOneOutEncoder) needs the target variable to adjust and replace the new labels (levels) for your variables defined in cat_attribs. So, you just need to invoke fit_transform method joined with y_train:
housing_prepared = full_pipeline.fit_transform(housing, y_train)

Keras: Metric and Loss with different input

When creating custom loss and metric function in a keras model it assumes for both cases that inputs are of (y_true, y_pred):
def custom_loss(y_true, y_pred):
.
return loss
def custom_metric(y_true, y_pred):
.
return metric
And the input of y_pred is the output of the Model. Example:
model = Model(inputs = [input1,..inputN], outputs=loss)
model.compile(loss=costum_loss, metrics=costum_metric)
In this case above for both loss and metric the y_pred will be the loss.
What if I want different input in costum_loss and different in the costum_metric. Is there a way to do it?
Edit:
More speciffically I want my loss to be:
def warp_loss(X):
z, positive_entity, negatives_entities = X
positiveSim = Lambda(lambda x: similarity(x[0], x[1]), output_shape=(1,), name="positive_sim")([z, positive_entity])
z_reshaped = Reshape((1, z.shape[1].value))(z)
negativeSim = Lambda(lambda x: similarity(x[0], x[1]), output_shape=(negatives_titles.shape[1].value, 1,), name="negative_sim")([z_reshaped, negatives_entities])
loss = Lambda(lambda x: max_margin_loss(x[0], x[1]), output_shape=(1,), name="max_margin")([positiveSim, negativeSim])
return loss
def mean_loss(y_true, y_pred):
return K.mean(y_pred - 0 * y_true)
and the metric:
def metric(X):
z, positive_entity, negatives_entities = X
positiveSim = Lambda(lambda x: similarity(x[0], x[1]), output_shape=(1,), name="positive_sim")([z, positive_entity])
z_reshaped = Reshape((1, z.shape[1].value))(z)
negativeSim = Lambda(lambda x: similarity(x[0], x[1]), output_shape=(negatives_titles.shape[1].value, 1,), name="negative_sim")([z_reshaped, negatives_entities])
position = K.sum(K.cast(K.greater(positiveSim, negativeSim), dtype="int32"), axis=1, keepdims=True)
accuracy = Lambda(lambda x: x / _NUMBER_OF_NEGATIVE_EXAMPLES)(position)
return accuracy
def mean_acc(y_true, y_pred):
return K.mean(y_pred - 0 * y_true)
So the first 4 lines are the same and after the two functions change. Could it be possible to use a Callback to print mean_acc?

You don't need the loss to be part of your model, you can make your model output its own outputs and later you apply the loss.
Here is a working code (it could be optimized to avoid repeating operations in both metrics and loss, by adding the commom part to the model)
I had some issues with your shapes, then I made it with arbitrary shapes. Your original lines are commented.
This code works for Keras 2.0.8, with Tensorflow 1.3.0. I suspect you're using Theano, right?
from keras.layers import *
from keras.models import *
import keras.backend as K
def get_divisor(x):
return K.sqrt(K.sum(K.square(x), axis=-1))
def similarity(a, b):
numerator = K.sum(a * b, axis=-1)
denominator = get_divisor(a) * get_divisor(b)
denominator = K.maximum(denominator, K.epsilon())
return numerator / denominator
def max_margin_loss(positive, negative):
#loss_matrix = K.maximum(0.0, 1.0 + negative - Reshape((1,))(positive))
loss_matrix = K.maximum(0.0, 1.0 + negative - positive)
loss = K.sum(loss_matrix, axis=-1, keepdims=True)
return loss
def warp_loss(X):
z = X[0]
positive_entity = X[1]
negative_entities = X[2]
positiveSim = similarity(z, positive_entity)
#z_reshaped = Reshape((1, z.shape[1].value))(z)
z_reshaped = K.expand_dims(z,axis=1)
negativeSim = similarity(z_reshaped, negative_entities)
#negativeSim = Reshape((negatives_titles.shape[1].value, 1,))
negativeSim = K.expand_dims(negativeSim,axis=-1)
loss = max_margin_loss(positiveSim, negativeSim)
return loss
def warp_metricsX(X):
z = X[0]
positive_entity = X[1]
negative_entities = X[2]
positiveSim = similarity(z, positive_entity)
#z_reshaped = Reshape((1, z.shape[1].value))(z)
z_reshaped = K.expand_dims(z,axis=1)
negativeSim = similarity(z_reshaped, negative_entities)
#Reshape((negatives_titles.shape[1].value, 1,))
negativeSim = K.expand_dims(negativeSim,axis=-1)
position = K.sum(K.cast(K.greater(positiveSim, negativeSim), dtype="int32"), axis=1, keepdims=True)
#accuracy = position / _NUMBER_OF_NEGATIVE_EXAMPLES
accuracy = position / 30
return accuracy
def mean_loss(yTrue,yPred):
return K.mean(warp_loss(yPred))
def warp_metrics(yTrue,yPred):
return warp_metricsX(yPred)
def build_nn_model():
#wl, tl = load_vector_lookups()
#embedded_layer_1 = initialize_embedding_matrix(wl)
#embedded_layer_2 = initialize_embedding_matrix(tl)
embedded_layer_1 = Embedding(200,25)
embedded_layer_2 = Embedding(200,25)
#sequence_input_1 = Input(shape=(_NUMBER_OF_LENGTH,), dtype='int32',name="text")
sequence_input_1 = Input(shape=(30,), dtype='int32',name="text")
sequence_input_positive = Input(shape=(1,), dtype='int32', name="positive")
sequence_input_negatives = Input(shape=(10,), dtype='int32', name="negatives")
embedded_sequences_1 = embedded_layer_1(sequence_input_1)
#embedded_sequences_positive = Reshape((tl.shape[1],))(embedded_layer_2(sequence_input_positive))
embedded_sequences_positive = Reshape((25,))(embedded_layer_2(sequence_input_positive))
embedded_sequences_negatives = embedded_layer_2(sequence_input_negatives)
conv_step1 = Convolution1D(
filters=1000,
kernel_size=5,
activation="tanh",
name="conv_layer_mp",
padding="valid")(embedded_sequences_1)
conv_step2 = GlobalMaxPooling1D(name="max_pool_mp")(conv_step1)
conv_step3 = Activation("tanh")(conv_step2)
conv_step4 = Dropout(0.2, name="dropout_mp")(conv_step3)
#z = Dense(wl.shape[1], name="predicted_vec")(conv_step4) # activation="linear"
z = Dense(25, name="predicted_vec")(conv_step4) # activation="linear"
model = Model(
inputs=[sequence_input_1, sequence_input_positive, sequence_input_negatives],
outputs = [z,embedded_sequences_positive,embedded_sequences_negatives]
)
model.compile(loss=mean_loss, optimizer='adam',metrics=[warp_metrics])
return model

Tensorflow input function with batch size and shuffle

I am trying to build tensorflow input function with tf.train.batch(). I have dataframe for train, eval and prediction. So input_fn should take argument for df, batch_size. In df there are continuous and categorical columns.
Revised code:
COLUMNS = ['atemp', 'holiday', 'humidity', 'season', 'temp', 'weather', 'windspeed', 'workingday', 'hour', 'weekday', 'month', 'label']
CONTINUOUS_COLUMNS = ['atemp', 'humidity', 'temp', 'windspeed',]
CATEGORICAL_COLUMNS =[ 'holiday', 'season', 'weather',
'workingday', 'weekday', 'month', 'hour' ]
LEARNING_RATE = 0.1
LABEL_COLUMN = 'label'
batch_size = 128
data_set = pd.read_excel('bike_str.xlsx')
# Split the data into a training set, an eval set and a pred set.
train_set = data_set[:9500]
eval_set = data_set[9500:10800]
pred_set = data_set[10800:]
## Eval and Prediction labels:
eval_label = eval_set['label']
pred_label = pred_set['label']
Input_fn:
def batch_input_fn(df, batch_size):
def input_fn():
"""Input builder function."""
# Creates a dictionary mapping from each continuous feature column name (k) to
# the values of that column stored in a constant Tensor.
continuous_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS}
# Creates a dictionary mapping from each categorical feature column name (k)
# to the values of that column stored in a tf.SparseTensor.
categorical_cols = {
k: tf.SparseTensor(
indices=[[i, 0] for i in range(df[k].size)],
values=df[k].values,
dense_shape=[df[k].size, 1])
for k in CATEGORICAL_COLUMNS}
# Merges the two dictionaries into one.
x = dict(continuous_cols)
x.update(categorical_cols)
# Converts the label column into a constant Tensor.
y = tf.constant(df[LABEL_COLUMN].values)
# Returns the feature columns and the label.
sliced_input = tf.train.slice_input_producer([x, y], shuffle = shuffle)
# So i'm trying to shuffle data for train and not shuffle for eval and pred
return tf.train.batch(sliced_input, batch_size=batch_size, num_threads= 3)
return input_fn
## Continuous base columns.
atemp = tf.contrib.layers.real_valued_column('atemp')
humidity = tf.contrib.layers.real_valued_column('humidity')
temp = tf.contrib.layers.real_valued_column('temp')
windspeed = tf.contrib.layers.real_valued_column('windspeed')
## Categoric base columns:
### To define a feature column for a categorical feature, we can create a SparseColumn
holiday = tf.contrib.layers.sparse_column_with_keys(column_name="holiday", keys=["no", "yes"])
season = tf.contrib.layers.sparse_column_with_keys(column_name="season", keys=["spring", "summer", "fall","winter"])
feat_dnn = [atemp_b, humidity_b, windspeed_b, temp_b,
tf.contrib.layers.embedding_column(holiday, dimension=1)
]
dnnregressor = tf.contrib.learn.DNNRegressor(feature_columns= feat_dnn,
hidden_units=[512,256, 512],
optimizer=tf.train.FtrlOptimizer(
learning_rate=0.250, l1_regularization_strength=0.8, l2_regularization_strength=0.8),
activation_fn =tf.nn.relu, dropout = 0.08)
dnnregressor.fit(input_fn= lambda: batch_input_fn(train_set, batch_size, shuffle = True), steps=1000 )
When calling directly batch_input_fn, error is:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-8-9c356159093d> in <module>()
----> 1 dnnregressor.fit(input_fn= lambda: batch_input_fn(train_set, batch_size), steps=15000 )
C:\Python\Anaconda\lib\site-packages\tensorflow\python\util\deprecation.py in new_func(*args, **kwargs)
287 'in a future version' if date is None else ('after %s' % date),
288 instructions)
--> 289 return func(*args, **kwargs)
290 return tf_decorator.make_decorator(func, new_func, 'deprecated',
291 _add_deprecated_arg_notice_to_docstring(
C:\Python\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps)
453 hooks.append(basic_session_run_hooks.StopAtStepHook(steps, max_steps))
454
--> 455 loss = self._train_model(input_fn=input_fn, hooks=hooks)
456 logging.info('Loss for final step: %s.', loss)
457 return self
C:\Python\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py in _train_model(self, input_fn, hooks)
951 random_seed.set_random_seed(self._config.tf_random_seed)
952 global_step = contrib_framework.create_global_step(g)
--> 953 features, labels = input_fn()
954 self._check_inputs(features, labels)
955 model_fn_ops = self._get_train_ops(features, labels)
TypeError: 'function' object is not iterable
From this code it's seems to work but here tensors are not list of dict:
def batched_input_fn(dataset_x, dataset_y, batch_size):
def _input_fn():
all_x = tf.constant(dataset_x, shape=dataset_x.shape, dtype=tf.float32)
all_y = tf.constant(dataset_y, shape=dataset_y.shape, dtype=tf.float32)
sliced_input = tf.train.slice_input_producer([all_x, all_y])
return tf.train.batch(sliced_input, batch_size=batch_size)
return _input_fn

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Custom function transformer not performing as expected - sklearn pipeline - python

Related

Transform y_train in scikit learn pipeline

ColumnTransformer fit_transform not working with pipeline

How use leave one out encoding in sklearn pipelines

Keras: Metric and Loss with different input

Tensorflow input function with batch size and shuffle

Categories

Resources