This is how I define a neural network
import tensorflow as tf
class MyFun:
def __init__(self, x, y, sizes, activations, scope):
with tf.variable_scope(scope):
last_out = tf.concat([x, y], axis=1)
for l, size in enumerate(sizes):
last_out = tf.layers.dense(last_out, size, activation=activations[l])
self.vars = tf.trainable_variables(scope=scope)
self.output = last_out
I need to use preprocess input x and y (both placeholders) into features before feeding them into the network. More specifically, I want to use quadratic features, i.e.,
new_input = [1, x, y, x**2, y**2, cross(x,y)]
where cross(x,y) includes the product between all elements of [x, y], i.e.,
cross(x,y) = [x_1*x_2, x_1*x_3, ..., x_1*y_1, ...]
How can I do it elegantly? Is there an equivalent of sklearn.preprocessing.PolynomialFeatures?
Here is one option:
# Suppose your placeholders are one dimensional vectors, with sizes 3 and 7:
x = tf.placeholder(tf.float32,shape=[3])
y = tf.placeholder(tf.float32, shape=[7])
# concat the constant 1.0 with x and y:
z = tf.concat((tf.constant(1.0,shape=(1,)),x,y),axis=0)
# construct all products of pairs:
new_input = [z[i]*z[j] for i in range(3+7-1) for j in range(i,3+7)]
# convert the list of tensors to a tensor (optional):
new_input = tf.stack(new_input)
EDIT 1
Extending this to the case where x and y have a batch dimension:
x = tf.placeholder(tf.float32,shape=[None,3])
y = tf.placeholder(tf.float32, shape=[None,7])
# I use 1.0+0*x[:,:1] instead of tf.constant(1.0)
z = tf.concat((1.0+0*x[:,:1],x,y),axis=1)
new_input = [z[:,i]*z[:,j] for i in range(3+7-1) for j in range(i,3+7)]
new_input = tf.stack(new_input,1)
Related
Is there any operation that can achieve the following:
import torch
batch_size = 2
seq_len = 2
dim = 3
# batch of squences of embedding vecs:
x = torch.rand([batch_size, seq_len, dim])
# batch of target embedding vecs:
y = torch.rand([batch_size, dim])
# the computation I want to achieve:
print(torch.outer(x[0][0], y[0]))
print(torch.outer(x[0][1], y[0]))
print(torch.outer(x[1][0], y[1]))
print(torch.outer(x[1][1], y[1]))
print()
What I've tried but failed: torch.einsum('bij, bj->bij', x, y)).
I am trying to implement mini-batch gradient descent for logistic regression. However when I try to test it on my data set with labels {-1, 1}, it seems my prediction is either almost always 1 or -1, leaving me with a test score around 50 % (since the true labels are approx. 50/50 between -1 and 1) when the target is above 95 %.
Can anyone help spot the mistake(s) in my code below?
def logistic(z):
"""
Helper function
Computes the logistic function 1/(1+e^{-x}) to each entry in input vector z.
Args:
z: numpy array shape (,d)
Returns:
logi: numpy array shape (,d) each entry transformed by the logistic function
"""
logi = np.zeros(z.shape)
logi = np.array([1 / (1+np.exp(-z[i])) for i in range(len(z))])
assert logi.shape == z.shape
return logi
class LogisticRegressionClassifier():
def __init__(self):
self.w = None
def fit(self, X, y, w=None, lr=0.1, batch_size=16, epochs=10):
"""
Run mini-batch Gradient Descent for logistic regression
use batch_size data points to compute gradient in each step.
Args:
X: np.array shape (n,d) dtype float32 - Features
y: np.array shape (,n) dtype int32 - Labels
w: np.array shape (,d) dtype float32 - Initial parameter vector
lr: scalar - learning rate for gradient descent
batch_size: number of elements to use in minibatch
epochs: Number of scans through the data
sets:
w: numpy array shape (,d) learned weight vector w
history: list/np.array len epochs
"""
if w is None: w = np.zeros(X.shape[1])
history = []
n = np.size(X, 0)
for i in range(epochs):
b = batch_size
X_ = np.copy(X)
X_shuf = np.take(X_,np.random.permutation(X_.shape[0]),axis=0,out=X_)
for i in range(n//b):
sample = X_shuf[b*i:(i+1)*b]
g = (1/b)*sum([-y[i]*sample[i,:]*sigmoid(-y[i]*np.dot(w,sample[i,:])) for i in range(b)])
w = np.array(w - lr*g)
history.append(w)
self.w = w
self.history = history
return w
def predict(self, X):
""" Classify each data element in X
Args:
X: np.array shape (n,d) dtype float - Features
Returns:
p: numpy array shape (n, ) dtype int32, class predictions on X (0, 1)
"""
z = np.dot(X,self.w.T)
print(z)
out = logistic(z)
return out
def score(self, X, y):
""" Compute model accuracy on Data X with labels y
Args:
X: np.array shape (n,d) dtype float - Features
y: np.array shape (n,) dtype int - Labels
Returns:
s: float, number of correct prediction divivded by n.
"""
s = 0
n = np.size(X,0)
pred = self.predict(X)
pred_labels = []
for i in range(n):
if pred[i] > 0.5:
pred_labels += [1]
if pred[i] <= 0.5:
pred_labels += [-1]
for i in range(n):
if pred_labels[i] == y[i]:
s += 1
return s / n
```
You forgot to shuffle the labels alongside the training data. If you have
[3, 1] [-1]
[2, 3] [ 1]
After you shuffled the training data, the labels get mismatched
[2, 3] [-1]
[3, 1] [ 1]
Consider a data frame with folder names and corresponding labels. Each folder contains an arbitrary number of images from video files. I'm looking for a way to sample a sequence of images from a folders with tf.data.Dataset to train an action recognition model. Something like that:
ds = tf.data.Dataset.from_tensor_slices(list_of_folders)
def read_and_preprocess_images_from_folder(folder):
list_of_image_names = some_function_to_list_files(folder)
list_length = len(list_of_image_names)
upper_boundary = list_length - sequence_length
random_start_index = tf.random_uniform(shape=[], minval=0, maxval=upper_boundary, dtype=tf.int64)
random_sequence = list_of_image_names[random_start_index:random_start_index+sequence_length]
return convert_sequence_to_image_tensor(random_sequence)
What I've done so far:
df = pd.DataFrame({'folder': ['folder_0', 'folder_1'], 'target': [0, 1]})
ds = tf.data.Dataset.from_tensor_slices((df.folder.values, df.target.values))
def load_and_preprocess_image_sequence(folder):
x = tf.io.matching_files('/path/to/folders/' + folder + '/*.jpg')
x = tf.map_fn(lambda x: preprocess_image(tf.read_file(x)), x, dtype=tf.float32)
return x
def preprocess_image(x):
x = tf.image.decode_jpeg(x, channels=3)
x = tf.image.resize_images(x, size=(IMAGE_SIZE,IMAGE_SIZE))
return x
def load_and_preprocess_from_folder_label(folder, label):
return load_and_preprocess_image_sequence(folder), label
train_ds = train_ds.map(load_and_preprocess_from_folder_label)
And I get:
<DatasetV1Adapter shapes: ((?, 224, 224, 3), ()), types: (tf.float32, tf.int64)>
The problem is that tf.io.matching_files returns a tensor with no shape when used with tf.data.Dataset. It returns a defined shape only during eager execution.
I tried to solve this problem differently. Knowing that every image in every folder has the same structure ['0001.jpg', '0002.jpg'] I tried to use np.random.randint but the problem is that np.random.randint produces the same result every time:
def load_and_preprocess_image_sequence(folder):
random_start_index = np.random.randint(0,upper_boundary)
x = []
for i in range(random_start_index, random_start_index+sequence_length):
x.append('/path/to/folders/' + folder + f'/{i:04d}.jpg')
x = [tf.read_file(i) for i in x]
x = [preprocess_image(i) for i in x]
x = tf.stack(x, axis=0)
return x
It works fine except the same random_start_index every time.
In order to solve the randomness issue I have to use tf.random_uniform:
def load_and_preprocess_image_sequence(folder):
random_start_index = tf.random_uniform(shape=[], minval=0, maxval=upper_boundary, dtype=tf.int64)
range = tf.map_fn(lambda x: x + random_start_index, tf.range(sequence_length, dtype=tf.int64))
And I get a tensor of consecutive numbers starting at random with the length equals to the sequence_length. The problem now is that tf.strings.format is somewhat limited and cannot produce results on a par with python formatting, like for example f'{i:04d}.jpg'.
I was able to solve this. Here is an example:
x = tf.io.matching_files(folder + '/*.jpg')
max_start_index = tf.cast(len(x) - SEQUENCE_LEN, tf.int64)
if max_start_index == 0:
random_start_index = max_start_index
else:
random_start_index = tf.random.uniform(shape=[], minval=0, maxval=max_start_index, dtype=tf.int64)
x = x[random_start_index:random_start_index + SEQUENCE_LEN]
x = tf.map_fn(lambda x: load_image(x), x, dtype=tf.uint8)
I want to create my own dataset class based on Dataset class of Skorch because I want to differentiate categorical columns and continuous columns. These categorical columns will be passed through the embedding layers in the model. The result is weird because it show NAN
like this:
epoch train_loss valid_loss dur
------- ------------ ------------ ------
1 nan nan 0.2187
2 nan nan 0.1719
3 nan nan 0.1719
4 nan nan 0.1562
5 nan nan 0.1406
Can you help me fix it ? I am using data from this kaggle:
Here
from skorch import NeuralNetRegressor
from skorch.dataset import Dataset
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
class TabularDataset(Dataset):
def __init__(self, data, cat_cols=None, output_col=None):
self.n = data.shape[0]
if output_col:
self.y = data[output_col].astype(np.float32).values.reshape(-1, 1)
else:
self.y = np.zeros((self.n, 1))
self.cat_cols = cat_cols if cat_cols else []
self.cont_cols = [col for col in data.columns
if col not in self.cat_cols + [output_col]]
if self.cont_cols:
self.cont_X = data[self.cont_cols].astype(np.float32).values
else:
self.cont_X = np.zeros((self.n, 1))
if self.cat_cols:
self.cat_X = data[self.cat_cols].astype(np.int64).values
else:
self.cat_X = np.zeros((self.n, 1))
def __len__(self):
# Denotes the total number of sampoes
return self.n
def __getitem__(self, idx):
# generates one sample of data
return [self.cont_X[idx], self.cat_X[idx]], self.y[idx]
class FeedForwardNN(nn.Module):
def __init__(self, emb_dims, no_of_cont, lin_layer_sizes,
output_size, emb_dropout, lin_layer_dropouts):
"""
Parameters
----------
emb_dims: List of two element tuples
This list will contain a two element tuple for each
categorical feature. The first element of a tuple will
denote the number of unique values of the categorical
feature. The second element will denote the embedding
dimension to be used for that feature.
no_of_cont: Integer
The number of continuous features in the data.
lin_layer_sizes: List of integers.
The size of each linear layer. The length will be equal
to the total number
of linear layers in the network.
output_size: Integer
The size of the final output.
emb_dropout: Float
The dropout to be used after the embedding layers.
lin_layer_dropouts: List of floats
The dropouts to be used after each linear layer.
"""
super().__init__()
# Embedding layers
self.emb_layers = nn.ModuleList([nn.Embedding(x, y)
for x, y in emb_dims])
no_of_embs = sum([y for x, y in emb_dims])
self.no_of_embs = no_of_embs
self.no_of_cont = no_of_cont
# Linear Layers
first_lin_layer = nn.Linear(self.no_of_embs + self.no_of_cont,
lin_layer_sizes[0])
self.lin_layers = \
nn.ModuleList([first_lin_layer] + \
[nn.Linear(lin_layer_sizes[i], lin_layer_sizes[i + 1])
for i in range(len(lin_layer_sizes) - 1)])
for lin_layer in self.lin_layers:
nn.init.kaiming_normal_(lin_layer.weight.data)
# Output Layer
self.output_layer = nn.Linear(lin_layer_sizes[-1],
output_size)
nn.init.kaiming_normal_(self.output_layer.weight.data)
# Batch Norm Layers
self.first_bn_layer = nn.BatchNorm1d(self.no_of_cont)
self.bn_layers = nn.ModuleList([nn.BatchNorm1d(size)
for size in lin_layer_sizes])
# Dropout Layers
self.emb_dropout_layer = nn.Dropout(emb_dropout)
self.droput_layers = nn.ModuleList([nn.Dropout(size)
for size in lin_layer_dropouts])
def forward(self, X):
cont_data = X[0]
cat_data = X[1]
if self.no_of_embs != 0:
x = [emb_layer(cat_data[:, i])
for i, emb_layer in enumerate(self.emb_layers)]
x = torch.cat(x, 1)
x = self.emb_dropout_layer(x)
if self.no_of_cont != 0:
normalized_cont_data = self.first_bn_layer(cont_data)
if self.no_of_embs != 0:
x = torch.cat([x, normalized_cont_data], 1)
else:
x = normalized_cont_data
for lin_layer, dropout_layer, bn_layer in \
zip(self.lin_layers, self.droput_layers, self.bn_layers):
x = F.relu(lin_layer(x))
x = bn_layer(x)
x = dropout_layer(x)
x = self.output_layer(x)
return x
# Read data
data = pd.read_csv("data/train.csv", usecols=["SalePrice", "MSSubClass", "MSZoning", "LotFrontage", "LotArea",
"Street", "YearBuilt", "LotShape", "1stFlrSF", "2ndFlrSF"]).dropna()
categorical_features = ["MSSubClass", "MSZoning", "Street", "LotShape", "YearBuilt"]
output_feature = "SalePrice"
# Label Encode Categorial Features
label_encoders = {}
for cat_col in categorical_features:
label_encoders[cat_col] = LabelEncoder()
data[cat_col] = label_encoders[cat_col].fit_transform(data[cat_col])
# feed Forward NN
cat_dims = [int(data[col].nunique()) for col in categorical_features]
emb_dims = [(x, min(50, (x + 1) // 2)) for x in cat_dims]
net = FeedForwardNN(emb_dims, no_of_cont=4, lin_layer_sizes=[50, 100],
output_size=1, emb_dropout=0.04,
lin_layer_dropouts=[0.001, 0.01])
# Fit
ds = TabularDataset(data=data, cat_cols=categorical_features,
output_col=output_feature)
X = data.drop(['SalePrice'], axis=1)
y = data['SalePrice'].values.reshape(-1, 1)
net = NeuralNetRegressor(
net,
max_epochs=5,
lr=0.1,
dataset=ds
)
net.fit(X, y)
For anyone who's experiencing a similar issue in case of classification problems, the loss function (criterion) used by Skorch by default is NLLLoss, which computes the log for you (doc and related issue). Therefore it is expected that a Softmax (in case of multiple classes) layer is present as last step in your architecture to be able to produce probabilities.
You can either:
Add a Softmax layer to produce probabilities and leave the default NLLLoss;
Change the default loss to CrossEntropyLoss:
net = NeuralNetClassifier(
...
criterion=torch.nn.CrossEntropyLoss
)
The problem is not with skorch but your data. You have to scale your inputs and, in this case, especially the targets to avoid huge losses and exploding gradients. As a start I suggest using, for example, sklearn.preprocessing.StandardScaler:
from sklearn.preprocessing import StandardScaler
class TabularDataset(Dataset):
def __init__(self, data, cat_cols=None, output_col=None):
self.n = data.shape[0]
# [...]
if output_col:
scaler_y = StandardScaler()
self.y = data[output_col].astype(np.float32).values.reshape(-1, 1)
scaler_y.fit(self.y)
self.y = scaler_y.transform(self.y)
# [...]
if self.cont_cols:
scaler_X_cont = StandardScaler()
self.cont_X = data[self.cont_cols].astype(np.float32).values
scaler_X_cont.fit(self.cont_X)
self.cont_X = scaler_X_cont.transform(self.cont_X)
# [...]
As a side note, you don't need X and y when you have a dataset that provides the actual data, you can simply pass it to net.fit (with the exception of using a stratified CV split):
net = NeuralNetRegressor(
net,
max_epochs=5,
lr=0.00001,
)
net.fit(ds, y=None)
I'm working on a Permutational Equivariant Layer for Keras based on this paper https://arxiv.org/pdf/1612.04530.pdf and previous work by Josef Ondrej found here.
The layer itself is a Keras Model consisting of multiple layers:
from keras import backend as K
from keras import losses
from keras.layers import Average, Add, Concatenate, Maximum, Input, Dense, Lambda
from keras.models import Model
from keras.engine.topology import Layer
def PermutationEquivariant(input_shape, layer_size, tuple_dim = 2, reduce_fun = "sum", dense_params = {}):
"""
Implements a permutation equivariant layer.
Each batch in our data consists of `input_shape[0]` observations
each with `input_shape[1]` features.
Args:
input_shape -- A pair of `int` - (number of observations in one batch x
number of features of each observation). The batch dimension is not included.
layer_size -- `int`. Size of dense layer applied to each tuple of observations.
tuple_dim -- A `int`, how many observations to put in one tuple.
reduce_fun -- A `string`, type of function to "average" over all tuples starting with the same index.
Returns:
g -- A keras Model - the permutation equivariant layer.
It consists of one tuple layer that creates all possible `tuple_dim`-tuples
of observations, sorted on an axis along which the first index is constant.
The same dense layer is applied on every tuple and then some symmetric pooling function is applied
across all tuples with the same first index (for example mean or maximum).
"""
inputs = Input(shape=input_shape)## input_shape: batch_size x row x col
## SeperatedTuple layer
x = SeperatedTuples(tuple_dim, input_shape = input_shape)(inputs)## out_shape: batch_size x row x row ** (tuple_dim-1) x tuple_dim*col
## Dense layer -- implemented with a conv layer
# Use the same dense layer for each tuple
dense_input_shape = (tuple_dim*input_shape[1], ) # batch_size x tuple_dim*col
dense_layer = Dense(input_shape = dense_input_shape, units=layer_size, **dense_params)
# iterate through rows
x_i_list = []
for i in range(input_shape[0]):
xi_j_list = []
# applying the dense layer to each tuple where first index equals i
# here we could also use a 1x1 convolution. Instead of reusing
# the dense layer for each tuple, we would be reusing the kernels
for j in range(input_shape[0] ** (tuple_dim-1)):
input_ij = Lambda(lambda x : x[:,i,j,:], output_shape=(tuple_dim*input_shape[-1],))(x) ##out_shape: batch_size x tuple_dim * col
xi_j_list += [dense_layer(input_ij)] ## xi_j_list-shape: row x batch_size x layer_size
## Pooling layer
# Pooling the list of the dense outputs of all the tuples where first index equals i to out_shape: batch_size x layer_size
# note that axis=0 because in previous step row-axis comes before batch_size-axis
# Use Lambda Wrapper to preserve the output being a Keras Tensor
if reduce_fun == "mean":
pooling_layer = Average(axis=1)
#pooling_layer = Lambda(lambda x : K.mean(x, axis = 0))
elif reduce_fun == "max":
pooling_layer = Maximum()
#pooling_layer = Lambda(lambda x : K.max(x, axis = 0))
elif reduce_fun == "sum":
pooling_layer = Add()
#pooling_layer = Lambda(lambda x : K.sum(x, axis = 0))
else:
raise ValueError("Invalid value for argument `reduce_fun` provided. ")
xi = pooling_layer(xi_j_list) ## xi-shape: batch_size x layer_size
x_i_list += [xi]
# x_i_list-shape:
# Concatenate the results of each row
x = Lambda(lambda x : K.stack(x, axis=1), output_shape = (input_shape[0], layer_size))(x_i_list) ## out_shape: batch_size x row x layer_size
model = Model(inputs=inputs, outputs=x)
return model
class SeperatedTuples(Layer):
"""
Creates all possible tuples of rows of 2D tensor, with an additional axis
along which the first elements are constant.
In the case of tuple_dim = 2, from one input batch:
x_1,
x_2,
...
x_n,
where x_i are rows of the tensor, it creates 3D output tensor:
[[x_1 | x_1, x_1 | x_2 ... x_1 | x_n],
[x_2 | x_1, x_2 | x_2 ... x_2 | x_n],
...
... x_n | x_n]]
Args:
tuple_dim -- A `int`. Dimension of one tuple (i.e. how many rows from the input
tensor to combine to create a row in output tensor)
input_shape -- A `tuple` of `int`. In the most frequent case where our data
has shape (batch_size x num_rows x num_cols) this should be (num_rows x num_cols).
"""
def __init__(self, tuple_dim = 2, **kwargs):
self.tuple_dim = tuple_dim
super(SeperatedTuples, self).__init__(**kwargs)
def create_indices(self, n, k = 2):
"""
Creates all integer valued coordinate k-tuples in k dimensional hypercube with edge size n.
for example n = 4, k = 2
returns [[0, 0], [0, 1], [0, 2], [0, 3],
[1, 0], [1, 1], [1, 2], [1, 3],
...
[3, 0], [3, 1], [3, 2], [3, 3]]
Args:
n -- A `int`, edge size of the hypercube.
k -- A `int`, dimension of the hypercube.
Returns:
indices_n_k -- A `list` of `list` of `int`. Each inner list represents coordinates of one integer point
in the hypercube.
"""
if k == 0:
indices_n_k = [[]]
else:
indices_n_k_minus_1 = self.create_indices(n, k-1)
indices_n_k = [[i] + indices_n_k_minus_1[c] for i in range(n) for c in range(n**(k-1))]
return indices_n_k
def create_seperated_indices(self, n, k = 2):
"""
Same as create_indices, just that there is an additional axis along which the first value of the tuples is constant
for example n = 4, k = 2
returns [[[0, 0], [0, 1], [0, 2], [0, 3]],
[[1, 0], [1, 1], [1, 2], [1, 3]],
...
[[3, 0], [3, 1], [3, 2], [3, 3]]]
shape: row x row x k
"""
indices = self.create_indices(n,k)
seperated_indices = [indices[i:i + n] for i in range(0, len(indices), n)]
return seperated_indices
def build(self, input_shape):
# Create indexing tuple
self.gathering_indices = self.create_seperated_indices(input_shape[-2], self.tuple_dim)
super(SeperatedTuples, self).build(input_shape) # Be sure to call this somewhere!
def call(self, x):
"""
input_dim : batch_size x rows x cols
output_dim : batch_size x rows x rows ** (tuple_dim-1) x cols * tuple_dim
"""
stacks_of_tuples = K.map_fn(
fn = lambda z : ## z shape: row x col
K.stack(
[K.concatenate(
[K.reshape(
K.gather(z, i), ## shape: tuple_dim x col
shape = (1,-1)
) ## shape: 1 x tuple_dim*col
for i in indices # i-dim: tuple_dim, indices-shape: row x tuple_dim
], ## shape: row x 1 x tuple_dim*col
axis = 0
) ## shape: row x tuple_dim*col
for indices in self.gathering_indices # gathering_indices-shape: row x row x tuple_dim
],
axis=0), ## shape: row x row x tuple_dim*col
elems = x ## shape: batch_size x row x col
) ## shape: batch_size x row x row x tuple_dim*col
return stacks_of_tuples
def compute_output_shape(self, input_shape):
"""
input_shape: batch_size x rows x cols
output_shape: batch_size x rows x rows ** (tuple_dim-1) x cols * tuple_dim
"""
output_shape = list(input_shape)
output_shape[-1] = output_shape[-1] * self.tuple_dim
output_shape[-2] = output_shape[-2] ** self.tuple_dim
return tuple(output_shape)
When testing the PermutationEquivariant layer all alone, everything seems to work fine (run 1). However, when I try to incorporate it in a larger model, the outputs just repeat themselves (run 2).
from keras.models import Model
from keras.layers import Input, Lambda
import numpy as np
# parameters for Permutational Equivariant layer
input_shape = (2,5)
dense_params = {'kernel_initializer': 'glorot_normal', 'bias_initializer': 'glorot_normal', 'activation': 'tanh'}
sample = np.random.random((1,) + input_shape)
# run 1: Using only the PermutationEquivariant layer as a model by itself seems to work
model_1 = PermutationEquivariant(input_shape=input_shape, layer_size=10, tuple_dim=2, reduce_fun="sum", dense_params = dense_params)
model_1.compile(optimizer='sgd', loss='categorical_crossentropy')
print("model_1: \n", model_1.predict(sample))
#model_1:
#[[[-1.0494264 -1.6808903 1.2861781 -0.90004706 1.6178854
# 1.6686234 -1.5724193 1.2454509 0.3730019 -1.4580158 ]
# [-1.3904197 -1.467866 1.0848606 -1.2094728 1.6304723
# 1.6369174 -1.4074551 0.58116794 0.292305 -1.7162979 ]]]
# run 2: Incorporating the PermutationEquivariant layer inside another model makes the output constant along the first axis
inputs = Input(shape=input_shape)
x = PermutationEquivariant(input_shape=input_shape, layer_size=10, tuple_dim=2, reduce_fun="sum", dense_params = dense_params)(inputs)
model_2 = Model(inputs=inputs,outputs = x)
model_2.compile(optimizer='sgd', loss='categorical_crossentropy')
print("model_2: \n", model_2.predict(sample))
enter code here
#model_2:
# [[[ 0.72823656 1.2213255 -0.28404936 1.4711846 -0.49544945
# 1.7930243 -0.7502286 1.892496 -1.675402 -0.2252224 ]
# [ 0.72823656 1.2213255 -0.28404936 1.4711846 -0.49544945
# 1.7930243 -0.7502286 1.892496 -1.675402 -0.2252224 ]]]
I have tried theano and tensorflow as backends, both with the same result. Does anybody have an idea why it behaves differently when inside another model / what am I missing? I appreciate any help!