I'm trying to move my scikit-learn python script into tensorflow code. Keep getting stuck with errors. Please help!
import pandas as pd
import numpy as np
import tensorflow as tf
# read csv
df = pd.read_csv("/Downloads/iris-2.csv", header=0)
# get header names as array
features = list(df.columns.values)
label = features.pop()
classes = len(df[label].unique())
# encode target
X = df[features]
y = df[label]
# convert feature headers into tf
for index,value in enumerate(features):
features[index] = tf.feature_column.numeric_column(value)
# initialize classifier
classifier = tf.estimator.DNNClassifier(
feature_columns=features,
hidden_units=[10, 10],
n_classes=classes)
# train the classifier
dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))
dataset = dataset.shuffle(1000).repeat().batch(0)
data = dataset.make_one_shot_iterator().get_next()
classifier.train(input_fn=lambda:data,steps=3)
predictions = classifier.predict([5.1,3.0,4.2,1.2])
print(predictions)
Latest error I'm stuck on is:
ValueError: Passed Tensor("dnn/head/weighted_loss/Sum:0", shape=(), dtype=float32) should have graph attribute that is equal to current graph <tensorflow.python.framework.ops.Graph object at 0x10dd9a190>.
Here's the dataset I'm using: https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/raw/d546eaee765268bf2f487608c537c05e22e4b221/iris.csv
The input tensor (variables data and dataset) cannot be precomputed. They need to be computed inside the function passed to input_fn in the call to train so that the tensors are in the graph that the Estimator (classifier) creates during the call to train(). So for your last block you could use:
# train the classifier
def my_input_fn():
dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))
dataset = dataset.shuffle(1000).repeat().batch(0)
return dataset.make_one_shot_iterator().get_next()
classifier.train(input_fn=my_input_fn, steps=3)
predictions = classifier.predict([5.1,3.0,4.2,1.2])
print(predictions)
Related
I am trying to convert an estimator LinearClassifier into tflite.
However the code is throwing some error.. I am not able to understand where I am doing wrong.
Here is my code
import pandas as pd
import tensorflow as tf
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')
#create feature columns. For testing I am using only numeric ones
NUMERIC_COLUMNS = ['age', 'fare']
feature_columns = []
for feature_name in NUMERIC_COLUMNS:
feature_columns.append(tf.feature_column.numeric_column(feature_name,
dtype=tf.float32))
# Use entire batch since this is such a small dataset.
NUM_EXAMPLES = len(y_train)
def make_input_fn(X, y, n_epochs=None, shuffle=True):
def input_fn():
dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))
if shuffle:
dataset = dataset.shuffle(NUM_EXAMPLES)
# For training, cycle thru dataset as many times as need (n_epochs=None).
dataset = dataset.repeat(n_epochs)
# In memory training doesn't use batching.
dataset = dataset.batch(NUM_EXAMPLES)
return dataset
return input_fn
# Training and evaluation input functions.
train_input_fn = make_input_fn(dftrain[NUMERIC_COLUMNS], y_train)
eval_input_fn = make_input_fn(dfeval[NUMERIC_COLUMNS], y_eval, shuffle=False, n_epochs=1)
linear_est = tf.estimator.LinearClassifier(feature_columns)
# Train model.
linear_est.train(train_input_fn, max_steps=100)
# Evaluation.
result = linear_est.evaluate(eval_input_fn)
So model is working fine.
print(pd.Series(result))
accuracy 0.659091
accuracy_baseline 0.625000
auc 0.667095
auc_precision_recall 0.589936
average_loss 0.619227
label/mean 0.375000
loss 0.619227
precision 0.764706
prediction/mean 0.336755
recall 0.131313
global_step 100.000000
dtype: float64
Now saving part:
serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
tf.feature_column.make_parse_example_spec(feature_columns))
model_dir = 'model_data'
path = linear_est.export_saved_model(model_dir, serving_input_fn)
when I am using :
converter = tf.lite.TFLiteConverter.from_saved_model(path)
tflite_model = converter.convert()
it throws error :
ValueError: This converter can only convert a single ConcreteFunction. Converting multiple functions is under development.
I have also tried :
saved_model_obj = tf.saved_model.load(export_dir=path)
concrete_func = saved_model_obj.signatures['serving_default']
converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
tflite_model = converter.convert()
And error is :
ConverterError: See console for info.
2020-02-18 16:23:15.446583: I tensorflow/lite/toco/import_tensorflow.cc:193] Unsupported data type in placeholder op: 20
2020-02-18 16:23:15.446687: F tensorflow/lite/toco/import_tensorflow.cc:2706] Check failed: status.ok() Input_content string_val doesn't have the right dimensions for this string tensor
(while processing node 'head/AsString')
Fatal Python error: Aborted
Please help.
Please refer to the following info:
"Some of the operators in the model are not supported by the standard TensorFlow Lite runtime. If those are native TensorFlow operators, you might be able to use the extended runtime by passing --enable_select_tf_ops, or by setting target_ops=TFLITE_BUILTINS,SELECT_TF_OPS when calling tf.lite.TFLiteConverter(). Otherwise, if you have a custom implementation for them you can disable this error with --allow_custom_ops, or by setting allow_custom_ops=True when calling tf.lite.TFLiteConverter(). Here is a list of builtin operators you are using: ADD, ADD_N, ARG_MAX, EXPAND_DIMS, FULLY_CONNECTED, RESHAPE, SOFTMAX, TILE. Here is a list of operators for which you will need custom implementations: As
String."
For me, this problem was solved after adding option "--allow_custom_ops":
tflite_convert --enable_v1_converter --allow_custom_ops --output_file=xxx --saved_model_dir=xxx --saved_model_signature_key='predict'
I'm using the tf.keras API in TensorFlow2. I have 100,000 images or so that are saved as TFRecords (128 images per record). Each record has an input image, target image, and frame index. I can't find a clean way to keep the frame index with the prediction.
Here is an example, except I build a dataset with NumPy arrays instead of reading from TFRecords:
import tensorflow as tf
from tensorflow import keras
import numpy as np
# build dummy tf.data.Dataset
x = np.random.random(10000).astype(np.float32)
y = x + np.random.random(10000).astype(np.float32) * 0.1
idx = np.arange(10000, dtype=np.uint16)
np.random.shuffle(idx) # frames are random in my TFRecord files
ds = tf.data.Dataset.from_tensor_slices((x, y, idx))
# pretend ds returned from TFRecord
ds = ds.map(lambda f0, f1, f2: (f0, f1)) # strip off idx
ds = ds.batch(32)
# build and train model
x = keras.Input(shape=(1,))
y_hat = keras.layers.Dense(1)(x) # i.e. linear regression
model = keras.Model(x, y_hat)
model.compile('sgd', 'mse')
history = model.fit(ds, epochs=5)
# predict 1 batch
model.predict(ds, steps=1)
Short of reading through the dataset again to extract the indices (which is prone to error), is there a clean way to keep prediction correspondence with image index? In TF1.x it was straightforward. But I'd like to take advantage of clean Keras compile(), fit(), predict() API in TF2.
Ok, was thinking too hard, pretty easy actually. Just add index to dataset when you are making predictions, and pull out indices as you are iterating through batches:
rt tensorflow as tf
from tensorflow import keras
import numpy as np
def build_dataset(mode):
np.random.seed(1)
x = np.random.random(10000).astype(np.float32)
y = x + np.random.random(10000).astype(np.float32) * 0.1
idx = np.arange(10000, dtype=np.uint16)
if mode == 'train':
ds = tf.data.Dataset.from_tensor_slices((x, y))
ds = ds.shuffle(128)
else:
ds = tf.data.Dataset.from_tensor_slices((x, idx))
ds = ds.batch(32)
return ds
# build and train simple linear regression model
x_tf = keras.Input(shape=(1,))
yhat_tf = keras.layers.Dense(1)(x_tf)
model = keras.Model(x_tf, yhat_tf)
model.compile(optimizer='sgd', loss='mse')
ds = build_dataset('train')
history = model.fit(ds, epochs=5)
# predict 1 batch
ds = build_dataset('predict')
for batch in ds:
x_tf, indices_tf = batch
yhat_np = model.predict(x_tf)
break
I have a train_x.csv and a train_y.csv, and I'd like to train a model using Dataset API and Keras interface. This what I'm trying to do:
import numpy as np
import pandas as pd
import tensorflow as tf
tf.enable_eager_execution()
N_FEATURES = 10
N_SAMPLES = 100
N_OUTPUTS = 2
BATCH_SIZE = 8
EPOCHS = 5
# prepare fake data
train_x = pd.DataFrame(np.random.rand(N_SAMPLES, N_FEATURES))
train_x.to_csv('train_x.csv', index=False)
train_y = pd.DataFrame(np.random.rand(N_SAMPLES, N_OUTPUTS))
train_y.to_csv('train_y.csv', index=False)
train_x = tf.data.experimental.CsvDataset('train_x.csv', [tf.float32] * N_FEATURES, header=True)
train_y = tf.data.experimental.CsvDataset('train_y.csv', [tf.float32] * N_OUTPUTS, header=True)
dataset = ... # What to do here?
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(N_OUTPUTS, input_shape=(N_FEATURES,)),
tf.keras.layers.Activation('linear'),
])
model.compile('sgd', 'mse')
model.fit(dataset, steps_per_epoch=N_SAMPLES/BATCH_SIZE, epochs=EPOCHS)
What's the right way to implement this dataset?
I tried Dataset.zip API like dataset = tf.data.Dataset.zip((train_x, train_y)) but it seems not working(code here and error here). I also read this answer, it's working but I'd like a non-functional model declaration way.
The problem is in the input shape of your dense layer. It should match shape of your input tensor, which is 1.
tf.keras.layers.Dense(N_OUTPUTS, input_shape=(features_shape,))
Also you might encounter problems defining model.fit() steps_per_epoch parameter, it should be of type int.
model.fit(dataset, steps_per_epoch=int(N_SAMPLES/BATCH_SIZE), epochs=EPOCHS)
Edit 1:
In case you need multiple labels, you can do
def parse_f(data, labels):
return data, tf.stack(labels, axis=0)
dataset = tf.data.Dataset.zip((train_x, train_y))
dataset = dataset.map(parse_func)
dataset = dataset.batch(BATCH_SIZE)
dataset = dataset.repeat()
Summary: according to the documentation, Keras model.fit() should accept tf.dataset as input (I am using TF version 1.12.0). I can train my model if I manually do the training steps but using model.fit() on the same model, I get an error I cannot resolve.
Here is a sketch of what I did: my dataset, which is too big to fit in the memory, consists of many files each with different number of rows of (100 features, label). I'd like to use tf.data to build my data pipeline:
def data_loader(filename):
'''load a single data file with many rows'''
features, labels = load_hdf5(filename)
...
return features, labels
def make_dataset(filenames, batch_size):
'''read files one by one, pick individual rows, batch them and repeat'''
dataset = tf.data.Dataset.from_tensor_slices(filenames)
dataset = dataset.map( # Problem here! See edit for solution
lambda filename: tuple(tf.py_func(data_loader, [filename], [float32, tf.float32])))
dataset = dataset.flat_map(
lambda features, labels: tf.data.Dataset.from_tensor_slices((features, labels)))
dataset = dataset.batch(batch_size)
dataset = dataset.repeat()
dataset = dataset.prefetch(1000)
return dataset
_BATCH_SIZE = 128
training_set = make_dataset(training_files, batch_size=_BATCH_SIZE)
I'd like to try a very basic logistic regression model:
inputs = tf.keras.layers.Input(shape=(100,))
outputs = tf.keras.layers.Dense(1, activation='softmax')(inputs)
model = tf.keras.Model(inputs, outputs)
If I train it manually everything works fine, e.g.:
labels = tf.placeholder(tf.float32)
loss = tf.reduce_mean(tf.keras.backend.categorical_crossentropy(labels, outputs))
train_step = tf.train.GradientDescentOptimizer(.05).minimize(loss)
iterator = training_set.make_one_shot_iterator()
next_element = iterator.get_next()
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op)
for i in range(training_size // _BATCH_SIZE):
x, y = sess.run(next_element)
train_step.run(feed_dict={inputs: x, labels: y})
However, if I instead try to use model.fit like this:
model.compile('adam', 'categorical_crossentropy', metrics=['acc'])
model.fit(training_set.make_one_shot_iterator(),
steps_per_epoch=training_size // _BATCH_SIZE,
epochs=1,
verbose=1)
I get an error message ValueError: Cannot take the length of Shape with unknown rank. inside the keras'es _standardize_user_data function.
I have tried quite a few things but could not resolve the issue. Any ideas?
Edit: based on #kvish's answer, the solution was to change the map from a lambda to a function that would specify the correct tensor dimensions, e.g.:
def data_loader(filename):
def loader_impl(filename):
features, labels, _ = load_hdf5(filename)
...
return features, labels
features, labels = tf.py_func(loader_impl, [filename], [tf.float32, tf.float32])
features.set_shape((None, 100))
labels.set_shape((None, 1))
return features, labels
and now, all needed to do is to call this function from map:
dataset = dataset.map(data_loader)
Probably tf.py_func produces an unknown shape which Keras cannot infer. We can set the shape of the tensor returned by it using set_shape(your_shape) method and that would help Keras infer the shape of the result.
I get a TypeError when attempting to train an Tensorflow Random Forest using TensorForestEstimator.
TypeError: Input 'input_data' of 'CountExtremelyRandomStats' Op has type float64 that does not match expected type of float32.
I've tried using Python 2.7 and Python 3, and I've tried using tf.cast() to put everything in float32 but it doesn't help. I have checked the data type on execution and it's float32. The problem doesn't seem to be the data I provide (csv of all floats), so I'm not sure where to go from here.
Any suggestions of things I can try would be much appreciated.
Code:
# Build an estimator.
def build_estimator(model_dir):
params = tensor_forest.ForestHParams(
num_classes=2, num_features=760,
num_trees=FLAGS.num_trees, max_nodes=FLAGS.max_nodes)
graph_builder_class = tensor_forest.RandomForestGraphs
if FLAGS.use_training_loss:
graph_builder_class = tensor_forest.TrainingLossForest
# Use the SKCompat wrapper, which gives us a convenient way to split in-memory data into batches.
return estimator.SKCompat(random_forest.TensorForestEstimator(params, graph_builder_class=graph_builder_class, model_dir=model_dir))
# Train and evaluate the model.
def train_and_eval():
# load datasets
training_set = pd.read_csv('/Users/carl/Dropbox/Docs/Python/randomforest_balanced_train.csv', dtype=np.float32, header=None)
test_set = pd.read_csv('/Users/carl/Dropbox/Docs/Python/randomforest_balanced_test.csv', dtype=np.float32, header=None)
print('###########')
print(training_set.loc[:,1].dtype) # this prints float32
# load labels
training_labels = pd.read_csv('/Users/carl/Dropbox/Docs/Python/randomforest_balanced_train_class.csv', dtype=np.int32, names=LABEL, header=None)
test_labels = pd.read_csv('/Users/carl/Dropbox/Docs/Python/randomforest_balanced_test_class.csv', dtype=np.int32, names=LABEL, header=None)
# define the path where the model will be stored - default is current directory
model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir
print('model directory = %s' % model_dir)
# build the random forest estimator
est = build_estimator(model_dir)
tf.cast(training_set, tf.float32) #error occurs with/without casts
tf.cast(test_set, tf.float32)
# train the forest to fit the training data
est.fit(x=training_set, y=training_labels) #this line throws the error
You are using tf.cast in incorrect manner
tf.cast(training_set, tf.float32) #error occurs with/without casts
should be
training_set = tf.cast(training_set, tf.float32)
tf.cast is not in-place method, it is a tensor flow op, as any other operation, and needs to be assigned and run.