Image processing in Tensor flow TFX pipelines

Image processing in Tensor flow TFX pipelines - python

I am trying to get a Tensorflow TFX pipeline up and running using the MNIST dataset.
# Imports
import pandas as pd
import numpy as np
from keras.datasets import mnist
import tensorflow as tf
from tfx import v1 as tfx
import os
from tfx.components import ImportExampleGen
from platform import python_version
python_version() #'3.8.8'
# Load the data - 60,000 training examples and 10,000 testing examples
(train_x, train_y), (test_x, test_y) = mnist.load_data()
Setup pipeline paths
_pipeline_root = './pipeline'
_data_root = './data'
if not os.path.isdir(_pipeline_root) and not os.path.isdir(_data_root):
!mkdir {_pipeline_root}
!mkdir {_data_root}
Write the data to TF.record format and save in eval and train dirs. NOTE that the MNIST data starts as a numpy array 28x28 and is converted to a bytestring to enable it to be encoded as part of the Tf.record.
def _bytes_feature(value):
"""Returns a bytes_list from a string / byte."""
if isinstance(value, type(tf.constant(0))): # if value ist tensor
value = value.numpy() # get value of tensor
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
"""Returns an int64_list from a bool / enum / int / uint."""
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def serialize_array(array):
array = tf.io.serialize_tensor(array)
return array
def image_label_to_tf_train(image, label):
image_shape = np.shape(image)
#define the dictionary -- the structure -- of our single example
data = {
'height': _int64_feature(image_shape[0]),
'width': _int64_feature(image_shape[1]),
'raw_image' : _bytes_feature(serialize_array(image)),
'label' : _int64_feature(label)
}
#create an Example, wrapping the single features
return tf.train.Example(features=tf.train.Features(feature=data))
def write_images_to_tfr_short(images, labels, filename:str="images", folder = ""):
if not os.path.isdir(folder):
!mkdir {folder}
filename= folder + "/" + filename+".tfrecords"
writer = tf.io.TFRecordWriter(filename) #create a writer that'll store our data to disk
count = 0
for index in range(len(images)):
#get the data we want to write
current_image = images[index]
current_label = labels[index]
out = image_label_to_tf_train(image=current_image, label=current_label)
writer.write(out.SerializeToString())
count += 1
writer.close()
print(f"Wrote {count} elements to TFRecord")
return count
The next stage is to call the transform component which uses the preprocessing_fn. This function should process all the data so for example divide the image array by 255 is a standard feature process. But the image is still as a bytestring and I can't for the life of me figure out how to turn it back into an array. The below is what I have tried.
def preprocessing_fn(inputs):
"""tf.transform's callback function for preprocessing inputs.
Args:
inputs: map from feature keys to raw not-yet-transformed features.
Returns:
Map from string feature key to transformed feature operations.
"""
# Initialize outputs dictionary
outputs = {}
raw_image_dataset = inputs[_IMAGE_KEY]
img = tf.io.decode_raw(raw_image_dataset, tf.int64)
outputs[_IMAGE_KEY] = img
outputs[_LABEL_KEY] = tf.cast(inputs[_LABEL_KEY], tf.int64)
return outputs
I get the following error:
WARNING:root:This output type hint will be ignored and not used for type-checking purposes. Typically, output type hints for a PTransform are single (or nested) types wrapped by a PCollection, PDone, or None. Got: Tuple[Dict[str, Union[NoneType, _Dataset]], Union[Dict[str, Dict[str, PCollection]], NoneType], int] instead.
WARNING:root:This output type hint will be ignored and not used for type-checking purposes. Typically, output type hints for a PTransform are single (or nested) types wrapped by a PCollection, PDone, or None. Got: Tuple[Dict[str, Union[NoneType, _Dataset]], Union[Dict[str, Dict[str, PCollection]], NoneType], int] instead.
WARNING:root:Make sure that locally built Python SDK docker image has Python 3.8 interpreter.
INFO:tensorflow:Assets written to: ./pipeline/Transform/transform_graph/225/.temp_path/tftransform_tmp/26150ae80de847fab932efeb0f0c610f/assets
INFO:tensorflow:Assets written to: ./pipeline/Transform/transform_graph/225/.temp_path/tftransform_tmp/26150ae80de847fab932efeb0f0c610f/assets
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.DoFnRunner.process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.PerWindowInvoker.invoke_process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.PerWindowInvoker._invoke_process_per_window()
/opt/conda/lib/python3.8/site-packages/apache_beam/transforms/core.py in <lambda>(x, *args, **kwargs)
1636 if fn_takes_side_inputs(fn):
-> 1637 wrapper = lambda x, *args, **kwargs: [fn(x, *args, **kwargs)]
1638 else:
/opt/conda/lib/python3.8/site-packages/tensorflow_transform/beam/impl.py in _create_v2_saved_model(tensor_replacement_map, base_temp_dir, preprocessing_fn, input_signature, baseline_analyzers_fingerprint, output_keys_to_name_map)
662 saved_model_dir = beam_common.get_unique_temp_path(base_temp_dir)
--> 663 impl_helper.trace_and_write_v2_saved_model(saved_model_dir, preprocessing_fn,
664 input_signature, base_temp_dir,
/opt/conda/lib/python3.8/site-packages/tensorflow_transform/impl_helper.py in trace_and_write_v2_saved_model(saved_model_dir, preprocessing_fn, input_signature, base_temp_dir, baseline_analyzers_fingerprint, tensor_replacement_map, output_keys_to_name_map)
893 analyzer_nodes.TENSOR_REPLACEMENTS):
--> 894 metadata = _trace_and_get_metadata(concrete_transform_fn, structured_inputs,
895 preprocessing_fn, base_temp_dir,
/opt/conda/lib/python3.8/site-packages/tensorflow_transform/impl_helper.py in _trace_and_get_metadata(concrete_transform_fn, structured_inputs, preprocessing_fn, base_temp_dir, tensor_replacement_map)
805 return dataset_metadata.DatasetMetadata(
--> 806 schema=schema_inference.infer_feature_schema_v2(
807 concrete_transform_fn.structured_outputs,
/opt/conda/lib/python3.8/site-packages/tensorflow_transform/schema_inference.py in infer_feature_schema_v2(features, concrete_metadata_fn, evaluate_schema_overrides)
255 metadata)
--> 256 return _infer_feature_schema_common(
257 features,
/opt/conda/lib/python3.8/site-packages/tensorflow_transform/schema_inference.py in _infer_feature_schema_common(features, tensor_ranges, feature_annotations, global_annotations, is_evaluation_complete)
300 min=min_value, max=max_value, is_categorical=True)
--> 301 feature_spec = _feature_spec_from_batched_tensors(features,
302 is_evaluation_complete)
/opt/conda/lib/python3.8/site-packages/tensorflow_transform/schema_inference.py in _feature_spec_from_batched_tensors(tensors, is_evaluation_complete)
128 dim is None for dim in shape.as_list()[1:]):
--> 129 raise ValueError(
130 'Feature {} ({}) had invalid shape {} for FixedLenFeature: apart '
ValueError: Feature raw_image (Tensor("Identity_1:0", shape=(None, 1, None), dtype=int64)) had invalid shape (None, 1, None) for FixedLenFeature: apart from the batch dimension, all dimensions must have known size
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-37-7beafa4fe436> in <module>
3 schema=schema_gen.outputs['schema'],
4 module_file=os.path.abspath(_mnist_transform_module))
----> 5 context.run(transform, enable_cache=False)
/opt/conda/lib/python3.8/site-packages/tfx/orchestration/experimental/interactive/interactive_context.py in run_if_ipython(*args, **kwargs)
61 # __IPYTHON__ variable is set by IPython, see
62 # https://ipython.org/ipython-doc/rel-0.10.2/html/interactive/reference.html#embedding-ipython.
---> 63 return fn(*args, **kwargs)
64 else:
65 absl.logging.warning(
/opt/conda/lib/python3.8/site-packages/tfx/orchestration/experimental/interactive/interactive_context.py in run(self, component, enable_cache, beam_pipeline_args)
181 telemetry_utils.LABEL_TFX_RUNNER: runner_label,
182 }):
--> 183 execution_id = launcher.launch().execution_id
184
185 return execution_result.ExecutionResult(
/opt/conda/lib/python3.8/site-packages/tfx/orchestration/launcher/base_component_launcher.py in launch(self)
198 # be immutable in this context.
199 # output_dict can still be changed, specifically properties.
--> 200 self._run_executor(execution_decision.execution_id,
201 copy.deepcopy(execution_decision.input_dict),
202 execution_decision.output_dict,
/opt/conda/lib/python3.8/site-packages/tfx/orchestration/launcher/in_process_component_launcher.py in _run_executor(self, execution_id, input_dict, output_dict, exec_properties)
71 # be immutable in this context.
72 # output_dict can still be changed, specifically properties.
---> 73 executor.Do(
74 copy.deepcopy(input_dict), output_dict, copy.deepcopy(exec_properties))
/opt/conda/lib/python3.8/site-packages/tfx/components/transform/executor.py in Do(self, input_dict, output_dict, exec_properties)
581 # remove the `_pip_dependencies` attribute.
582 with udf_utils.TempPipInstallContext(self._pip_dependencies):
--> 583 TransformProcessor().Transform(label_inputs, label_outputs, status_file)
584 logging.debug('Cleaning up temp path %s on executor success', temp_path)
585 io_utils.delete_dir(temp_path)
/opt/conda/lib/python3.8/site-packages/tfx/components/transform/executor.py in Transform(***failed resolving arguments***)
1114 materialization_format = (
1115 transform_paths_file_formats[-1] if materialize_output_paths else None)
-> 1116 self._RunBeamImpl(analyze_data_list, transform_data_list, preprocessing_fn,
1117 stats_options_updater_fn, force_tf_compat_v1,
1118 input_dataset_metadata, transform_output_path,
/opt/conda/lib/python3.8/site-packages/tfx/components/transform/executor.py in _RunBeamImpl(self, analyze_data_list, transform_data_list, preprocessing_fn, stats_options_updater_fn, force_tf_compat_v1, input_dataset_metadata, transform_output_path, raw_examples_data_format, temp_path, input_cache_dir, output_cache_dir, disable_statistics, per_set_stats_output_paths, materialization_format, analyze_paths_count, stats_output_paths, make_beam_pipeline_fn)
1496 for dataset in transform_data_list:
1497 infix = 'TransformIndex{}'.format(dataset.index)
-> 1498 (dataset.transformed
1499 | 'EncodeAndSerialize[{}]'.format(infix) >> beam.ParDo(
1500 self._RecordBatchToExamplesFn(transformed_schema_proto))
/opt/conda/lib/python3.8/site-packages/apache_beam/pipeline.py in __exit__(self, exc_type, exc_val, exc_tb)
594 try:
595 if not exc_type:
--> 596 self.result = self.run()
597 self.result.wait_until_finish()
598 finally:
/opt/conda/lib/python3.8/site-packages/apache_beam/pipeline.py in run(self, test_runner_api)
571 finally:
572 shutil.rmtree(tmpdir)
--> 573 return self.runner.run_pipeline(self, self._options)
574 finally:
575 if not is_in_ipython():
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/direct/direct_runner.py in run_pipeline(self, pipeline, options)
129 runner = BundleBasedDirectRunner()
130
--> 131 return runner.run_pipeline(pipeline, options)
132
133
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py in run_pipeline(self, pipeline, options)
197 options.view_as(pipeline_options.ProfilingOptions))
198
--> 199 self._latest_run_result = self.run_via_runner_api(
200 pipeline.to_runner_api(default_environment=self._default_environment))
201 return self._latest_run_result
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py in run_via_runner_api(self, pipeline_proto)
208 # TODO(pabloem, BEAM-7514): Create a watermark manager (that has access to
209 # the teststream (if any), and all the stages).
--> 210 return self.run_stages(stage_context, stages)
211
212 #contextlib.contextmanager
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py in run_stages(self, stage_context, stages)
393 )
394
--> 395 stage_results = self._run_stage(
396 runner_execution_context, bundle_context_manager)
397
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py in _run_stage(self, runner_execution_context, bundle_context_manager)
658 while True:
659 last_result, deferred_inputs, fired_timers, watermark_updates = (
--> 660 self._run_bundle(
661 runner_execution_context,
662 bundle_context_manager,
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py in _run_bundle(self, runner_execution_context, bundle_context_manager, data_input, data_output, input_timers, expected_timer_output, bundle_manager)
781 expected_timer_output)
782
--> 783 result, splits = bundle_manager.process_bundle(
784 data_input, data_output, input_timers, expected_timer_output)
785 # Now we collect all the deferred inputs remaining from bundle execution.
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py in process_bundle(self, inputs, expected_outputs, fired_timers, expected_output_timers, dry_run)
1092 process_bundle_descriptor.id,
1093 cache_tokens=[next(self._cache_token_generator)]))
-> 1094 result_future = self._worker_handler.control_conn.push(process_bundle_req)
1095
1096 split_results = [] # type: List[beam_fn_api_pb2.ProcessBundleSplitResponse]
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/portability/fn_api_runner/worker_handlers.py in push(self, request)
376 self._uid_counter += 1
377 request.instruction_id = 'control_%s' % self._uid_counter
--> 378 response = self.worker.do_instruction(request)
379 return ControlFuture(request.instruction_id, response)
380
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py in do_instruction(self, request)
578 if request_type:
579 # E.g. if register is set, this will call self.register(request.register))
--> 580 return getattr(self, request_type)(
581 getattr(request, request_type), request.instruction_id)
582 else:
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py in process_bundle(self, request, instruction_id)
616 with self.maybe_profile(instruction_id):
617 delayed_applications, requests_finalization = (
--> 618 bundle_processor.process_bundle(instruction_id))
619 monitoring_infos = bundle_processor.monitoring_infos()
620 monitoring_infos.extend(self.state_cache_metrics_fn())
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py in process_bundle(self, instruction_id)
993 element.timer_family_id, timer_data)
994 elif isinstance(element, beam_fn_api_pb2.Elements.Data):
--> 995 input_op_by_transform_id[element.transform_id].process_encoded(
996 element.data)
997
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py in process_encoded(self, encoded_windowed_values)
219 decoded_value = self.windowed_coder_impl.decode_from_stream(
220 input_stream, True)
--> 221 self.output(decoded_value)
222
223 def monitoring_infos(self, transform_id, tag_to_pcollection_id):
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/operations.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.worker.operations.Operation.output()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/operations.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.worker.operations.Operation.output()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/operations.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.worker.operations.SingletonConsumerSet.receive()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/operations.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.worker.operations.DoOperation.process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/operations.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.worker.operations.DoOperation.process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.DoFnRunner.process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.DoFnRunner._reraise_augmented()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.DoFnRunner.process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.SimpleInvoker.invoke_process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common._OutputProcessor.process_outputs()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/operations.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.worker.operations.SingletonConsumerSet.receive()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/operations.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.worker.operations.DoOperation.process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/operations.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.worker.operations.DoOperation.process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.DoFnRunner.process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.DoFnRunner._reraise_augmented()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.DoFnRunner.process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.SimpleInvoker.invoke_process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common._OutputProcessor.process_outputs()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/operations.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.worker.operations.SingletonConsumerSet.receive()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/operations.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.worker.operations.DoOperation.process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/worker/operations.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.worker.operations.DoOperation.process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.DoFnRunner.process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.DoFnRunner._reraise_augmented()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.DoFnRunner.process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.PerWindowInvoker.invoke_process()
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/common.cpython-38-x86_64-linux-gnu.so in apache_beam.runners.common.PerWindowInvoker._invoke_process_per_window()
/opt/conda/lib/python3.8/site-packages/apache_beam/transforms/core.py in <lambda>(x, *args, **kwargs)
1635 from apache_beam.transforms.util import fn_takes_side_inputs
1636 if fn_takes_side_inputs(fn):
-> 1637 wrapper = lambda x, *args, **kwargs: [fn(x, *args, **kwargs)]
1638 else:
1639 wrapper = lambda x: [fn(x)]
/opt/conda/lib/python3.8/site-packages/tensorflow_transform/beam/impl.py in _create_v2_saved_model(tensor_replacement_map, base_temp_dir, preprocessing_fn, input_signature, baseline_analyzers_fingerprint, output_keys_to_name_map)
661 """
662 saved_model_dir = beam_common.get_unique_temp_path(base_temp_dir)
--> 663 impl_helper.trace_and_write_v2_saved_model(saved_model_dir, preprocessing_fn,
664 input_signature, base_temp_dir,
665 baseline_analyzers_fingerprint,
/opt/conda/lib/python3.8/site-packages/tensorflow_transform/impl_helper.py in trace_and_write_v2_saved_model(saved_model_dir, preprocessing_fn, input_signature, base_temp_dir, baseline_analyzers_fingerprint, tensor_replacement_map, output_keys_to_name_map)
892 if not concrete_transform_fn.graph.get_collection(
893 analyzer_nodes.TENSOR_REPLACEMENTS):
--> 894 metadata = _trace_and_get_metadata(concrete_transform_fn, structured_inputs,
895 preprocessing_fn, base_temp_dir,
896 tensor_replacement_map)
/opt/conda/lib/python3.8/site-packages/tensorflow_transform/impl_helper.py in _trace_and_get_metadata(concrete_transform_fn, structured_inputs, preprocessing_fn, base_temp_dir, tensor_replacement_map)
804 evaluate_schema_overrides=True)
805 return dataset_metadata.DatasetMetadata(
--> 806 schema=schema_inference.infer_feature_schema_v2(
807 concrete_transform_fn.structured_outputs,
808 concrete_metadata_fn,
/opt/conda/lib/python3.8/site-packages/tensorflow_transform/schema_inference.py in infer_feature_schema_v2(features, concrete_metadata_fn, evaluate_schema_overrides)
254 tensor_annotations, global_annotations = _get_schema_annotations_v2(
255 metadata)
--> 256 return _infer_feature_schema_common(
257 features,
258 tensor_ranges,
/opt/conda/lib/python3.8/site-packages/tensorflow_transform/schema_inference.py in _infer_feature_schema_common(features, tensor_ranges, feature_annotations, global_annotations, is_evaluation_complete)
299 domains[name] = schema_pb2.IntDomain(
300 min=min_value, max=max_value, is_categorical=True)
--> 301 feature_spec = _feature_spec_from_batched_tensors(features,
302 is_evaluation_complete)
303
/opt/conda/lib/python3.8/site-packages/tensorflow_transform/schema_inference.py in _feature_spec_from_batched_tensors(tensors, is_evaluation_complete)
127 if is_evaluation_complete and any(
128 dim is None for dim in shape.as_list()[1:]):
--> 129 raise ValueError(
130 'Feature {} ({}) had invalid shape {} for FixedLenFeature: apart '
131 'from the batch dimension, all dimensions must have known size'
ValueError: Feature raw_image (Tensor("Identity_1:0", shape=(None, 1, None), dtype=int64)) had invalid shape (None, 1, None) for FixedLenFeature: apart from the batch dimension, all dimensions must have known size [while running 'Analyze/CreateSavedModel[tf_v2_only]/CreateSavedModel']
I know the label feature is working as I can call the below code and get a print as so....
transform = tfx.components.Transform(
examples=example_gen.outputs['examples'],
schema=schema_gen.outputs['schema'],
module_file=os.path.abspath(_mnist_transform_module))
context.run(transform, enable_cache=False)
# Get the URI of the output artifact representing the transformed examples
train_uri = os.path.join(transform.outputs['transformed_examples'].get()[0].uri, 'Split-train')
# Get the list of files in this directory (all compressed TFRecord files)
tfrecord_filenames = [os.path.join(train_uri, name)
for name in os.listdir(train_uri)]
# Create a `TFRecordDataset` to read these files
dataset = tf.data.TFRecordDataset(tfrecord_filenames, compression_type="GZIP")
# Decode the first record and print output
for tfrecord in dataset.take(1):
serialized_example = tfrecord.numpy()
example = tf.train.Example()
example.ParseFromString(serialized_example)
print(example)
IF I remove the lines:
img = tf.io.decode_raw(raw_image_dataset, tf.int64)
outputs[_IMAGE_KEY] = img
I get printed
features {
feature {
key: "label"
value {
int64_list {
value: 5
}
}
}
}
This shows what I am doing to the label feature is working but I really can't figure how to transform the image bytes. Part of the issue is I'm not completely sure what the format is as it's just a tensor which is pretty opaque. It seems given the label operation I'm operating on a column of data effectively but again, can't figure the correct operation or syntax

For any future viewers this works
raw_image_dataset = tf.map_fn(fn = lambda x : tf.io.parse_tensor(x[0], tf.uint8, name=None), elems = raw_image_dataset, fn_output_signature = tf.TensorSpec((28,28),dtype=tf.uint8, name=None), infer_shape = True)
raw_image_dataset = tf.cast(raw_image_dataset, tf.int64)
outputs[_IMAGE_KEY] = raw_image_dataset

So I think I solved this using
raw_image_dataset = inputs[_IMAGE_KEY]
raw_image_dataset = tf.map_fn(fn = lambda x : tf.io.decode_image(x[0]) , elems = raw_image_dataset, dtype=tf.uint8)
Theres something about the data going in as a batch so needing to map it and also using the right component of the resulting tensor "x[0]", I'm still not 100% sure on why this is the case but it seems to run.
Now I'm struggling with TFX as it won't let me output features that are different to the what went in...

Related

"ValueError: Unable to create tensor" when trying to train a hugging face transformer

I am trying to use the "visheratin/t5-efficient-mini-grammar-correction" pre-trained model for grammar correction and I would like to add my own training examples.
I've loaded the model:
model = AutoModelForSeq2SeqLM.from_pretrained("visheratin/t5-efficient-mini-grammar-correction")
tokenizer = AutoTokenizer.from_pretrained("visheratin/t5-efficient-mini-grammar-correction")
set the training arguments:
training_args = TrainingArguments(
output_dir="./models",
num_train_epochs=3,
per_device_train_batch_size=8,
learning_rate=3e-5,
weight_decay=0.01,
)
and created the training data:
training_examples = [ ('input text 1', 'output text 1'), ('input text 2', 'output text 2') ]
train_data = []
for input_text, target_text in training_examples:
input_ids = tokenizer.encode(input_text, return_tensors="pt", truncation=True, padding=True)
target_ids = tokenizer.encode(target_text, return_tensors="pt", truncation=True, padding=True)
train_data.append({
'input_ids': input_ids,
'attention_mask': torch.ones_like(input_ids),
'labels': target_ids,
})
but when I go to train:
trainer = Trainer(
model=model,
tokenizer=tokenizer,
args=training_args,
train_dataset=train_data,
)
trainer.train()
I get this error:
ValueError: Unable to create tensor, you should probably activate truncation and/or
padding with 'padding=True' 'truncation=True' to have batched tensors with the same
length. Perhaps your features (`input_ids` in this case) have excessive nesting (inputs
type `list` where type `int` is expected).
I already have 'padding=True' and 'truncation=True' to tokenizer.encode() and as far as I can tell I do not have any excessive nesting in my features.
This is the full Traceback:
ValueError Traceback (most recent call last)
File ~/opt/miniconda3/envs/ub_nbdev2/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:715, in BatchEncoding.convert_to_tensors(self, tensor_type, prepend_batch_axis)
714 if not is_tensor(value):
--> 715 tensor = as_tensor(value)
717 # Removing this for now in favor of controlling the shape with `prepend_batch_axis`
718 # # at-least2d
719 # if tensor.ndim > 2:
720 # tensor = tensor.squeeze(0)
721 # elif tensor.ndim < 2:
722 # tensor = tensor[None, :]
ValueError: expected sequence of length 37 at dim 2 (got 44)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[104], line 1
----> 1 trainer.train()
File ~/opt/miniconda3/envs/ub_nbdev2/lib/python3.10/site-packages/transformers/trainer.py:1501, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1496 self.model_wrapped = self.model
1498 inner_training_loop = find_executable_batch_size(
1499 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1500 )
-> 1501 return inner_training_loop(
1502 args=args,
1503 resume_from_checkpoint=resume_from_checkpoint,
1504 trial=trial,
1505 ignore_keys_for_eval=ignore_keys_for_eval,
1506 )
File ~/opt/miniconda3/envs/ub_nbdev2/lib/python3.10/site-packages/transformers/trainer.py:1723, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1720 self._load_rng_state(resume_from_checkpoint)
1722 step = -1
-> 1723 for step, inputs in enumerate(epoch_iterator):
1724
1725 # Skip past any already trained steps if resuming training
1726 if steps_trained_in_current_epoch > 0:
1727 steps_trained_in_current_epoch -= 1
File ~/opt/miniconda3/envs/ub_nbdev2/lib/python3.10/site-packages/torch/utils/data/dataloader.py:628, in _BaseDataLoaderIter.__next__(self)
625 if self._sampler_iter is None:
626 # TODO(https://github.com/pytorch/pytorch/issues/76750)
627 self._reset() # type: ignore[call-arg]
--> 628 data = self._next_data()
629 self._num_yielded += 1
630 if self._dataset_kind == _DatasetKind.Iterable and \
631 self._IterableDataset_len_called is not None and \
632 self._num_yielded > self._IterableDataset_len_called:
File ~/opt/miniconda3/envs/ub_nbdev2/lib/python3.10/site-packages/torch/utils/data/dataloader.py:671, in _SingleProcessDataLoaderIter._next_data(self)
669 def _next_data(self):
670 index = self._next_index() # may raise StopIteration
--> 671 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
672 if self._pin_memory:
673 data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)
File ~/opt/miniconda3/envs/ub_nbdev2/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py:61, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
59 else:
60 data = self.dataset[possibly_batched_index]
---> 61 return self.collate_fn(data)
File ~/opt/miniconda3/envs/ub_nbdev2/lib/python3.10/site-packages/transformers/trainer_utils.py:696, in RemoveColumnsCollator.__call__(self, features)
694 def __call__(self, features: List[dict]):
695 features = [self._remove_columns(feature) for feature in features]
--> 696 return self.data_collator(features)
File ~/opt/miniconda3/envs/ub_nbdev2/lib/python3.10/site-packages/transformers/data/data_collator.py:249, in DataCollatorWithPadding.__call__(self, features)
248 def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
--> 249 batch = self.tokenizer.pad(
250 features,
251 padding=self.padding,
252 max_length=self.max_length,
253 pad_to_multiple_of=self.pad_to_multiple_of,
254 return_tensors=self.return_tensors,
255 )
256 if "label" in batch:
257 batch["labels"] = batch["label"]
File ~/opt/miniconda3/envs/ub_nbdev2/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2985, in PreTrainedTokenizerBase.pad(self, encoded_inputs, padding, max_length, pad_to_multiple_of, return_attention_mask, return_tensors, verbose)
2982 batch_outputs[key] = []
2983 batch_outputs[key].append(value)
-> 2985 return BatchEncoding(batch_outputs, tensor_type=return_tensors)
File ~/opt/miniconda3/envs/ub_nbdev2/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:210, in BatchEncoding.__init__(self, data, encoding, tensor_type, prepend_batch_axis, n_sequences)
206 n_sequences = encoding[0].n_sequences
208 self._n_sequences = n_sequences
--> 210 self.convert_to_tensors(tensor_type=tensor_type, prepend_batch_axis=prepend_batch_axis)
File ~/opt/miniconda3/envs/ub_nbdev2/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:731, in BatchEncoding.convert_to_tensors(self, tensor_type, prepend_batch_axis)
726 if key == "overflowing_tokens":
727 raise ValueError(
728 "Unable to create tensor returning overflowing tokens of different lengths. "
729 "Please see if a fast version of this tokenizer is available to have this feature available."
730 )
--> 731 raise ValueError(
732 "Unable to create tensor, you should probably activate truncation and/or padding with"
733 " 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your"
734 f" features (`{key}` in this case) have excessive nesting (inputs type `list` where type `int` is"
735 " expected)."
736 )
738 return self
ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`input_ids` in this case) have excessive nesting (inputs type `list` where type `int` is expected).
Could someone please help me understand what could be causing this error?

StackingClassifier Raises Exception 'numpy.ndarray' object has no attribute 'columns'

I am trying to train a StackingClassifier in Sklearn, but I keep running into this error where the fit method seems to be complaining about me having passed it numpy arrays. To my knowledge, this is how all the fit methods in sklearn are supposed to work. I read and followed the example from the documentation and expanded on it to include a more complex and comprehensive pipeline that would process categorical, ordinal, scalar, and text data.
Sorry in advance for the lengthy code sample, but I felt it was necessary to provide a complete reproducible example. Simply breaking down the pipeline into its constituent estimators and test those individually did not raise any exceptions, so I figure that the error somehow comes from the gestalt estimator.
Select Features
categorical_data = [
"race",
"gender",
"admission_type_id",
"discharge_disposition_id",
"admission_source_id",
"insulin",
"diabetesMed",
"change",
"payer_code",
"A1Cresult",
"metformin",
"repaglinide",
"nateglinide",
"chlorpropamide",
"glimepiride",
"glipizide",
"glyburide",
"tolbutamide",
"pioglitazone",
"rosiglitazone",
"acarbose",
"miglitol",
"tolazamide",
"glyburide.metformin",
"glipizide.metformin",
]
ordinal_data = [
"age"
]
scalar_data = [
"num_medications",
"time_in_hospital",
"num_lab_procedures",
"num_procedures",
"number_outpatient",
"number_emergency",
"number_inpatient",
"number_diagnoses",
]
text_data = [
"diag_1_desc",
"diag_2_desc",
"diag_3_desc"
]
Create Column Transformers
impute_trans = compose.make_column_transformer(
(
impute.SimpleImputer(
strategy="constant",
fill_value="missing"
),
categorical_data
)
)
encode_trans = compose.make_column_transformer(
(
preprocessing.OneHotEncoder(
sparse=False,
handle_unknown="ignore"
),
categorical_data
),
(
preprocessing.OrdinalEncoder(),
ordinal_data
)
)
scalar_trans = compose.make_column_transformer(
(preprocessing.StandardScaler(), scalar_data),
)
text_trans = compose.make_column_transformer(
(TfidfVectorizer(ngram_range=(1,2)), "diag_1_desc"),
(TfidfVectorizer(ngram_range=(1,2)), "diag_2_desc"),
(TfidfVectorizer(ngram_range=(1,2)), "diag_3_desc"),
)
Create Estimators
cat_pre_pipe = make_pipeline(impute_trans, encode_trans)
logreg = LogisticRegression(
solver = "saga",
penalty="elasticnet",
l1_ratio=0.5,
max_iter=1000
)
text_pipe = make_pipeline(text_trans, logreg)
scalar_pipe = make_pipeline(scalar_trans, logreg)
cat_pipe = make_pipeline(cat_pre_pipe, logreg)
estimators = [
("cat", cat_pipe),
("text", text_pipe),
("scalar", scalar_pipe)
]
Create Stacking Classifier
stack_clf = StackingClassifier(
estimators=estimators,
final_estimator=logreg
)
diabetes_data = pd.read_csv("8k_diabetes.csv", delimiter=',')
x_train, x_test, y_train, y_test = train_test_split(
pd.concat([
preprocess_dataframe(diabetes_data[text_data]),
diabetes_data[categorical_data + scalar_data]
], axis=1),
diabetes_data["readmitted"].astype(int)
)
stack_clf.fit(x_train, y_train)
Complete Stack Trace
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/sklearn/utils/__init__.py:409, in _get_column_indices(X, key)
408 try:
--> 409 all_columns = X.columns
410 except AttributeError:
AttributeError: 'numpy.ndarray' object has no attribute 'columns'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Input In [19], in <cell line: 1>()
----> 1 stack_clf.fit(x_train, y_train)
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/sklearn/ensemble/_stacking.py:488, in StackingClassifier.fit(self, X, y, sample_weight)
486 self._le = LabelEncoder().fit(y)
487 self.classes_ = self._le.classes_
--> 488 return super().fit(X, self._le.transform(y), sample_weight)
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/sklearn/ensemble/_stacking.py:158, in _BaseStacking.fit(self, X, y, sample_weight)
153 stack_method = [self.stack_method] * len(all_estimators)
155 # Fit the base estimators on the whole training data. Those
156 # base estimators will be used in transform, predict, and
157 # predict_proba. They are exposed publicly.
--> 158 self.estimators_ = Parallel(n_jobs=self.n_jobs)(
159 delayed(_fit_single_estimator)(clone(est), X, y, sample_weight)
160 for est in all_estimators
161 if est != "drop"
162 )
164 self.named_estimators_ = Bunch()
165 est_fitted_idx = 0
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/joblib/parallel.py:1043, in Parallel.__call__(self, iterable)
1034 try:
1035 # Only set self._iterating to True if at least a batch
1036 # was dispatched. In particular this covers the edge
(...)
1040 # was very quick and its callback already dispatched all the
1041 # remaining jobs.
1042 self._iterating = False
-> 1043 if self.dispatch_one_batch(iterator):
1044 self._iterating = self._original_iterator is not None
1046 while self.dispatch_one_batch(iterator):
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/joblib/parallel.py:861, in Parallel.dispatch_one_batch(self, iterator)
859 return False
860 else:
--> 861 self._dispatch(tasks)
862 return True
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/joblib/parallel.py:779, in Parallel._dispatch(self, batch)
777 with self._lock:
778 job_idx = len(self._jobs)
--> 779 job = self._backend.apply_async(batch, callback=cb)
780 # A job can complete so quickly than its callback is
781 # called before we get here, causing self._jobs to
782 # grow. To ensure correct results ordering, .insert is
783 # used (rather than .append) in the following line
784 self._jobs.insert(job_idx, job)
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/joblib/_parallel_backends.py:208, in SequentialBackend.apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/joblib/_parallel_backends.py:572, in ImmediateResult.__init__(self, batch)
569 def __init__(self, batch):
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/joblib/parallel.py:262, in BatchedCalls.__call__(self)
258 def __call__(self):
259 # Set the default nested backend to self._backend but do not set the
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/joblib/parallel.py:262, in <listcomp>(.0)
258 def __call__(self):
259 # Set the default nested backend to self._backend but do not set the
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/sklearn/utils/fixes.py:216, in _FuncWrapper.__call__(self, *args, **kwargs)
214 def __call__(self, *args, **kwargs):
215 with config_context(**self.config):
--> 216 return self.function(*args, **kwargs)
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/sklearn/ensemble/_base.py:42, in _fit_single_estimator(estimator, X, y, sample_weight, message_clsname, message)
40 else:
41 with _print_elapsed_time(message_clsname, message):
---> 42 estimator.fit(X, y)
43 return estimator
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/sklearn/pipeline.py:390, in Pipeline.fit(self, X, y, **fit_params)
364 """Fit the model.
365
366 Fit all the transformers one after the other and transform the
(...)
387 Pipeline with fitted steps.
388 """
389 fit_params_steps = self._check_fit_params(**fit_params)
--> 390 Xt = self._fit(X, y, **fit_params_steps)
391 with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):
392 if self._final_estimator != "passthrough":
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/sklearn/pipeline.py:348, in Pipeline._fit(self, X, y, **fit_params_steps)
346 cloned_transformer = clone(transformer)
347 # Fit or load from cache the current transformer
--> 348 X, fitted_transformer = fit_transform_one_cached(
349 cloned_transformer,
350 X,
351 y,
352 None,
353 message_clsname="Pipeline",
354 message=self._log_message(step_idx),
355 **fit_params_steps[name],
356 )
357 # Replace the transformer of the step with the fitted
358 # transformer. This is necessary when loading the transformer
359 # from the cache.
360 self.steps[step_idx] = (name, fitted_transformer)
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/joblib/memory.py:349, in NotMemorizedFunc.__call__(self, *args, **kwargs)
348 def __call__(self, *args, **kwargs):
--> 349 return self.func(*args, **kwargs)
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/sklearn/pipeline.py:893, in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
891 with _print_elapsed_time(message_clsname, message):
892 if hasattr(transformer, "fit_transform"):
--> 893 res = transformer.fit_transform(X, y, **fit_params)
894 else:
895 res = transformer.fit(X, y, **fit_params).transform(X)
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/sklearn/pipeline.py:434, in Pipeline.fit_transform(self, X, y, **fit_params)
432 fit_params_last_step = fit_params_steps[self.steps[-1][0]]
433 if hasattr(last_step, "fit_transform"):
--> 434 return last_step.fit_transform(Xt, y, **fit_params_last_step)
435 else:
436 return last_step.fit(Xt, y, **fit_params_last_step).transform(Xt)
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py:672, in ColumnTransformer.fit_transform(self, X, y)
670 self._check_n_features(X, reset=True)
671 self._validate_transformers()
--> 672 self._validate_column_callables(X)
673 self._validate_remainder(X)
675 result = self._fit_transform(X, y, _fit_transform_one)
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py:352, in ColumnTransformer._validate_column_callables(self, X)
350 columns = columns(X)
351 all_columns.append(columns)
--> 352 transformer_to_input_indices[name] = _get_column_indices(X, columns)
354 self._columns = all_columns
355 self._transformer_to_input_indices = transformer_to_input_indices
File ~/anaconda3/envs/assignment2/lib/python3.8/site-packages/sklearn/utils/__init__.py:411, in _get_column_indices(X, key)
409 all_columns = X.columns
410 except AttributeError:
--> 411 raise ValueError(
412 "Specifying the columns using strings is only "
413 "supported for pandas DataFrames"
414 )
415 if isinstance(key, str):
416 columns = [key]
ValueError: Specifying the columns using strings is only supported for pandas DataFrames
Full Pipeline Diagram

Your categorical pipeline chains two column transformers together. After the first one, the output is a numpy array, but then the second one cannot select transformers by column name as you've requested. Notice the final error message is more informative here, ValueError: Specifying the columns using strings is only supported for pandas DataFrames.
I'd suggest using one column transformer with separate pipelines instead of one pipeline with multiple columntransformers for this reason.

TF 2.6: Input 'resource' of 'AssignVariableOp' Op has type float32 that does not match expected type of resource

I am applying an autoencoder to categorical data. My code used to work just fine, but after updating to TF 2.6, it throws this error while attempting to save my model: Input 'resource' of 'AssignVariableOp' Op has type float32 that does not match expected type of resource.
The data file can be found here: https://drive.google.com/file/d/1Jnblr3Ik88V_qd3N5EDkTkBtFu_E7ku1/view?usp=sharing
It is in tsv format.
Here is the code and full error traceback:
import csv
import sys
import numpy as np
import math
import os
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Embedding, Flatten, Reshape, Dropout
from tensorflow.keras.regularizers import l1
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import tensorflow.keras.callbacks as kcb
def adjust_count(x) :
if x<2 :
return x
else :
return x-1
mutations = {}
mut_info = {}
sampleIDs = []
mapping = {
"0|0":0,
"0|1":1,
"1|0":2,
"1|1":3
}
SNP_vcf = "/my/dir/in/chr1_SNPs_50785418_51253612_fh.vcf"
outdir = "/my/dir/out/"
threshold = 1
chromosome, foo, start, end, _ = SNP_vcf.split('/').pop().split('.').pop(0).split('_') # foo will be "SNPs"
start=int(start)
end=int(end)
with open(SNP_vcf, newline='') as csvfile:
csvread = csv.reader(csvfile, delimiter='\t', quotechar='"')
for myline in csvread: #reads csvfile line by line, storing array of cells into myline
if(myline[0][0] == '#'):
del(myline[0:9])
for mycell in myline :
sampleIDs.append(mycell)
else :
chrom, pos, notID, ref, alt = myline[0:5] #get info of the SNP
ID = chrom + "_" + pos + "." + ref + "_" + alt
chrom = int(chrom)
pos = int(pos) #these will be stored as str otherwise
info = myline[7].split(';') # get info stored as string
dinfo = {} # we need 'AF', not always at same position in string
for v in info : # so convert the list into a dict to get it after
t = v.split('=')
if(len(t)>1) :
dinfo[t[0]] = t[1]
mut_info[ID] = [[chrom, pos], #position info
[ref, alt], #nucletodie info
min(float(dinfo['AF']), 1-float(dinfo['AF'])), #global allele frequency (min(x, 1-x) because some have freq>50%)
-1]
mutations[ID] = [] #will be used to store the mutation in pop data
del(myline[0:9])
for mycell in myline :
mutations[ID].append(mapping[mycell]) # ONE HOT ENCODING
mut_info[ID][3] = sum(map(adjust_count, mutations[ID]))
print("Read "+str(len(mutations))+" mutations from "+str(start)+" to "+str(end)+" on "+ chromosome)
tokeep = []
if threshold < 1 :
for k,v in mut_info.items() :
if v[2] >= threshold :
tokeep.append(k)
else:
for k,v in mut_info.items() :
if v[3] >= threshold :
tokeep.append(k)
print(str(len(tokeep))+" mutations passed the frequency threshold of "+str(threshold))
x = []
for k in tokeep :
x.append(mutations[k])
x_arr = np.array(x)
x_tr = np.transpose(x_arr)
test_index = np.random.choice(range(len(x_tr)), math.floor(len(x_tr)/5), replace=False)
x_test = to_categorical(x_tr[test_index])
x_train = to_categorical(np.delete(x_tr, test_index, 0))
print("Total data is of shape "+str(x_tr.shape))
print("Training data is of shape "+str(x_train.shape))
print("Testing data is of shape " + str(x_test.shape))
input_size = len(tokeep)
hidden_1_size = math.ceil(0.5*input_size)
hidden_2_size = math.ceil(0.2*input_size)
code_size = math.ceil(0.05*input_size)
print("Input size: "+str(input_size))
print("First hidden layer size: "+str(hidden_1_size))
print("Second hidden layer size: "+str(hidden_2_size))
print("Code size: "+str(code_size))
input_data = Input(shape=x_train[1].shape)
f = Flatten()(input_data)
hidden_1 = Dense(hidden_1_size, activation='relu')(f)
e_drop = Dropout(0.4)(hidden_1)
hidden_2 = Dense(hidden_2_size, activation='relu')(e_drop)
code = Dense(code_size, activation='relu')(hidden_2)
hidden_2_rev = Dense(hidden_2_size, activation='relu')(code)
d_drop = Dropout(0.4)(hidden_2_rev)
hidden_1_rev = Dense(hidden_1_size, activation='relu')(d_drop)
output_data = Dense(input_size*4, activation='softmax')(hidden_1_rev)
rshp_output_data = Reshape((-1,4))(output_data)
autoencoder = Model(input_data, rshp_output_data)
encoder = Model(input_data, code)
encoded_input = Input(shape=(code_size,))
decoder = Model(encoded_input, autoencoder.layers[-1](autoencoder.layers[-2](autoencoder.layers[-3](autoencoder.layers[-4](autoencoder.layers[-5](encoded_input))))) )
autoencoder.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[tf.keras.metrics.CategoricalAccuracy()]) # when data is OHE
history = autoencoder.fit( x_train, x_train, epochs=1000, shuffle=True, validation_data=(x_test, x_test),
callbacks=kcb.EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True) )
encoder.save(outdir + "encoder_fh_"+chromosome+"_"+str(start)+"_"+str(end))
--------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(op_type_name, name, **keywords)
516 else:
--> 517 values = ops.convert_to_tensor(
518 values,
~/.local/lib/python3.8/site-packages/tensorflow/python/profiler/trace.py in wrapped(*args, **kwargs)
162 return func(*args, **kwargs)
--> 163 return func(*args, **kwargs)
164
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, dtype_hint, ctx, accepted_result_types)
1532 if dtype is not None and not dtype.is_compatible_with(value.dtype):
-> 1533 raise ValueError(
1534 "Tensor conversion requested dtype %s for Tensor with dtype %s: %r" %
ValueError: Tensor conversion requested dtype resource for Tensor with dtype float32: <tf.Tensor 'dense_6/kernel/Read/ReadVariableOp:0' shape=(10000, 1250) dtype=float32>
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-16-e76ca5687b87> in <module>
1 ## SAVE OUPUT
2 # autoencoder.save(outdir + "autoencoder_"+chromosome+"_"+str(start)+"_"+str(end))
----> 3 encoder.save(outdir + "encoder_fh_"+chromosome+"_"+str(start)+"_"+str(end))
~/.local/lib/python3.8/site-packages/keras/engine/training.py in save(self, filepath, overwrite, include_optimizer, save_format, signatures, options, save_traces)
2143 """
2144 # pylint: enable=line-too-long
-> 2145 save.save_model(self, filepath, overwrite, include_optimizer, save_format,
2146 signatures, options, save_traces)
2147
~/.local/lib/python3.8/site-packages/keras/saving/save.py in save_model(model, filepath, overwrite, include_optimizer, save_format, signatures, options, save_traces)
147 else:
148 with generic_utils.SharedObjectSavingScope():
--> 149 saved_model_save.save(model, filepath, overwrite, include_optimizer,
150 signatures, options, save_traces)
151
~/.local/lib/python3.8/site-packages/keras/saving/saved_model/save.py in save(model, filepath, overwrite, include_optimizer, signatures, options, save_traces)
88 with K.deprecated_internal_learning_phase_scope(0):
89 with utils.keras_option_scope(save_traces):
---> 90 saved_nodes, node_paths = save_lib.save_and_return_nodes(
91 model, filepath, signatures, options)
92
~/.local/lib/python3.8/site-packages/tensorflow/python/saved_model/save.py in save_and_return_nodes(obj, export_dir, signatures, options, experimental_skip_checkpoint)
1226
1227 _, exported_graph, object_saver, asset_info, saved_nodes, node_paths = (
-> 1228 _build_meta_graph(obj, signatures, options, meta_graph_def))
1229 saved_model.saved_model_schema_version = (
1230 pywrap_libexport.SAVED_MODEL_SCHEMA_VERSION)
~/.local/lib/python3.8/site-packages/tensorflow/python/saved_model/save.py in _build_meta_graph(obj, signatures, options, meta_graph_def)
1397
1398 with save_context.save_context(options):
-> 1399 return _build_meta_graph_impl(obj, signatures, options, meta_graph_def)
~/.local/lib/python3.8/site-packages/tensorflow/python/saved_model/save.py in _build_meta_graph_impl(obj, signatures, options, meta_graph_def)
1349 wrapped_functions)
1350 object_saver = util.TrackableSaver(checkpoint_graph_view)
-> 1351 asset_info, exported_graph = _fill_meta_graph_def(
1352 meta_graph_def, saveable_view, signatures,
1353 options.namespace_whitelist, options.experimental_custom_gradients)
~/.local/lib/python3.8/site-packages/tensorflow/python/saved_model/save.py in _fill_meta_graph_def(meta_graph_def, saveable_view, signature_functions, namespace_whitelist, save_custom_gradients)
872 "to get the full error message.")
873
--> 874 saver_def = saver.to_proto()
875 meta_graph_def.saver_def.CopyFrom(saver_def)
876 graph_def = exported_graph.as_graph_def(add_shapes=True)
~/.local/lib/python3.8/site-packages/tensorflow/python/training/saving/functional_saver.py in to_proto(self)
180 shape=[], dtype=dtypes.string, name="saver_filename")
181 save_tensor = self._traced_save(filename_tensor)
--> 182 restore_op = self._traced_restore(filename_tensor).op
183 return saver_pb2.SaverDef(
184 filename_tensor_name=filename_tensor.name,
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
883
884 with OptionalXlaContext(self._jit_compile):
--> 885 result = self._call(*args, **kwds)
886
887 new_tracing_count = self.experimental_get_tracing_count()
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
931 # This is the first call of __call__, so we have to initialize.
932 initializers = []
--> 933 self._initialize(args, kwds, add_initializers_to=initializers)
934 finally:
935 # At this point we know that the initialization is complete (or less
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
757 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
758 self._concrete_stateful_fn = (
--> 759 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
760 *args, **kwds))
761
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
3064 args, kwargs = None, None
3065 with self._lock:
-> 3066 graph_function, _ = self._maybe_define_function(args, kwargs)
3067 return graph_function
3068
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3461
3462 self._function_cache.missed.add(call_context_key)
-> 3463 graph_function = self._create_graph_function(args, kwargs)
3464 self._function_cache.primary[cache_key] = graph_function
3465
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3296 arg_names = base_arg_names + missing_arg_names
3297 graph_function = ConcreteFunction(
-> 3298 func_graph_module.func_graph_from_py_func(
3299 self._name,
3300 self._python_function,
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses)
1005 _, original_func = tf_decorator.unwrap(python_func)
1006
-> 1007 func_outputs = python_func(*func_args, **func_kwargs)
1008
1009 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
666 # the function a weak reference to itself to avoid a reference cycle.
667 with OptionalXlaContext(compile_with_xla):
--> 668 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
669 return out
670
~/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in bound_method_wrapper(*args, **kwargs)
3983 if tf_inspect.ismethod(wrapped_fn):
3984 wrapped_fn = six.get_unbound_function(wrapped_fn)
-> 3985 return wrapped_fn(weak_instance(), *args, **kwargs)
3986
3987 # If __wrapped__ was replaced, then it is always an unbound function.
~/.local/lib/python3.8/site-packages/tensorflow/python/training/saving/functional_saver.py in _traced_restore(self, file_prefix)
200 autograph=False)
201 def _traced_restore(self, file_prefix):
--> 202 restore_ops = self.restore(file_prefix)
203 with ops.device("cpu:0"):
204 with ops.control_dependencies(restore_ops.values()):
~/.local/lib/python3.8/site-packages/tensorflow/python/training/saving/functional_saver.py in restore(self, file_prefix, options)
337 restore_ops = tf_function_restore()
338 else:
--> 339 restore_ops = restore_fn()
340
341 for callback in self._after_restore_callbacks:
~/.local/lib/python3.8/site-packages/tensorflow/python/training/saving/functional_saver.py in restore_fn()
321 for device, saver in sorted(self._single_device_savers.items()):
322 with ops.device(device):
--> 323 restore_ops.update(saver.restore(file_prefix, options))
324
325 return restore_ops
~/.local/lib/python3.8/site-packages/tensorflow/python/training/saving/functional_saver.py in restore(self, file_prefix, options)
113 for saveable, restored_tensors in zip(self._saveable_objects,
114 structured_restored_tensors):
--> 115 restore_ops[saveable.name] = saveable.restore(
116 restored_tensors, restored_shapes=None)
117 return restore_ops
~/.local/lib/python3.8/site-packages/tensorflow/python/training/saving/saveable_object_util.py in restore(self, restored_tensors, restored_shapes)
129 with ops.device(self._var_device):
130 restored_tensor = array_ops.identity(restored_tensor)
--> 131 return resource_variable_ops.shape_safe_assign_variable_handle(
132 self.handle_op, self._var_shape, restored_tensor)
133
~/.local/lib/python3.8/site-packages/tensorflow/python/ops/resource_variable_ops.py in shape_safe_assign_variable_handle(handle, shape, value, name)
308 value_tensor = ops.convert_to_tensor(value)
309 shape.assert_is_compatible_with(value_tensor.shape)
--> 310 return gen_resource_variable_ops.assign_variable_op(
311 handle, value_tensor, name=name)
312
~/.local/lib/python3.8/site-packages/tensorflow/python/ops/gen_resource_variable_ops.py in assign_variable_op(resource, value, name)
152 pass # Add nodes to the TensorFlow graph.
153 # Add nodes to the TensorFlow graph.
--> 154 _, _, _op, _outputs = _op_def_library._apply_op_helper(
155 "AssignVariableOp", resource=resource, value=value, name=name)
156 return _op
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(op_type_name, name, **keywords)
542 (input_name, op_type_name, observed))
543 if input_arg.type != types_pb2.DT_INVALID:
--> 544 raise TypeError("%s expected type of %s." %
545 (prefix, dtypes.as_dtype(input_arg.type).name))
546 else:
TypeError: Input 'resource' of 'AssignVariableOp' Op has type float32 that does not match expected type of resource.
I tried specifying the dtype when creating x_train and x_test, but it does not affect this type mismatch error. What can I do to fix it?

TensorFlow: TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'

Let's say I have this data frame saved in a parquet format
import numpy as np
import pandas as pd
data = pd.DataFrame(dict(
a=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
b=[1.0, 1.0, 1.0, np.NaN, 0.0, np.NaN],
c=[0.9, np.NaN, 1.0, 0.0, 0.0, 0.0]
))
data.to_parquet('data.parquet')
along with a dictionary that tells me which values I should use for the imputation. Then I can write a preprocessing function.
import tensorflow as tf
impute_dictionary = dict(b=1.0, c=0.0)
def preprocessing_fn(inputs):
outputs = inputs.copy()
for key, value in impute_dictionary.items():
outputs[key] = tf.where(
tf.math.is_nan(outputs[key]),
tf.constant(value, shape=outputs[key].shape),
outputs[key]
)
return outputs
and use it in Apache Beam pipeline
import tempfile
import apache_beam as beam
import tensorflow_transform.beam as tft_beam
from tensorflow_transform.tf_metadata import dataset_metadata, schema_utils
temp = tempfile.gettempdir()
RAW_DATA_FEATURE_SPEC = dict(
[(name, tf.io.FixedLenFeature([], tf.float32)) for name in ['a', 'b', 'c']]
)
RAW_DATA_METADATA = dataset_metadata.DatasetMetadata(schema_utils.schema_from_feature_spec(RAW_DATA_FEATURE_SPEC))
with beam.Pipeline() as pipeline:
with tft_beam.Context(temp_dir=tempfile.mkdtemp()):
raw_data = pipeline | 'ReadTrainData' >> beam.io.ReadFromParquet('data.parquet')
raw_dataset = (raw_data, RAW_DATA_METADATA)
transformed_dataset, transform_fn = (raw_dataset | tft_beam.AnalyzeAndTransformDataset(preprocessing_fn))
transformed_data, transformed_metadata = transformed_dataset
transformed_data_coder = tft.coders.ExampleProtoCoder(transformed_metadata.schema)
I get this error: TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'
It seems that outputs[key].shape is (None,)
Any suggestion?
Package versions:
tensorflow==2.1.0
tensorflow-transform==0.21.0
pandas==1.0.0
numpy==1.18.1
apache-beam==2.19.0
Entire error message:
WARNING: Logging before flag parsing goes to stderr.
W0204 10:36:03.793034 140735593104256 interactive_environment.py:113] Dependencies required for Interactive Beam PCollection visualization are not available, please use: `pip install apache-beam[interactive]` to install necessary dependencies to enable all data visualization features.
W0204 10:36:03.793169 140735593104256 interactive_environment.py:125] You have limited Interactive Beam features since your ipython kernel is not connected any notebook frontend.
W0204 10:36:03.929135 140735593104256 impl.py:360] Tensorflow version (2.1.0) found. Note that Tensorflow Transform support for TF 2.0 is currently in beta, and features such as tf.function may not work as intended.
W0204 10:36:03.929914 140735593104256 impl.py:360] Tensorflow version (2.1.0) found. Note that Tensorflow Transform support for TF 2.0 is currently in beta, and features such as tf.function may not work as intended.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-5-465b3f61784c> in <module>
17 raw_data = pipeline | 'ReadTrainData' >> beam.io.ReadFromParquet('data.parquet')
18 raw_dataset = (raw_data, RAW_DATA_METADATA)
---> 19 transformed_dataset, transform_fn = (raw_dataset | tft_beam.AnalyzeAndTransformDataset(preprocessing_fn))
20 transformed_data, transformed_metadata = transformed_dataset
21 transformed_data_coder = tft.coders.ExampleProtoCoder(transformed_metadata.schema)
/usr/local/lib/python3.7/site-packages/apache_beam/transforms/ptransform.py in __ror__(self, left, label)
547 pvalueish = _SetInputPValues().visit(pvalueish, replacements)
548 self.pipeline = p
--> 549 result = p.apply(self, pvalueish, label)
550 if deferred:
551 return result
/usr/local/lib/python3.7/site-packages/apache_beam/pipeline.py in apply(self, transform, pvalueish, label)
575 transform.type_check_inputs(pvalueish)
576
--> 577 pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
578
579 if type_options is not None and type_options.pipeline_type_check:
/usr/local/lib/python3.7/site-packages/apache_beam/runners/runner.py in apply(self, transform, input, options)
193 m = getattr(self, 'apply_%s' % cls.__name__, None)
194 if m:
--> 195 return m(transform, input, options)
196 raise NotImplementedError(
197 'Execution of [%s] not implemented in runner %s.' % (transform, self))
/usr/local/lib/python3.7/site-packages/apache_beam/runners/runner.py in apply_PTransform(self, transform, input, options)
223 def apply_PTransform(self, transform, input, options):
224 # The base case of apply is to call the transform's expand.
--> 225 return transform.expand(input)
226
227 def run_transform(self,
/usr/local/lib/python3.7/site-packages/tensorflow_transform/beam/impl.py in expand(self, dataset)
861 # e.g. caching the values of expensive computations done in AnalyzeDataset.
862 transform_fn = (
--> 863 dataset | 'AnalyzeDataset' >> AnalyzeDataset(self._preprocessing_fn))
864
865 if Context.get_use_deep_copy_optimization():
/usr/local/lib/python3.7/site-packages/apache_beam/transforms/ptransform.py in __ror__(self, pvalueish, _unused)
987
988 def __ror__(self, pvalueish, _unused=None):
--> 989 return self.transform.__ror__(pvalueish, self.label)
990
991 def expand(self, pvalue):
/usr/local/lib/python3.7/site-packages/apache_beam/transforms/ptransform.py in __ror__(self, left, label)
547 pvalueish = _SetInputPValues().visit(pvalueish, replacements)
548 self.pipeline = p
--> 549 result = p.apply(self, pvalueish, label)
550 if deferred:
551 return result
/usr/local/lib/python3.7/site-packages/apache_beam/pipeline.py in apply(self, transform, pvalueish, label)
534 try:
535 old_label, transform.label = transform.label, label
--> 536 return self.apply(transform, pvalueish)
537 finally:
538 transform.label = old_label
/usr/local/lib/python3.7/site-packages/apache_beam/pipeline.py in apply(self, transform, pvalueish, label)
575 transform.type_check_inputs(pvalueish)
576
--> 577 pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
578
579 if type_options is not None and type_options.pipeline_type_check:
/usr/local/lib/python3.7/site-packages/apache_beam/runners/runner.py in apply(self, transform, input, options)
193 m = getattr(self, 'apply_%s' % cls.__name__, None)
194 if m:
--> 195 return m(transform, input, options)
196 raise NotImplementedError(
197 'Execution of [%s] not implemented in runner %s.' % (transform, self))
/usr/local/lib/python3.7/site-packages/apache_beam/runners/runner.py in apply_PTransform(self, transform, input, options)
223 def apply_PTransform(self, transform, input, options):
224 # The base case of apply is to call the transform's expand.
--> 225 return transform.expand(input)
226
227 def run_transform(self,
/usr/local/lib/python3.7/site-packages/tensorflow_transform/beam/impl.py in expand(self, dataset)
808 input_values, input_metadata = dataset
809 result, cache = super(AnalyzeDataset, self).expand((input_values, None,
--> 810 None, input_metadata))
811 assert not cache
812 return result
/usr/local/lib/python3.7/site-packages/tensorflow_transform/beam/impl.py in expand(self, dataset)
681 copied_inputs = impl_helper.copy_tensors(input_signature)
682
--> 683 output_signature = self._preprocessing_fn(copied_inputs)
684
685 # At this point we check that the preprocessing_fn has at least one
<ipython-input-2-205d9abf4136> in preprocessing_fn(inputs)
9 outputs[key] = tf.where(
10 tf.math.is_nan(outputs[key]),
---> 11 tf.constant(value, shape=outputs[key].shape),
12 outputs[key]
13 )
/usr/local/lib/python3.7/site-packages/tensorflow_core/python/framework/constant_op.py in constant(value, dtype, shape, name)
256 """
257 return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 258 allow_broadcast=True)
259
260
/usr/local/lib/python3.7/site-packages/tensorflow_core/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
294 tensor_util.make_tensor_proto(
295 value, dtype=dtype, shape=shape, verify_shape=verify_shape,
--> 296 allow_broadcast=allow_broadcast))
297 dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
298 const_tensor = g._create_op_internal( # pylint: disable=protected-access
/usr/local/lib/python3.7/site-packages/tensorflow_core/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
446 # If shape is None, numpy.prod returns None when dtype is not set, but
447 # raises exception when dtype is set to np.int64
--> 448 if shape is not None and np.prod(shape, dtype=np.int64) == 0:
449 nparray = np.empty(shape, dtype=np_dt)
450 else:
<__array_function__ internals> in prod(*args, **kwargs)
/usr/local/lib/python3.7/site-packages/numpy/core/fromnumeric.py in prod(a, axis, dtype, out, keepdims, initial, where)
2960 """
2961 return _wrapreduction(a, np.multiply, 'prod', axis, dtype, out,
-> 2962 keepdims=keepdims, initial=initial, where=where)
2963
2964
/usr/local/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapreduction(obj, ufunc, method, axis, dtype, out, **kwargs)
88 return reduction(axis=axis, out=out, **passkwargs)
89
---> 90 return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
91
92
TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'

The problem is that I set the shape in tf.constant(value, shape=outputs[key].shape). I should have only used tf.constant(value, dtype=tf.float32).

ValueError when running MaskedAutoregressiveFlow example

I am trying to run the example for MaskedAutoregressiveFlow at https://www.tensorflow.org/api_docs/python/tf/contrib/distributions/bijectors/MaskedAutoregressiveFlow. It's a plain copy from the docs but I receive the following error. I've tried event_shape=[dims, 1] but that doesn't seem to help (different error). I'm not sure what to make of it.
Has anyone seen this as well?
import tensorflow as tf
import tensorflow.contrib.distributions as tfd
from tensorflow.contrib.distributions import bijectors as tfb
dims = 5
# A common choice for a normalizing flow is to use a Gaussian for the base
# distribution. (However, any continuous distribution would work.) E.g.,
maf = tfd.TransformedDistribution(
distribution=tfd.Normal(loc=0., scale=1.),
bijector=tfb.MaskedAutoregressiveFlow(
shift_and_log_scale_fn=tfb.masked_autoregressive_default_template(
hidden_layers=[512, 512])),
event_shape=[dims])
x = maf.sample() # Expensive; uses `tf.while_loop`, no Bijector caching.
maf.log_prob(x) # Almost free; uses Bijector caching.
maf.log_prob(0.) # Cheap; no `tf.while_loop` despite no Bijector caching.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-2-3b2fcb2af309> in <module>()
11
12
---> 13 x = maf.sample() # Expensive; uses `tf.while_loop`, no Bijector caching.
14 maf.log_prob(x) # Almost free; uses Bijector caching.
15 maf.log_prob(0.) # Cheap; no `tf.while_loop` despite no Bijector caching.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/distributions/distribution.py in sample(self, sample_shape, seed, name)
687 samples: a `Tensor` with prepended dimensions `sample_shape`.
688 """
--> 689 return self._call_sample_n(sample_shape, seed, name)
690
691 def _log_prob(self, value):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/distributions/transformed_distribution.py in _call_sample_n(self, sample_shape, seed, name, **kwargs)
411 # work, it is imperative that this is the last modification to the
412 # returned result.
--> 413 y = self.bijector.forward(x, **kwargs)
414 y = self._set_sample_static_shape(y, sample_shape)
415
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/distributions/bijector_impl.py in forward(self, x, name)
618 NotImplementedError: if `_forward` is not implemented.
619 """
--> 620 return self._call_forward(x, name)
621
622 def _inverse(self, y):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/distributions/bijector_impl.py in _call_forward(self, x, name, **kwargs)
599 if mapping.y is not None:
600 return mapping.y
--> 601 mapping = mapping.merge(y=self._forward(x, **kwargs))
602 self._cache(mapping)
603 return mapping.y
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py in _forward(self, x)
245 y0 = array_ops.zeros_like(x, name="y0")
246 # call the template once to ensure creation
--> 247 _ = self._shift_and_log_scale_fn(y0)
248 def _loop_body(index, y0):
249 """While-loop body for autoregression calculation."""
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/template.py in __call__(self, *args, **kwargs)
358 custom_getter=self._custom_getter) as vs:
359 self._variable_scope = vs
--> 360 result = self._call_func(args, kwargs)
361 return result
362
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/template.py in _call_func(self, args, kwargs)
300 trainable_at_start = len(
301 ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
--> 302 result = self._func(*args, **kwargs)
303
304 if self._variables_created:
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py in _fn(x)
478 activation=activation,
479 *args,
--> 480 **kwargs)
481 x = masked_dense(
482 inputs=x,
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py in masked_dense(inputs, units, num_blocks, exclusive, kernel_initializer, reuse, name, *args, **kwargs)
386 *args,
387 **kwargs)
--> 388 return layer.apply(inputs)
389
390
/usr/local/lib/python3.6/dist-packages/tensorflow/python/layers/base.py in apply(self, inputs, *args, **kwargs)
807 Output tensor(s).
808 """
--> 809 return self.__call__(inputs, *args, **kwargs)
810
811 def _add_inbound_node(self,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
671
672 # Check input assumptions set before layer building, e.g. input rank.
--> 673 self._assert_input_compatibility(inputs)
674 if input_list and self._dtype is None:
675 try:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/layers/base.py in _assert_input_compatibility(self, inputs)
1195 ', found ndim=' + str(ndim) +
1196 '. Full shape received: ' +
-> 1197 str(x.get_shape().as_list()))
1198 # Check dtype.
1199 if spec.dtype is not None:
ValueError: Input 0 of layer dense_1 is incompatible with the layer: : expected min_ndim=2, found ndim=1. Full shape received: [5]
originally defined at:
File "<ipython-input-2-3b2fcb2af309>", line 9, in <module>
hidden_layers=[512, 512])),
File "/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py", line 499, in masked_autoregressive_default_template
"masked_autoregressive_default_template", _fn)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/template.py", line 152, in make_template
**kwargs)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Image processing in Tensor flow TFX pipelines - python

Related

"ValueError: Unable to create tensor" when trying to train a hugging face transformer

StackingClassifier Raises Exception 'numpy.ndarray' object has no attribute 'columns'

TF 2.6: Input 'resource' of 'AssignVariableOp' Op has type float32 that does not match expected type of resource

TensorFlow: TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'

ValueError when running MaskedAutoregressiveFlow example

Categories

Resources