C2W2 Assignment. Excercise 7

I keep getting the same ValeError.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-70-9e8eaa918618> in <module>
      9 
     10 # Run the component
---> 11 context.run(transform)

/opt/conda/lib/python3.8/site-packages/tfx/orchestration/experimental/interactive/interactive_context.py in run_if_ipython(*args, **kwargs)
     65       # __IPYTHON__ variable is set by IPython, see
     66       # https://ipython.org/ipython-doc/rel-0.10.2/html/interactive/reference.html#embedding-ipython.
---> 67       return fn(*args, **kwargs)
     68     else:
     69       absl.logging.warning(

/opt/conda/lib/python3.8/site-packages/tfx/orchestration/experimental/interactive/interactive_context.py in run(self, component, enable_cache, beam_pipeline_args)
    180         telemetry_utils.LABEL_TFX_RUNNER: runner_label,
    181     }):
--> 182       execution_id = launcher.launch().execution_id
    183 
    184     return execution_result.ExecutionResult(

/opt/conda/lib/python3.8/site-packages/tfx/orchestration/launcher/base_component_launcher.py in launch(self)
    200       absl.logging.info('Running executor for %s',
    201                         self._component_info.component_id)
--> 202       self._run_executor(execution_decision.execution_id,
    203                          execution_decision.input_dict,
    204                          execution_decision.output_dict,

/opt/conda/lib/python3.8/site-packages/tfx/orchestration/launcher/in_process_component_launcher.py in _run_executor(self, execution_id, input_dict, output_dict, exec_properties)
     65         executor_context)  # type: ignore
     66 
---> 67     executor.Do(input_dict, output_dict, exec_properties)

/opt/conda/lib/python3.8/site-packages/tfx/components/transform/executor.py in Do(self, input_dict, output_dict, exec_properties)
    415       label_outputs[labels.CACHE_OUTPUT_PATH_LABEL] = cache_output
    416     status_file = 'status_file'  # Unused
--> 417     self.Transform(label_inputs, label_outputs, status_file)
    418     absl.logging.debug('Cleaning up temp path %s on executor success',
    419                        temp_path)

/opt/conda/lib/python3.8/site-packages/tfx/components/transform/executor.py in Transform(***failed resolving arguments***)
    933     materialization_format = (
    934         transform_paths_file_formats[-1] if materialize_output_paths else None)
--> 935     self._RunBeamImpl(analyze_data_list, transform_data_list,
    936                       preprocessing_fn, input_dataset_metadata,
    937                       transform_output_path, raw_examples_data_format,

/opt/conda/lib/python3.8/site-packages/tfx/components/transform/executor.py in _RunBeamImpl(self, analyze_data_list, transform_data_list, preprocessing_fn, input_dataset_metadata, transform_output_path, raw_examples_data_format, temp_path, input_cache_dir, output_cache_dir, compute_statistics, per_set_stats_output_paths, materialization_format, analyze_paths_count)
   1057 
   1058         transform_fn, cache_output = (
-> 1059             (input_analysis_data, input_cache,
   1060              analyze_data_tensor_adapter_config)
   1061             | 'Analyze' >> tft_beam.AnalyzeDatasetWithCache(

/opt/conda/lib/python3.8/site-packages/apache_beam/transforms/ptransform.py in __ror__(self, pvalueish, _unused)
   1056 
   1057   def __ror__(self, pvalueish, _unused=None):
-> 1058     return self.transform.__ror__(pvalueish, self.label)
   1059 
   1060   def expand(self, pvalue):

/opt/conda/lib/python3.8/site-packages/apache_beam/transforms/ptransform.py in __ror__(self, left, label)
    571     pvalueish = _SetInputPValues().visit(pvalueish, replacements)
    572     self.pipeline = p
--> 573     result = p.apply(self, pvalueish, label)
    574     if deferred:
    575       return result

/opt/conda/lib/python3.8/site-packages/apache_beam/pipeline.py in apply(self, transform, pvalueish, label)
    644       try:
    645         old_label, transform.label = transform.label, label
--> 646         return self.apply(transform, pvalueish)
    647       finally:
    648         transform.label = old_label

/opt/conda/lib/python3.8/site-packages/apache_beam/pipeline.py in apply(self, transform, pvalueish, label)
    687         transform.type_check_inputs(pvalueish)
    688 
--> 689       pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
    690 
    691       if type_options is not None and type_options.pipeline_type_check:

/opt/conda/lib/python3.8/site-packages/apache_beam/runners/runner.py in apply(self, transform, input, options)
    186       m = getattr(self, 'apply_%s' % cls.__name__, None)
    187       if m:
--> 188         return m(transform, input, options)
    189     raise NotImplementedError(
    190         'Execution of [%s] not implemented in runner %s.' % (transform, self))

/opt/conda/lib/python3.8/site-packages/apache_beam/runners/runner.py in apply_PTransform(self, transform, input, options)
    216   def apply_PTransform(self, transform, input, options):
    217     # The base case of apply is to call the transform's expand.
--> 218     return transform.expand(input)
    219 
    220   def run_transform(self,

/opt/conda/lib/python3.8/site-packages/tensorflow_transform/beam/impl.py in expand(self, dataset)
   1168     input_values_pcoll_dict = dataset[1] or dict()
   1169     analyzer_cache.validate_dataset_keys(input_values_pcoll_dict.keys())
-> 1170     return super(AnalyzeDatasetWithCache,
   1171                  self).expand(self._make_parent_dataset(dataset))
   1172 

/opt/conda/lib/python3.8/site-packages/tensorflow_transform/beam/impl.py in expand(self, dataset)
   1106     # deferred manner, once the analyzer outputs are known.
   1107     if self._use_tf_compat_v1:
-> 1108       schema = schema_inference.infer_feature_schema(structured_outputs, graph)
   1109     else:
   1110       metadata_fn = schema_inference.get_traced_metadata_fn(

/opt/conda/lib/python3.8/site-packages/tensorflow_transform/schema_inference.py in infer_feature_schema(features, graph, session)
    148     feature_annotations[name] = tensor_annotations.get(hashable_values, [])
    149 
--> 150   return _infer_feature_schema_common(features, modified_tensor_ranges,
    151                                       feature_annotations, global_annotations)
    152 

/opt/conda/lib/python3.8/site-packages/tensorflow_transform/schema_inference.py in _infer_feature_schema_common(features, tensor_ranges, feature_annotations, global_annotations)
    224       domains[name] = schema_pb2.IntDomain(
    225           min=min_value, max=max_value, is_categorical=True)
--> 226   feature_spec = _feature_spec_from_batched_tensors(features)
    227 
    228   schema_proto = schema_utils.schema_from_feature_spec(feature_spec, domains)

/opt/conda/lib/python3.8/site-packages/tensorflow_transform/schema_inference.py in _feature_spec_from_batched_tensors(tensors)
     71       shape = tensor.get_shape()
     72       if shape.ndims in [None, 0]:
---> 73         raise ValueError(
     74             'Feature {} ({}) had invalid shape {} for FixedLenFeature: must '
     75             'have rank at least 1'.format(name, tensor, shape))

ValueError: Feature traffic_volume_xf (Tensor("Cast_2:0", shape=(), dtype=int64)) had invalid shape () for FixedLenFeature: must have rank at least 1

Here is the code for excercise 6:
зображення

And the excercise 7:
зображення

What could be the issue?

The ValueError was complaining something about the traffic_volume, so it is possible one of the last two transformation statements was not quite right.

One thing to try is to comment out the last two transformation statements related to the traffic_volume and see whether the context.run would execute with the other transformation steps. If the run works, you can then focus on the traffic_volume transformation.

Hope this helps.

2 Likes

Thank you so much! Indeed, I made a mistake when defining the traffic_volume. Here is the correct line:
traffic_volume = _fill_in_missing(tf.cast(inputs[_VOLUME_KEY], tf.float32))

3 Likes

Nicely done! Keep the great work going. :slight_smile:

I had preprocessing_fn not in user_module error and this correct line took care of it, finally. :relieved: :thanks:

1 Like