C2W3 - Exercise 2 RuntimeError on TRAINING_DIR_FSELECT

Hello,

I am receiving a split error on ExampleGen when targeting TRAINING_DIR_FSELECT.

I have verified the fselect\dataset.csv exists. I have also deleted the data and pipeline folders. As well as the checkpoints without success.

Any help would be appreciated.

### START CODE HERE

# Instantiate ExampleGen with the input CSV dataset
example_gen = tfx.components.CsvExampleGen(input_base=TRAINING_DATA_FSELECT)

# Run the component using the InteractiveContext instance
context.run(example_gen)

### END CODE HERE
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-82-ceea169f9728> in <module>
      5 
      6 # Run the component using the InteractiveContext instance
----> 7 context.run(example_gen)
      8 
      9 ### END CODE HERE

/opt/conda/lib/python3.8/site-packages/tfx/orchestration/experimental/interactive/interactive_context.py in run_if_ipython(*args, **kwargs)
     61       # __IPYTHON__ variable is set by IPython, see
     62       # https://ipython.org/ipython-doc/rel-0.10.2/html/interactive/reference.html#embedding-ipython.
---> 63       return fn(*args, **kwargs)
     64     else:
     65       absl.logging.warning(

/opt/conda/lib/python3.8/site-packages/tfx/orchestration/experimental/interactive/interactive_context.py in run(self, component, enable_cache, beam_pipeline_args)
    181         telemetry_utils.LABEL_TFX_RUNNER: runner_label,
    182     }):
--> 183       execution_id = launcher.launch().execution_id
    184 
    185     return execution_result.ExecutionResult(

/opt/conda/lib/python3.8/site-packages/tfx/orchestration/launcher/base_component_launcher.py in launch(self)
    198       # be immutable in this context.
    199       # output_dict can still be changed, specifically properties.
--> 200       self._run_executor(execution_decision.execution_id,
    201                          copy.deepcopy(execution_decision.input_dict),
    202                          execution_decision.output_dict,

/opt/conda/lib/python3.8/site-packages/tfx/orchestration/launcher/in_process_component_launcher.py in _run_executor(self, execution_id, input_dict, output_dict, exec_properties)
     71     # be immutable in this context.
     72     # output_dict can still be changed, specifically properties.
---> 73     executor.Do(
     74         copy.deepcopy(input_dict), output_dict, copy.deepcopy(exec_properties))

/opt/conda/lib/python3.8/site-packages/tfx/components/example_gen/base_example_gen_executor.py in Do(self, input_dict, output_dict, exec_properties)
    272     logging.info('Generating examples.')
    273     with self._make_beam_pipeline() as pipeline:
--> 274       example_splits = self.GenerateExamplesByBeam(pipeline, exec_properties)
    275 
    276       # pylint: disable=expression-not-assigned, no-value-for-parameter

/opt/conda/lib/python3.8/site-packages/tfx/components/example_gen/base_example_gen_executor.py in GenerateExamplesByBeam(self, pipeline, exec_properties)
    194         buckets.append(total_buckets)
    195       example_splits = (
--> 196           pipeline
    197           | 'InputToRecord' >>
    198           # pylint: disable=no-value-for-parameter

/opt/conda/lib/python3.8/site-packages/apache_beam/transforms/ptransform.py in __ror__(self, pvalueish, _unused)
   1089 
   1090   def __ror__(self, pvalueish, _unused=None):
-> 1091     return self.transform.__ror__(pvalueish, self.label)
   1092 
   1093   def expand(self, pvalue):

/opt/conda/lib/python3.8/site-packages/apache_beam/transforms/ptransform.py in __ror__(self, left, label)
    613     pvalueish = _SetInputPValues().visit(pvalueish, replacements)
    614     self.pipeline = p
--> 615     result = p.apply(self, pvalueish, label)
    616     if deferred:
    617       return result

/opt/conda/lib/python3.8/site-packages/apache_beam/pipeline.py in apply(self, transform, pvalueish, label)
    650       try:
    651         old_label, transform.label = transform.label, label
--> 652         return self.apply(transform, pvalueish)
    653       finally:
    654         transform.label = old_label

/opt/conda/lib/python3.8/site-packages/apache_beam/pipeline.py in apply(self, transform, pvalueish, label)
    696         transform.type_check_inputs(pvalueish)
    697 
--> 698       pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
    699 
    700       if type_options is not None and type_options.pipeline_type_check:

/opt/conda/lib/python3.8/site-packages/apache_beam/runners/runner.py in apply(self, transform, input, options)
    183       m = getattr(self, 'apply_%s' % cls.__name__, None)
    184       if m:
--> 185         return m(transform, input, options)
    186     raise NotImplementedError(
    187         'Execution of [%s] not implemented in runner %s.' % (transform, self))

/opt/conda/lib/python3.8/site-packages/apache_beam/runners/runner.py in apply_PTransform(self, transform, input, options)
    213   def apply_PTransform(self, transform, input, options):
    214     # The base case of apply is to call the transform's expand.
--> 215     return transform.expand(input)
    216 
    217   def run_transform(self,

/opt/conda/lib/python3.8/site-packages/tfx/components/example_gen/csv_example_gen/executor.py in expand(self, pipeline)
    181     csv_files = fileio.glob(self._csv_pattern)
    182     if not csv_files:
--> 183       raise RuntimeError('Split pattern {} does not match any files.'.format(
    184           self._csv_pattern))
    185 

RuntimeError: Split pattern ./data/training/fselect/dataset.csv/* does not match any files.

Are you sure this is week 2 and not week 3 assignment?

Hi, you are correct. I have updated the title to read week 3.

Thanks for confirming. Please look at the way you are creating CsvExampleGen. input_base should be set to the directory containing the csv files and not the path of a csv file.

Thanks! That fixed the issue!