Hello,
I am receiving a split error on ExampleGen when targeting TRAINING_DIR_FSELECT.
I have verified the fselect\dataset.csv exists. I have also deleted the data and pipeline folders. As well as the checkpoints without success.
Any help would be appreciated.
### START CODE HERE
# Instantiate ExampleGen with the input CSV dataset
example_gen = tfx.components.CsvExampleGen(input_base=TRAINING_DATA_FSELECT)
# Run the component using the InteractiveContext instance
context.run(example_gen)
### END CODE HERE
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-82-ceea169f9728> in <module>
5
6 # Run the component using the InteractiveContext instance
----> 7 context.run(example_gen)
8
9 ### END CODE HERE
/opt/conda/lib/python3.8/site-packages/tfx/orchestration/experimental/interactive/interactive_context.py in run_if_ipython(*args, **kwargs)
61 # __IPYTHON__ variable is set by IPython, see
62 # https://ipython.org/ipython-doc/rel-0.10.2/html/interactive/reference.html#embedding-ipython.
---> 63 return fn(*args, **kwargs)
64 else:
65 absl.logging.warning(
/opt/conda/lib/python3.8/site-packages/tfx/orchestration/experimental/interactive/interactive_context.py in run(self, component, enable_cache, beam_pipeline_args)
181 telemetry_utils.LABEL_TFX_RUNNER: runner_label,
182 }):
--> 183 execution_id = launcher.launch().execution_id
184
185 return execution_result.ExecutionResult(
/opt/conda/lib/python3.8/site-packages/tfx/orchestration/launcher/base_component_launcher.py in launch(self)
198 # be immutable in this context.
199 # output_dict can still be changed, specifically properties.
--> 200 self._run_executor(execution_decision.execution_id,
201 copy.deepcopy(execution_decision.input_dict),
202 execution_decision.output_dict,
/opt/conda/lib/python3.8/site-packages/tfx/orchestration/launcher/in_process_component_launcher.py in _run_executor(self, execution_id, input_dict, output_dict, exec_properties)
71 # be immutable in this context.
72 # output_dict can still be changed, specifically properties.
---> 73 executor.Do(
74 copy.deepcopy(input_dict), output_dict, copy.deepcopy(exec_properties))
/opt/conda/lib/python3.8/site-packages/tfx/components/example_gen/base_example_gen_executor.py in Do(self, input_dict, output_dict, exec_properties)
272 logging.info('Generating examples.')
273 with self._make_beam_pipeline() as pipeline:
--> 274 example_splits = self.GenerateExamplesByBeam(pipeline, exec_properties)
275
276 # pylint: disable=expression-not-assigned, no-value-for-parameter
/opt/conda/lib/python3.8/site-packages/tfx/components/example_gen/base_example_gen_executor.py in GenerateExamplesByBeam(self, pipeline, exec_properties)
194 buckets.append(total_buckets)
195 example_splits = (
--> 196 pipeline
197 | 'InputToRecord' >>
198 # pylint: disable=no-value-for-parameter
/opt/conda/lib/python3.8/site-packages/apache_beam/transforms/ptransform.py in __ror__(self, pvalueish, _unused)
1089
1090 def __ror__(self, pvalueish, _unused=None):
-> 1091 return self.transform.__ror__(pvalueish, self.label)
1092
1093 def expand(self, pvalue):
/opt/conda/lib/python3.8/site-packages/apache_beam/transforms/ptransform.py in __ror__(self, left, label)
613 pvalueish = _SetInputPValues().visit(pvalueish, replacements)
614 self.pipeline = p
--> 615 result = p.apply(self, pvalueish, label)
616 if deferred:
617 return result
/opt/conda/lib/python3.8/site-packages/apache_beam/pipeline.py in apply(self, transform, pvalueish, label)
650 try:
651 old_label, transform.label = transform.label, label
--> 652 return self.apply(transform, pvalueish)
653 finally:
654 transform.label = old_label
/opt/conda/lib/python3.8/site-packages/apache_beam/pipeline.py in apply(self, transform, pvalueish, label)
696 transform.type_check_inputs(pvalueish)
697
--> 698 pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
699
700 if type_options is not None and type_options.pipeline_type_check:
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/runner.py in apply(self, transform, input, options)
183 m = getattr(self, 'apply_%s' % cls.__name__, None)
184 if m:
--> 185 return m(transform, input, options)
186 raise NotImplementedError(
187 'Execution of [%s] not implemented in runner %s.' % (transform, self))
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/runner.py in apply_PTransform(self, transform, input, options)
213 def apply_PTransform(self, transform, input, options):
214 # The base case of apply is to call the transform's expand.
--> 215 return transform.expand(input)
216
217 def run_transform(self,
/opt/conda/lib/python3.8/site-packages/tfx/components/example_gen/csv_example_gen/executor.py in expand(self, pipeline)
181 csv_files = fileio.glob(self._csv_pattern)
182 if not csv_files:
--> 183 raise RuntimeError('Split pattern {} does not match any files.'.format(
184 self._csv_pattern))
185
RuntimeError: Split pattern ./data/training/fselect/dataset.csv/* does not match any files.