Hello everyone!
I’m trying to complete the assignment corresponding to the week 3 from MLOps course 2.
I’m stuck at Exercise 2. When I execute it, I get the following error: RuntimeError: Files in same split ./data/training/* have different header.
I saw there is a cell above aimed to solve this error, butnNo matter how much I execute it, the error still appears.
I’m indicating the TRAINING_DIR value to the example_gen instance.
I appreciate any help provided as I am unable to find a solution for the exercise.
Many thanks!
PD: Bellow is the stacktrace of the error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-28-fe2882f96379> in <module>
7
8 # Run the component using the InteractiveContext instance
----> 9 context.run(example_gen)
10
11 ### END CODE HERE
/opt/conda/lib/python3.8/site-packages/tfx/orchestration/experimental/interactive/interactive_context.py in run_if_ipython(*args, **kwargs)
61 # __IPYTHON__ variable is set by IPython, see
62 # https://ipython.org/ipython-doc/rel-0.10.2/html/interactive/reference.html#embedding-ipython.
---> 63 return fn(*args, **kwargs)
64 else:
65 absl.logging.warning(
/opt/conda/lib/python3.8/site-packages/tfx/orchestration/experimental/interactive/interactive_context.py in run(self, component, enable_cache, beam_pipeline_args)
181 telemetry_utils.LABEL_TFX_RUNNER: runner_label,
182 }):
--> 183 execution_id = launcher.launch().execution_id
184
185 return execution_result.ExecutionResult(
/opt/conda/lib/python3.8/site-packages/tfx/orchestration/launcher/base_component_launcher.py in launch(self)
198 # be immutable in this context.
199 # output_dict can still be changed, specifically properties.
--> 200 self._run_executor(execution_decision.execution_id,
201 copy.deepcopy(execution_decision.input_dict),
202 execution_decision.output_dict,
/opt/conda/lib/python3.8/site-packages/tfx/orchestration/launcher/in_process_component_launcher.py in _run_executor(self, execution_id, input_dict, output_dict, exec_properties)
71 # be immutable in this context.
72 # output_dict can still be changed, specifically properties.
---> 73 executor.Do(
74 copy.deepcopy(input_dict), output_dict, copy.deepcopy(exec_properties))
/opt/conda/lib/python3.8/site-packages/tfx/components/example_gen/base_example_gen_executor.py in Do(self, input_dict, output_dict, exec_properties)
272 logging.info('Generating examples.')
273 with self._make_beam_pipeline() as pipeline:
--> 274 example_splits = self.GenerateExamplesByBeam(pipeline, exec_properties)
275
276 # pylint: disable=expression-not-assigned, no-value-for-parameter
/opt/conda/lib/python3.8/site-packages/tfx/components/example_gen/base_example_gen_executor.py in GenerateExamplesByBeam(self, pipeline, exec_properties)
194 buckets.append(total_buckets)
195 example_splits = (
--> 196 pipeline
197 | 'InputToRecord' >>
198 # pylint: disable=no-value-for-parameter
/opt/conda/lib/python3.8/site-packages/apache_beam/transforms/ptransform.py in __ror__(self, pvalueish, _unused)
1089
1090 def __ror__(self, pvalueish, _unused=None):
-> 1091 return self.transform.__ror__(pvalueish, self.label)
1092
1093 def expand(self, pvalue):
/opt/conda/lib/python3.8/site-packages/apache_beam/transforms/ptransform.py in __ror__(self, left, label)
613 pvalueish = _SetInputPValues().visit(pvalueish, replacements)
614 self.pipeline = p
--> 615 result = p.apply(self, pvalueish, label)
616 if deferred:
617 return result
/opt/conda/lib/python3.8/site-packages/apache_beam/pipeline.py in apply(self, transform, pvalueish, label)
650 try:
651 old_label, transform.label = transform.label, label
--> 652 return self.apply(transform, pvalueish)
653 finally:
654 transform.label = old_label
/opt/conda/lib/python3.8/site-packages/apache_beam/pipeline.py in apply(self, transform, pvalueish, label)
696 transform.type_check_inputs(pvalueish)
697
--> 698 pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
699
700 if type_options is not None and type_options.pipeline_type_check:
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/runner.py in apply(self, transform, input, options)
183 m = getattr(self, 'apply_%s' % cls.__name__, None)
184 if m:
--> 185 return m(transform, input, options)
186 raise NotImplementedError(
187 'Execution of [%s] not implemented in runner %s.' % (transform, self))
/opt/conda/lib/python3.8/site-packages/apache_beam/runners/runner.py in apply_PTransform(self, transform, input, options)
213 def apply_PTransform(self, transform, input, options):
214 # The base case of apply is to call the transform's expand.
--> 215 return transform.expand(input)
216
217 def run_transform(self,
/opt/conda/lib/python3.8/site-packages/tfx/components/example_gen/csv_example_gen/executor.py in expand(self, pipeline)
187 for csv_file in csv_files[1:]:
188 if io_utils.load_csv_column_names(csv_file) != column_names:
--> 189 raise RuntimeError(
190 'Files in same split {} have different header.'.format(
191 self._csv_pattern))
RuntimeError: Files in same split ./data/training/* have different header.