In the Week 1> Validatiing Data > Lab , I think tdfv has an internal bug as the code is not running…
# Generate training dataset statistics
train_stats = tfdv.generate_statistics_from_dataframe(train_df)
gives the below error
ValueError Traceback (most recent call last)
<ipython-input-24-69ff4c174551> in <module>
9 df = df.explode('feat_2').reset_index(drop=True)
10
---> 11 train_stats = tfdv.generate_statistics_from_dataframe(df)
12 schema = tfdv.infer_schema(statistics=train_stats)
13 tfdv.display_schema(schema=schema)
2 frames
/usr/local/lib/python3.9/dist-packages/tensorflow_data_validation/utils/stats_gen_lib.py in generate_statistics_from_dataframe(dataframe, stats_options, n_jobs)
228
229 if n_jobs == 1:
--> 230 merged_partial_stats = _generate_partial_statistics_from_df(
231 dataframe, stats_options, stats_generators)
232 else:
/usr/local/lib/python3.9/dist-packages/tensorflow_data_validation/utils/stats_gen_lib.py in _generate_partial_statistics_from_df(dataframe, stats_options, stats_generators)
273 stats_options_modified.schema = schema
274 record_batch_with_list_arrays = table_util.CanonicalizeRecordBatch(
--> 275 pa.RecordBatch.from_pandas(dataframe))
276 return stats_impl.generate_partial_statistics_in_memory(
277 record_batch_with_list_arrays, stats_options_modified, stats_generators)
/usr/local/lib/python3.9/dist-packages/pyarrow/table.pxi in pyarrow.lib.RecordBatch.from_pandas()
ValueError: need more than 2 values to unpack