Loading Custom Image Dataset with TFX as TFRecord

I’m trying to use TFX on a custom dataset [Images/labels] So I’m using ImportExampleGen with .TFRecord file as follows

example_gen = ImportExampleGen(input_base=TFRecord_DIR_PATH)

context.run(example_gen)

artifact = example_gen.outputs['examples'].get()[0]

I got IndexError: list index out of range because example_gen.outputs['examples'].get() is []

Here is the Images to TFRecord code

for idx, d in enumerate(str_labels) # LABELS IS DIR NAME (STR):
    imgs = glob.glob(f"..\\PATH\\*.*g")
    str2int[d] = idx
    for img_path in tqdm.tqdm(imgs):
        image = cv2.imread(img_path)[:,:,::-1]
        all_imgs.append(cv2.resize(image, (144, 96)))
        labels.append(idx)

all_imgs = np.array(all_imgs)
labels = np.array(labels)[...,np.newaxis]

with tf.io.TFRecordWriter("TFRecord_DIR_PATH/NAME.tfrecord") as tfrecord:
  for lbl, img in zip(labels, all_imgs):
    label = lbl
    feature = tf.io.serialize_tensor(img)
    features = {
      "label" : tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
      "feature" : tf.train.Feature(bytes_list=tf.train.BytesList(value=[feature.numpy()]))
    }
    example = tf.train.Example(features=tf.train.Features(feature=features))
    tfrecord.write(example.SerializeToString())

I’m using TF==2.7.1 and TFX==1.6.0

I can’t find the mistake so I wish you can. Thank you.

Here’s an example:

Write path

def create_tfrecords(input_root,
                     output_filename):
    with tf.io.TFRecordWriter(output_filename) as writer:
        for image_path in pathlib.Path(input_root).glob('**/*.png'):
            image = tf.keras.utils.load_img(image_path)
            image = image.resize((64, 64))
            image = tf.keras.utils.img_to_array(image).flatten().tolist()
            label = image_path.parent.name.encode()
            features = tf.train.Features(feature={
                'image': tf.train.Feature(float_list=tf.train.FloatList(value=image)),
                'label': tf.train.Feature(bytes_list=tf.train.BytesList(value=[label]))
            })
            example = tf.train.Example(features=features)
            writer.write(example.SerializeToString())

create_tfrecords('happy-or-sad', 'images.tfrecord')

Read path

def decode_record(record_bytes):
  return tf.io.parse_single_example(
      record_bytes,
      {
          'image': tf.io.FixedLenFeature(shape=(64*64*3, 1), dtype=tf.dtypes.float32),
          'label': tf.io.FixedLenFeature(shape=(), dtype=tf.dtypes.string)
      }
  )
def read_tfrecords(file_path):
    stored_records = []
    for record in tf.data.TFRecordDataset(file_path).map(decode_record):
        label = record['label'].numpy().decode()
        image = record['image']
        image = tf.constant(image, dtype=tf.float32, shape=(64, 64, 3))
        stored_records.append({'image': image,
                        'label': label})
    return stored_records


records = read_tfrecords('images.tfrecord')
# show random index.
print(records[2]['label'])
tf.keras.utils.array_to_img(records[2]['image'])

Please see the attached notebook for the full code.
tf-records-demo.ipynb (11.5 KB)