I’m trying to use TFX on a custom dataset [Images/labels] So I’m using ImportExampleGen
with .TFRecord
file as follows
example_gen = ImportExampleGen(input_base=TFRecord_DIR_PATH)
context.run(example_gen)
artifact = example_gen.outputs['examples'].get()[0]
I got IndexError: list index out of range
because example_gen.outputs['examples'].get()
is []
Here is the Images to TFRecord code
for idx, d in enumerate(str_labels) # LABELS IS DIR NAME (STR):
imgs = glob.glob(f"..\\PATH\\*.*g")
str2int[d] = idx
for img_path in tqdm.tqdm(imgs):
image = cv2.imread(img_path)[:,:,::-1]
all_imgs.append(cv2.resize(image, (144, 96)))
labels.append(idx)
all_imgs = np.array(all_imgs)
labels = np.array(labels)[...,np.newaxis]
with tf.io.TFRecordWriter("TFRecord_DIR_PATH/NAME.tfrecord") as tfrecord:
for lbl, img in zip(labels, all_imgs):
label = lbl
feature = tf.io.serialize_tensor(img)
features = {
"label" : tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
"feature" : tf.train.Feature(bytes_list=tf.train.BytesList(value=[feature.numpy()]))
}
example = tf.train.Example(features=tf.train.Features(feature=features))
tfrecord.write(example.SerializeToString())
I’m using TF==2.7.1
and TFX==1.6.0
I can’t find the mistake so I wish you can. Thank you.
Here’s an example:
Write path
def create_tfrecords(input_root,
output_filename):
with tf.io.TFRecordWriter(output_filename) as writer:
for image_path in pathlib.Path(input_root).glob('**/*.png'):
image = tf.keras.utils.load_img(image_path)
image = image.resize((64, 64))
image = tf.keras.utils.img_to_array(image).flatten().tolist()
label = image_path.parent.name.encode()
features = tf.train.Features(feature={
'image': tf.train.Feature(float_list=tf.train.FloatList(value=image)),
'label': tf.train.Feature(bytes_list=tf.train.BytesList(value=[label]))
})
example = tf.train.Example(features=features)
writer.write(example.SerializeToString())
create_tfrecords('happy-or-sad', 'images.tfrecord')
Read path
def decode_record(record_bytes):
return tf.io.parse_single_example(
record_bytes,
{
'image': tf.io.FixedLenFeature(shape=(64*64*3, 1), dtype=tf.dtypes.float32),
'label': tf.io.FixedLenFeature(shape=(), dtype=tf.dtypes.string)
}
)
def read_tfrecords(file_path):
stored_records = []
for record in tf.data.TFRecordDataset(file_path).map(decode_record):
label = record['label'].numpy().decode()
image = record['image']
image = tf.constant(image, dtype=tf.float32, shape=(64, 64, 3))
stored_records.append({'image': image,
'label': label})
return stored_records
records = read_tfrecords('images.tfrecord')
# show random index.
print(records[2]['label'])
tf.keras.utils.array_to_img(records[2]['image'])
Please see the attached notebook for the full code.
tf-records-demo.ipynb (11.5 KB)