How to input image data with text files of bounding box and labels in Keras Tensorflow API/Key error 'class'

Hello!

I am trying to implement the mobile net code on my custom data that has label/text files that includes the bounding box and label name (category information) along with each image. I wrote a code to get a dataframe that looks like below.

My question is when I try to run the train batch command - “train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet.preprocess_input).flow_from_dataframe(dataframe= dfTrain,directory=train_path, xcol=‘filename’,ycol=[‘label’,‘xmin’,‘ymin’,‘xmax’,‘ymax’], target_size=(640,640), batch_size=10, class_mode=‘sparse’)”

I am getting an error saying "Keyerror : ‘class’ - below is the error text. Could you please guide me how can I pass this kind of data with text file labels to train the model in mobilenet.
filename label xmin ymin xmax ymax
0 img_01_3402617700_00001.jpg 3_yueyawan 1738 806 1948 993
1 img_01_3402617700_01009.jpg 3_yueyawan 1756 298 1967 858
2 img_01_3436789500_00004.jpg 9_zhehen 981 182 2046 249
3 img_01_3436789500_00004.jpg 9_zhehen 478 179 711 244
4 img_01_3436814600_00259.jpg 6_siban 746 17 1011 993
… … … … … … …
3558 img_08_4406743300_00483.jpg 6_siban 690 260 840 675
3559 img_08_4406743300_00486.jpg 6_siban 959 484 1017 889
3560 img_08_4406743300_00698.jpg 6_siban 579 348 831 684
3561 img_08_4406743300_00699.jpg 6_siban 571 571 853 843
3562 img_08_4406772100_00002.jpg 3_yueyawan 46 609 294 998

The code I modified is below to create the data frame,
def genDf(namegane):
xmins =
ymins =
xmaxs =
ymaxs =
labels =
filenames =

listOfFilesTrain = os.listdir(namegane)
for i in range(len(listOfFilesTrain)): #len(listOfFilesTrain)
    nameOfFile = listOfFilesTrain[i].split(".")
    # print(nameOfFile[0])
    tree = et.parse("annotations/" + nameOfFile[0] + ".xml")
    root = tree.getroot()
    for item in tree.iterfind("object"):
        labelname = item.find("name").text
        labelname1 = labelname.split("_")
        if (labelname1[0].isdigit()):
            xmin = item.find("bndbox").find("xmin").text
            ymin = item.find("bndbox").find("ymin").text
            xmax = item.find("bndbox").find("xmax").text
            ymax = item.find("bndbox").find("ymax").text
            label = item.find("name").text
            xmins.append(xmin)
            ymins.append(ymin)
            xmaxs.append(xmax)
            ymaxs.append(ymax)
            labels.append(label)
            filenames.append(listOfFilesTrain[i])
df = pd.DataFrame(list(zip(filenames,labels,xmins,ymins,xmaxs,ymaxs)),columns=['filename','label','xmin','ymin','xmax','ymax'])
return df

Error:
train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet.preprocess_input).flow_from_dataframe(dataframe= dfTrain,directory=train_path, xcol=‘filename’,ycol=[‘label’,‘xmin’,‘ymin’,‘xmax’,‘ymax’], target_size=(640,640), batch_size=10, class_mode=‘sparse’)
1
train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet.preprocess_input).flow_from_dataframe(dataframe= dfTrain,directory=train_path, xcol=‘filename’,ycol=[‘label’,‘xmin’,‘ymin’,‘xmax’,‘ymax’], target_size=(640,640), batch_size=10, class_mode=‘sparse’)

KeyError Traceback (most recent call last)
File ~\anaconda3\anaconda3_\envs\tensorflow2\lib\site-packages\pandas\core\indexes\base.py:3621, in Index.get_loc(self, key, method, tolerance)
3620 try:
→ 3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:

File ~\anaconda3\anaconda3_\envs\tensorflow2\lib\site-packages\pandas_libs\index.pyx:136, in pandas._libs.index.IndexEngine.get_loc()

File ~\anaconda3\anaconda3_\envs\tensorflow2\lib\site-packages\pandas_libs\index.pyx:163, in pandas._libs.index.IndexEngine.get_loc()

File pandas_libs\hashtable_class_helper.pxi:5198, in pandas._libs.hashtable.PyObjectHashTable.get_item()

File pandas_libs\hashtable_class_helper.pxi:5206, in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: ‘class’

The above exception was the direct cause of the following exception:

KeyError Traceback (most recent call last)
Input In [15], in ()
----> 1 train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet.preprocess_input).flow_from_dataframe(dataframe= dfTrain,directory=train_path, xcol=‘filename’,ycol=[‘label’,‘xmin’,‘ymin’,‘xmax’,‘ymax’], target_size=(640,640), batch_size=10, class_mode=‘sparse’)

File ~\anaconda3\anaconda3_\envs\tensorflow2\lib\site-packages\keras\preprocessing\image.py:1117, in ImageDataGenerator.flow_from_dataframe(self, dataframe, directory, x_col, y_col, weight_col, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format, subset, interpolation, validate_filenames, **kwargs)
1111 if ‘drop_duplicates’ in kwargs:
1112 tf_logging.warning(
1113 'drop_duplicates is deprecated, you can drop duplicates ’
1114 ‘by using the pandas.DataFrame.drop_duplicates method.’,
1115 DeprecationWarning)
→ 1117 return DataFrameIterator(
1118 dataframe,
1119 directory,
1120 self,
1121 x_col=x_col,
1122 y_col=y_col,
1123 weight_col=weight_col,
1124 target_size=target_size,
1125 color_mode=color_mode,
1126 classes=classes,
1127 class_mode=class_mode,
1128 data_format=self.data_format,
1129 batch_size=batch_size,
1130 shuffle=shuffle,
1131 seed=seed,
1132 save_to_dir=save_to_dir,
1133 save_prefix=save_prefix,
1134 save_format=save_format,
1135 subset=subset,
1136 interpolation=interpolation,
1137 validate_filenames=validate_filenames)

File ~\anaconda3\anaconda3_\envs\tensorflow2\lib\site-packages\keras\preprocessing\image.py:571, in DataFrameIterator.init(self, dataframe, directory, image_data_generator, x_col, y_col, weight_col, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, data_format, save_to_dir, save_prefix, save_format, subset, interpolation, dtype, validate_filenames)
548 def init(
549 self,
550 dataframe,
(…)
569 dtype=‘float32’,
570 validate_filenames=True):
→ 571 super(DataFrameIterator, self).init(
572 dataframe=dataframe,
573 directory=directory,
574 image_data_generator=image_data_generator,
575 x_col=x_col,
576 y_col=y_col,
577 weight_col=weight_col,
578 target_size=target_size,
579 color_mode=color_mode,
580 classes=classes,
581 class_mode=class_mode,
582 batch_size=batch_size,
583 shuffle=shuffle,
584 seed=seed,
585 data_format=data_format,
586 save_to_dir=save_to_dir,
587 save_prefix=save_prefix,
588 save_format=save_format,
589 subset=subset,
590 interpolation=interpolation,
591 dtype=dtype,
592 validate_filenames=validate_filenames
593 )

File ~\anaconda3\anaconda3_\envs\tensorflow2\lib\site-packages\keras_preprocessing\image\dataframe_iterator.py:138, in DataFrameIterator.init(self, dataframe, directory, image_data_generator, x_col, y_col, weight_col, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, data_format, save_to_dir, save_prefix, save_format, subset, interpolation, dtype, validate_filenames)
136 self.dtype = dtype
137 # check that inputs match the required class_mode
→ 138 self._check_params(df, x_col, y_col, weight_col, classes)
139 if validate_filenames: # check which image files are valid and keep them
140 df = self._filter_valid_filepaths(df, x_col)

File ~\anaconda3\anaconda3_\envs\tensorflow2\lib\site-packages\keras_preprocessing\image\dataframe_iterator.py:195, in DataFrameIterator._check_params(self, df, x_col, y_col, weight_col, classes)
193 # check labels are string if class_mode is binary or sparse
194 if self.class_mode in {‘binary’, ‘sparse’}:
→ 195 if not all(df[y_col].apply(lambda x: isinstance(x, str))):
196 raise TypeError('If class_mode="{}", y_col="{}" column ’
197 ‘values must be strings.’
198 .format(self.class_mode, y_col))
199 # check that if binary there are only 2 different classes

File ~\anaconda3\anaconda3_\envs\tensorflow2\lib\site-packages\pandas\core\frame.py:3505, in DataFrame.getitem(self, key)
3503 if self.columns.nlevels > 1:
3504 return self._getitem_multilevel(key)
→ 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = [indexer]

File ~\anaconda3\anaconda3_\envs\tensorflow2\lib\site-packages\pandas\core\indexes\base.py:3623, in Index.get_loc(self, key, method, tolerance)
3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
→ 3623 raise KeyError(key) from err
3624 except TypeError:
3625 # If we have a listlike key, _check_indexing_error will raise
3626 # InvalidIndexError. Otherwise we fall through and re-raise
3627 # the TypeError.
3628 self._check_indexing_error(key)

KeyError: ‘class’

y_col string or list, column/s in dataframe that has the target data.

Yours has no underbar. Same for x_col

Thank you very much, after changing what you suggested and changing the class_mode to “raw” - my code is working now. Thank you!