How to load an image dataset in python for an image classification problem? shape issue

Hi everyone,

I am using this class with the code below to load my train and test datasets of thermal images and render it in the format suitable for a Conv2D deep learning model.

I have a folder, Dataset, with the Train and Test subfolders containing images. for example, in the Train folder, I have three subfolders, Cat, Car and Man each containing about 1700 images.

Using this class has given me a wrong shape - shape=(0, 200, 200, 3), dtype=float64) for my train images while I expect it to be (shape=(1700, 200, 200, 3), dtype=float64)?

I mean the first dimension should be the number of images right?

my code.

import os
import pickle
import numpy as np

path_train = "C:\\Users\\me\\Jupiter_Notebooks\\Dataset\\Train"
path_test = "C:\\Users\\me\\Jupiter_Notebooks\\Dataset\\Test"

# defining the class

class MasterImage(object):

    def __init__(self,PATH='', IMAGE_SIZE = 50):
        self.PATH = PATH
        self.IMAGE_SIZE = IMAGE_SIZE

        self.image_data = []
        self.x_data = []
        self.y_data = []
        self.CATEGORIES = []

        # This will get List of categories
        self.list_categories = []

    def get_categories(self):
        for path in os.listdir(self.PATH):
            if '.DS_Store' in path:
                pass
            else:
                self.list_categories.append(path)
        print("Found Categories ",self.list_categories,'\n')
        return self.list_categories

    def Process_Image(self):
        try:
            """
            Return Numpy array of image
            :return: X_Data, Y_Data
            """
            self.CATEGORIES = self.get_categories()
            for categories in self.CATEGORIES:                                                  # Iterate over categories

                train_folder_path = os.path.join(self.PATH, categories)                         # Folder Path
                class_index = self.CATEGORIES.index(categories)                                 # this will get index for classification

                for img in os.listdir(train_folder_path):                                       # This will iterate in the Folder
                    new_path = os.path.join(train_folder_path, img)                             # image Path

                    try:        # if any image is corrupted
                        image_data_temp = cv2.imread(new_path)                 # Read Image as numbers
                        image_temp_resize = cv2.resize(image_data_temp,(self.IMAGE_SIZE,self.IMAGE_SIZE))
                        self.image_data.append([image_temp_resize,class_index])
                        random.shuffle(self.image_data)
                    except:
                        pass

            data = np.asanyarray(self.image_data)

            # Iterate over the Data
            for x in data:
                self.x_data.append(x[0])        # Get the X_Data
                self.y_data.append(x[1])        # get the label

            X_Data = np.asarray(self.x_data) / (255.0)      # Normalize Data
            Y_Data = np.asarray(self.y_data)

            # reshape x_Data

            X_Data = X_Data.reshape(-1, self.IMAGE_SIZE, self.IMAGE_SIZE, 3)

            return X_Data, Y_Data
        except:
            print("Failed to run Function Process Image ")

    def pickle_image(self):

        """
        :return: None Creates a Pickle Object of DataSet
        """
        # Call the Function and Get the Data
        X_Data,Y_Data = self.Process_Image()

        # Write the Entire Data into a Pickle File
        pickle_out = open('X_Data','wb')
        pickle.dump(X_Data, pickle_out)
        pickle_out.close()

        # Write the Y Label Data
        pickle_out = open('Y_Data', 'wb')
        pickle.dump(Y_Data, pickle_out)
        pickle_out.close()

        print("Pickled Image Successfully ")
        return X_Data,Y_Data

    def load_dataset(self):

        try:
            # Read the Data from Pickle Object
            X_Temp = open('X_Data','rb')
            X_Data = pickle.load(X_Temp)

            Y_Temp = open('Y_Data','rb')
            Y_Data = pickle.load(Y_Temp)

            print('Reading Dataset from PIckle Object')

            return X_Data,Y_Data

        except:
            print('Could not Found Pickle File ')
            print('Loading File and Dataset  ..........')

            X_Data,Y_Data = self.pickle_image()
            return X_Data,Y_Data


# loading data

dstrain = MasterImage(PATH = path_train,IMAGE_SIZE = 100)
dstrain

(train_images, train_labels) = dstrain.load_dataset()


print('Train: X_images=%s, y_labels=%s' % (train_images.shape, train_labels.shape))
Train: X_images=(0, 100, 100, 3), y_labels=(0,)

Can someone advise me on how to get the correct shape? Or maybe you know of alternative ways to do it.

the image data is similar to this one.

Thank you very much for any help with this.

Might be your cv2 is not properly reading in the files. Can you check if you have cv2 installed?

In your load_dataset() method, it looks like you are using the relative pathnames “X_Data” and “Y_Data” to find the files. You didn’t mention those names in your previous description. Also note that because those pathnames are “relative”, the behavior of this code will depend on what your “current working directory” is at the point that you run it, right? So maybe the problem is that no files were actually found? Add more print statements to watch the progress …

1 Like

thank you so much for your advice. I have printed out all the methods, and changed to open(’…\SeekThermal\X_Data’,‘rb’) and it worked :slight_smile: