Hi everyone,
I am using this class with the code below to load my train and test datasets of thermal images and render it in the format suitable for a Conv2D deep learning model.
I have a folder, Dataset, with the Train and Test subfolders containing images. for example, in the Train folder, I have three subfolders, Cat, Car and Man each containing about 1700 images.
Using this class has given me a wrong shape - shape=(0, 200, 200, 3), dtype=float64) for my train images while I expect it to be (shape=(1700, 200, 200, 3), dtype=float64)?
I mean the first dimension should be the number of images right?
my code.
import os
import pickle
import numpy as np
path_train = "C:\\Users\\me\\Jupiter_Notebooks\\Dataset\\Train"
path_test = "C:\\Users\\me\\Jupiter_Notebooks\\Dataset\\Test"
# defining the class
class MasterImage(object):
def __init__(self,PATH='', IMAGE_SIZE = 50):
self.PATH = PATH
self.IMAGE_SIZE = IMAGE_SIZE
self.image_data = []
self.x_data = []
self.y_data = []
self.CATEGORIES = []
# This will get List of categories
self.list_categories = []
def get_categories(self):
for path in os.listdir(self.PATH):
if '.DS_Store' in path:
pass
else:
self.list_categories.append(path)
print("Found Categories ",self.list_categories,'\n')
return self.list_categories
def Process_Image(self):
try:
"""
Return Numpy array of image
:return: X_Data, Y_Data
"""
self.CATEGORIES = self.get_categories()
for categories in self.CATEGORIES: # Iterate over categories
train_folder_path = os.path.join(self.PATH, categories) # Folder Path
class_index = self.CATEGORIES.index(categories) # this will get index for classification
for img in os.listdir(train_folder_path): # This will iterate in the Folder
new_path = os.path.join(train_folder_path, img) # image Path
try: # if any image is corrupted
image_data_temp = cv2.imread(new_path) # Read Image as numbers
image_temp_resize = cv2.resize(image_data_temp,(self.IMAGE_SIZE,self.IMAGE_SIZE))
self.image_data.append([image_temp_resize,class_index])
random.shuffle(self.image_data)
except:
pass
data = np.asanyarray(self.image_data)
# Iterate over the Data
for x in data:
self.x_data.append(x[0]) # Get the X_Data
self.y_data.append(x[1]) # get the label
X_Data = np.asarray(self.x_data) / (255.0) # Normalize Data
Y_Data = np.asarray(self.y_data)
# reshape x_Data
X_Data = X_Data.reshape(-1, self.IMAGE_SIZE, self.IMAGE_SIZE, 3)
return X_Data, Y_Data
except:
print("Failed to run Function Process Image ")
def pickle_image(self):
"""
:return: None Creates a Pickle Object of DataSet
"""
# Call the Function and Get the Data
X_Data,Y_Data = self.Process_Image()
# Write the Entire Data into a Pickle File
pickle_out = open('X_Data','wb')
pickle.dump(X_Data, pickle_out)
pickle_out.close()
# Write the Y Label Data
pickle_out = open('Y_Data', 'wb')
pickle.dump(Y_Data, pickle_out)
pickle_out.close()
print("Pickled Image Successfully ")
return X_Data,Y_Data
def load_dataset(self):
try:
# Read the Data from Pickle Object
X_Temp = open('X_Data','rb')
X_Data = pickle.load(X_Temp)
Y_Temp = open('Y_Data','rb')
Y_Data = pickle.load(Y_Temp)
print('Reading Dataset from PIckle Object')
return X_Data,Y_Data
except:
print('Could not Found Pickle File ')
print('Loading File and Dataset ..........')
X_Data,Y_Data = self.pickle_image()
return X_Data,Y_Data
# loading data
dstrain = MasterImage(PATH = path_train,IMAGE_SIZE = 100)
dstrain
(train_images, train_labels) = dstrain.load_dataset()
print('Train: X_images=%s, y_labels=%s' % (train_images.shape, train_labels.shape))
Train: X_images=(0, 100, 100, 3), y_labels=(0,)
Can someone advise me on how to get the correct shape? Or maybe you know of alternative ways to do it.
the image data is similar to this one.
Thank you very much for any help with this.