I am using the OpenCV library to generate an ML model that recognises the handwritten digits from the MNIST dataset.
During the process of training the model, the results in the output layer almost always end up as Nan or Inf. I tried changing the parameters of the model like number of iterations or learning rate but with little or no success. I also tried to use a smaller set of data for training, but that doesn’t seem to work as well. Does anybody have an idea what I’m doing wrong?
Here is the code for the model setup
cv::Ptr<cv::ml::ANN_MLP> mlp = cv::ml::ANN_MLP::create();
mlp->setActivationFunction(cv::ml::ANN_MLP::SIGMOID_SYM, 1, 1);
int inputLayerSize = imagesData[0].total();
int hiddenLayerSize = 100;
int outputLayerSize = 10;
cv::Mat layers = (cv::Mat_<int>(3, 1)<<inputLayerSize, hiddenLayerSize, outputLayerSize);
mlp->setLayerSizes(layers);
int numSamples = imagesData.size();
cv::Mat trainingData(numSamples, inputLayerSize, CV_32F);
cv::Mat labelData(numSamples, outputLayerSize, CV_32F);
for (int i = 0; i < numSamples; i++) {
cv::Mat image = imagesData[i].reshape(1, 1);
image.convertTo(trainingData.row(i), CV_32F);
cv::Mat label = cv::Mat::zeros(1, outputLayerSize, CV_32F);
label.at<float>(0, labelsData[i]) = 1.0;
label.copyTo(labelData.row(i));
}
cv::TermCriteria termCrit(cv::TermCriteria::MAX_ITER + cv::TermCriteria::EPS, 100, 0.001);
mlp->setTermCriteria(termCrit);
mlp->setTrainMethod(cv::ml::ANN_MLP::BACKPROP, 0.001, 0.1);
mlp->train(trainingData, cv::ml::ROW_SAMPLE, labelData);