Poor Performance in Training the Network with Images

Rachid95 · May 8, 2024, 8:13pm

Hello everyone,

I’ve encountered difficulties during the initial training of my neural network with images, and the results were very disappointing. I’m seeking your advice and suggestions to improve the performance of my model in this particular phase of learning.

Below, you’ll find the relevant code along. Any assistance would be greatly appreciated. Thank you in advance for your contributions.

import math
import torch
import logging
import numpy as np
import torch.nn as nn
import torch.nn.functional as F

class GRUModel(nn.Module):
    def __init__(self, input_dim=96, hidden_size=128, n_layers=1):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_dim, hidden_size, n_layers, batch_first=True)
        self.init_weights()

    def forward(self, x):
        out, _ = self.gru(x)        
        return out
    
    def init_weights(self):
        for name, param in self.gru.named_parameters():
            if 'bias' in name:
                nn.init.constant_(param, 0.0)
            elif 'weight_ih' in name:
                nn.init.kaiming_normal_(param)
            elif 'weight_hh' in name:
                nn.init.orthogonal_(param)

class SpatialAttention(nn.Module):
    def __init__(self, input_dim):
        super(SpatialAttention, self).__init__()
        self.W = nn.Parameter(torch.Tensor(input_dim, 1)) 
        self.softmax = nn.Softmax(dim=1)
        torch.nn.init.kaiming_normal_(self.W, a=math.sqrt(5))

    def forward(self, x):
        e = torch.matmul(torch.tanh(x), self.W)
        alpha = torch.softmax(e, 1)
        context = torch.mul(x, alpha)
        context = torch.sum(context, dim=1)
        return context

class Dense_Layer(nn.Module):
    def __init__(self, in_chans=96, dropout_p=0.25):
        super(Dense_Layer, self).__init__()
        self.fc_out = nn.Linear(in_chans, 2)
        self.softmax = nn.Softmax(dim=1)
        self.init_weights()

    def init_weights(self):
        nn.init.kaiming_normal_(self.fc_out.weight)
        nn.init.zeros_(self.fc_out.bias)

    def forward(self, x):
        x = self.fc_out(x)
        x = self.softmax(x)
        return x

class VisionEmbedding(nn.Module):
    def __init__(self, configs):
        super(VisionEmbedding, self).__init__()
        self.fc = nn.Sequential(nn.Linear(2048, 512), nn.ReLU())
        self.hm = GRUModel(input_dim=512, hidden_size=256)
        self.self_attention = SpatialAttention(input_dim=256)

    def forward(self, x):
        out = self.fc(x['local_image'].squeeze())
        out = self.hm(out)
        out = self.self_attention(out)
        return out 

class IntentPrediction(nn.Module):
    def __init__(self, configs= None):
        super(IntentPrediction, self).__init__()
        self.vision_encoder = VisionEmbedding(configs)
        self.dense_layer = Dense_Layer(in_chans = 256)

    def forward(self, x):
        vision_encoder = self.vision_encoder(x)
        out = self.dense_layer(vision_encoder)
        return out

TMosh · May 8, 2024, 8:50pm

Can I ask why you have a GRU in the middle of this model?

Rachid95 · May 8, 2024, 9:16pm

I included the GRU layer to capture temporal dependencies within the sequence of video frames, enhancing the model’s ability to understand the temporal dynamics present in the data.

Topic		Replies	Views
Training model test output not good AI Discussions ai-discussions , introductions , project	2	184	April 25, 2024
Question about LSTM model, please help AI Discussions ai-discussions	1	96	February 22, 2024
Help Me With NN Model Accuracy AI Discussions ai-discussions , project	33	318	January 3, 2025
Classification model using NN AI Discussions ai-discussions	1	102	February 15, 2024
Planar Data Classification Exercise 8 Neural Networks and Deep Learning coursera-platform	2	554	August 26, 2021

Poor Performance in Training the Network with Images

Related topics