Hello everyone,
I’ve encountered difficulties during the initial training of my neural network with images, and the results were very disappointing. I’m seeking your advice and suggestions to improve the performance of my model in this particular phase of learning.
Below, you’ll find the relevant code along. Any assistance would be greatly appreciated. Thank you in advance for your contributions.
import math
import torch
import logging
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
class GRUModel(nn.Module):
def __init__(self, input_dim=96, hidden_size=128, n_layers=1):
super(GRUModel, self).__init__()
self.gru = nn.GRU(input_dim, hidden_size, n_layers, batch_first=True)
self.init_weights()
def forward(self, x):
out, _ = self.gru(x)
return out
def init_weights(self):
for name, param in self.gru.named_parameters():
if 'bias' in name:
nn.init.constant_(param, 0.0)
elif 'weight_ih' in name:
nn.init.kaiming_normal_(param)
elif 'weight_hh' in name:
nn.init.orthogonal_(param)
class SpatialAttention(nn.Module):
def __init__(self, input_dim):
super(SpatialAttention, self).__init__()
self.W = nn.Parameter(torch.Tensor(input_dim, 1))
self.softmax = nn.Softmax(dim=1)
torch.nn.init.kaiming_normal_(self.W, a=math.sqrt(5))
def forward(self, x):
e = torch.matmul(torch.tanh(x), self.W)
alpha = torch.softmax(e, 1)
context = torch.mul(x, alpha)
context = torch.sum(context, dim=1)
return context
class Dense_Layer(nn.Module):
def __init__(self, in_chans=96, dropout_p=0.25):
super(Dense_Layer, self).__init__()
self.fc_out = nn.Linear(in_chans, 2)
self.softmax = nn.Softmax(dim=1)
self.init_weights()
def init_weights(self):
nn.init.kaiming_normal_(self.fc_out.weight)
nn.init.zeros_(self.fc_out.bias)
def forward(self, x):
x = self.fc_out(x)
x = self.softmax(x)
return x
class VisionEmbedding(nn.Module):
def __init__(self, configs):
super(VisionEmbedding, self).__init__()
self.fc = nn.Sequential(nn.Linear(2048, 512), nn.ReLU())
self.hm = GRUModel(input_dim=512, hidden_size=256)
self.self_attention = SpatialAttention(input_dim=256)
def forward(self, x):
out = self.fc(x['local_image'].squeeze())
out = self.hm(out)
out = self.self_attention(out)
return out
class IntentPrediction(nn.Module):
def __init__(self, configs= None):
super(IntentPrediction, self).__init__()
self.vision_encoder = VisionEmbedding(configs)
self.dense_layer = Dense_Layer(in_chans = 256)
def forward(self, x):
vision_encoder = self.vision_encoder(x)
out = self.dense_layer(vision_encoder)
return out