Hi. I’m new to pytorch and I’m working on the babi data set. I have some code that works when I train, but not eval. During train I get improving accuracy over time, but during eval I don’t. I get a maximum below 48 % accuracy. Meanwhile while I’m training I can actually acheive 99 or 100 % accuracy. The dataset comes in two parts. There is a set of 1000 questions and another test set of 1000 more. I have split the test set in half for validation and testing. From this I have a data set with train/validation/test proportions of 50/25/25. My code is messy and I want to include as little of it as possible. The pytorch modules are composed of some ‘gru’ and some ‘linear’ and some other components (like embeddings). My basic question is how do I make sure that dropout computations (and similar things) are removed when I call the ‘eval()’ method on a model? What is the proper way of calling eval() ?? I will include some module code. This isn’t everything.
class EpisodicAttn(nn.Module):
def __init__(self, hidden_size, a_list_size=5):
super(EpisodicAttn, self).__init__()
self.hidden_size = hidden_size
self.a_list_size = a_list_size
self.W_1 = nn.Linear( self.a_list_size * hidden_size,1)
self.W_2 = nn.Linear(1, hidden_size)
self.next_mem = nn.Linear(3 * hidden_size, hidden_size)
self.reset_parameters()
def reset_parameters(self):
stdv = 1.0 / math.sqrt(self.hidden_size)
for weight in self.parameters():
weight.data.uniform_(-stdv, stdv)
def forward(self,concat_list):
assert len(concat_list) == self.a_list_size
''' attention list '''
self.c_list_z = torch.cat(concat_list,dim=1)
self.c_list_z = self.c_list_z.view(1,-1)
self.l_1 = self.W_1(self.c_list_z)
self.l_1 = torch.tanh(self.l_1)
self.l_2 = self.W_2(self.l_1)
self.G = F.sigmoid(self.l_2)[0]
return self.G
class CustomGRU(nn.Module):
def __init__(self, input_size, hidden_size):
super(CustomGRU, self).__init__()
self.hidden_size = hidden_size
self.Wr = nn.Linear(input_size, hidden_size)
self.Ur = nn.Linear(hidden_size, hidden_size)
self.W = nn.Linear(input_size, hidden_size)
self.U = nn.Linear(hidden_size, hidden_size)
self.reset_parameters()
def reset_parameters(self):
stdv = 1.0 / math.sqrt(self.hidden_size)
for weight in self.parameters():
weight.data.uniform_(-stdv, stdv)
def forward(self, fact, C):
r = F.sigmoid(self.Wr(fact) + self.Ur(C))
h_tilda = F.tanh(self.W(fact) + r * self.U(C))
return h_tilda
class MemRNN(nn.Module):
def __init__(self, hidden_size):
super(MemRNN, self).__init__()
self.hidden_size = hidden_size
self.gru = nn.GRU(hidden_size, hidden_size, num_layers=1, batch_first=False,bidirectional=False)
#self.gru = CustomGRU(hidden_size,hidden_size)
def forward(self, input, hidden=None):
#_, hidden = self.gru(input,hidden)
output,hidden = self.gru(input, hidden)
#output = 0
return output, hidden
class Encoder(nn.Module):
def __init__(self, source_vocab_size, embed_dim, hidden_dim,
n_layers, dropout, bidirectional=False, embedding=None):
super(Encoder, self).__init__()
self.hidden_dim = hidden_dim
self.n_layers = n_layers
self.bidirectional = bidirectional
self.embed = nn.Embedding(source_vocab_size, embed_dim, padding_idx=1)
self.gru = nn.GRU(embed_dim, hidden_dim, n_layers, dropout=dropout, bidirectional=bidirectional)
if embedding is not None:
self.embed.weight.data.copy_(torch.from_numpy(embedding))
print('embedding encoder')
#self.gru = MGRU(self.hidden_dim)
def forward(self, source, hidden=None):
embedded = self.embed(source) # (batch_size, seq_len, embed_dim)
encoder_out, encoder_hidden = self.gru( embedded, hidden) # (seq_len, batch, hidden_dim*2)
#encoder_hidden = self.gru( embedded, hidden) # (seq_len, batch, hidden_dim*2)
# sum bidirectional outputs, the other option is to retain concat features
if self.bidirectional:
encoder_out = (encoder_out[:, :, :self.hidden_dim] +
encoder_out[:, :, self.hidden_dim:])
#encoder_out = 0
return encoder_out, encoder_hidden