Hello.
I have classification task. Input data is 3x256x256 image sequence. I want output of CNN pass to RNN and then at last timestamp get some probabilities.
Here is my model:
class Model(nn.Module):
def __init__(self, hidden_size, output_size):
super(Model, self).__init__()
# Hyperparameters
self.hidden_size = hidden_size
self.output_size = output_size
self.feature_size = 256*7*7
self.dropout_ratio = 0.5
# Layers
self.initial_hidden1 = nn.Parameter(torch.zeros(512))
self.initial_hidden2 = nn.Parameter(torch.zeros(hidden_size))
alex = alexnet(pretrained=True)
self.feature_extractor = alex.features
self.hidden1 = nn.GRUCell(self.feature_size, 512)
self.hidden2 = nn.GRUCell(512, hidden_size)
partial_hidden = round(hidden_size/2)
self.classifier = nn.Sequential(
nn.Dropout(self.dropout_ratio),
nn.Linear(hidden_size, partial_hidden),
nn.ReLU(inplace=True),
nn.Dropout(self.dropout_ratio),
nn.Linear(partial_hidden, output_size)
)
def forward(self, input, hidden=None):
if hidden is None:
hidden = self.new_hidden(input.size()[0])
features = self.feature_extractor(input)
features = features.view(-1, self.feature_size)
hidden1 = self.hidden1(features, hidden[0])
hidden2 = self.hidden2(hidden1, hidden[1])
output = self.classifier(hidden2)
if self.training is False:
output = torch.sigmoid(output)
return output, (hidden1, hidden2)
def new_hidden(self, batch=None):
hidden1 = self.initial_hidden1
if batch is not None:
hidden1 = hidden1.repeat(batch, 1)
hidden2 = self.initial_hidden2
if batch is not None:
hidden2 = hidden2.repeat(batch, 1)
return hidden1, hidden2
The way I train is
batch_size = 16
seq_length = 30
for epoch in range(epochs):
model.train()
for i, data in enumerate(train_dataloader, 0):
model.zero_grad()
# images.shape == [16, 30, 3, 256, 256]
# labels.shape == [16, 39]
images, labels = data
images, labels = images.cuda(), Variable(labels.cuda())
hidden = None
for j in range(images.size()[1]):
image = Variable(images[:, j])
output, hidden = model(image, hidden)
# output.shape == [16, 39]
loss = criterion(output, labels)
loss.backward()
optimizer.step()
After each epoch, accuracy is measured on test data:
model.eval()
for i, data in enumerate(loader, 0):
images, labels = data
images, labels = images.cuda(), Variable(labels.cuda(), volatile=True)
hidden = None
for j in range(images.size()[1]):
image = Variable(images[:, j], volatile=True)
output, hidden = model(image, hidden)
print(output)
However, I receive output that has exactly same output for each item in batch (each sequence has same prediction values). It seems crazy because in training loop, output differs for individual sequences from batch. And in training progress, while loss is changing, output in evaluation does not.
I noticed that even when I changed convolutional part of my model, nothing changed.
What am I doing wrong?
Thank you in advance.