Hi there. I was wondering why does the loss in my self-build LeNet5 stay constant?
Below is the code.
# LeNet5
class LeNet5(nn.Module):
def __init__(self):
super(LeNet5, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=3, stride=1),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.ReLU(),
nn.Conv2d(6, 16, kernel_size=3, stride=1),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.ReLU(),
nn.Flatten(),
# MLP
nn.Linear(400, 120, bias=True),
nn.ReLU(),
nn.Linear(120, 84, bias=True),
nn.ReLU(),
nn.Dropout(keep_prob),
nn.Linear(84, 2),
)
def forward(self, x):
x = self.model(x)
x_softmax = F.softmax(x, dim=1)
return x_softmax
# instance
model = LeNet5()
# optimizer & loss function
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
loss_function = nn.CrossEntropyLoss()
#
def train(
model,
data_loader,
optimizer,
loss_function,
epoch,
log_interval=200 # print out info every 200 batches
):
model.train()
for batch_idx, (x, label) in enumerate(data_loader):
optimizer.zero_grad()
output = model(x)
loss = loss_function(output, label)
loss.backward()
optimizer.step()
if batch_idx % log_interval == 0:
print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
epoch, batch_idx*len(x), len(data_loader.dataset),
100. * batch_idx/len(data_loader), loss.data.item()
# "batch_idx/len(data_loader)" = 当前的batch index除以batch总数
))
#
def validate(
model,
data_loader,
loss_function,
epoch,
loss_vector,
accuracy_vector
):
'''applied at train dataset as well as validate dataset'''
model.eval()
loss, correct = 0, 0
for x, label in data_loader: # retrieve tensor dataset and label
output = model(x)
loss += loss_function(output, label).data.item() # "loss_function" is pre-defined, which should be applied in this section
pred = output.data.max(1)[1]
correct += pred.eq(label.data).cpu().sum()
# save cal
loss /= len(data_loader)
# print(data_loader.dataset)
loss_vector.append(loss) # for plotting
accuracy = 100. * correct.to(torch.float32) / len(data_loader.dataset)
accuracy_vector.append(accuracy)
print('Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
loss, correct, len(data_loader.dataset), accuracy
))
#
def main(model, train_loader, validate_loader, optimizer, loss_function, num_epochs):
lossv, accv = [], []
train_lossv, train_accv = [], []
for epoch in range(1, num_epochs+1):
train(model, train_loader, optimizer, loss_function, epoch)
print("Train set:")
validate(model, train_loader, loss_function, epoch, train_lossv, train_accv)
print("Validate set:")
validate(model, validate_loader, loss_function, epoch, lossv, accv)
return {'validate':[lossv, accv], 'train':[train_lossv, train_accv]}
I’ll really appreciate it if someone could give me a hint!