I’ve made a simple model and it seems to train properly (training loss decays) but when I look at the running mean and variances they don’t seem to have been updated i.e., they’re set to the 0 and 1 vectors respectively. Track_running_stats is set to True for the bn layers.
relevant code snippet below
def train(train_loader, model, criterion, optimizer, device, binary_classification=False):
batch_time = AverageMeter('Time', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
progress = ProgressMeter(
len(train_loader),
[batch_time, data_time, losses, top1],
prefix="Train: ")
# switch to train mode
model.train()
num_batches = len(train_loader)
end = time.time()
for batch_index, (images, target) in enumerate(train_loader):
# print(batch_index)
# print(images.shape,target.shape,type(images),type(target))
# measure data loading time
data_time.update(time.time() - end)
images = images.to(device)
# reformat target
if isinstance(criterion, nn.BCEWithLogitsLoss): target = target.float()
target = target.squeeze().to(device)
# compute logits
logits = model(images).squeeze()
loss = criterion(logits, target)
# measure accuracy and record loss; reshaping needed for accuracy calculation
probabilities=get_complete_probabilities(torch.sigmoid(logits)) if binary_classification \
else torch.softmax(logits, dim=1)
acc1 = accuracy(probabilities, target, topk=(1,))[0]
losses.update(loss.item(), images.size(0))
top1.update(acc1.item(), images.size(0))
# compute gradient and do SGD step
optimizer.zero_grad()
loss.backward()
optimizer.step()
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
progress.display(batch_index)
return losses.avg, top1.avg
model=torchvision.models.resnet18()
model.fc=nn.Linear(model.fc.in_features,2)
for epoch in range(start_epoch, start_epoch + args.num_epochs):
avg_loss, avg_acc1 = train(train_loader, model, criterion,
optimizer, device)