Finetune quetions

qiaoyang_luo · March 28, 2018, 11:26am

When I try to used official guide to finetune model, error happens on train method.
The train method:

def train(net, train_data, valid_data, num_epochs, optimizer, criterion):
    if torch.cuda.is_available():
        net = net.cuda()
    prev_time = datetime.now()
    for epoch in range(num_epochs):
        train_loss = 0
        train_acc = 0
        net = net.train()
        for im, label in train_data:
            if torch.cuda.is_available():
                im_train = Variable(im.cuda())  # (bs, 3, h, w)
                label_train = Variable(label.cuda())  # (bs, h, w)
            else:
                im_train = Variable(im,requires_grad=True)
                label_train = Variable(label,requires_grad=True)
            # forward
            output = net(im_train)
            loss = criterion(output, label_train)
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.data[0]
            train_acc += get_acc(output, label_train)

        cur_time = datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)
        if valid_data is not None:
            valid_loss = 0
            valid_acc = 0
            net = net.eval()
            for im, label in valid_data:
                if torch.cuda.is_available():
                    im_val = Variable(im.cuda(), volatile=True)
                    label_val = Variable(label.cuda(), volatile=True)
                else:
                    im_val = Variable(im, volatile=True)
                    label_val = Variable(label, volatile=True)
                output = net(im_val)
                loss = criterion(output, label_val)
                valid_loss += loss.data[0]
                valid_acc += get_acc(output, label_val)
            epoch_str = (
                "Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, "
                % (epoch, train_loss / len(train_data),
                   train_acc / len(train_data), valid_loss / len(valid_data),
                   valid_acc / len(valid_data)))
        else:
            epoch_str = ("Epoch %d. Train Loss: %f, Train Acc: %f, " %
                         (epoch, train_loss / len(train_data),
                          train_acc / len(train_data)))
        prev_time = cur_time
        print(epoch_str + time_str)

# create model
model = MobileNet()
model = torch.nn.DataParallel(model).cuda()
params = torch.load('model_params.pth.tar')['state_dict']
model.load_state_dict(params)
# only keep grad on fc layer
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Linear(2048, 5)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(), lr=1e-2, weight_decay=1e-4)
train(model, train_data, valid_data, 30, optimizer, criterion)

1.Why there is a error:
RuntimeError: element 0 of variables does not require grad and does not have a grad_fn

2.Should I just remove the code:

for param in model.parameters():
    param.requires_grad = False

3.Why we need optimizer.zero_grad() in trainning?

marsggbo · April 10, 2018, 12:26pm

have u solved the problem?

qiaoyang_luo · May 5, 2018, 6:27am

Yes, I solved this by removing unnecessary keys.

# There are two kinds of pretrained model
# load torchvision pretrain model
vgg16 = models.vgg16(pretrained=True)
pretrained_dict = vgg16.state_dict()
model_dict = model.state_dict()

# 1. filter out unnecessary keys
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict) 
# 3. load the new state dict
model.load_state_dict(model_dict)

My github sample about mobilenet