When I try to used official guide to finetune model, error happens on train method.
The train method:
def train(net, train_data, valid_data, num_epochs, optimizer, criterion):
if torch.cuda.is_available():
net = net.cuda()
prev_time = datetime.now()
for epoch in range(num_epochs):
train_loss = 0
train_acc = 0
net = net.train()
for im, label in train_data:
if torch.cuda.is_available():
im_train = Variable(im.cuda()) # (bs, 3, h, w)
label_train = Variable(label.cuda()) # (bs, h, w)
else:
im_train = Variable(im,requires_grad=True)
label_train = Variable(label,requires_grad=True)
# forward
output = net(im_train)
loss = criterion(output, label_train)
# backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.data[0]
train_acc += get_acc(output, label_train)
cur_time = datetime.now()
h, remainder = divmod((cur_time - prev_time).seconds, 3600)
m, s = divmod(remainder, 60)
time_str = "Time %02d:%02d:%02d" % (h, m, s)
if valid_data is not None:
valid_loss = 0
valid_acc = 0
net = net.eval()
for im, label in valid_data:
if torch.cuda.is_available():
im_val = Variable(im.cuda(), volatile=True)
label_val = Variable(label.cuda(), volatile=True)
else:
im_val = Variable(im, volatile=True)
label_val = Variable(label, volatile=True)
output = net(im_val)
loss = criterion(output, label_val)
valid_loss += loss.data[0]
valid_acc += get_acc(output, label_val)
epoch_str = (
"Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, "
% (epoch, train_loss / len(train_data),
train_acc / len(train_data), valid_loss / len(valid_data),
valid_acc / len(valid_data)))
else:
epoch_str = ("Epoch %d. Train Loss: %f, Train Acc: %f, " %
(epoch, train_loss / len(train_data),
train_acc / len(train_data)))
prev_time = cur_time
print(epoch_str + time_str)
# create model
model = MobileNet()
model = torch.nn.DataParallel(model).cuda()
params = torch.load('model_params.pth.tar')['state_dict']
model.load_state_dict(params)
# only keep grad on fc layer
for param in model.parameters():
param.requires_grad = False
model.fc = nn.Linear(2048, 5)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(), lr=1e-2, weight_decay=1e-4)
train(model, train_data, valid_data, 30, optimizer, criterion)
1.Why there is a error:
RuntimeError: element 0 of variables does not require grad and does not have a grad_fn
2.Should I just remove the code:
for param in model.parameters():
param.requires_grad = False
3.Why we need optimizer.zero_grad() in trainning?