when runed classical cnn or simple resnet ,process finished with exit 0.
but when run resnet,train code like:
criterion=nn.CrossEntropyLoss().cuda()
optimizer=torch.optim.SGD(model.parameters(),args.lr,momentum=args.momentum,
nesterov=True,
weight_decay=args.weight_decay)
for epoch in range(args.start_epoch, args.epochs):
adjust_learning_rate(optimizer, epoch)
train(train_loader, model, criterion, optimizer, epoch)
prec1 = validate(val_loader, model, criterion, epoch)
# remember best prec@1 and save checkpoint
is_best = prec1 > best_prec1
best_prec1 = max(prec1, best_prec1)
print('Best accuracy: ', best_prec1)
def train(train_loader, model, criterion, optimizer, epoch):
""“Train for one epoch on the training set”""
batch_time = AverageMeter()
losses = AverageMeter()
top1 = AverageMeter()
model.train()
end = time.time()
for i, (input, target) in enumerate(train_loader):
target = target.cuda(async=True)
input = input.cuda()
input_var = torch.autograd.Variable(input)
target_var = torch.autograd.Variable(target)
# compute output
output = model(input_var)
loss = criterion(output, target_var)
# measure accuracy and record loss
prec1 = accuracy(output.data, target, topk=(1,))[0]
losses.update(loss.data[0], input.size(0))
top1.update(prec1[0], input.size(0))
# compute gradient and do SGD step
optimizer.zero_grad()
loss.backward()
optimizer.step()
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
print('Epoch: [{0}][{1}/{2}]\t'
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
epoch, i, len(train_loader), batch_time=batch_time,
loss=losses, top1=top1))
# log to TensorBoard
if args.tensorboard:
log_value('train_loss', losses.avg, epoch)
log_value('train_acc', top1.avg, epoch)
def validate(val_loader, model, criterion, epoch):
""“Perform validation on the validation set”""
batch_time = AverageMeter()
losses = AverageMeter()
top1 = AverageMeter()
# switch to evaluate mode
model.eval()
end = time.time()
for i, (input, target) in enumerate(val_loader):
target = target.cuda(async=True)
input = input.cuda()
input_var = torch.autograd.Variable(input, volatile=True)
target_var = torch.autograd.Variable(target, volatile=True)
# compute output
output = model(input_var)
loss = criterion(output, target_var)
# measure accuracy and record loss
prec1 = accuracy(output.data, target, topk=(1,))[0]
losses.update(loss.data[0], input.size(0))
top1.update(prec1[0], input.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
print('Test: [{0}/{1}]\t'
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
i, len(val_loader), batch_time=batch_time, loss=losses,
top1=top1))
print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1))
# log to TensorBoard
if args.tensorboard:
log_value('val_loss', losses.avg, epoch)
log_value('val_acc', top1.avg, epoch)
return top1.avg
class AverageMeter(object):
""“Computes and stores the average and current value”""
def init(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def adjust_learning_rate(optimizer, epoch):
""“Sets the learning rate to the initial LR decayed by 10 after 150 and 225 epochs”""
lr = args.lr * (0.1 ** (epoch // 150)) * (0.1 ** (epoch // 225))
# log to TensorBoard
if args.tensorboard:
log_value(‘learning_rate’, lr, epoch)
for param_group in optimizer.param_groups:
param_group[‘lr’] = lr
def accuracy(output, target, topk=(1,)):
""“Computes the precision@k for the specified values of k”""
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].view(-1).float().sum(0)
res.append(correct_k.mul_(100.0 / batch_size))
return res
if name == ‘main’:
main()
Forward reply.THANKS.