This is the entire code!!
from __future__ import print_function
import torch
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
import os
import argparse
import time
from models import *
from torch.optim import Adam, SGD, NAdam
from optimizers import *
from sls import *
from tensorboardX import SummaryWriter
from torch_lr_finder import LRFinder
import pandas as pd
writer = SummaryWriter('./log')
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
def get_parser():
parser = argparse.ArgumentParser(description='PyTorch CIFAR10/100 Training')
parser.add_argument('--dataset', default='cifar10', type=str, help='dataset = [cifar10/cifar100]')
parser.add_argument('--total_epoch', default=200, type=int, help='Total number of training epochs')
parser.add_argument('--decay_epoch', default=150, type=int, help='Number of epochs to decay learning rate')
parser.add_argument('--model', default='resnet', type=str, help='model',
choices=['resnet', 'densenet', 'vgg', 'wideresnet'])
parser.add_argument('--optim', default='adam2', type=str, help='optimizer')
parser.add_argument('--run', default=0, type=int, help='number of runs')
parser.add_argument('--lr', default=0.001, type=float, help='learning rate')
parser.add_argument('--lr-gamma', default=0.1, type=float, help='learning rate')
parser.add_argument('--final_lr', default=0.1, type=float,
help='final learning rate of AdaBound')
parser.add_argument('--gamma', default=1e-3, type=float,
help='convergence speed term of AdaBound')
parser.add_argument('--eta', default=0.001, type=float, help='Gradient noise parameter')
parser.add_argument('--eps', default=1e-8, type=float, help='eps for var adam')
parser.add_argument('--momentum', default=0.9, type=float, help='momentum term')
parser.add_argument('--beta', default=0.999, type=float, help='coefficients beta')
parser.add_argument('--beta1', default=0.9, type=float, help='Adam coefficients beta_1')
parser.add_argument('--beta2', default=0.999, type=float, help='Adam coefficients beta_2')
parser.add_argument('--lambdaA', default=0.5, type=float, help='MAS coefficients lambdaA')
parser.add_argument('--lambdaS', default=0.5, type=float, help='MAS coefficients lambdaS')
parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
parser.add_argument('--batchsize', type=int, default=256, help='batch size')
parser.add_argument('--weight_decay', default=5e-4, type=float,
help='weight decay for optimizers')
parser.add_argument('--reset', action = 'store_true',
help='whether reset optimizer at learning rate decay')
return parser
def build_dataset(args):
# print('==> Preparing data..')
parser = get_parser()
args = parser.parse_args()
if (args.dataset == 'cifar10'):
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batchsize, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
test_loader = torch.utils.data.DataLoader(testset, batch_size=args.batchsize, shuffle=False, num_workers=2)
elif(args.dataset=='cifar100'):
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])
trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batchsize, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)
test_loader = torch.utils.data.DataLoader(testset, batch_size=args.batchsize, shuffle=False, num_workers=2)
return train_loader, test_loader
def get_ckpt_name(comp='Effect', model='resnet', dataset='cifar10', batchsize=128,optimizer='sgd', lr=0.1, weight_decay=5e-4, decay_epoch = 150, total_epoch=200,
run = 0):
return '{}-{}-{}-lr-{}-batch-{}-{}-total-{}-decay-{}-WD-{}-run-{}'.format(comp, dataset, optimizer, lr, batchsize, model, total_epoch,decay_epoch,weight_decay,run)
def load_checkpoint(ckpt_name):
# print('==> Resuming from checkpoint..')
path = os.path.join('checkpoint', ckpt_name)
assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
assert os.path.exists(path), 'Error: checkpoint {} not found'.format(ckpt_name)
return torch.load(path)
def build_model(args, device, ckpt=None):
# print('==> Building model..')
net = {
'resnet': ResNet34,
'vgg':vgg11,
'wideresnet':Wide_ResNet # for CIFAR 100
}[args.model]()
net = net.to(device)
if device == 'cuda':
net = torch.nn.DataParallel(net)
cudnn.benchmark = True
if ckpt:
net.load_state_dict(ckpt['net'])
return net
def create_optimizer(args, model_params):
args.optim = args.optim.lower()
if args.optim == 'sgd':
return optim.SGD(model_params, args.lr, momentum=0,
weight_decay=args.weight_decay)
else:
print('Optimizer not found')
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def train(net, epoch, device, data_loader, optimizer, criterion, args):
print('\nEpoch: %d' % epoch)
net.train()
train_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(data_loader):
inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, targets)
loss.backward()
# return loss, stepsize in def step from optimizer
optimizer.step()
if epoch == 0 or epoch == args.total_epoch - 1:
learning_rate = get_lr(optimizer)
train_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
Train_losses = train_loss
accuracy = 100. * correct / total
print('train loss: %.3f' % train_loss)
return Train_losses, accuracy
def test(net, epoch, device, data_loader, criterion):
net.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(data_loader):
inputs, targets = inputs.to(device), targets.to(device)
outputs = net(inputs)
loss = criterion(outputs, targets)
test_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
accuracy = 100. * correct / total
Test_losses = test_loss
print('test acc: %.3f' % accuracy)
return Test_losses, accuracy
def adjust_learning_rate(optimizer, epoch, step_size=150, gamma=0.1, reset = False):
for param_group in optimizer.param_groups:
if epoch % step_size==0 and epoch>0:
param_group['lr'] *= gamma
if epoch % step_size==0 and epoch>0 and reset:
optimizer.reset()
def main():
parser = get_parser()
args = parser.parse_args()
train_loader, test_loader = build_dataset(args)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
ckpt_name = get_ckpt_name(model=args.model,dataset=args.dataset, lambdaA=args.lambdaA, lambdaS=args.lambdaS, batchsize=args.batchsize,optimizer=args.optim, lr=args.lr, eta=args.eta,
final_lr=args.final_lr, weight_decay = args.weight_decay, run = args.run, total_epoch=args.total_epoch, decay_epoch=args.decay_epoch, momentum=args.momentum)
# print('ckpt_name')
if args.resume:
ckpt = load_checkpoint(ckpt_name)
start_epoch = ckpt['epoch']
curve = os.path.join('curve', ckpt_name)
curve = torch.load(curve)
train_losses = curve['train_loss']
test_losses = curve['test_loss']
train_accuracies = curve['train_acc']
test_accuracies = curve['test_acc']
else:
ckpt = None
start_epoch = -1
train_losses = []
test_losses = []
train_accuracies = []
test_accuracies = []
net = build_model(args, device, ckpt=ckpt)
criterion = nn.CrossEntropyLoss()
optimizer = create_optimizer(args, net.parameters())
start_time = time.time()
for epoch in range(start_epoch + 1, args.total_epoch):
start = time.time()
#scheduler.step()
adjust_learning_rate(optimizer, epoch, step_size=args.decay_epoch, gamma=args.lr_gamma, reset = args.reset)
train_loss, train_acc = train(net, epoch, device, train_loader, optimizer, criterion, args)
test_loss, test_acc = test(net, epoch, device, test_loader, criterion)
end = time.time()
print('Time {}'.format(end-start))
train_losses.append(train_loss)
train_accuracies.append(train_acc)
test_losses.append(test_loss)
test_accuracies.append(test_acc)
if not os.path.isdir('curve'):
os.mkdir('curve')
torch.save({'train_loss': train_losses, 'test_loss': test_losses, 'train_acc': train_accuracies, 'test_acc': test_accuracies},
os.path.join('curve', ckpt_name))
end_time = time.time()
print('End Time: {}'.format(end_time - start_time))
if __name__ == '__main__':
main()