Now, I’m revising this code
below is main.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import os
import torch
import argparse
import data
import util
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from models import nin
from torch.autograd import Variable
def save_state(model, best_acc):
print('==> Saving model ...')
state = {
'best_acc': best_acc,
'state_dict': model.state_dict(),
}
for key in state['state_dict'].keys():
if 'module' in key:
state['state_dict'][key.replace('module.', '')] = \
state['state_dict'].pop(key)
torch.save(state, 'models/nin.pth.tar')
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(trainloader):
# process the weights including binarization
bin_op.binarization()
# forwarding
data, target = Variable(data.cuda()), Variable(target.cuda())
optimizer.zero_grad()
output = model(data)
# backwarding
loss = criterion(output, target)
loss.backward()
# restore weights
bin_op.restore()
bin_op.updateBinaryGradWeight()
optimizer.step()
if batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tLR: {}'.format(
epoch, batch_idx * len(data), len(trainloader.dataset),
100. * batch_idx / len(trainloader), loss.data.item(),
optimizer.param_groups[0]['lr']))
return
def test():
global best_acc
model.eval()
test_loss = 0
correct = 0
bin_op.binarization()
for data, target in testloader:
data, target = Variable(data.cuda()), Variable(target.cuda())
output = model(data)
test_loss += criterion(output, target).data.item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
bin_op.restore()
acc = 100. * float(correct) / len(testloader.dataset)
'''
if acc > best_acc:
best_acc = acc
save_state(model, best_acc)
'''
test_loss /= len(testloader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
test_loss * 128., correct, len(testloader.dataset),
100. * float(correct) / len(testloader.dataset)))
print('Best Accuracy: {:.2f}%\n'.format(best_acc))
return
def adjust_learning_rate(optimizer, epoch):
update_list = [120, 200, 240, 280]
if epoch in update_list:
for param_group in optimizer.param_groups:
param_group['lr'] = param_group['lr'] * 0.1
return
if __name__=='__main__':
# prepare the options
parser = argparse.ArgumentParser()
parser.add_argument('--cpu', action='store_true',
help='set if only CPU is available')
parser.add_argument('--data', action='store', default='./data/',
help='dataset path')
parser.add_argument('--arch', action='store', default='nin',
help='the architecture for the network: nin')
parser.add_argument('--lr', action='store', default='0.01',
help='the intial learning rate')
parser.add_argument('--pretrained', action='store', default=None,
help='the path to the pretrained model')
parser.add_argument('--evaluate', action='store_true',
help='evaluate the model')
args = parser.parse_args()
print('==> Options:',args)
# set the seed
torch.manual_seed(1)
torch.cuda.manual_seed(1)
'''
# prepare the data
if not os.path.isfile(args.data+'/train_data'):
# check the data path
raise Exception\
('Please assign the correct data path with --data <DATA_PATH>')
trainset = data.dataset(root=args.data, train=True)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
shuffle=True, num_workers=2)
testset = data.dataset(root=args.data, train=False)
testloader = torch.utils.data.DataLoader(testset, batch_size=100,
shuffle=False, num_workers=2)
'''
# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
trainset = torchvision.datasets.CIFAR10(
root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
trainset, batch_size=512, shuffle=True, num_workers=12)#num_work = # of CPU we use
testset = torchvision.datasets.CIFAR10(
root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
testset, batch_size=100, shuffle=False, num_workers=12)
# define classes
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
# define the model
print('==> building model',args.arch,'...')
if args.arch == 'nin':
model = nin.Net()
else:
raise Exception(args.arch+' is currently not supported')
# initialize the model
if not args.pretrained:
print('==> Initializing model parameters ...')
best_acc = 0
for m in model.modules():
if isinstance(m, nn.Conv2d):
m.weight.data.normal_(0, 0.05)
m.bias.data.zero_()
else:
print('==> Load pretrained model form', args.pretrained, '...')
pretrained_model = torch.load(args.pretrained)
best_acc = pretrained_model['best_acc']
model.load_state_dict(pretrained_model['state_dict'])
if not args.cpu:
model.cuda()
model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
print(model)
# define solver and criterion
base_lr = float(args.lr)
param_dict = dict(model.named_parameters())
params = []
for key, value in param_dict.items():
params += [{'params':[value], 'lr': base_lr,
'weight_decay':0.00001}]
optimizer = optim.Adam(params, lr=0.10,weight_decay=0.00001)
criterion = nn.CrossEntropyLoss()
# define the binarization operator
bin_op = util.BinOp(model)
# do the evaluation if specified
if args.evaluate:
test()
exit(0)
# start training
for epoch in range(1, 320):
adjust_learning_rate(optimizer, epoch)
train(epoch)
test()
and here is nin.py
(revised)
import torch.nn as nn
import torch
import torch.nn.functional as F
class BinActive(torch.autograd.Function):
def forward(self, input):
self.save_for_backward(input)
size = input.size()
mean = torch.mean(input.abs(), 1, keepdim=True)
input = input.sign()
return input, mean
def backward(self, grad_output, grad_output_mean):
input, = self.saved_tensors
grad_input = grad_output.clone()
grad_input[input.gt(1)] = 0
grad_input[input.lt(-1)] = 0
return grad_input
binactive = BinActive.apply
'''
def BinActive(input):
size = input.size()
mean = torch.mean(input.abs(), 1, keepdim=True)
input = input.sign()
return input, mean
'''
class BinConv2d(nn.Module):
def __init__(self, input_channels, output_channels,
kernel_size=-1, stride=-1, padding=-1, dropout=0):
super(BinConv2d, self).__init__()
self.layer_type = 'BinConv2d'
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dropout_ratio = dropout
self.bn = nn.BatchNorm2d(input_channels, eps=1e-4, momentum=0.1, affine=True)
self.bn.weight.data = self.bn.weight.data.zero_().add(1.0)
if dropout!=0:
self.dropout = nn.Dropout(dropout)
self.conv = nn.Conv2d(input_channels, output_channels,
kernel_size=kernel_size, stride=stride, padding=padding)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.bn(x)
x, mean = binactive(x)
if self.dropout_ratio!=0:
x = self.dropout(x)
x = self.conv(x)
x = self.relu(x)
return x
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.xnor = nn.Sequential(
nn.Conv2d(3, 192, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(192, eps=1e-4, momentum=0.1, affine=False),
nn.ReLU(inplace=True),
BinConv2d(192, 160, kernel_size=1, stride=1, padding=0),
BinConv2d(160, 96, kernel_size=1, stride=1, padding=0),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
BinConv2d( 96, 192, kernel_size=5, stride=1, padding=2, dropout=0.5),
BinConv2d(192, 192, kernel_size=1, stride=1, padding=0),
BinConv2d(192, 192, kernel_size=1, stride=1, padding=0),
nn.AvgPool2d(kernel_size=3, stride=2, padding=1),
BinConv2d(192, 192, kernel_size=3, stride=1, padding=1, dropout=0.5),
BinConv2d(192, 192, kernel_size=1, stride=1, padding=0),
nn.BatchNorm2d(192, eps=1e-4, momentum=0.1, affine=False),
nn.Conv2d(192, 10, kernel_size=1, stride=1, padding=0),
nn.ReLU(inplace=True),
nn.AvgPool2d(kernel_size=8, stride=1, padding=0),
)
def forward(self, x):
for m in self.modules():
if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
if hasattr(m.weight, 'data'):
m.weight.data.clamp_(min=0.01)
x = self.xnor(x)
x = x.view(x.size(0), 10)
return x
this is an error code
Traceback (most recent call last):
File "main.py", line 205, in <module>
train(epoch)
File "main.py", line 44, in train
loss.backward()
File "/home/mel/.local/lib/python3.7/site-packages/torch/tensor.py", line 221, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/home/mel/.local/lib/python3.7/site-packages/torch/autograd/__init__.py", line 132, in backward
allow_unreachable=True) # allow_unreachable flag
File "/home/mel/.local/lib/python3.7/site-packages/torch/autograd/function.py", line 89, in apply
return self._forward_cls.backward(self, *args) # type: ignore
File "/home/mel/.local/lib/python3.7/site-packages/torch/autograd/function.py", line 201, in backward
raise NotImplementedError("You must implement the backward function for custom"
NotImplementedError: You must implement the backward function for custom autograd.Function.
I’ve been googled for 3 hours, but can’t find a solution.
Is it correct to add Binactiv'backward
to the loss function?