Hi, I encountered this problem when I tried to remove some filters and then retrain the network though I have moved both model and data to cuda. It appears that the error is at the backward stage when I called loss.backward()
here is my code:
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum=0.9, weight_decay=0.0005)
def train(model, train_loader, criterion, optimizer, device, epoch):
model.train()
for idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output,target)
loss.backward()
optimizer.step()
if idx % 5 == 0:
print("Epoch {} [{}/{} ({:.2f}%)]\tLoss: {:.6f}"
.format(epoch,idx*len(data),len(train_loader.dataset),100.0*idx/len(train_loader),loss.item()))
def test(model,test_loader,criterion,device):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data,target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
loss = criterion(output,target)
test_loss += loss.item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
.format(test_loss, correct, len(test_loader.dataset),100. * correct / len(test_loader.dataset)))
def index_pruned(model, layer):
l = []
for i in range(model.features[layer].weight.size(0)):
l.append(abs(model.features[layer].weight[i,:,:,:]).sum())
return l.index(min(l))
def prune_filter(model, layer, next_layer, indexes):
new_conv = torch.nn.Conv2d(in_channels=model.features[layer].in_channels,
out_channels=model.features[layer].out_channels-1,
kernel_size=model.features[layer].kernel_size,
stride=model.features[layer].stride,
padding=model.features[layer].padding)
new_conv.weight[0:indexes,:,:,:] = model.features[layer].weight[0:indexes,:,:,:]
new_conv.weight[indexes:,:,:,:] = model.features[layer].weight[indexes+1,:,:,:]
new_conv.bias[0:indexes] = model.features[layer].bias[0:indexes]
new_conv.bias[indexes:] = model.features[layer].bias[indexes+1:]
model.features[layer] = new_conv
if layer != 10:
next_new_conv = torch.nn.Conv2d(in_channels=model.features[next_layer].in_channels-1,
out_channels=model.features[next_layer].out_channels,
kernel_size=model.features[next_layer].kernel_size,
stride=model.features[next_layer].stride,
padding=model.features[next_layer].padding)
next_new_conv.weight[:,0:indexes,:,:] = model.features[next_layer].weight[:,0:indexes,:,:]
next_new_conv.weight[:,indexes:,:,:] = model.features[next_layer].weight[:,indexes+1:,:,:]
model.features[next_layer] = next_new_conv
elif layer == 10:
params = int(model.classifier[0].in_features / (model.features[10].out_channels+1))
new_fc1 = torch.nn.Linear(in_features=int(model.classifier[0].in_features-params),
out_features=int(model.classifier[0].out_features))
new_fc1.weight[:,0:indexes*params] = model.classifier[0].weight[:,0:indexes*params]
new_fc1.weight[:,:params*indexes] = model.classifier[0].weight[:,:(indexes+1)*params]
new_fc1.bias = model.classifier[0].bias
model.classifier[0]=new_fc1
return model
def main(model, train_loader,test_loader,criterion,optimizer,
pretrained=False,prune=False,save=False,pruneFilter=False):
device = 'cuda'
if pretrained == True:
model.load_state_dict(torch.load('AlexNet_pruned.pt'))
for params in model.parameters():
params.requires_grad = True
model.to(device)
'''
if prune == True:
threshold = 0.02
for epoch in range(1, 10):
for name, p in model.named_parameters():
if 'weight' in name:
m = mask(p.data, threshold)
p.data = p.data.mul_(m)
threshold = threshold + 0.01
#for epoch in range(2):
# train(model, train_loader, criterion, optimizer, device, epoch)
# test(model,test_loader,criterion,device)
for name, p in model.named_parameters():
if 'weight' in name:
m = mask(p.data, threshold)
p.data = p.data.mul_(m)
'''
if pruneFilter == True:
#conv0:
for num_filters_pruned in range(16):
model=prune_filter(model=model, layer=0, next_layer=3, indexes=index_pruned(model,0))
if num_filters_pruned %4 == 0:
model=model.cuda()
train(model, train_loader, criterion, optimizer, device, 1)
test(model,test_loader,criterion,device)
#conv2:
for num_filters_pruned in range(81):
model=prune_filter(model=model, layer=3, next_layer=6, indexes=index_pruned(model, 3))
if num_filters_pruned %10 == 0:
model.cuda()
train(model, train_loader, criterion, optimizer, device, 1)
test(model,test_loader,criterion,device)
#conv3:
for num_filters_pruned in range(192):
model=prune_filter(model=model, layer=6, next_layer=8, indexes=index_pruned(model, 6))
if num_filters_pruned %10 == 0:
model.cuda()
train(model, train_loader, criterion, optimizer, device, 1)
test(model,test_loader,criterion,device)
#conv4:
for num_filters_pruned in range(128):
model=prune_filter(model=model, layer=8, next_layer=10, indexes=index_pruned(model, 8))
if num_filters_pruned %10 == 0:
model.cuda()
train(model, train_loader, criterion, optimizer, device, 1)
test(model,test_loader,criterion,device)
#conv5:
for num_filters_pruned in range(128):
model=prune_filter(model=model, layer=10, next_layer=None, indexes=index_pruned(model, 10))
if num_filters_pruned %10 == 0:
model.cuda()
train(model, train_loader, criterion, optimizer, device, 1)
test(model,test_loader,criterion,device)
torch.save(model.state_dict(),'AlexNet_filers_pruned.pt')
RuntimeError: Function CudnnConvolutionBackward returned an invalid gradient at index 1 - expected type torch.FloatTensor but got torch.cuda.FloatTensor