Hey all,
I got following error:
RuntimeError: expected stride to be a single integer value or a list of 3 values to match the convolution dimensions, but got stride=[1, 1]
all I’ve found is to use unqueeze, but I already did. I’m just wondering because I my training setup everything works fine.
Train:
# Write a function that determines a threshold for input/output neurons to be set to zero
# (the ones which have been reduced in absolute value using the sparsity constraint).
# You can use the function topk (similar to nth_element in C++),
# which outputs both the values and indices sorted around a chosen quantile/percentile.
# Here we simply use the median to set 50% of values to zero.
# When applied correctly (as incoming & outgoing mask) for each Conv2d layer,
# it reduces the nonzero parameters by ~75% (the first incoming & last outgoing Conv2d are not masked).
# Note that BatchNorm has four tensors and two index masks have to be applied as follows:
# B = A[idx_next,:,:,:][:,idx_prev,:,:]
# Now you can replace all Conv2d and BatchNorm2d layers with smaller filters
# (and copy their weights) so that we have the following sequence of channels:
# 3, 32, 64, (2x)128, (3x)256, 512
def flop50k(net_trained,percentOut):
percentIn=1-percentOut
for feature_idx in range(29):
if isinstance(net_trained.features[feature_idx],(nn.BatchNorm2d)):
if feature_idx != 26:
sizeIn = int(net_trained.features[feature_idx].bias.size(0)*percentIn)
#layerBias = net_trained.features[feature_idx].bias
layerWeight = net_trained.features[feature_idx].weight
#bias_flopk = torch.topk(layerBias,sizeIn, largest=True)[1]
weight_flopk = torch.topk(layerWeight,sizeIn, largest=True)[1].sort()[0]
#net_trained.features[feature_idx].weight[weight_flopk] = 0
#net_trained.features[feature_idx].bias[bias_flopk] = 0
#net_trained.features[feature_idx-1].weight[bias_flopk,:,:,:] = 0
#net_trained.features[feature_idx-1].bias[bias_flopk,:,:,:] = 0
#save params of BatchNorm where the weights are above the treshold
#totalparams = net_trained.features[feature_idx].total_params
weightBatch = net_trained.features[feature_idx].weight[weight_flopk]
biasBatch = net_trained.features[feature_idx].bias[weight_flopk]
runnngmeanBatch = net_trained.features[feature_idx].running_mean[weight_flopk]
runningvarBatch = net_trained.features[feature_idx].running_var[weight_flopk]
#creating slimmer BatchNorm
net_trained.features[feature_idx] = nn.BatchNorm2d(int(sizeIn),
eps=1e-05, momentum=0.1,
affine=True,
track_running_stats=True)
net_trained.features[feature_idx].weight.data = weightBatch
net_trained.features[feature_idx].bias.data = biasBatch
net_trained.features[feature_idx].running_mean.data = runnngmeanBatch
net_trained.features[feature_idx].running_var.data = runningvarBatch
#adapt the joint Cov2d
weightConv = net_trained.features[feature_idx-1].weight
biasConv = net_trained.features[feature_idx-1].bias
# size(0) outchannel size(1) inchannel
sizeConv = weightConv.size()
#create new Conv layer
#don't reduce the first input channel size
if feature_idx == 1:
net_trained.features[feature_idx-1] = nn.Conv2d(sizeConv[1],
int(sizeConv[0]*percentIn),
kernel_size=3,
stride=1,
padding=1)
net_trained.features[feature_idx-1].weight.data = weightConv[weight_flopk,:,:,:]
net_trained.features[feature_idx-1].bias.data = biasConv[weight_flopk]
elif feature_idx == 26:
#Batch und conv auf 512
net_trained.features[feature_idx-1] = nn.Conv2d(int(sizeConv[1]*percentIn),
512,
kernel_size=3,
stride=1,
padding=1)
net_trained.features[feature_idx-1].weight.data = weightConv[:,weight_flopk_prev.view(1,-1),:,:]
net_trained.features[feature_idx-1].bias.data = biasConv[weight_flopk]
else:
net_trained.features[feature_idx-1] = nn.Conv2d(int(sizeConv[1]*percentIn),
int(sizeConv[0]*percentIn),
kernel_size=3,
stride=1,
padding=1)
net_trained.features[feature_idx-1].weight.data = weightConv[weight_flopk.view(-1,1),weight_flopk_prev.view(1,-1),:,:]
net_trained.features[feature_idx-1].bias.data = biasConv[weight_flopk]
#save the index for the inchannels of the next layer
weight_flopk_prev = weight_flopk
return net_trained
#net2.eval()
net3=copy.deepcopy(net2)
with torch.no_grad():
# SAVE THE TRAINED MODEL
net4 = flop50k(net3,0.5)
print(net3)
print(net4)
torch.save(net4.cpu(),'mdl_4_net4.pth')
Test:
# Evaluate the slimmed network (you could observe a slight improvement to ~92%)
# and confirm that the required computations are reduced to 12 GFlops
with torch.no_grad():
net4 = torch.load('mdl_4_net4.pth')
net4.eval()
test_num = img_test.size(0)
test_acc = 0.0
# LOOP over the testdata set
for i in range(test_num):
test_img = img_test[i,:,:,:].unsqueeze(0)
print(test_img.size())
test_label =label_test[0,i].cuda()
print(test_label.size())
output = net4(test_img).cuda()
accuracy = torch.mean((output.argmax(1)==test_label).float()).cuda()
test_acc += accuracy
test_acc /= test_num
print('Test accuracy:', test_acc)