Hello, I am new to Pytorch and I am trying to write a custom binary XNOR layer which can replace conv2d if input and weight are binary. (algorithm implementation are not related to my problem, please don’t spend to much time looking deep into it. I don’t want waste your time)
I have a small sample here. The problem is loss never change, and weight doesn’t update.
I find out in optimizer.step(), p.grad == none. (it should not be none)
I have been stuck here for three days, Could someone please help me and give me some idea, or share a custom layer implementation(have weight involved)
Here is the code. Thanks for your time.
class myBinConv2d(torch.nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True):
super(myBinConv2d, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.s = stride
self.p = padding
self.weight = nn.Parameter(Variable(torch.Tensor(1,1,kernel_size,kernel_size)))
def forward(self,input):
a = input.data[0,0,:,:]
isize = len(a[0])
if self.p!=0:
a = np.pad(a, (self.p,self.p), 'constant', constant_values=(0, 0))
wsize =len(self.weight.data[0,0,:,:][0])
numOfItmInFilter = len(self.weight.data[0,0,:,:][0])*len(self.weight.data[0,0,:,:][0])
nisize = len(a[0])
outputOfConv = torch.from_numpy(np.zeros(shape=(1,1,(isize-wsize+2*self.p)/self.s+1,(isize-wsize+2*self.p)/self.s+1)))
wc = 0
hc = 0
i=0
j=0
while (i+wsize) <= nisize :
while j+wsize <= nisize:
tempa = np.uint64(a[i:i+wsize,j:j+wsize])
tempb = np.uint64(self.weight.data[0,0,:,:])
outputOfConv[0][0][hc][wc] = np.count_nonzero(np.bitwise_xor(tempa,tempb))*2 - numOfItmInFilter
j = (j+1)*self.s
wc= wc+1
j=0
wc=0
hc = hc+1
i = (i+1)*self.s
return Variable(outputOfConv,requires_grad = True)
def extra_repr(self):
return 'in_channels={}, out_channels={}'.format(
self.in_channels, self.out_channels)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.myBinConv2d = myBinConv2d(1, 1, kernel_size=3)
def forward(self, x):
x = self.myBinConv2d(x)
return x
x_data = np.loadtxt("/home/zhiming/Desktop/outfile.txt", dtype='i', delimiter=' ')
y_data = x_data[0:30,0:30]
x_data = Variable(torch.Tensor([[x_data]]))
y_data = Variable(torch.Tensor([[y_data]]).double())
model = Net()
criterion = torch.nn.MSELoss(size_average=False)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
for epoch in range(5):
start = time.time()
y_pred = model(x_data)
end = time.time()
a = list(model.parameters())[0]
loss = criterion(y_pred, y_data)
print(epoch, loss.data," loss data")
print(a)
optimizer.zero_grad()
loss.backward()
optimizer.step()
Here is the print for weight matrix and loss(alway 71100)
1 tensor(71100., dtype=torch.float64) loss data
Parameter containing:
tensor([[[[-9.5033e-27, 4.5581e-41, 4.0389e+30],
[ 3.0618e-41, -3.3686e-26, 4.5581e-41],
[-3.4935e-26, 4.5581e-41, 6.7262e-44]]]])
2 tensor(71100., dtype=torch.float64) loss data
Parameter containing:
tensor([[[[-9.5033e-27, 4.5581e-41, 4.0389e+30],
[ 3.0618e-41, -3.3686e-26, 4.5581e-41],
[-3.4935e-26, 4.5581e-41, 6.7262e-44]]]])
3 tensor(71100., dtype=torch.float64) loss data
Parameter containing:
tensor([[[[-9.5033e-27, 4.5581e-41, 4.0389e+30],
[ 3.0618e-41, -3.3686e-26, 4.5581e-41],
[-3.4935e-26, 4.5581e-41, 6.7262e-44]]]])
4 tensor(71100., dtype=torch.float64) loss data
Parameter containing:
tensor([[[[-9.5033e-27, 4.5581e-41, 4.0389e+30],
[ 3.0618e-41, -3.3686e-26, 4.5581e-41],
[-3.4935e-26, 4.5581e-41, 6.7262e-44]]]])