These is how it looks like from what i understood from all of your posts
class Myconv2D(torch.autograd.Function):
# Note that both forward and backward are @staticmethods
@staticmethod
# bias is an optional argument
def forward(ctx, input, weight, bias=None):
ctx.save_for_backward(input, weight, bias)
# input dim [batch_size, channels, height, width]
batch_size = len(input)
channels = input[0][0]
h, w = input[0][0][0], input [0][0][0]
image = torch.tensor(batch_size, channels, h, w) # input image
#print (image)
#print (image.size())
#print (image[0])
kh, kw = 3, 3# kernel size
dh, dw = 2, 2 # stride
filt = nn.parameter(channels, kh, kw) # filter (create this as nn.Parameter if you want to train it)
#print (filt)
patches = image.unfold(2, kh, dh).unfold(3, kw, dw)
#print(patches.shape)
patches = patches.contiguous().view(batch_size, channels, -1, kh, kw)
#print(patches.shape)
patches = patches.permute(0, 2, 1, 3, 4)
patches = patches.view(-1, channels, kh, kw)
#print (patches.shape)
#print ("Filter shape",filt.shape)
#print(patches)
#print (patches,"\n","\n","\n",filt)
#print ("Builtin multiplication")
dummy = patches
patches = patches * filt # same is done below with 4 nested loops with custom operation
#print(patches)
#print ("Custom Multiplication starts here:")
#print(patches[0][0][0][0])
#print(filt)
for b in range(batch_size* int(h/3)* int(h/3)):
for c in range(channels):
for height in range(kh):
for width in range(kw):
patches[b][c][height][width] = \
dummy[b][c][height][width] * filt[c][height][width] replace this multiply(*) with my function (mymult(num1,num2))
#print(patches)
patches = patches.sum(1) # previous and this patch are same what is it doing
#print (patches)
output = patches # is it right ?
if bias is not None:
output += bias.unsqueeze(0).expand_as(output)
return output
# This function has only a single output, so it gets only one gradient
@staticmethod
def backward(ctx, grad_output):
# This is a pattern that is very convenient - at the top of backward
# unpack saved_tensors and initialize all gradients w.r.t. inputs to
# None. Thanks to the fact that additional trailing Nones are
# ignored, the return statement is simple even when the function has
# optional inputs.
input, weight, bias = ctx.saved_tensors
grad_input = grad_weight = grad_bias = None
# These needs_input_grad checks are optional and there only to
# improve efficiency. If you want to make your code simpler, you can
# skip them. Returning gradients for inputs that don't require it is
# not an error.
if ctx.needs_input_grad[0]:
grad_input = grad_output.mm(weight)
if ctx.needs_input_grad[1]:
grad_weight = grad_output.t().mm(input)
if bias is not None and ctx.needs_input_grad[2]:
grad_bias = grad_output.sum(0).squeeze(0)
return grad_input, grad_weight, grad_bias
class Myconv2D(nn.Module):
def __init__(self):
super(Myconv2D, self).__init__()
self.fn = Myconv2D.apply
# weight tensor = out_channels× in_channels/groups ×kH×kW
self.weight = nn.Parameter(torch.randn(1, 1, 2, 2)) # when groups=1
def forward(self, x):
x = self.fn(x, self.weight)
return x
The multiplication answers are the same as
patches = patches * filt and the custom 4-Nested loop structure in forward method of class Myconv2D(torch.autograd.Function)
After that there is addition “patches = patches.sum(1)” i am not sure what is it doing , I would like to replace the addition as well.
Can you please have a look at it.
I will be using this in the previous code.
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1=nn.Myconv2D(1,6,5)
self.conv2=nn.Myconv2D(6,16,5)
self.fc1 = nn.Linear(400,120)
self.fc2 = nn.Linear(120,84)
self.classifier = nn.Linear(84,10)
self.features = nn.Sequential(*list(self.children()))
Is this the right direction. I am really Thankful for your time and effort. I wouldn’t even be able to start with this without your comments.