If you want to define a custom layer that uses other layers inside , for example
def custom_layer():
convlayer1 = self.conv1(...)
convlayer2 = self.conv2(...)
activation = F.relu(convlayer1 + convlayer2)
return activation
and you want to make a very deep network with a lot of these custom layers, how would you go around not having to define
self.conv1 = nn.Conv2d…
self.conv2 = nn.Conv2d…
and so on, in the init method of your model class ? Also, two custom layers must NOT share weights.
EDIT: Ok, the above idea is wrong and I’ve done some reading on the pytorch documentation. I have an adaptation of the examples from the docs:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
class model_block(torch.nn.Module):
def __init__(self, D_in, H, D_out):
super(model_block, self).__init__()
self.linear1 = torch.nn.Linear(D_in, H)
self.linear2 = torch.nn.Linear(H, D_out)
def forward(self, x):
activation = self.linear1(x)
activation = F.relu(activation)
activation = self.linear2(x)
activation = F.relu(activation)
return activation
class Net(torch.nn.Module):
def __init__(self, D_in1, H1, D_out1, D_in2, H2, D_out2):
super(Net, self).__init__()
self.block1 = model_block(D_in1, H1, D_out1)
self.block2 = model_block(D_in2, H2, D_out2)
def forward(self, x):
pred = self.block1(x)
y_pred = self.block2(pred)
return y_pred
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in1, H1, D_out1 = 64, 1000, 1000, 10
D_in2, H2, D_out2 = 10, 10, 10
# Create random Tensors to hold inputs and outputs, and wrap them in Variables
x = Variable(torch.randn(N, D_in))
y = Variable(torch.randn(N, D_out), requires_grad=False)
# Construct our model by instantiating the class defined above
model = Net(D_in1, H1, D_out1, D_in2, H2, D_out2)
# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
for t in range(500):
# Forward pass: Compute predicted y by passing x to the model
y_pred = model(x)
# Compute and print loss
loss = criterion(y_pred, y)
print(t, loss.data[0])
# Zero gradients, perform a backward pass, and update the weights.
optimizer.zero_grad()
loss.backward()
optimizer.step()
This code runs. I just have to know, if I define my blocks like that, can I be absolutely certain that two blocks do not share weights within the network?