Hi,
Is the following right way to share a layer between two different networks? or it is better to have a separate module for a shared layer?
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class A(nn.Module):
def __init__(self):
super(A, self).__init__()
hsize_fc = 5
self.hsiz_rec = 7
self.fc = nn.Linear(3, hsize_fc)
self.rec_layer =nn.GRU(hsize_fc,
self.hsiz_rec,
bidirectional = False,
batch_first = True,
num_layers = 1)
def forward(self, x):
bsize = x.shape[0]
x = self.fc(x)
xs = x.view(bsize, 1, -1)
ih = torch.zeros(1, bsize, self.hsiz_rec)
_,h = self.rec_layer(xs, ih)
return h.squeeze(0)
class B(nn.Module):
def __init__(self, shared_re):
super(B, self).__init__()
hsize_fc = 5
self.hsiz_rec = 7
self.fc = nn.Linear(3, hsize_fc)
self.rec_layer = shared_re
def forward(self, x):
bsize = x.shape[0]
x = self.fc(x)
xs = x.view(bsize, 1, -1)
ih = torch.zeros(1, bsize, self.hsiz_rec)
#### shared layer
_, h = self.rec_layer(xs, ih)
####
return h.squeeze(0)
net_A = A()
net_B = B(shared_re = net_A.rec_layer)
optim_A = optim.Adam(net_A.parameters())
optim_B = optim.Adam(net_B.parameters())
dummy = torch.randn(5,7)
x_A = torch.rand(5,3)
y_A_hat = net_A(x_A)
loss_A = F.mse_loss(y_A_hat, dummy)
x_B = torch.rand(5,3)
y_B_hat = net_B(x_B)
loss_B = F.mse_loss(y_B_hat, dummy)
###### A
optim_A.zero_grad()
loss_A.backward()
optim_A.step()
##### B
optim_B.zero_grad()
loss_B.backward()
optim_B.step()
In addition, if the shared layer has been implemented like above, it will be updated when optimizer runs either for A or B, is that correct? (i.e. running any the followings will change/update shared layer weights.)
optim_B.zero_grad()
loss_B.backward()
optim_B.step()
###### A
optim_A.zero_grad()
loss_A.backward()
optim_A.step()
Thanks