For some purpose, I defined layers where elementwise multiplication is operated. I realized this in the following way:
class VectorLinear(nn.Module):
def __init__(self, N, keep_bias=True):
super(VectorLinear, self).__init__()
self.keep_bias = keep_bias
self.weight = nn.Parameter(torch.randn([1, N])) # initialize weight
if self.keep_bias:
self.bias = nn.Parameter(torch.randn([1, N])) # initialize bias
self.reset_parameters() # self-defined initialization
def forward(self, input):
if self.keep_bias:
return input*self.weight + self.bias
else:
return input * self.weight
def reset_parameters(self):
for p in self.parameters():
if p.dim() > 1:
nn.init.normal_(p, std=0.01)
else:
nn.init.normal_(p, std=0.01)
Because I want the code to iteratively define many such layers for use, I use for loop in following way:
class MyNet(nn.Module):
def __init__(self, in_dim, num_subcarriers, num_layer, Loss_scalar=10, Residule=False, Keep_Bias=False, BN = True, training_method='unsupervised', device=MainDevice):
super(MyNet self).__init__()
self.in_dim = in_dim
self.training_method = training_method
self.device = device
self.Rsdl = Residule
self.dobn = BN
self.layers_x = []
self.layers_KL = []
self.bns = []
self.num_layer = num_layer
self.scalar = Loss_scalar
self.num_subcarriers = num_subcarriers
for i in range(num_layer): # define all layers
layer = VectorLinear(N, keep_bias=Keep_Bias)
self.layers_x.append(layer)
setattr(self, 'layer_x_%i' % i, layer)
for k in range(num_subcarriers):
layerk = VectorLinear(N, keep_bias=Keep_Bias)
layer_id = str(i)+str(k)
setattr(self, 'layer_bzx_'+ layer_id, layerk) ## another method is to use nn.ModuleList
self.layers_KL.append(layerk)
if self.dobn:
bn_layer = nn.BatchNorm1d(self.in_dim, momentum=0.2)
setattr(self, 'bn_layers%i'%i, bn_layer)
self.bns.append(bn_layer)
def forward(self, BB, zB, x, z, B):
# batch_size = zB.size()[0]
LOSS = []
x_est = torch.randn_like(x, requires_grad=True)
for l in range(self.num_layer):
out_x = self.layers_x[l](x_est)
# Bzx_sum = torch.zeros_like(x, device=self.device)
for k in range(self.num_subcarriers):
index = l*self.num_subcarriers + k
aux_term = torch.bmm(x_est.unsqueeze(1), BB[:, :, :, k]).squeeze() - zB[:, :, k]
out_x += self.layers_KL[index](aux_term)
x_est = out_x
if self.dobn:
x_est = self.bns[l](x_est)
if l<self.num_layer-1:
x_est = torch.nn.functional.relu(x_est)
else:
x_est = torch.tanh(x_est)
if self.training_method == 'supervised':
dis = torch.mean(torch.square(x - x_est))
else:
dis_sum = 0
for k in range(self.num_subcarriers):
diff = z[:, :, k] - torch.matmul(x_est.unsqueeze(1), B[:, :, :, k]).squeeze()
dis_sum += torch.mean(torch.square(diff))
LOSS.append(self.scalar*np.log(l+1) * dis_sum)
return x_est, LOSS
Now, there are some strange problems. The above codes work well for some values of ( num_subcarriers, num_layer). But for some values, it arises the two device error, like:
Traceback (most recent call last):
File "/scratch/project_2005641/THz_DNN/THz_Huge.py", line 384, in <module>
Loss_cache, Lr_list = train_model()
File "/scratch/project_2005641/THz_DNN/THz_Huge.py", line 94, in train_model
x_est, loss_list = myModel(batch_BB.to(MainDevice), batch_Bz.to(MainDevice), batch_X.to(MainDevice),
File "/usr/local/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/project_2005641/THz_DNN/FuncLbr.py", line 890, in forward
out_x += self.layers_KL[index](aux_term)
File "/usr/local/lib64/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/project_2005641/THz_DNN/FuncLbr.py", line 445, in forward
return input*self.weight + self.bias
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!
I can’t figure out why this happens . I just changed the values of ( num_subcarriers, num_layer). Have anyone experienced this before? I thought it might be because of the use of nn.Parameter.