Hi,
I don’t understand why loss.grad is None and all the weights and bias in the mlps are None. If the gradients are all None, I think the output loss should not decrease, but it is decreasing and then converged within about 20 epoch. Is there anything wrong? Hope my problem explaination is enough for understanding. Thanks for any help.
The codes for the network:
class ReproduceGNN(torch.nn.Module):
def __init__(self, in_channels_bs, in_channels_ue, in_channels_h, out_channels_bs, out_channels_ue, out_channels_h,
d, N, num_layers):
super(ReproduceGNN, self).__init__()
#initialize the first layer
self.layer1 = PreprocessingLayer(d)
# initialize the `num_layers` ReproduceLayer layers
self.middle_layers = []
for i in range(num_layers):
layer = ReproduceLayer(in_channels_bs, in_channels_ue, in_channels_h, out_channels_bs, out_channels_ue,
out_channels_h)
self.middle_layers.append(layer)
self.lastLayer = PostProcessingLayer(d, N)
self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
self.to(self.device) # move the entire model to cuda
def forward(self, P, Noise, H):
#feed into the preprocessing layer
F_bs, F_ue, E = self.layer1(P, Noise, H)
if self.device.type == "cuda":
F_bs = F_bs.to(self.device)
F_ue = F_ue.to(self.device)
E = E.to(self.device)
#feed into the updating layer
for layer in self.middle_layers:
F_bs, F_ue, E = layer(F_bs, F_ue, E)
if self.device.type == "cuda":
F_bs = F_bs.to(self.device)
F_ue = F_ue.to(self.device)
E = E.to(self.device)
V_out = self.lastLayer(E, P)
if self.device.type == "cuda":
V_out = V_out.to(self.device)
return V_out
The code for the costomized Loss function and train function:
def Loss(V_out, H, Noise):
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
M, K, N = V_out.size()
R = torch.zeros(K)
temp = torch.zeros(K)
interference = torch.zeros(K)
H = H.to(torch.complex64)
if device.type == "cuda":
Noise = Noise.to(device)
for k in range(K):
Signal = 0.0
for l in range(K):
interference_k = 0.0
if l != k:
for m in range(M):
interference_k = interference_k + torch.matmul(torch.conj(H[m, k, :]).t(), V_out[m,l,:])
interference[k] = interference[k] + abs(interference_k)**2
for m in range(M):
Signal = Signal + torch.matmul(torch.conj(H[m, k, :]).t(), V_out[m,k,:])
temp[k] = abs(Signal) ** 2 / (interference[k] + Noise[k]) #SINR
R[k] = torch.log2(1+temp[k])
Rsum = -torch.sum(R)
return Rsum
def train(P, Noise, dataset, num_epochs, lr):
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = ReproduceGNN(in_channels_bs=64, in_channels_ue=64, in_channels_h=64, out_channels_bs=64, out_channels_ue=64,
out_channels_h=64, d=64, N=2, num_layers=2)
model.to(device)
optimizer = torch.optim.RMSprop(model.parameters(), lr)
train_size = int(1 * len(dataset))
print(f'train size: {train_size}')
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
batch_size = 64
num_batches = (train_size + batch_size - 1) // (batch_size*num_epochs)
print(f'num_batches: {num_batches}')
# define lists to save loss and rate for each epoch
losses = []
rate = []
for epoch in range(num_epochs):
running_loss = 0.0
for batch_idx in range(num_batches): # number of minibatches = num_batches
model.train()
start_idx = (epoch * num_batches + batch_idx) * batch_size
end_idx = start_idx + batch_size
subset_indices = range(start_idx, end_idx)
subset = Subset(train_dataset, subset_indices)
subset_loader = DataLoader(subset, batch_size=batch_size, shuffle=True)
for data in subset_loader:
channel = data
if device.type == "cuda":
channel = channel.to(device)
optimizer.zero_grad()
loss = 0.0
batch_n, M, K, N = channel.size()
for b in range(batch_n):
input_H = channel[b, :, :, :]
if device.type == "cuda":
input_H = input_H.to(device)
V_out = model(P, Noise, input_H)
loss = loss + Loss(V_out, input_H, Noise)
#print(f'batch loss: {loss}')
loss /= batch_n
if device.type == "cuda":
loss = loss.cpu()
running_loss += loss.item()
print(V_out.grad)
for name, param in model.named_parameters():
if param.grad is None:
print(f'Parameter: {name}, Gradient: {param.grad}')
loss.backward()
optimizer.step()
epoch_loss = running_loss/num_batches
losses.append(epoch_loss)
rate.append(-epoch_loss)
print(f'Epoch: {epoch + 1:03d}, Training Loss: {epoch_loss:.4f}')