I have a problem when I am using the network. For the first loop, I can get the gradient to the inputs. But when it runs the second time, I have the problem.
I have tested it in the forward pass and backpropagation for the first time. The backpropagtion is fine for a single loop. Thus, I am not quite sure about that.
My codes are in the followings:
import torch
import torch.nn as nn
import torch.nn.functional as F
#import torchvision
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from gumbel_softmax import gsm
torch.autograd.set_detect_anomaly(True)
#from torch.utils.tensorboard import SummaryWriter
#writer = SummaryWriter()
def max_matrix_multi(A, B):
length_A = len(A)
length_B = len(B)
width_B = len(B[0])
res = [[s_ for i in range(width_B)] for j in range(length_A)]
for i in range(length_A):
for j in range(width_B):
temp = []
for k in range(length_B):
temp.append(A[i][k] + B[k][j])
res[i][j] = max(temp)
return res
# Initiate all locations as input variables
matrix_location = torch.LongTensor([[0,0],
[0,2],
[1,0],
[1,2],
[1,4],
[2,0],
[2,2],
[2,4],
[2,6],
[3,0],
[3,2],
[3,4],
[3,6],
[4,1],
[4,5],
[5,1],
[5,5],
[5,7],
[6,1],
[6,3],
[6,5],
[6,7],
[7,1],
[7,3],
[7,5],
[7,7]])
matrix_positions = matrix_location.data.numpy()
input_size = matrix_location.shape[1] * 8 * matrix_location.shape[0]
hidden1_size = 300
hidden2_size = 140
hidden3_size = 60
output_size = matrix_location.shape[0]
class SamplingNet(nn.Module):
def __init__(self, input_size, output_size):
super(SamplingNet, self).__init__()
self.input_size = input_size
self.hidden1_size = 300
self.hidden2_size = 140
self.hidden3_size = 60
self.output_size = output_size
self.InputLayer = nn.Linear(self.input_size, self.hidden1_size)
self.InputLayer.weight.data.normal_(0, 0.3)
self.Hidden1Layer = nn.Linear(self.hidden1_size, self.hidden2_size)
self.Hidden1Layer.weight.data.normal_(0,0.3)
self.Hidden2Layer = nn.Linear(self.hidden2_size, self.hidden3_size)
self.Hidden2Layer.weight.data.normal_(0, 0.3)
self.OutputLayer = nn.Linear(self.hidden3_size, self.output_size)
self.OutputLayer.weight.data.normal_(0, 0.3)
self.relu = nn.ReLU()
self.tanh = nn.Tanh()
def forward(self, locations):
Information = self.InputLayer(locations)
Information = self.tanh(Information)
hidInfo = self.Hidden1Layer(Information)
hidInfo = self.tanh(hidInfo)
extendInfo = self.Hidden2Layer(hidInfo)
extendInfo = self.relu(extendInfo)
output_lamda = self.OutputLayer(extendInfo)
return output_lamda
class TestNet(nn.Module):
def __init__(self):
super(TestNet, self).__init__()
self.forw = torch.nn.Linear(416, 26)
def forward(self, ins):
return self.forw(ins)
#Poisson = SamplingNet(input_size, output_size)
#position = matrix_location[0]
#lamda = Poisson(position)
#print(lamda)
# Construct the A0* B matrix for external input;
s_ = -float(10000)
A0_B = torch.FloatTensor([[s_ for _ in range(8)] for _ in range(8)])
# For each position in A0_B matrix, there exists a Poisson distribution.
# We directly take the lamda + uniform(0, 1) as the value
learning_rate = 0.01
# Poisson = SamplingNet(input_size, output_size)
Poisson = SamplingNet(input_size, output_size)
Distribution_Optimizer = torch.optim.Adam(Poisson.parameters(), lr=learning_rate)
# Read simulation data into the system
source_data = pd.read_csv('SimulationData.csv',usecols=['u1','u2','u31','u32','u41','u42',
'u51','u52','q1','q2','q3','y1','q4',
'q5','q6','y2'])
training_data = source_data.values.tolist()
external_inputs = torch.FloatTensor([element[0:8] for element in training_data])
states = torch.FloatTensor([element[8:16] for element in training_data])
#img_grid = torchvision.utils.make_grid(states)
STEPS = len(external_inputs)
qval = torch.FloatTensor([[0 for i in range(8)] for step in range(STEPS)])
criterion = nn.L1Loss()
loss_list = []
soft_max = nn.Softmax(dim=0)
gumbel_softmax = gsm(t=0.01).apply
EPOCHS = 100
for epoch in range(EPOCHS):
sum_loss = 0
target = torch.randn(26)
for step in range(STEPS):
qstep=torch.zeros(8, dtype=torch.float32)
# One-hot encoding locations
one_hot_location = F.one_hot(matrix_location,num_classes=8).view(-1).float()
one_hot_location.requires_grad=True
lamda_vector = Poisson(one_hot_location.float())
# writer.add_graph(Poisson, one_hot_location.float())
# writer.close()
for p in range(0, matrix_location.shape[0]):
i_index = int(matrix_positions[p][0])
j_index = int(matrix_positions[p][1])
lamda = lamda_vector[p]*1.0
epion = torch.rand(1).detach()
A0_B[i_index][j_index] = lamda * (1 + 0.5 * epion)
A0B = A0_B.clone()
# Calculate the forward pass
for index in range(len(A0B)):
transition_timing = (A0B[index] + external_inputs[step]) * 1.0
# qval[step][index] = torch.matmul(sm(gm(sm(tempd.clone()))), tempd.clone())
qstep[index] = torch.matmul(soft_max(gumbel_softmax(soft_max(transition_timing))), transition_timing)
# Distribution_Optimizer.zero_grad()
# LossTest.backward()
qval[step] = qstep.clone()
# if step % 5 == 0:
# print(q[step])
# Calculate the backpropagation
qlog = torch.log(qstep.clone())
loss = criterion(qlog,torch.log(states[step]))
sum_loss += loss.item()
# for name, parms in Poisson.named_parameters():
# print('--name:',name,'--grad_requires:',parms.requires_grad,'--grad_value:',parms.grad)
Distribution_Optimizer.zero_grad()
loss.backward(retain_graph=True)
oldvals = Poisson.InputLayer.weight * 1.0
print(one_hot_location.grad)
Distribution_Optimizer.step()
# print(((oldvals==Poisson.InputLayer.weight).sum().float())/124800)
# writer.add_scalar('sum_loss', sum_loss,epoch)
loss_list.append(sum_loss)
print('Epoch: %.f, Loss: %.4f' %((epoch + 1),sum_loss))
plt.plot(loss_list)
plt.show()