I appreciate your fast reply, I tried with
There are several errors.
- Weights are not being updated.
- It seems does not solve my error.
C:\Users\lli4\Anaconda3\lib\site-packages\torch\autograd\__init__.py:130: UserWarning: Error detected in MulBackward0. Traceback of forward call that caused the error:
File "D:/workspace/POMDP_CODE/ubvo/ubvo.py", line 300, in <module>
agent.train(product_pomdp)
File "D:/workspace/POMDP_CODE/ubvo/ubvo.py", line 156, in train
self.value.lam = self.value.lam + self.value.c * e_v
(Triggered internally at ..\torch\csrc\autograd\python_anomaly_mode.cpp:104.)
Variable._execution_engine.run_backward(
Traceback (most recent call last):
File "D:/workspace/POMDP_CODE/ubvo/ubvo.py", line 300, in <module>
agent.train(product_pomdp)
File "D:/workspace/POMDP_CODE/ubvo/ubvo.py", line 125, in train
loss.backward()
C:\Users\lli4\Anaconda3\lib\site-packages\torch\autograd\__init__.py:130: UserWarning: Error detected in MulBackward0. Traceback of forward call that caused the error:
File "D:/workspace/POMDP_CODE/ubvo/ubvo.py", line 300, in <module>
agent.train(product_pomdp)
File "D:/workspace/POMDP_CODE/ubvo/ubvo.py", line 156, in train
self.value.lam = self.value.lam + self.value.c * e_v
(Triggered internally at ..\torch\csrc\autograd\python_anomaly_mode.cpp:104.)
Variable._execution_engine.run_backward(
Traceback (most recent call last):
File "D:/workspace/POMDP_CODE/ubvo/ubvo.py", line 300, in <module>
agent.train(product_pomdp)
File "D:/workspace/POMDP_CODE/ubvo/ubvo.py", line 125, in train
loss.backward()
File "C:\Users\lli4\Anaconda3\lib\site-packages\torch\tensor.py", line 221, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "C:\Users\lli4\Anaconda3\lib\site-packages\torch\autograd\__init__.py", line 130, in backward
Variable._execution_engine.run_backward(
RuntimeError: Trying to backward through the graph a second time, but the saved intermediate results have already been freed. Specify retain_graph=True when calling backward the first time.
def train(self, env):
torch.autograd.set_detect_anomaly(True)
self.env = env
# set hyperparameters
self.num_traj = paras.num_traj # the number of trajectories
self.max_time = paras.max_time
self.max_inner_iter = paras.max_inner_iter
self.max_outer_iter = paras.max_outer_iter
self.beta = paras.beta
self.eta = paras.eta
# Create the network
input_size = 66 # 64 inputs for b, 1 input for q, 1 input for a
hidden_size = 128
output_size = 1
net = Network(input_size, hidden_size, output_size)
# create a value obj
self.value = PytorchValue(net, env)
self.value.mu = paras.mu # temperature
self.value.lam = paras.lam
self.value.c = paras.c
# print the parameter's shape
for name, param in self.value.model.named_parameters():
print(name, '\t\t', param.shape)
OPTIMIZER_CONSTRUCTOR = torch.optim.SGD # This is the SGD algorithm.
### TensorBoard Writer Setup ###
log_name = str(self.eta) + str(OPTIMIZER_CONSTRUCTOR.__name__)
writer = SummaryWriter(log_dir="../logs/" + log_name)
print("To see tensorboard, run: tensorboard --logdir=logs/")
# add model into the tensorboard
x = torch.randn(1, input_size)
writer.add_graph(net, x)
# Create the optimizer
optimizer = OPTIMIZER_CONSTRUCTOR(self.value.model.parameters(), lr=self.eta)
# Create the learning rate scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=paras.max_inner_iter, gamma=0.9)
outer_step = 0
epochs = 0
e_v = np.Inf
while outer_step < self.max_outer_iter:
o_e_v = e_v # set the old expectation as the last round of the expectation
inner_step = 0
print("-------------------------------------------------------------------------------------------")
print("Outer iteration", '\t\t', outer_step, '\t\t', 'C', '\t\t', self.value.c, '\t\t', 'lambda', '\t\t',
self.value.lam)
while inner_step < self.max_inner_iter:
epochs += 1
inner_step += 1
# sample some trajectories
trajs = list()
for i in range(self.num_traj):
y = self.env.sample_belief()
trajs.append(self.env.sample_trajectory(y, self.value, self.max_time))
# compute the loss
loss = self.entire_l(trajs)
loss.backward()
# check if the weights are updated
a = list(self.value.model.parameters())[0].clone()
with torch.no_grad():
optimizer.zero_grad()
optimizer.step()
b = list(self.value.model.parameters())[0].clone()
print("Weights Updated: ", not torch.equal(a.data, b.data))
# compute the expectation of h of g
# TODO: there is a problem here, I change the tensor here.
e_v = self.e_vio(trajs)
print('Inner iteration', '\t\t', inner_step, '\t\t', 'loss:', loss.data, '\t\t', 'e_v:', e_v)
writer.add_scalar('Expected violation', e_v, global_step=epochs)
writer.add_scalar('L', loss, global_step=epochs)
writer.add_scalar('Value of' + str((self.env.b0, 0)), self.value.getValue((self.env.b0, 0)),
global_step=epochs)
# Decay Learning Rate
scheduler.step()
# if the expectation is not decreased, then increase the penalty term
if abs(e_v) > 0.9 * abs(o_e_v):
self.value.c = self.beta * self.value.c # c is increasing to infinity
else:
self.value.c = self.value.c
# update the slack variable
self.value.lam = self.value.lam + self.value.c * e_v
outer_step += 1
print("Finish the training!")
writer.close()
pass