This code is modified from the DQN tutorial (which runs on my machine). However, this code does not. It core dumps in the backward method. Am I doing something dumb?
Thank you, in advance, for any assistance.
class DQN(nn.Module):
def __init__(self):
super(DQN, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2)
self.bn1 = nn.BatchNorm2d(16)
self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
self.bn2 = nn.BatchNorm2d(32)
self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2)
self.bn3 = nn.BatchNorm2d(32)
self.head = nn.Linear(192, 3)
def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
return self.head(x.view(x.size(0), -1))
model = DQN()
optimizer = optim.RMSprop(model.parameters())
action_batch = Variable(LongTensor([[0],[1],[2],[0],[1],[2],[0],[1],[2],[0]]))
next_states = Variable(torch.rand(10,3,26,70),volatile=True)
state_batch = Variable(torch.rand(10,3,26,70))
state_action_values = model(state_batch)
next_state_values = model(next_states)
next_state_values.volatile = False
expected_state_action_values = (next_state_values * 0.999) + 1.0
loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)
optimizer.zero_grad(); model.zero_grad();
loss.backward()