class CrititcNetwork(nn.Module):
def init(self, beta, fc1_dim,fc2_dim, x_dim, hid_dim,z_dim, n_actions, name, chkpt_dir = ‘C:/Users/Bahar/PycharmProjects/pythonProject7’):
super(CrititcNetwork, self).init()
self.fc1_dim = fc1_dim
self.fc2_dim = fc2_dim
self.x_dim = x_dim
self.hid_dim = hid_dim
self.z_dim = z_dim
self.n_actions = n_actions
self.checkpoint_file = os.path.join(chkpt_dir, name+'ddpg’)
self.encoder = nn.Sequential(
conv_block(x_dim, hid_dim),
conv_block(hid_dim, hid_dim),
conv_block(hid_dim, z_dim),
)
self.fc1 = nn.Linear(6400, self.fc1_dim)
f1 = 1/np.sqrt(self.fc1.weight.data.size()[0])
torch.nn.init.uniform(self.fc1.weight.data, -f1, f1)
torch.nn.init.uniform(self.fc1.bias.data, -f1, f1)
self.bn1 = nn.LayerNorm(self.fc1_dim)
self.fc2 = nn.Linear(self.fc1_dim, self.fc2_dim)
f2 = 1/np.sqrt(self.fc2.weight.data.size()[0])
torch.nn.init.uniform_(self.fc2.weight.data, -f2, f2)
torch.nn.init.uniform_(self.fc2.bias.data, -f2, f2)
self.bn2 = nn.LayerNorm(self.fc2_dim)
self.action_value = nn.Linear(self.n_actions, self.fc2_dim)
f3 = 0.003
self.q = nn.Linear(self.fc2_dim*2, 1)
torch.nn.init.uniform_(self.q.weight.data, -f3, f3)
torch.nn.init.uniform_(self.q.bias.data, -f3, f3)
# self.optimizer = optim.Adam(self.parameters(), lr = beta)
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu' )
self.to(self.device)
def forward(self, state, action):
state_value = self.encoder(state)
state_value = state_value.view(state_value.shape[0], -1)
state_value = self.fc1(state_value)
state_value = F.relu(self.bn1(state_value))
state_value = self.fc2(state_value)
state_value = self.bn2(state_value)
action_value = F.relu(self.action_value(action))
pp = torch.cat([state_value, action_value], dim = 1)
state_action_value = F.relu(pp)
state_action_value = self.q(state_action_value)
return state_action_value
def save_checkpoint(self):
print('...saving checkpoint...')
torch.save(self.state_dict(), self.checkpoint_file)
def load_checkpoint(self):
print('...loading checkpoint...')
self.load_state_dict(torch.load(self.checkpoint_file))
class ActorNetwork(nn.Module):
def init(self, alpha, fc1_dim, fc2_dim, x_dim, hid_dim,z_dim, n_actions, name, chkpt_dir = ‘C:/Users/Bahar/PycharmProjects/pythonProject7’ ):
super(ActorNetwork, self).init()
self.fc1_dim = fc1_dim
self.fc2_dim = fc2_dim
self.x_dim = x_dim
self.hid_dim = hid_dim
self.z_dim = z_dim
self.n_actions = n_actions
self.checkpoint_file = os.path.join(chkpt_dir, name+‘__ddpg’)
self.encoder = nn.Sequential(
conv_block(x_dim, hid_dim),
conv_block(hid_dim, hid_dim),
conv_block(hid_dim, z_dim),
)
self.checkpoint_file = os.path.join(chkpt_dir, name + ‘__ddpg’)
self.fc1 = nn.Linear(6400, self.fc1_dim)
f1 = 1 / np.sqrt(self.fc1.weight.data.size()[0])
torch.nn.init.uniform_(self.fc1.weight.data, -f1, f1)
torch.nn.init.uniform_(self.fc1.bias.data, -f1, f1)
self.bn1 = nn.LayerNorm(self.fc1_dim)
self.fc2 = nn.Linear(self.fc1_dim, self.fc2_dim)
f2 = 1 / np.sqrt(self.fc2.weight.data.size()[0])
torch.nn.init.uniform_(self.fc2.weight.data, -f2, f2)
torch.nn.init.uniform_(self.fc2.bias.data, -f2, f2)
self.bn2 = nn.LayerNorm(self.fc2_dim)
f3 = 0.003
self.mu = nn.Linear(self.fc2_dim, self.n_actions)
torch.nn.init.uniform_(self.mu.weight.data, -f3, f3)
torch.nn.init.uniform_(self.mu.bias.data, -f3, f3)
self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu' )
self.to(self.device)
def forward(self, state):
x = self.encoder(state)
x = x.view(x.size(0), -1)
x = self.fc1(x)
x = F.relu(self.bn1(x))
x = self.fc2(x)
x = F.relu(self.bn2(x))
x = F.tanh(self.mu(x))
return x
target_actions = self.target_actor(new_state)
critic_value = self.target_critic(new_state, target_actions)
critic_value_ = self.critic(state, action.to('cuda'))
target = torch.zeros([8 , 1])
for j in range(self.batch_size):
target[:,j] = (reward[j] + self.gamma*critic_value_[:,j])
target = torch.tensor(target, dtype=torch.float).to('cuda')
# target = target.view(self.batch_size, 1)
self.critic.train()
self.critic_optimizer.zero_grad()
critic_loss = F.mse_loss(target, critic_value)
critic_loss.backward()
self.critic_optimizer.step()
Hi,
I’m trying to train my DDPG on image classification setting. Everything looks fine, but I’m getting this error: Function AddmmBackward returned an invalid gradient at index 1 - got [8, 6400] but expected shape compatible with [8, 13440]. I have no clue why this error is happening. I would appriciate it if you could help me to solve this problem.