I will add more code so you can take a look at them:

this is my first network: an autoencoder:

class DEC_AE(nn.Module):

def **init**(self, num_classes, num_features):

super(DEC_AE, self).**init**()

self.dropout = nn.Dropout(p=0.1)

self.fc1 = nn.Linear(28 * 28, 500)

self.fc2 = nn.Linear(500, 500)

self.fc3 = nn.Linear(500, 2000)

self.fc4 = nn.Linear(2000, num_features)

self.relu = nn.ReLU()

self.fc_d1 = nn.Linear(500, 28 * 28)

self.fc_d2 = nn.Linear(500, 500)

self.fc_d3 = nn.Linear(2000, 500)

self.fc_d4 = nn.Linear(num_features, 2000)

self.alpha = 1.0

```
self.pretrainMode = True
#here we initialize all the weights
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
torch.nn.init.xavier_uniform_(m.weight)
def setPretrain(self, mode):
"""To set training mode to pretrain or not,
so that it can control to run only the Encoder or Encoder+Decoder"""
self.pretrainMode = mode
def forward(self, x):#a batch of images
x = x.view(-1, 1 * 28 * 28)
x = self.dropout(x)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
x = self.relu(x)
x = self.fc4(x)
x_e = x
# if not in pre_train mode, we need encoder and t distribution output
if self.pretrainMode is False:
return x
# encoder is done, followed by decoder
x = self.fc_d4(x)
x = self.relu(x)
x = self.fc_d3(x)
x = self.relu(x)
x = self.fc_d2(x)
x = self.relu(x)
x = self.fc_d1(x)
x_de = x.view(-1, 1, 28, 28)
return x_e, x_de
```

loading the AE:

def load_checkpoint(filepath, model):

checkpoint = torch.load(filepath)

#model = checkpoint[‘model’]

model.load_state_dict(checkpoint[‘state_dict’])

for parameter in model.parameters():

parameter.requires_grad = True

```
model.train()
return model
```

and this is the architecture od the second network (I am testing with both sequential and also a model inherited from nn.module ):

def mlp(sizes, activation=nn.Tanh, output_activation=nn.Identity):

# Build a feedforward neural network. outputs are the logits

layers = []

for j in range(len(sizes)-1):

act = activation if j < len(sizes)-2 else output_activation

layers += [nn.Linear(sizes[j], sizes[j+1]), act()]

return nn.Sequential(*layers)

and loss is calculated this way:

logits_net = mlp(sizes=[obs_dim]+hidden_sizes+[n_acts])

```
```

```
def get_policy(obs):
logits = logits_net(obs)
return Categorical(logits=logits)
def get_action(obs):
return get_policy(obs).sample().item()
def Logp(obs, act):
logp = get_policy(obs).log_prob(act)
return logp
def compute_loss(logp, weights):
return -(logp * weights).mean()
```

```
opt = MultipleOptimizer(SGD(model.parameters(), lr=1, momentum=0.9), Adam(logits_net.parameters(), lr=lr)
for i, data in enumerate(train_loader):
x, label = data
x = model(x.cuda())
obs = x.data.cpu().numpy()
batch_obs.append(obs.copy())
#act in the environment
act = get_action(torch.as_tensor(obs, dtype=torch.float32))
#log probability
logp = Logp(torch.as_tensor(obs, dtype=torch.float32),act = torch.as_tensor(act, dtype=torch.int32))
rew = reward(obs, act+2)
# save action, reward
batch_acts.append(act)
batch_weights.append(rew)#episode rewards
batch_logp.append(logp)
opt.zero_grad()
batch_logp = torch.stack(batch_logp, dim=0)
batch_loss = compute_loss(logp = torch.as_tensor(batch_logp, dtype=torch.float32),
weights = torch.as_tensor(batch_weights, dtype=torch.float32))
batch_loss.backward()
opt.step()
```