import torch

from torch import nn

from torch.utils.data import dataset, dataloader

from torch import optim

train_loader = torch.utils.data.DataLoader(

torchvision.datasets.MNIST(’/files/’, train=True, download=True,

```
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
```

batch_size=64, shuffle=True)

val_loader = torch.utils.data.DataLoader(

torchvision.datasets.MNIST(’/files/’, train=False, download=True,

```
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
```

batch_size=1000, shuffle=True)

# model

class CNN(nn.Module):

def **init**(self):

```
super(CNN, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 10, kernel_size=5), # non-RGB image hence input_channel = 1, out_channels is the number of feature maps,
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(10, 20, kernel_size=5),
nn.ReLU(),
nn.MaxPool2d(2)
)
self.flatten = nn.Flatten()
self.dense = nn.Sequential(
nn.Linear(320, 50),
nn.BatchNorm2d(50),
nn.Linear(50, 10)
)
```

def forward(self, x):

```
x = self.conv(x)
x = self.flatten(x)
x = self.dense(x)
return x
```

model = CNN()

loss_fn = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters())

epochs = 20

train_loss = []

val_loss = []

for t in range(epochs):

for feature, label in train_loader:

```
# forward
pred = model(feature)
# compute the objective function
output = loss_fn(pred, label)
# clean the gradients
model.zero_grad()
# backward propogation
output.backward()
# update
optimizer.step()
train_loss.append(output.item())
```

print(f’Epoch {t + 1}, train loss: {output}’)

for feature2, label2 in val_loader:

```
with torch.no_grad():
# forward
pred2 = model(feature2)
# compute the objective function
output2 = loss_fn(pred2, label2)
val_loss.append(output2.item())
print(f'Epoch {t + 1}, train loss: {output}')
```

ValueError: expected 4D input (got 2D input)