I have the following example. It’s a MNIST and trying to make training and validation losses correct.
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, TensorDataset
from torch.optim import *
import torchvision
dl = DataLoader(
torchvision.datasets.MNIST('/data/mnist', train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.5,), (0.5,))
])), shuffle=False)
tensor = dl.dataset.data
tensor = tensor.to(dtype=torch.float32)
tr = tensor.reshape(tensor.size(0), -1)
tr = tr/128
targets = dl.dataset.targets
targets = targets.to(dtype=torch.long)
x_train = tr[0:50000-1]
y_train = targets[0:50000-1]
x_valid = tr[50000:60000-1]
y_valid = targets[50000:60000-1]
bs=64
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs, drop_last=False, shuffle=True)
valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds, batch_size=bs * 2)
loaders={}
loaders['train'] = train_dl
loaders['valid'] = valid_dl
class M(nn.Module):
'custom module'
def __init__(self):
super().__init__()
self.lin = nn.Linear(784, 10)
def forward(self, xb):
return self.lin(xb)
model = M()
criterion = nn.CrossEntropyLoss()
bs=64
epochs = 4
lr = 0.0001
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
# optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
for epoch in range(0,epochs):
train_loss = 0
valid_loss = 0
print(f"Epoch {epoch}")
model.train()
for i, (data,target) in enumerate(loaders['train']):
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
train_loss += loss.item()
loss.backward()
optimizer.step()
if (i%100==0):
print(f"Batch {i}, loss {loss.item()}")
model.eval()
for i, (data,target) in enumerate(loaders['valid']):
output = model(data)
loss = criterion(output,target)
valid_loss += loss.item()
print("no train:", len(loaders['train']))
train_loss = train_loss/len(loaders['train'])
valid_loss = valid_loss/len(loaders['valid'])
print(f"Train loss: {train_loss}")
print(f"Validation loss: {valid_loss}")
Questions
- Have I calculated in here the training and validation loss correct way?
- Is the batch loss term still legit term?
- What is the running loss? Is this the sum of all batch losses per epoch?
- According to this thread does the train_loss / validation_loss has a name?