# Training and Validation Loss Too High and Not Converging

I am trying to calculate training and validation loss however I am getting an extremely high amount that is not converging

‘’'transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])

batch_size = 32

pivot = 40000
cifar10 = sorted(cifar10, key=lambda x: x[1])
train_set = torch.utils.data.Subset(cifar10, range(pivot))
val_set = torch.utils.data.Subset(cifar10, range(pivot, len(cifar10)))

class Network(nn.Module):
def init(self):
super().init()
#Using padding convolution 2d if downsampling is performed by average pooling
self.conv1 = nn.Conv2d(3, 6, kernel_size = 5, padding = 2)
#MaxPooling2D has no attribute with torch.nn so changed it to MaxPool2d
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, kernel_size = 5, padding = 2)
self.fc1 = nn.Linear(8816, 120)
self.fc2 = nn.Linear(120, 2)
self.fc3 = nn.Linear(2, 10)

``````def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
#Flatten has no attribute with torch so changed it to flatten
x = torch.flatten(x, 1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
``````

model = Network()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e3, momentum=0.9)

with wandb.init(project = ‘Tier-1-Test’, save_code=True) as run:
for epoch in range(5):
current_loss = 0

``````    model.train()

for i, data in enumerate(train_loader):
images, labels = data

outputs = model(images)
loss = criterion(outputs, labels)

loss.backward()
optimizer.step()

current_loss += loss

run.log({'train_loss': current_loss / (i + 1)})

model.eval()

current_loss = 0

for i, data in enumerate(val_loader):
images, labels = data
outputs = model(images)

loss = criterion(outputs, labels)

current_loss += loss

run.log({'val_loss': current_loss / (i + 1)})
``````

You are sorting the labels in:

``````cifar10 = sorted(cifar10, key=lambda x: x[1])
``````

and split this sorted dataset into a training and validation `Subset` which would move classes 0-7 into the `train_set` and `8-9` into the `val_loader` based on the `pivot`.
Your model thus has never seen any classes present in the validation split and I would expect it to fail.

1 Like

Oh alright yea that makes sense! Probably a dumb question because I am missing something. But I changed the range of the val_set by combining it with the first row but I do not thing it got the classes still. Is there a better way for this?

``````cifar10 = sorted(cifar10, key=lambda x: x[1])
train_set = torch.utils.data.Subset(cifar10, range(pivot))
val_set_1 = torch.utils.data.Subset(cifar10, range(0))
val_set_2 = torch.utils.data.Subset(cifar10, range(pivot, len(cifar10)))
val_set = torch.utils.data.ConcatDataset([val_set_1, val_set_2])
``````

`range(0)` is an empty range and this `val_set_1` is also empty, so your code snippet doesn’t change anything.
You could either remove the sorting, split the train and val indices manually by making sure both sets see all classes, or you could also use e.g. `sklearn.model_selection.train_test_split` with the `stratify` argument.