I recently struggled with result that when train with batch size more than 1, my loss could not reduce a lot as the output from mini_batch seems always get similar output while the label is quite different. I prototype this with a simple model and get similar problem, I wonder if anyone know what’s wrong with my code, or should we always train with same labels for one batch. thanks!
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
class TestDataSet(Dataset):
def __init__(self):
self.data_list = torch.rand((6, 1, 4, 4), dtype=torch.float32)
self.groundtruth_list = torch.arange(6, dtype=torch.float32) * 10
def __len__(self):
return 6
def __getitem__(self, idx):
data = self.data_list[idx]
groundtruth = self.groundtruth_list[idx]
return data, groundtruth
trainset = TestDataSet()
trainloader = DataLoader(dataset=trainset, batch_size=3, shuffle=False, num_workers=0)
class TestNet(nn.Module):
def __init__(self):
super(TestNet, self).__init__()
self.feature = nn.Sequential(
nn.Conv2d(1, 32, kernel_size = 3, stride=1, padding=1), nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size = 3, stride=1, padding=1), nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size = 3, stride=1, padding=1), nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size = 3, stride=1, padding=1), nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size = 3, stride=1, padding=1), nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size = 3, stride=1, padding=1), nn.ReLU(inplace=True))
self.linear = nn.Linear(32*4*4, 1)
self.__initialize()
def __initialize(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = self.feature(x)
x = x.view(-1, 512)
x = self.linear(x)
return x
net = TestNet()
print(net)
net = TestNet()
import torch.optim as optim
criterion = nn.L1Loss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
num_epochs = 1000
for epoch in range(num_epochs):
running_loss = 0.0
for batch_idx, (inputs, labels) in enumerate(trainloader):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
if epoch % 100 == 99:
print("epoch {} output = {}".format(epoch, outputs))
loss.backward()
optimizer.step()
running_loss += loss.item()
running_loss = running_loss / 2
if epoch % 100 == 99:
print("epoch {} running loss = {:.4f}".format(epoch, running_loss))