I am facing an error, because my batch size of 128 does not fit in my GPU. However, I do not want to reduce the batch size too much, because the network will be too unstable. Is it possible to sample from the batch of size 128 in pieces of 8, calculate the loss, and calculate gradients after passing 16x8 = 128 samples? This way, it would avoid fitting all 128 instances, but it would still update every 128 instances.
Here’s a state of the art reproducible example:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.nn import Module
x = torch.rand((1000, 3, 30, 30))
y = torch.randint(0, 2, size=(1000,)).type('torch.LongTensor')
class ConvNet(Module):
def __init__(self):
super().__init__()
a = 32
b = 64
c = 128
self.conv1 = nn.Conv2d(3, a, 3)
self.conv2 = nn.Conv2d(a, b, 3)
self.conv3 = nn.Conv2d(b, c, 3)
self.fc1 = nn.Linear(2 * 2 * c, 1024)
self.fc2 = nn.Linear(1024, 2048)
self.fc3 = nn.Linear(2048, 2)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.dropout(x, 0.5)
x = self.fc3(x)
return x
net = ConvNet()
optimizer = optim.Adam(net.parameters(), lr=0.0001)
loss_function = nn.CrossEntropyLoss()
class DataSet():
def __init__(self):
self.len = x.shape[0]
self.x_train = x
self.y_train = y
def __getitem__(self, index):
return x[index], y[index]
def __len__(self):
return self.len
data = DataSet()
train_loader = DataLoader(dataset=data, batch_size=64, shuffle=True)
epochs = 2
steps = 0
train_losses, test_losses = [], []
for e in range(epochs):
running_loss = 0
net.train()
for images, labels in train_loader:
if torch.cuda.is_available():
images, labels = images.cuda(), labels.cuda()
optimizer.zero_grad()
log_ps = net(images)
loss = loss_function(log_ps, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print("[Epoch: {}/{}] ".format(e + 1, epochs),
"[Training Loss: {:.3f}] ".format(running_loss / len(train_loader)))