I have 4gb ram ,2gb ram gpu and when i am trying lenet-5 for kaggle facial keypoints dataset i m getting RuntimeError: CUDA error: out of memory. What should I do and what is causing this?

```
import torch
import torch.nn as nn
import torch.nn.functional as F
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
# 1 input image channel, 6 output channels, 5x5 square convolution
# kernel
self.conv1 = nn.Conv2d(1, 32, 3)
self.conv2 = nn.Conv2d(6, 64, 2)
self.conv3 = nn.Conv2d(64, 128, 2)
self.fc1 = nn.Linear(128 * 5 * 5, 500)
self.fc2 = nn.Linear(500, 500)
self.fc3 = nn.Linear(500, 30)
def forward(self, x):
# Max pooling over a (2, 2) window
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
# If the size is a square you can only specify a single number
x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc3(x)
return x
def num_flat_features(self, x):
size = x.size()[1:] # all dimensions except the batch dimension
num_features = 1
for s in size:
num_features *= s
return num_features
dtype = torch.float
device = torch.device("cuda:0")
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 2140, 9216, 100, 30
def train(model,x,y,criterion,optimizer):
model.train()
y_pred = model(x)
loss = criterion(y_pred, y)
print('train-loss',t, loss.item(),end=' ')
optimizer.zero_grad()
loss.backward()
optimizer.step()
return loss.item()
def valid(model,x_valid,y_valid,criterion):
model.eval()
y_pred = model(x_valid)
loss = criterion(y_pred, y_valid)
print('test-loss',t, loss.item(),end=' ')
return loss.item()
# Create random Tensors to hold inputs and outputs
X_train=X_train.reshape(-1, 1, 96, 96)
X_valid=X_valid.reshape(-1, 1, 96, 96)
x_train = torch.tensor(torch.from_numpy(X_train),device=device,dtype=dtype)
y_train = torch.tensor(torch.from_numpy(Y_train),device=device,dtype=dtype)
x_valid = torch.tensor(torch.from_numpy(X_valid),device=device,dtype=dtype)
y_valid = torch.tensor(torch.from_numpy(Y_valid),device=device,dtype=dtype)
model = LeNet().to(device)
loss_train=[]
loss_valid=[]
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
for t in range(400):
loss_train.append(train(model,x_train,y_train,criterion,optimizer))
loss_valid.append(valid(model,x_valid,y_valid,criterion))
print()
```