I think I’ve done wrong. It uses too much memory.
Before training I made sure any memory isn’t allocated,
but it dies saying
RuntimeError: CUDA out of memory. Tried to allocate 14.00 MiB (GPU 0; 7.93 GiB total capacity; 6.30 GiB already allocated; 25.75 MiB free; 6.78 GiB reserved in total by PyTorch)
I do not know why 6.3GB is alreay allocated.
Is there something I am doing wrong?
This is my training function.
def train(input, target):
# initializing grad and hidden state
hidden = model.init_hidden()
loss = 0
for i in range(input.size()[0]):
output, hidden = model(input.data[i].unsqueeze(0), hidden)
l = criterion(output, target.data[i])
loss += l
# backpropagation
# optimizing
# learning rate scheduling
return loss
this is my gru unit
import torch.nn as nn
import torch
class GRU(nn.Module):
def init(self, input_size, hidden_size, output_size, batch_size, device, num_layers):
super(GRU, self).init()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.batch_size = batch_size
self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=False, dropout=0.2)
self.elu = nn.ELU()
self.fc = nn.Linear(hidden_size, output_size)
self.device = device
def forward(self, x, h):
out, h = self.gru(x, h)
out = self.elu(h[0])
out = self.fc(out)
return out, h
def init_hidden(self):
hidden = torch.zeros(self.num_layers, self.batch_size, self.hidden_size).to(self.device)
return hidden