Need help about Low GPU Utilization when training Seq2Seq

Hello PyTorch!
I’m training Seq2Seq model with 2080 Ti and I cannot use GPU fully right now.

  • Input dimension, output dimension = 1
  • Hidden dimension = 20
  • Sequence length (in and out) = 10
  • batch size = 100, totally 200 batches
    Here is my code for Dataset, Model and Training

I’m using single GPU and memory-usage and utilization are 951Mb and 22% respectivly.

Please give any advices or tips for using Memory and GPU fully!!!

Thanks.


dataset = myDataset()
train_loader = DataLoader(dataset=dataset, batch_size=100, shuffle=True, drop_last=True, num_workers=4)


class Encoder(nn.Module):

def __init__(self, input_dim, hidden_dim, num_layers=1, dropout=0, bidirectional=False):
    super(Encoder, self).__init__()
    self.encoder = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional)
    
def forward(self, x, hidden):
    encoder_output, encoder_state = self.encoder(x, hidden)
    return encoder_output, encoder_state

class Decoder(nn.Module):

def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1, dropout=0, bidirectional=False):
    super(Decoder, self).__init__()
    self.decoder = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional)
    self.linear = nn.Linear(hidden_dim, output_dim)

def forward(self, x, hidden):
    decoder_output, next_hidden = self.decoder(x, hidden)
    
    outputs = []
    for i in range(decoder_output.size()[1]):
        outputs += [self.linear(decoder_output[:, i, :])]
    return torch.stack(outputs, dim=1).squeeze(), decoder_output, next_hidden

class Model(nn.Module):

def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1, output_length=10):
    super(Model, self).__init__()
    self.encoder = Encoder(input_dim, hidden_dim, num_layers=num_layers)
    self.decoder = Decoder(hidden_dim, hidden_dim, output_dim, num_layers=num_layers)
    self.output_length = output_length
    self.num_layers = num_layers
    self.hidden_dim = hidden_dim
    
def forward(self, x):
    encoder_output, encdoer_state = self.encoder(x, None) 
    
    decoder_input = torch.unsqueeze(encoder_output[-1], 0)
    
    seq = []
    next_hidden=None 
    next_input = decoder_input
    
    for _ in range(self.output_length):
        output, next_input, next_hidden = self.decoder(next_input, next_hidden)
        seq += [output]
    return torch.stack(seq, dim=0).squeeze(), torch.unsqueeze(encoder_output[-1],0)

loss_func = nn.MSELoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 20

total_batch = len(train_loader)
print(‘total_batch = {}’.format(total_batch))

model.train()
train_loss = []

for epoch in range(epochs):

avg_cost = 0.0

for nums, data in enumerate(train_loader):
    temp_x, temp_y = data
    
    x = torch.FloatTensor(temp_x)
    y = torch.FloatTensor(temp_y)
    
    x = np.transpose(x, (1,0,2))
    y = np.transpose(y, (1,0,2))

    
    optimizer.zero_grad()
    
    prediction, fixed_vector = model(x.to(device))   # rnn output
    prediction = prediction.unsqueeze(2) 
    loss = loss_func(prediction, y.to(device))
    
    loss.backward()
    optimizer.step()