Hello PyTorch!
I’m training Seq2Seq model with 2080 Ti and I cannot use GPU fully right now.
- Input dimension, output dimension = 1
- Hidden dimension = 20
- Sequence length (in and out) = 10
- batch size = 100, totally 200 batches
Here is my code for Dataset, Model and Training
I’m using single GPU and memory-usage and utilization are 951Mb and 22% respectivly.
Please give any advices or tips for using Memory and GPU fully!!!
Thanks.
dataset = myDataset()
train_loader = DataLoader(dataset=dataset, batch_size=100, shuffle=True, drop_last=True, num_workers=4)
class Encoder(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers=1, dropout=0, bidirectional=False):
super(Encoder, self).__init__()
self.encoder = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional)
def forward(self, x, hidden):
encoder_output, encoder_state = self.encoder(x, hidden)
return encoder_output, encoder_state
class Decoder(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1, dropout=0, bidirectional=False):
super(Decoder, self).__init__()
self.decoder = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional)
self.linear = nn.Linear(hidden_dim, output_dim)
def forward(self, x, hidden):
decoder_output, next_hidden = self.decoder(x, hidden)
outputs = []
for i in range(decoder_output.size()[1]):
outputs += [self.linear(decoder_output[:, i, :])]
return torch.stack(outputs, dim=1).squeeze(), decoder_output, next_hidden
class Model(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1, output_length=10):
super(Model, self).__init__()
self.encoder = Encoder(input_dim, hidden_dim, num_layers=num_layers)
self.decoder = Decoder(hidden_dim, hidden_dim, output_dim, num_layers=num_layers)
self.output_length = output_length
self.num_layers = num_layers
self.hidden_dim = hidden_dim
def forward(self, x):
encoder_output, encdoer_state = self.encoder(x, None)
decoder_input = torch.unsqueeze(encoder_output[-1], 0)
seq = []
next_hidden=None
next_input = decoder_input
for _ in range(self.output_length):
output, next_input, next_hidden = self.decoder(next_input, next_hidden)
seq += [output]
return torch.stack(seq, dim=0).squeeze(), torch.unsqueeze(encoder_output[-1],0)
loss_func = nn.MSELoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 20
total_batch = len(train_loader)
print(‘total_batch = {}’.format(total_batch))
model.train()
train_loss = []
for epoch in range(epochs):
avg_cost = 0.0
for nums, data in enumerate(train_loader):
temp_x, temp_y = data
x = torch.FloatTensor(temp_x)
y = torch.FloatTensor(temp_y)
x = np.transpose(x, (1,0,2))
y = np.transpose(y, (1,0,2))
optimizer.zero_grad()
prediction, fixed_vector = model(x.to(device)) # rnn output
prediction = prediction.unsqueeze(2)
loss = loss_func(prediction, y.to(device))
loss.backward()
optimizer.step()