I have built a simple LSTM auto encoder for sequence data. I run it on CPU and I get a cross entropy loss that converges to a nice low value to 0.075. If I run this exact code on a machine with CUDA using the GPU, it bounces around 1.36 and never converges to a similarly low value as on the CPU.
This is my DataLoader:
class SequenceDataset(Dataset):
def __init__(self, data_path,device):
self.data_file = pd.read_csv(data_path)
self.sequences = self.data_file["text"]
self.labels = T.tensor(self.data_file["label"], dtype=T.float).to(device)
self.n_examples = len(self.labels)
self.tokens = T.zeros([self.n_examples, len(self.sequences[0])], dtype=T.long).to(device)
tokenizer = get_tokenizer(None)
def yield_tokens(data_iterator):
for t in data_iterator:
yield tokenizer(t)
self.vocab = build_vocab_from_iterator(yield_tokens(" ".join(self.sequences)))
for i,d in enumerate(self.sequences):
self.tokens[i] = T.tensor(self.vocab(tokenizer(" ".join(d))))
def __len__(self):
return self.n_examples
def __getitem__(self, item):
return {'text' : self.tokens[item],'label': self.labels[item]}
This is my Autoencoder:
class Encoder(nn.Module):
def __init__(self, n_features, hidden_size):
super(Encoder, self).__init__()
self.embedding = nn.Embedding(4, n_features)
self.lstm_enc = nn.LSTM(
input_size = n_features,
hidden_size = hidden_size,
batch_first = True
)
def forward(self,x):
embedded = self.embedding(x)
_, (h,_) = self.lstm_enc(embedded)
enc = h.squeeze(dim=0)
return enc
class Decoder(nn.Module):
def __init__(self, n_features, hidden_size):
super(Decoder,self).__init__()
self.lstm_dec = nn.LSTM(
input_size = hidden_size,
hidden_size = hidden_size,
batch_first = True
)
self.linearlayer = nn.Linear(hidden_size, n_features)
def forward(self, x):
enc = x.unsqueeze(1).repeat(1,69,1)
out, (_,_) = self.lstm_dec(enc)
out = self.linearlayer(out)
return out
class LSTM_AutoEncoder(nn.Module):
def __init__(self, n_features, hidden_size):
super(LSTM_AutoEncoder, self).__init__()
self.encoder = Encoder(n_features, hidden_size)
self.decoder = Decoder(n_features, hidden_size)
def forward(self, x):
h = self.encoder(x)
out = self.decoder(h)
return out
This is my model call:
model = LSTM_AutoEncoder(n_features, hidden_size)
model = model.to(device)
best_model_wts = copy.deepcopy(model.state_dict())
#------
optimizer = T.optim.Adam(model.parameters(),lr=1e-3)
criterion = nn.CrossEntropyLoss(reduction='mean').to(device)
And this is my training loop:
history = dict(train=[])
best_loss = 2
SD = SequenceDataset(file, device) # Load Data
DL = DataLoader(SD, batch_size=batch_size, shuffle=True) # Batches
for epoch in range(1,n_epochs+1):
model = model.train()
train_losses = []
for batch in DL:
optimizer.zero_grad()
actual = batch['text'].to(device)
pred = model(actual)
l = [criterion(pred[:,i], actual[:,i]) for i in range(69)]
loss = sum(l)/len(l)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
train_loss = np.mean(train_losses)
history['train'].append(train_loss)
model = model.eval()
if train_loss < best_loss:
best_loss = train_loss
best_model_wts = copy.deepcopy(model.state_dict())
if epoch % 100 == 0:
print(f'Epoch {epoch}: CE loss {train_loss}')
I have cuda 11.5 and pytroch 1.10.