Hallo I am a beginner and want to build a word prediction model with LSTM
but I get the following error and I cant find the cause
Thank you for your help
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1536, 200]] is at version 2; expected version 1 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
import torch
import torch.nn as nn
import string
import random
import sys
import unidecode
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import re
import math
print(torch.__version__)
def char_tensor(string):
tensor = torch.zeros(len(string)).long()
for c in range(len(string)):
tensor[c] = all_characters.index(string[c])
return tensor
def clean_text(text):
text = text.replace('\n',' ')
text = re.sub('[^A-Za-z ]+', '', text)
return text
def word_to_class(word, word_dict):
word_class = word_dict[word]
word_class_t = torch.zeros(1) + word_class
return word_class_t
def one_hot_encode(y,output_size):
one_hot = torch.zeros(output_size)
one_hot[y] = 1
return one_hot
def create_batches(X_train, y_train, choices, word_dict, output_size, batch_size):
x_batch = []
y_batch = []
for i in range(batch_size):
idx = choices.pop()
x_batch.append(char_tensor((X_train[idx])))
num = word_to_class(y_data[idx], word_dict)
one_hot = one_hot_encode(int(num), output_size)
y_batch.append(one_hot)
xt_batch = torch.stack(x_batch)
yt_batch = torch.stack(y_batch)
return xt_batch, yt_batch, choices
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size):
super(RNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
#self.embed = nn.Embedding(input_size, hidden_size)
#self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=False)
self.fc = nn.Linear(hidden_size, output_size)
#self.softmax = nn.LogSoftmax(dim=1)
def forward(self, x, hidden, cell):
#out = self.embed(x)
out, (hidden, cell) = self.lstm(x, (hidden, cell))
output = self.fc(out)
#output = self.softmax(output)
return output, (hidden, cell)
def init_hidden(self, batch_size):
hidden = torch.zeros(self.num_layers, batch_size, self.input_size).to(device)
cell = torch.zeros(self.num_layers, batch_size, self.input_size).to(device)
#hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
#cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
return hidden, cell
class SentimentNet(nn.Module):
def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, device, drop_prob=0.5):
super(SentimentNet, self).__init__()
self.output_size = output_size
self.n_layers = n_layers
self.hidden_dim = hidden_dim
self.device = device
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)
self.dropout = nn.Dropout(drop_prob)
self.fc = nn.Linear(hidden_dim, output_size)
#self.sigmoid = nn.softmax()
def forward(self, x, hidden):
batch_size = x.size(0)
x = x.long()
embeds = self.embedding(x)
lstm_out, hidden = self.lstm(embeds, hidden)
#lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
out = self.dropout(lstm_out)
#out = self.fc(out)
out = self.fc(out[:, -1, :])
#out = out[:,-1]
print("out of fc", out.shape)
#out = self.sigmoid(out)
#out = out.view(batch_size, -1)
return out, hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(self.device), weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(self.device))
return hidden
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = "cpu"
# Get characters from string.printable
all_characters = string.printable
n_characters = len(all_characters)
# Read large text file (Note can be any text file: not limited to just names)
text = unidecode.unidecode(open("wonderland.txt").read())
text_clean = clean_text(text)
text_clean = text_clean.split()
seq_length = 100
y_data = []
data = []
part = ''
for idx in range(0, len(text_clean) - 1):
part += (text_clean[idx] + " ")
new_length = len(part) + len(text_clean[idx+1])
if new_length > seq_length:
part = part[:-1]
part = part.rjust(100)
y_data.append(text_clean[idx+1])
data.append(part)
part = ''
continue
print("data size ", len(data))
len(y_data)
train_size = round(len(data) * 0.9)
X_train = data[:train_size]
X_test = y_data[:train_size]
y_train = data[train_size:]
y_test = y_data[train_size:]
unique_word = set(text_clean)
output_size = len(set(text_clean))
seq_length = 100
num_epochs = 5000
batch_size = 16
hidden_size = 128*3
num_layers = 3
lr = 0.3
iterations = math.floor(len(X_train)/ batch_size)
train_data_size = iterations * batch_size
choices = list(range(train_data_size))
word_dict = {word:idx for idx,word in enumerate(unique_word)}
#model = RNN(seq_length, hidden_size, num_layers,output_size).to(device)
model = SentimentNet(seq_length, output_size, 200, hidden_size, num_layers, device).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
#criterion = nn.BCELoss()
print("=> Starting training")
best_loss = float("inf")
for epoch in range(1, num_epochs + 1):
hidden = model.init_hidden(batch_size=batch_size)
epoch_loss = 0
choices = list(range(train_data_size))
for idx in range(iterations):
x_batch, y_batch, choices = create_batches(X_train, y_train, choices, word_dict, output_size, batch_size)
x_batch = x_batch.to(device)
y_batch = y_batch.to(device).long()
#x_batch = x_batch.unsqueeze(0)
print("Input size ", x_batch.shape)
# output, (hidden, cell) = model(x_batch, hidden, cell)
output, hidden = model(x_batch, hidden)
print(output.shape)
print("target", y_batch.shape)
output = output.float()
y_batch = y_batch.long()
loss = criterion(output, torch.max(y_batch, 1)[1]).to(device)
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
#epoch_loss += loss.item()
epoch_loss = epoch_loss / iterations
print("Epoch {} loss {}".format(epoch, epoch_loss ))