Hi everyone,
I am trying to train an RNN based off the code here
The error is pretty easy to intepret, the model is expecting 3 dimensions, but I am only giving it 1. However, I only have 1 input array for each output. So that being said, I could use some insight as to where the error is coming from. Thanks!
My input are 300d word embeddings and my output are one hot encoded vectors of length 11, where the model makes a classification choice in each of the 11 output dimensions.
I define my vanilla RNN as follows.
class VanillaRNN(nn.Module):
def __init__(self, input_size, output_size, hidden_dim, n_layers):
super(VanillaRNN, self).__init__()
# Defining some parameters
self.hidden_dim = hidden_dim
self.n_layers = n_layers
#Defining the layers
# RNN Layer
self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
# Fully connected layer
self.fc = nn.Linear(hidden_dim, output_size)
def forward(self, inputs):
batch_size = inputs.size(0)
# Initializing hidden state for first input using method defined below
hidden = self.init_hidden(batch_size)
# Passing in the input and hidden state into the model and obtaining outputs
out, hidden = self.rnn(inputs, hidden)
# Reshaping the outputs such that it can be fit into the fully connected layer
out = out.contiguous().view(-1, self.hidden_dim)
out = self.fc(out)
return out, hidden
def init_hidden(self, batch_size):
# This method generates the first hidden state of zeros which we'll use in the forward pass
# We'll send the tensor holding the hidden state to the device we specified earlier as well
hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
return hidden
and my training loop as follows
def plot_train_val(x, train, val, train_label,
val_label, title, y_label,
color):
plt.plot(x, train, label=train_label, color=color)
plt.plot(x, val, label=val_label, color=color, linestyle='--')
plt.legend(loc='lower right')
plt.xlabel('epoch')
plt.ylabel(y_label)
plt.title(title)
def count_parameters(model):
parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
return parameters
def init_weights(m):
if type(m) in (nn.Linear, nn.Conv1d):
nn.init.xavier_uniform_(m.weight)
# Training functioN
def train(model, device, train_loader, valid_loader, epochs, learning_rate):
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train_loss, validation_loss = [], []
train_acc, validation_acc = [], []
for epoch in range(epochs):
#train
model.train()
running_loss = 0.
correct, total = 0, 0
steps = 0
for idx, batch in enumerate(train_loader):
text = batch["Sample"].to(device)
target = batch['Class'].to(device)
target = torch.autograd.Variable(target).long()
text, target = text.to(device), target.to(device)
# add micro for coding training loop
optimizer.zero_grad()
output, hideden = model(text)
print(output.shape, target.shape, target.view(-1).shape)
loss = criterion(output, target.view(-1))
loss.backward()
optimizer.step()
steps += 1
running_loss += loss.item()
# get accuracy
_, predicted = torch.max(output, 1)
print(predicted)
#predicted = torch.round(output.squeeze())
total += target.size(0)
correct += (predicted == target).sum().item()
train_loss.append(running_loss/len(train_loader))
train_acc.append(correct/total)
print(f'Epoch: {epoch + 1}, '
f'Training Loss: {running_loss/len(train_loader):.4f}, '
f'Training Accuracy: {100*correct/total: .2f}%')
# evaluate on validation data
model.eval()
running_loss = 0.
correct, total = 0, 0
with torch.no_grad():
for idx, batch in enumerate(valid_loader):
text = batch["Sample"].to(device)
print(type(text), text.shape)
target = batch['Class'].to(device)
target = torch.autograd.Variable(target).long()
text, target = text.to(device), target.to(device)
optimizer.zero_grad()
output = model(text)
loss = criterion(output, target)
running_loss += loss.item()
# get accuracy
_, predicted = torch.max(output, 1)
#predicted = torch.round(output.squeeze())
total += target.size(0)
correct += (predicted == target).sum().item()
validation_loss.append(running_loss/len(valid_loader))
validation_acc.append(correct/total)
print (f'Validation Loss: {running_loss/len(valid_loader):.4f}, '
f'Validation Accuracy: {100*correct/total: .2f}%')
return train_loss, train_acc, validation_loss, validation_acc
When I run the model with the following, I get the error provided below. Thanks in advance for any help.
# Model hyperparamters
#vocab_size = len(word_array)
learning_rate = 1e-3
output_size = 11
input_size = 300
epochs = 10
hidden_dim = 100
n_layers = 2
# Initialize model, training and testing
set_seed(SEED)
vanilla_rnn_model = VanillaRNN(input_size, output_size, hidden_dim, n_layers)
#vanilla_rnn_model = VanillaRNN(output_size, input_size, RNN_size, fc_size, DEVICE)
vanilla_rnn_model.to(DEVICE)
vanilla_rnn_start_time = time.time()
vanilla_train_loss, vanilla_train_acc, vanilla_validation_loss, vanilla_validation_acc = train(vanilla_rnn_model,
DEVICE,
train_loader,
valid_loader,
epochs = epochs,
learning_rate = learning_rate)
The error
RuntimeError: input must have 3 dimensions, got 1