Hi, I’m training an RNN for intent prediction. There are 11 actions and 29 slots for each action.
for example:
I’d like 2 tickets to see Zoolander 2 tomorrow at Regal Meridian 16 theater in Seattle at 9:25 PM
request(ticket;moviename=Zoolander 2;date=tomorrow;theater=Regal Meridian 16;city=Seattle;starttime=9:25 PM;numberofpeople=2)
I used TfidfVectorizer to transform the text into vectors. Below is the code for training.
Code:
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--slots', dest='run_mode', type=int, default=0, help='only predict action')
args = parser.parse_args()
params = vars(args)
run_mode = params['run_mode']
raw = load_file()
if(run_mode == 1):
features_numpy, targets_numpy = Data_Preproccess(raw)
elif(run_mode == 0):
features_numpy, targets_numpy = Data_Preproccess2(raw)
# train test split. Size of train data is 80% and size of test data is 20%.
features_train, features_test, targets_train, targets_test = train_test_split(features_numpy, targets_numpy, test_size = 0.2, random_state = 42)
# create feature and targets tensor for train set. As you remember we need variable to accumulate gradients. Therefore first we create tensor, then we will create variable
featuresTrain = torch.from_numpy(features_train)
targetsTrain = torch.from_numpy(targets_train).type(torch.LongTensor) # data type is long
print(featuresTrain.shape)
print(targetsTrain.shape)
# create feature and targets tensor for test set.
featuresTest = torch.from_numpy(features_test)
targetsTest = torch.from_numpy(targets_test).type(torch.LongTensor) # data type is long
# batch_size, epoch and iteration
batch_size = 100
n_iters = 2500
num_epochs = n_iters / (len(features_train) / batch_size)
num_epochs = int(num_epochs)
# Pytorch train and test sets
train = torch.utils.data.TensorDataset(featuresTrain,targetsTrain)
test = torch.utils.data.TensorDataset(featuresTest,targetsTest)
# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = False)
# Create RNN
#print(len(train))
input_dim = 4773 # input dimension
hidden_dim = 100 # hidden layer dimension
layer_dim = 2 # number of hidden layers
output_dim = 11*29 # output dimension
model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)
# Cross Entropy Loss
error = nn.CrossEntropyLoss()
# SGD Optimizer
learning_rate = 0.05
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
seq_dim = 1
loss_list = []
iteration_list = []
accuracy_list = []
count = 0
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
train = Variable(images.view(-1, seq_dim, input_dim))
labels = Variable(labels)
# Clear gradients
optimizer.zero_grad()
# Forward propagation
outputs = model(train.float())
# Calculate softmax and cross entropy loss
loss = error(outputs, labels)
# Calculating gradients
loss.backward()
# Update parameters
optimizer.step()
count += 1
if count % 250 == 0:
# Calculate Accuracy
correct = 0
total = 0
# Iterate through test dataset
for images, labels in test_loader:
images = Variable(images.view(-1, seq_dim, input_dim))
# Forward propagation
outputs = model(images)
# Get predictions from the maximum value
predicted = torch.max(outputs.data, 1)[1]
# Total number of labels
total += labels.size(0)
correct += (predicted == labels).sum()
accuracy = 100 * correct / float(total)
# store loss and iteration
loss_list.append(loss.data)
iteration_list.append(count)
accuracy_list.append(accuracy)
if count % 500 == 0:
# Print Loss
print('Iteration: {} Loss: {} Accuracy: {} %'.format(count, loss.data[0], accuracy))
Error:
Traceback (most recent call last):
File "train.py", line 85, in <module>
loss = error(outputs, labels)
File "/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 477, in __call__
result = self.forward(*input, **kwargs)
File "/anaconda3/lib/python3.6/site-packages/torch/nn/modules/loss.py", line 862, in forward
ignore_index=self.ignore_index, reduction=self.reduction)
File "/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py", line 1550, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
File "/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py", line 1407, in nll_loss
return torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: multi-target not supported at /Users/soumith/miniconda2/conda-bld/pytorch_1532623076075/work/aten/src/THNN/generic/ClassNLLCriterion.c:21
I can see that the problem is at loss = error(outputs, labels)
, since outputs should be 1D. However, I printed its shape and the result is torch.Size([100, 319]). How should I fix this?
Thankyou.