Hi there, I’m working on a RNN model to predict if a line of a text file is a title or not.
I have 11 features in input that gives informations about the line such as font-weight, if text contains capital letters, numbering list and so on…
The output should be 0 if the line is not a title or 1 if it is a title.
Train data contains 1467 lines with target=0 (not title) and 1467 with target=1 (title). I know it’s small but I plan to add much more data later.
My problem is that loss and accuracy doesn’t evolve as the epochs goes, I still have the same numbers so it’s like the model learns nothing. I can’t figured out why. Is there something in the code that does not behave as it should. I’m waiting for a poor score because of a lack of data but here I have no evolution at all as training goes through.
Output shows :
Iteration: 500 Loss: 0.6921280026435852 Accuracy: 48.72231674194336 %
Iteration: 1000 Loss: 0.6927993297576904 Accuracy: 48.72231674194336 %
Iteration: 1500 Loss: 0.6930286884307861 Accuracy: 48.72231674194336 %
Iteration: 2000 Loss: 0.6939061880111694 Accuracy: 48.72231674194336 %
Iteration: 2500 Loss: 0.6928155422210693 Accuracy: 48.72231674194336 %
Iteration: 3000 Loss: 0.6931646466255188 Accuracy: 48.72231674194336 %
Here is my RNN model :
class RNNModel(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(RNNModel, self).__init__()
# Number of hidden dimensions
self.hidden_dim = hidden_dim
# Number of hidden layers
self.layer_dim = layer_dim
# RNN
self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True,
nonlinearity='relu')
# Readout layer
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# Initialize hidden state with zeros
h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
# One time step
out, hn = self.rnn(x, h0)
out = self.fc(out[:, -1, :])
return out
And here is where the training happens :
dataset = ContractTitlesDataset(csv_file="train.csv")
# train test split. Size of train data is 80% and size of test data is 20%.
features_train, features_test, targets_train, targets_test = train_test_split(dataset.features,
dataset.targets,
test_size = 0.2,
random_state = 42)
# create feature and targets tensor for train set. As you remember we need variable to accumulate gradients. Therefore first we create tensor, then we will create variable
featuresTrain = torch.from_numpy(features_train.values).type(torch.FloatTensor)
targetsTrain = torch.from_numpy(targets_train.values).type(torch.LongTensor) # data type is long
# create feature and targets tensor for test set.
featuresTest = torch.from_numpy(features_test.values).type(torch.FloatTensor)
targetsTest = torch.from_numpy(targets_test.values).type(torch.LongTensor) # data type is long
# batch_size, epoch and iteration
batch_size = 100
n_iters = 100000
num_epochs = n_iters / (len(features_train) / batch_size)
num_epochs = int(num_epochs)
# Pytorch train and test sets
train = torch.utils.data.TensorDataset(featuresTrain,targetsTrain)
test = torch.utils.data.TensorDataset(featuresTest,targetsTest)
# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = False)
# Create RNN
input_dim = 11 # input dimension
hidden_dim = 10 # hidden layer dimension
layer_dim = 1 # number of hidden layers
output_dim = 2 # output dimension
model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)
# Cross Entropy Loss
error = nn.CrossEntropyLoss()
# SGD Optimizer
learning_rate = 0.05
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_list = []
iteration_list = []
accuracy_list = []
count = 0
for epoch in range(num_epochs):
for i, (features, labels) in enumerate(train_loader):
train = Variable(features.view(-1, 1, input_dim))
labels = Variable(labels)
# Clear gradients
optimizer.zero_grad()
# Forward propagation
outputs = model(train)
# Calculate softmax and cross entropy loss
loss = error(outputs, labels.view(features.shape[0]))
# Calculating gradients
loss.backward()
# Update parameters
optimizer.step()
count += 1
if count % 250 == 0:
# Calculate Accuracy
correct = 0
total = 0
# Iterate through test dataset
for features, labels in test_loader:
features = Variable(features.view(-1, 1, input_dim))
# Forward propagation
outputs = model(features)
# Get predictions from the maximum value
predicted = torch.max(outputs.data, 1)[1]
# Total number of labels
total += labels.size(0)
correct += (predicted == labels.squeeze(1)).sum()
accuracy = 100 * correct / float(total)
# store loss and iteration
loss_list.append(loss.data)
iteration_list.append(count)
accuracy_list.append(accuracy)
if count % 500 == 0:
# Print Loss
print('Iteration: {} Loss: {} Accuracy: {} %'.format(count, loss.data.item(), accuracy))