Stack expects non empty tensor list

import torch
import torch.nn as nn
import torch.nn.functional as f

import spacy
import torchtext
from torchtext.data import TabularDataset,BucketIterator,Field

import pandas as pd
import numpy as np

df_train = pd.read_csv('V:\\pythonproject\\nlp\\New folder\\headlines\\train.csv')
df_test = pd.read_csv('V:\\pythonproject\\nlp\\New folder\\headlines\\test.csv')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

spacy_eng = spacy.load('en')
stop_words = spacy.lang.en.stop_words.STOP_WORDS

def tokenize(text):
    return [tok.text for tok in spacy_eng.tokenizer(text) if tok.text not in stop_words]

text = Field(tokenize=tokenize,lower=True,batch_first=True)
label = Field(sequential=False,use_vocab=False)
auth = Field(sequential=False,use_vocab=False)
#short_d = Field(tokenize=tokenize,lower=True,batch_first=True)

fields = {'healine':('t1',text),'short_description':('t2',text),'author':('au',auth),'label':('l',label)}

train_data,test_data = TabularDataset.splits(path='V:\\pythonproject\\nlp\\New folder\\headlines',
                                             train='train.csv',validation='test.csv',
                                             format = 'CSV',fields = fields)

text.build_vocab(train_data,max_size=50000,min_freq = 1)
train_iterator, test_iterator = BucketIterator.splits(
    (train_data, test_data), batch_size=4, device=device,sort = False,shuffle=False
)
class RNN_LSTM(nn.Module):
    def __init__(self, input_size, embed_size, hidden_size, num_layers):
        super(RNN_LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_size, embed_size)
        self.rnn = nn.LSTM(embed_size, hidden_size, num_layers,batch_first=True)
        #self.fc_out = nn.Linear(hidden_size, 41)

    def forward(self, x):
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        embedded = self.embedding(x)
        outputs, _ = self.rnn(embedded, (h0, c0))
        #prediction = f.softmax(self.fc_out(outputs[ :,-1, :]))

        return outputs[ :,-1, :]

#hyper parameters
input_size = len(text.vocab)
hidden_size = 128
num_layers = 1
embedding_size = 100
learning_rate = 0.005
num_epochs = 6

model_headline = RNN_LSTM(input_size, embedding_size, hidden_size, num_layers).to(device)
model_short_description = RNN_LSTM(input_size, embedding_size, hidden_size, num_layers).to(device)

class NN(nn.Module):
    def __init__(self):
        super().__init__()

        self.emb = nn.Embedding(max(df_train['author'].max(),df_test['author'].max())+1,128)
        self.fc_head = nn.Linear(128,32)
        self.fc_short = nn.Linear(128,16)
        self.fc_emb = nn.Linear(128,8)
        self.fc1 = nn.Linear(56,128)
        self.fc2 = nn.Linear(128,41)

    def forward(self,model_headline,model_short_description,headlines,short_description,authors):
        x_head = model_headline.forward(headlines)
        x_short_descript = model_short_description.forward(short_description)
        emb = self.emb(authors)
        x_head = self.fc_head(x_head)
        x_short_descript = self.fc_short(x_short_descript)
        x_authors = self.fc_emb(emb)
        x = torch.cat((x_head,x_short_descript,x_authors),dim=1)
        return f.softmax(self.fc2(self.fc1(x)),dim=1)

model = NN().to(device=device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)


def train(model,rnn_models, iterator, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0
    for batch_idx, batch in enumerate(iterator):
        head = batch.t1.to(device=device)
        short = batch.t2.to(device=device)
        author = batch.au.to(device=device)
        targets = batch.l.to(device=device)

        scores = model(rnn_models[0],rnn_models[1],head,short,author)
        loss = criterion(scores, targets).to(device)

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()

    return epoch_loss / len(iterator)


def evaluate(model, rnn_models, iterator, criterion):
    epoch_loss = 0
    epoch_acc = 0
    for batch_idx, batch in enumerate(iterator):
        head = batch.t1.to(device=device)
        short = batch.t2.to(device=device)
        author = batch.au.to(device=device)
        targets = batch.l.to(device=device)

        scores = model(rnn_models[0], rnn_models[1], head, short, author)
        loss = criterion(scores, targets).to(device)

        epoch_loss += loss.item()

    return epoch_loss / len(iterator)


import time

for epoch in range(6):
    start_time = time.time()
    train_loss = train(model,[model_headline,model_short_description] ,train_iterator, optimizer, criterion)
    valid_loss = evaluate(model,[model_headline,model_short_description], test_iterator, criterion)
    end_time = time.time()

    print(f'Epoch: {epoch + 1:02} | Epoch Time: {(end_time - start_time) / 60}')
    print(f'\tTrain Loss: {train_loss:.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f}')


dataset is from News Category Dataset | Kaggle