RuntimeError: stack expects each tensor to be equal size

I apologize in advance in this was asked before. I genuinely did not understand the solution as my application is NLP and not computer vision with images.

MAX_LEN = 160
BATCH_SIZE = 16
EPOCHS = 10

class GPReviewDataset(data.Dataset):
    def __init__(self, review, target, tokenizer, max_len):
        self.review = review
        self.target = target
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __len__(self):
        return len(self.review)
    
    def __getitem__(self, item):
        review = str(self.review[item])
        
        encoding = tokenizer.encode_plus(text=review, 
                                         max_length=self.max_len,
                                         add_special_tokens=True, padding='max_length', 
                                         return_attention_mask=True, 
                                         return_token_type_ids=False, return_tensors='pt')
        
        return {'review': review,
                'input_ids': encoding['input_ids'].flatten(), 
                'attention_mask': encoding['attention_mask'].flatten(),
                'targets': torch.tensor(self.target[item], dtype=torch.long)}
   

free_df_train, free_df_test = train_test_split(free_df, test_size=0.2)
free_df_val, free_df_test = train_test_split(free_df_test, test_size=0.5)


def create_data_loader(df, tokenizer, max_len, batch_size):
    ds = GPReviewDataset(review=free_df.content.to_numpy(), 
                         target=free_df['score'].to_numpy(),
                         tokenizer=tokenizer, 
                         max_len=max_len)
    
    return data.DataLoader(ds, batch_size=batch_size, 
                           num_workers=0)


train_data_loader = create_data_loader(free_df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(free_df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(free_df_test, tokenizer, MAX_LEN, BATCH_SIZE)


data = next(iter(train_data_loader))

I wrote a function later on to take in train_data_loader when training the data, but it was giving me the Runtime error. It seems like the proper solution is to use some sort of collate_fn; however I am confused on how exactly to apply that function.

My error below:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<timed exec> in <module>

<ipython-input-26-8ba1e19dd195> in train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples)
      4     correct_predictions = 0
      5 
----> 6     for i in data_loader:
      7         input_ids = i['input_ids'].to(device)
      8         attention_mask = i['attention_mask'].to(device)

~\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
    361 
    362     def __next__(self):
--> 363         data = self._next_data()
    364         self._num_yielded += 1
    365         if self._dataset_kind == _DatasetKind.Iterable and \

~\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py in _next_data(self)
    401     def _next_data(self):
    402         index = self._next_index()  # may raise StopIteration
--> 403         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    404         if self._pin_memory:
    405             data = _utils.pin_memory.pin_memory(data)

~\Anaconda3\lib\site-packages\torch\utils\data\_utils\fetch.py in fetch(self, possibly_batched_index)
     45         else:
     46             data = self.dataset[possibly_batched_index]
---> 47         return self.collate_fn(data)

~\Anaconda3\lib\site-packages\torch\utils\data\_utils\collate.py in default_collate(batch)
     72         return batch
     73     elif isinstance(elem, container_abcs.Mapping):
---> 74         return {key: default_collate([d[key] for d in batch]) for key in elem}
     75     elif isinstance(elem, tuple) and hasattr(elem, '_fields'):  # namedtuple
     76         return elem_type(*(default_collate(samples) for samples in zip(*batch)))

~\Anaconda3\lib\site-packages\torch\utils\data\_utils\collate.py in <dictcomp>(.0)
     72         return batch
     73     elif isinstance(elem, container_abcs.Mapping):
---> 74         return {key: default_collate([d[key] for d in batch]) for key in elem}
     75     elif isinstance(elem, tuple) and hasattr(elem, '_fields'):  # namedtuple
     76         return elem_type(*(default_collate(samples) for samples in zip(*batch)))

~\Anaconda3\lib\site-packages\torch\utils\data\_utils\collate.py in default_collate(batch)
     53             storage = elem.storage()._new_shared(numel)
     54             out = elem.new(storage)
---> 55         return torch.stack(batch, 0, out=out)
     56     elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
     57             and elem_type.__name__ != 'string_':

RuntimeError: stack expects each tensor to be equal size, but got [160] at entry 0 and [376] at entry 5

I guess tokenizer.encode_plus has some problem. Can you show the output of
1)

tokenizer.encode_plus(text="I am in pytorch", 
                                         max_length=50,
                                         add_special_tokens=True, padding='max_length', 
                                         return_attention_mask=True, 
                                         return_token_type_ids=False, return_tensors='pt')

and
2)

tokenizer.encode_plus(text="I am not in pytorch", 
                                         max_length=50,
                                         add_special_tokens=True, padding='max_length', 
                                         return_attention_mask=True, 
                                         return_token_type_ids=False, return_tensors='pt')

I found the error. I needed to add truncation into the argument.