TypeError: '<' not supported between instances of 'Example' and 'Example'

i’m making sentiment analysis in Korean version.
But I got this error, “TypeError: ‘<’ not supported between instances of ‘Example’ and ‘Example’”

This is my abstract code :

import torch
from torchtext import data
from torchtext import datasets
from soynlp.tokenizer import MaxScoreTokenizer
SEED = 1234

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

#When tokenizing, some text will be null
TEXT = data.Field(tokenize = tokenizer, preprocessing = generate_bigrams, stop_words = stop_words)
LABEL = data.LabelField(dtype = torch.float)

from torchtext.data import TabularDataset
fields = [(“text”, TEXT),(“label”, LABEL)]

train_data, test_data = data.TabularDataset.splits(
path = ‘’,
train = ‘train.csv’,
test = ‘train.csv’,
format = ‘csv’,
fields = fields,
skip_header = True
)

import torchtext
vec = torchtext.vocab.Vectors(‘wiki.ko.vec’, cache=’./Downloads/’)
TEXT.build_vocab(train_data,
max_size = MAX_VOCAB_SIZE,
vectors = vec,
unk_init = torch.Tensor.normal_)
LABEL.build_vocab(train_data)

BATCH_SIZE = 32

device = torch.device(‘cuda’ if torch.cuda.is_available() else ‘cpu’)

train_iterator, test_iterator = data.BucketIterator.splits(
(train_data, test_data),
batch_size = BATCH_SIZE,
device = device)
print(‘Train:’)
for batch in train_iterator:
print(batch)

print(‘Test:’)
for batch in test_iterator:
print(batch)

And then, I got the this error

Test:

TypeError Traceback (most recent call last)
in
12
13 print(‘Test:’)
—> 14 for batch in test_iterator:
15 print(batch)

/anaconda3/lib/python3.7/site-packages/torchtext/data/iterator.py in iter(self)
140 def iter(self):
141 while True:
–> 142 self.init_epoch()
143 for idx, minibatch in enumerate(self.batches):
144 # fast-forward if loaded from state

/anaconda3/lib/python3.7/site-packages/torchtext/data/iterator.py in init_epoch(self)
116 self._random_state_this_epoch = self.random_shuffler.random_state
117
–> 118 self.create_batches()
119
120 if self._restored_from_state:

/anaconda3/lib/python3.7/site-packages/torchtext/data/iterator.py in create_batches(self)
240 def create_batches(self):
241 if self.sort:
–> 242 self.batches = batch(self.data(), self.batch_size,
243 self.batch_size_fn)
244 else:

/anaconda3/lib/python3.7/site-packages/torchtext/data/iterator.py in data(self)
101 “”“Return the examples in the dataset in order, sorted, or shuffled.”""
102 if self.sort:
–> 103 xs = sorted(self.dataset, key=self.sort_key)
104 elif self.shuffle:
105 xs = [self.dataset[i] for i in self.random_shuffler(range(len(self.dataset)))]

TypeError: ‘<’ not supported between instances of ‘Example’ and ‘Example’
I don’t know why the error happend, even I set the same file in train and test for finding the reasons!!

Thank you in advance.

This happens because the dataset iterator from torchtext tries to sort the dataset, which implies comparing two elements.

Apparently this is a known “feature” and you have to provide a sorting function to the constructor of the BucketIterator, as seen in this issue.

1 Like