Hi all, sorry for basic question.
Is it a correct way to build_vocab()?
import torch
from torchtext.data import Dataset, Example, Field
from torchtext.data import Iterator, BucketIterator
TEXT = Field(sequential=True, tokenize=lambda x: x.split(), lower=True)
LABEL = Field(sequential=False, use_vocab=False)
data = [("The mountain is hight", "A"), ("Work is quite interesting", "B")]
fs = [('text', LABEL), ('category', TEXT)]
examples = list(map(lambda x: Example.fromlist(list(x), fields=fs), data))
dt = Dataset(examples, fields=fs)
TEXT.build_vocab(dt, vectors="glove.6B.100d")
print(len(TEXT.vocab))
for el in data:
tokens = el[0].split()
print(tokens)
for t in tokens:
print(TEXT.vocab.stoi[t])
Asking because in my working code (above is a test) all indices are set to 0, as also in this example.