Hi,
I stuck while trying to implement very basic model: https://gist.github.com/av-maslov/2ba811543558267dfc3cf909f95d1138
The error occurs on the last line when calculating loss.
I have been trying to follow https://pytorch.org/tutorials/beginner/text_sentiment_ngrams_tutorial.html but using my own artificially generated text data.
I thought an error in how I generate batches: Dimensionality of batches but maybe there is an error somewhere else.
First I generate random sentences
import torch
import torch.nn as nn
import numpy as np
from torchtext.data import Dataset, Example, Field
from torchtext.data import Iterator, BucketIterator
N = 100
words = ['world', 'hello', 'country', 'moon', 'planet', 'earth']
random_sent_lengths=np.random.randint(1, 10, N)
def generate_art_data():
""" Generate random sentences from random words along with random target.
"""
for i in range(N):
rand_sent = np.random.choice(words, random_sent_lengths[i])
rand_y = np.random.randint(0,2,3) # Num of classes = 3
yield (" ".join(rand_sent), rand_y)
Then create Dataset
def create_datasets():
""" Create torchtext.data.Dataset from generated random data
"""
data = generate_art_data()
TEXT = Field(sequential=True, tokenize=lambda x: x.split(), use_vocab=True, lower=True, fix_length=1000)
LABEL = Field(sequential=False, use_vocab=False)
trn_fields = [('text', TEXT), ('target', LABEL)]
examples = list(
map(lambda x: Example.fromlist(list(x), fields=trn_fields),
data))
TEXT.build_vocab(data)
dt_train = Dataset(examples, fields=trn_fields)
trn, vld = dt_train.split(split_ratio=0.7)
return (trn, vld, TEXT)
Then create Iterators
def create_iterators(batch_sizes=(3,3)):
""" Create BucketIterator iterators from generated torchtext.data.Dataset s
"""
trn, vld, T = create_datasets()
b_size=4
train_iter = Iterator(trn, batch_size=b_size, sort_key=lambda x: len(x.text))
val_iter = Iterator(vld, batch_size=b_size, sort_key=lambda x: len(x.text))
return train_iter, val_iter, T
BATCH_SIZES=None
BATCH_SIZES= 4 #(4,4)
train_iter, val_iter, T = create_iterators(BATCH_SIZES)
after that - define model
EMBED_DIM = 4
NUM_OF_CLASSES = 3
VOCAB_SIZE = len(T.vocab)
class SuperSimpleModel(nn.Module):
def __init__(self, vocab_size, embed_dim, num_class):
super().__init__()
self.embedding=nn.Embedding(vocab_size, embed_dim)
self.fc = nn.Linear(embed_dim, num_class)
def forward(self, text):
embedded = self.embedding(text)
return self.fc(embedded)
model = SuperSimpleModel(VOCAB_SIZE, EMBED_DIM, NUM_OF_CLASSES)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=4.0)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)
and trying to run it (at least 1 forward step)
for i, batch in enumerate(train_iter):
if i < 5:
output = model(batch.text)
print(output.size(), batch.target.size())
loss = criterion(output, batch.target)
And it produces error. Where am I wrong?