Hey there!
I’m relatively new to PyTorch, and I’ve been working on a neural network to do sentence classification with a text embedding model.
Here is my code:
# Load train, test, and dev data
data = MessagesDataset('./training/data_dev.csv')
# Create Model Object, specify parameters
net = LanguageClassifier()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# Create dataloader object to manage the data
dataloader = DataLoader(data, batch_size=1, shuffle=True, num_workers=5)
# For each epoch
for epoch in range(1):
# Reset Gradients and loss statistics
optimizer.zero_grad()
# For each training example
for data, label in enumerate(dataloader):
# Send data through neural net, calculate loss, and back propagate
output = net(data)
loss = criterion(output, label)
loss.backward()
optimizer.step()
And here’s my dataset object:
# Class for loading the messages dataset
class MessagesDataset(Dataset):
def __init__(self, csv_file):
self.messages_frame = pd.read_csv(csv_file)
self.embedding_model = gensim.models.KeyedVectors.load('./models/model.wordvectors')
self.sentence_length = 10
def __len__(self):
return len(self.messages_frame)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
# Tokenize the message and create appropriate label
message = []
message_words = word_tokenize(self.messages_frame.iloc[idx, 0])
for word in message_words:
if word not in stop_words and word in self.embedding_model.wv.vocab:
message.append(self.embedding_model.wv.vocab[word].index)
label = self.messages_frame.iloc[idx, 1]
if len(message) < self.sentence_length:
pad_zeroes = self.sentence_length - len(message)
for i in range(pad_zeroes):
message.append(0)
elif len(message) > self.sentence_length:
message = message[:10]
sample = {'message': message, 'label': [int(label) for l in range(len(message))]}
return sample
I’m having issues using the DataLoader to do minibatches. I’m following a tutorial online, but I keep having issues understanding the proper way to pass batched data into the neural network for training.
Thank you for your help in advance!