Trying to make a model that reads in a simple vector containing a review of a product and outputs a classification (favorable or unfavorable). The code for my classifier and training loop is below:
Model class definition:
class Classifier(nn.Module):
def __init__(self, initial_n_channels, n_classes, network_n_channels):
super(Classifier, self).__init__()
self.network = nn.Sequential(
nn.Conv1d(in_channels=initial_n_channels,
out_channels=network_n_channels,
kernel_size=args["kernel_size"]),
nn.ReLU(),
nn.Conv1d(in_channels=network_n_channels,
out_channels=network_n_channels,
kernel_size=args["kernel_size"],
stride=args["stride"]),
nn.ReLU(),
nn.Conv1d(in_channels=network_n_channels,
out_channels=network_n_channels,
kernel_size=args["kernel_size"],
stride=args["stride"]),
nn.ReLU(),
nn.Conv1d(in_channels=network_n_channels,
out_channels=network_n_channels,
kernel_size=args["kernel_size"],
stride=args["stride"]),
nn.ReLU()
)
self.fc = nn.Linear(network_n_channels, n_classes)
def forward(self, x_in, apply_sigmoid=False):
# diagnostics
print("classifier diagnostics", "\n",
"---------------------------------", "\n")
print("classifier x_in size: ", x_in.size())
print("classifier weight size: ", self.fc.weight.size())
features = self.network(x_in)
prediction_vector = self.fc(features)
if apply_sigmoid:
prediction_vector = F.sigmoid(prediction_vector, dim=1)
return prediction_vector.double()
Instantiation:
# dataset and vectorizer
dataset = ReviewDataset.load_and_vectorize(args["review_csv"])
vectorizer = dataset.get_vectorizer()
# model
classifier = Classifier(initial_n_channels=len(vectorizer.review_vocab),
n_classes=len(vectorizer.rating_vocab),
network_n_channels=args["num_channels"]).double()
# loss and optimizer
loss_func = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(classifier.parameters(), lr=args["learning_rate"])
Training loop:
for epoch_index in range(args["num_epochs"]):
train_state["epoch_index"] = epoch_index
# set up batch generator, initialize loss and
# accuracy each outer loop, set train mode on
dataset.set_split("train")
dataloader = DataLoader(dataset=dataset,
batch_size=args["batch_size"],
drop_last=args["drop_last"])
running_loss = 0.0
running_acc = 0.0
classifier.train()
for batch_index, batch_dict in enumerate(dataloader):
# five-step training routine
# diagnostic stats
print("\n", "training loop diagnostics", "\n",
"---------------------------------", "\n")
print("batch tensor dimensions: ", batch_dict["x_data"].shape)
print("labels: ", batch_dict["y_target"])
# i. zero the gradients
optimizer.zero_grad()
# ii. compute the output
y_pred = classifier.forward(x_in=batch_dict["x_data"].unsqueeze(dim=2))
# iii. compute the loss
loss = loss_func(y_pred, batch_dict["y_target"].float())
loss_batch = loss.item()
running_loss += (loss_batch - running_loss) / (batch_index + 1)
# iv. use loss to produce gradients
loss.backward()
# v. use optimizer to take gradient step
optimizer.step()
# -----------------------------------
# compute accuracy score
acc_batch = compute_accuracy(y_pred, batch_dict["y_target"])
running_acc += (acc_batch - running_acc) / (batch_index + 1)
train_state["train_loss"].append(running_loss)
train_state["train_acc"].append(running_acc)
# iterate over validation dataset
# set up batch generator, set loss and acc to
# zero, and set eval mode on
dataset.set_split("val")
dataloader = DataLoader(dataset=dataset, batch_size=args.batch_size)
running_loss = 0.0
running_acc = 0.0
classifier.eval()
for batch_index, batch_dict in enumerate(dataloader):
# i. compute output
y_pred = classifier.forward(x_in=batch_dict["x_data"].unsqueeze(dim=2))
# ii. compute loss
loss = loss_func(y_pred, batch_dict["y_target"].float())
loss_batch = loss.item()
running_loss += (loss_batch - running_loss) / (batch_index + 1)
# iii. compute accuracy
acc_batch = compute_accuracy(y_pred, batch_dict["y_target"])
running_acc += (acc_batch - running_acc) / (batch_index + 1)
train_state["val_loss"].append(running_loss)
train_state["val_acc"].append(running_acc)
Output:
training loop diagnostics
---------------------------------
batch tensor dimensions: torch.Size([128, 7882])
labels: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0])
classifier diagnostics
---------------------------------
classifier x_in size: torch.Size([128, 7882, 1])
classifier weight size: torch.Size([2, 128])
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-213-2327bb74133d> in <module>
26
27 # ii. compute the output
---> 28 y_pred = classifier.forward(x_in=batch_dict["x_data"].unsqueeze(dim=2))
29
30 # iii. compute the loss
<ipython-input-209-c8b508905fa0> in forward(self, x_in, apply_sigmoid)
45 print("classifier weight size: ", self.fc.weight.size())
46 features = self.network(x_in)
---> 47 prediction_vector = self.fc(features)
48 if apply_sigmoid:
49 prediction_vector = F.sigmoid(prediction_vector, dim=1)
... blah blah ...
RuntimeError: size mismatch, m1: [16384 x 1], m2: [128 x 2] at ../aten/src/TH/generic/THTensorMath.cpp:752
Selected parameters/hyperparameters:
args = {
... blah blah ...
# Model Hyperparameters
"num_channels": 128,
"kernel_size": 1,
"stride": 1,
# Training Hyperparameters
"batch_size": 128,
"early_stopping_criteria": 5,
"learning_rate": 0.001,
"num_epochs": 100,
"drop_last": True
}
How can I fix the dimensions of my batch tensors so that they’re of the proper size?