I’m getting dimension out of range (expected to be in range of [-1, 0], but got 1) for the following code snippet. The size of the output is [20,133] and that of the target is [20].
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
"""returns trained model"""
# initialize tracker for minimum validation loss
valid_loss_min = np.Inf
global total_train_loss
global total_valid_loss
if use_cuda:
model.cuda()
for epoch in range(1, n_epochs+1):
# initialize variables to monitor training and validation loss
train_loss = 0.0
valid_loss = 0.0
###################
# train the model #
###################
model.train()
for batch_idx, (data, target) in enumerate(loaders['train']):
# move to GPU
if use_cuda:
data = data.cuda()
target = target.cuda()
## find the loss and update the model parameters accordingly
## record the average training loss, using something like
## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
optimizer.zero_grad()
output = model(data)
print(output[0].shape,target.shape)
loss = criterion(output[0],target)
optimizer.step()
train_loss += ((1/(batch_idx+1))*(loss.data-train_loss))
######################
# validate the model #
######################
model.eval()
for batch_idx, (data, target) in enumerate(loaders['valid']):
# move to GPU
if use_cuda:
data, target = data.cuda(), target.cuda()
## update the average validation loss
output = model(data)
loss = criterion(output[0],target)
valid_loss += ((1/(batch_idx+1))*(loss.data-valid_loss))
# print training/validation statistics
print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
epoch,
train_loss,
valid_loss
))
## TODO: save the model if validation loss has decreased
total_train_loss.append(train_loss)
total_valid_loss.append(valid_loss)
if valid_loss <= valid_loss_min:
print("Saving the model.....")
torch.save(model.state_dict(),save_path)
valid_loss_min = valid_loss
# return trained model
return model
The code runs for few data points in the batch but then stops with this error message. Any help?
Edit:
I doubt there is some error with the data loader cause in the training loop the output[0].size is [20,133] while for validation loop is [133]. Following is the code for the data loader.
import os
from torchvision import datasets
from glob import glob
### TODO: Write data loaders for training, validation, and test sets
## Specify appropriate transforms, and batch_sizes
data_dir = "/data/dog_images/"
dog_files_train = os.path.join(data_dir, "train/")
dog_files_test = os.path.join(data_dir , "test/")
dog_files_valid = os.path.join(data_dir ,"valid/")
data_transform = transforms.Compose([transforms.CenterCrop(299),
transforms.RandomHorizontalFlip(),transforms.RandomRotation(40),
transforms.ToTensor()])
data_transform_test = transforms.Compose([transforms.Resize(299),transforms.CenterCrop(299),
transforms.ToTensor()])
train_data = datasets.ImageFolder(dog_files_train, transform = data_transform)
test_data = datasets.ImageFolder(dog_files_train, transform = data_transform_test)
valid_data = datasets.ImageFolder(dog_files_valid, transform = data_transform)
batch_size = 20
train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test_data,batch_size = batch_size, shuffle = True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size = batch_size, shuffle = True)