My CNN is returning a final loss of nan and all of the predictions are also nan.
Could anyone take a look at my code and help me figure out where I’ve gone so hopelessly wrong?
Some notes:
- I made it have 394 input layers
- I thought it should be 3 (for RBG), but I got an error message telling me to change it to 394
- I was getting an error ‘Expected object of scalar type Double but got scalar type Float for argument #2 ‘weight’’
- so I used prediction = model(inputs.float()) to solve this
As you might guess, I’m a newbie to NNs and CNNS. I’m doing it for a school project and have attended some workshops (where I got the code below that I adapted).
Thanks for the help in advance.
## Making the data for the NN
# Using a small sample of the dataset
X_train_norm = X_train_norm[:100]
y_train = y_train_norm[:100]
X_test_norm = X_test_norm[:100]
y_test = y_test_norm[:100]
train_data = []
for i in range(len(X_train_norm)):
train_data.append([X_train_norm[i], y_train[i]])
test_data = []
for i in range(len(X_test_norm)):
test_data.append([X_test_norm[i], y_test[i]])
train_data, val_data = torch.utils.data.random_split(train_data, [int(.8 * len(train_data)), int(.2 * len(train_data))]) # split into 50K training & 10K validation
train_loader = torch.utils.data.DataLoader(train_data, shuffle=True, batch_size=100)
val_loader = torch.utils.data.DataLoader(val_data, shuffle=True, batch_size=100)
test_loader = torch.utils.data.DataLoader(test_data, shuffle=True, batch_size=100)
## My NN
class ConvNet(torch.nn.Module):
def __init__(self):
super().__init__()
# conv2d(in_channels, out_channels, kernel_size)
# in_channels is the number of layers which it takes in (i.e.num color channels in 1st layer)
# out_channels is the number of different filters that we use
# kernel_size is the depthxwidthxheight of the kernel#
# stride is how many pixels we shift the kernel by each time
self.conv_layers = torch.nn.Sequential( # put your convolutional architecture here using torch.nn.Sequential
torch.nn.Conv2d(in_channels=394, out_channels=32, kernel_size=(5, 5), padding=2, stride = 1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size = 2, stride = 2),
torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(5, 5), padding=2, stride = 1),
torch.nn.ReLU(),
torch.nn.Conv2d(64, 128, kernel_size=1, stride=1),
torch.nn.ReLU(),
)
self.drop_out = torch.nn.Dropout()
self.fc_layers = torch.nn.Sequential(
torch.nn.Linear(25216, 1000),
torch.nn.Linear(1000,2)
)
def forward(self, x):
x = self.conv_layers(x)# pass through conv layers
x = x.view(x.shape[0], -1)# flatten output ready for fully connected layer
x = self.fc_layers(x)# pass through fully connected layer
x = F.softmax(x, dim=1)# softmax activation function on outputs
return x
## Global parmas
use_cuda = torch.cuda.is_available() #checks if gpu is available
device = torch.device("cuda" if use_cuda else "cpu")
learning_rate = 0.0005 # set learning rate
epochs = 5# set number of epochs
cnn = ConvNet().to(device)
criterion = torch.nn.CrossEntropyLoss() #use cross entropy loss function
optimiser = torch.optim.Adam(cnn.parameters(), lr=learning_rate)
## My training function
def train(model, epochs, verbose=True, tag='Loss/Train'):
for epoch in range(epochs):
for idx, (inputs, labels) in enumerate(train_loader, 0):
inputs, labels = inputs.to(device), labels.to(device)
# pass x through your model to get a prediction
prediction = model(inputs.float()) # pass the data forward through the model
loss = criterion(prediction, labels) # compute the cost
if verbose: print('Epoch:', epoch, '\tBatch:', idx, '\tLoss:', loss.item())
optimiser.zero_grad() # reset the gradients attribute of all of the model's params to zero
loss.backward() # backward pass to compute and store all of the model's param's gradients
optimiser.step() # update the model's parameters
print('Training Complete. Final loss =',loss.item())
return model(inputs.float()) # returns preds to make auc and roc curve
cnn_preds = train(cnn, epochs, verbose = False)
cnn_preds
Output:
Training Complete. Final loss = nan
Out[155]:
tensor([[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan]], grad_fn=<SoftmaxBackward>)