I am trying to convert a keras CNN model I created with the functional API to PyTorch. As a sanity check, I made sure the output of my model summary (torchsummary) matched the keras model summary. However, when feeding my PyTorch model images in batches for training purposes, the error “IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)” is thrown. The keras model as well as my (flawed) PyTorch implementation looks like this:
Keras:
from tensorflow.keras import Input, layers
from tensorflow.keras.models import Model
image_size = 200
input_tensor = Input(shape = (image_size, image_size, 3))
x = layers.Conv2D(32, kernel_size = (3,3), strides = (1,1),
activation = 'relu',
input_shape = (image_size, image_size, 3),
data_format="channels_last",
padding = 'same')(input_tensor)
y = layers.MaxPool2D(pool_size = (2,2), strides = (2,2))(x)
x = layers.Conv2D(32, kernel_size = (3,3), strides = (1,1),
activation = 'relu',
input_shape = (image_size, image_size, 3),
data_format="channels_last",
padding = 'same')(y)
x = layers.Conv2D(32, kernel_size = (3,3), strides = (1,1),
activation = 'relu',
input_shape = (image_size, image_size, 3),
data_format="channels_last",
padding = 'same')(x)
x = layers.MaxPool2D(pool_size = (2,2), strides = (2,2))(x)
residual = layers.Conv2D(32, kernel_size = (1,1), strides = (2,2),
activation = 'relu',
input_shape = (image_size, image_size, 3),
data_format="channels_last",
padding = 'same')(y)
y = layers.add([residual,x])
x = layers.Conv2D(64, kernel_size = (3,3), strides = (1,1),
activation = 'relu',
input_shape = (image_size, image_size, 3),
data_format="channels_last",
padding = 'same')(y)
x = layers.Conv2D(64, kernel_size = (3,3), strides = (1,1),
activation = 'relu',
input_shape = (image_size, image_size, 3),
data_format="channels_last",
padding = 'same')(x)
x = layers.MaxPool2D(pool_size = (2,2), strides = (2,2))(x)
residual = layers.Conv2D(64, kernel_size = (1,1), strides = (2,2),
input_shape = (image_size, image_size, 3),
data_format="channels_last",
padding = 'same')(y)
y = layers.add([residual,x])
x = layers.Flatten()(y)
x = layers.Dense(128, activation = 'relu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(64, activation = 'relu')(x)
x = layers.Dropout(0.5)(x)
output_tensor = layers.Dense(4, activation = 'softmax')(x)
model = Model(input_tensor, output_tensor)
print(model.summary())
Model: "model_4"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_5 (InputLayer) [(None, 200, 200, 3) 0
__________________________________________________________________________________________________
conv2d_28 (Conv2D) (None, 200, 200, 32) 896 input_5[0][0]
__________________________________________________________________________________________________
max_pooling2d_12 (MaxPooling2D) (None, 100, 100, 32) 0 conv2d_28[0][0]
__________________________________________________________________________________________________
conv2d_29 (Conv2D) (None, 100, 100, 32) 9248 max_pooling2d_12[0][0]
__________________________________________________________________________________________________
conv2d_30 (Conv2D) (None, 100, 100, 32) 9248 conv2d_29[0][0]
__________________________________________________________________________________________________
conv2d_31 (Conv2D) (None, 50, 50, 32) 1056 max_pooling2d_12[0][0]
__________________________________________________________________________________________________
max_pooling2d_13 (MaxPooling2D) (None, 50, 50, 32) 0 conv2d_30[0][0]
__________________________________________________________________________________________________
add_8 (Add) (None, 50, 50, 32) 0 conv2d_31[0][0]
max_pooling2d_13[0][0]
__________________________________________________________________________________________________
conv2d_32 (Conv2D) (None, 50, 50, 64) 18496 add_8[0][0]
__________________________________________________________________________________________________
conv2d_33 (Conv2D) (None, 50, 50, 64) 36928 conv2d_32[0][0]
__________________________________________________________________________________________________
conv2d_34 (Conv2D) (None, 25, 25, 64) 2112 add_8[0][0]
__________________________________________________________________________________________________
max_pooling2d_14 (MaxPooling2D) (None, 25, 25, 64) 0 conv2d_33[0][0]
__________________________________________________________________________________________________
add_9 (Add) (None, 25, 25, 64) 0 conv2d_34[0][0]
max_pooling2d_14[0][0]
__________________________________________________________________________________________________
flatten_4 (Flatten) (None, 40000) 0 add_9[0][0]
__________________________________________________________________________________________________
dense_12 (Dense) (None, 128) 5120128 flatten_4[0][0]
__________________________________________________________________________________________________
dropout_4 (Dropout) (None, 128) 0 dense_12[0][0]
__________________________________________________________________________________________________
dense_13 (Dense) (None, 64) 8256 dropout_4[0][0]
__________________________________________________________________________________________________
dropout_5 (Dropout) (None, 64) 0 dense_13[0][0]
__________________________________________________________________________________________________
dense_14 (Dense) (None, 4) 260 dropout_5[0][0]
==================================================================================================
Total params: 5,206,628
Trainable params: 5,206,628
Non-trainable params: 0
PyTorch:
import torch
import torch.nn as nn
from torchsummary import summary
class ResNet(nn.Module):
def __init__(self, num_classes=4):
super().__init__()
self.conv32_1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding='same',
bias=False)
self.conv32_2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding='same',
bias=False)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv64_1 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding='same',
bias=False)
self.conv64_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding='same',
bias=False)
self.conv32resid = nn.Conv2d(32, 32, kernel_size=1, stride=2, #padding='same',
bias=False)
self.conv64resid = nn.Conv2d(32, 64, kernel_size=1, stride=2, #padding='same',
bias=False)
self.avg = nn.AdaptiveAvgPool2d((1, 1))
self.fc1 = nn.Linear(40000 , 128)
self.fc2 = nn.Linear(128, 64)
self.out = nn.Linear(64, num_classes)
def forward(self, x):
x = self.conv32_1(x)
x = self.relu(x)
y = self.maxpool(x)
x = self.conv32_2(y)
x = self.relu(x)
x = self.conv32_2(x)
x = self.relu(x)
x = self.maxpool(x)
residual = self.conv32resid(y)
residual = self.relu(residual)
y = torch.cat((residual, x))
x = self.conv64_1(y)
x = self.relu(x)
x = self.conv64_2(x)
x = self.relu(x)
x = self.maxpool(x)
residual = self.conv64resid(y)
residual = self.relu(residual)
y = torch.cat((residual, x))
x = torch.flatten(y)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.out(x)
return x
model = ResNet()
summary(model.cuda(), (3, 200, 200))
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 32, 200, 200] 864
ReLU-2 [-1, 32, 200, 200] 0
MaxPool2d-3 [-1, 32, 100, 100] 0
Conv2d-4 [-1, 32, 100, 100] 9,216
ReLU-5 [-1, 32, 100, 100] 0
Conv2d-6 [-1, 32, 100, 100] 9,216
ReLU-7 [-1, 32, 100, 100] 0
MaxPool2d-8 [-1, 32, 50, 50] 0
Conv2d-9 [-1, 32, 50, 50] 1,024
ReLU-10 [-1, 32, 50, 50] 0
Conv2d-11 [-1, 64, 50, 50] 18,432
ReLU-12 [-1, 64, 50, 50] 0
Conv2d-13 [-1, 64, 50, 50] 36,864
ReLU-14 [-1, 64, 50, 50] 0
MaxPool2d-15 [-1, 64, 25, 25] 0
Conv2d-16 [-1, 64, 25, 25] 2,048
ReLU-17 [-1, 64, 25, 25] 0
Linear-18 [-1] 5,120,128
ReLU-19 [-1] 0
Linear-20 [-1] 8,256
ReLU-21 [-1] 0
Linear-22 [-1] 260
================================================================
Total params: 5,206,308
Trainable params: 5,206,308
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.46
Forward/backward pass size (MB): 39.37
Params size (MB): 19.86
Estimated Total Size (MB): 59.69
----------------------------------------------------------------
The code for training is the following:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets
from torch.utils.data import DataLoader
import numpy as np
image_size = 200
batch_size = 8
train_dataset = datasets.ImageFolder(train_dir, transform=transforms_train)
val_dataset = datasets.ImageFolder(val_dir, transform=transform_val)
test_dataset = datasets.ImageFolder(test_dir, transform=transform_val)
train_loader = DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=True,
drop_last=True,
pin_memory=True)
val_loader = DataLoader(
val_dataset,
batch_size=batch_size)
test_loader = DataLoader(
test_dataset,
batch_size=batch_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)
epochs = 20
min_valid_loss = np.inf
loss_values_train = []
loss_values_val = []
acc_values_train = []
acc_values_val = []
for e in range(epochs):
y_pred_train = []
y_true_train = []
train_loss = 0.0
model.train()
for data, labels in train_loader:
if torch.cuda.is_available():
data, labels = data.cuda(), labels.cuda()
optimizer.zero_grad()
target = model(data)
loss = criterion(target,labels)
loss.backward()
optimizer.step()
train_loss += loss.item() * data.size(0)
_, preds = torch.max(target, 1)
y_pred_train.append(preds.tolist())
y_true_train.append(labels.tolist())
y_pred_train = [item for sublist in y_pred_train for item in sublist]
y_true_train = [item for sublist in y_true_train for item in sublist]
train_acc = accuracy_score(y_true_train, y_pred_train)
acc_values_train.append(train_acc)
loss_values_train.append(train_loss / len(train_loader))
y_pred_val = []
y_true_val = []
valid_loss = 0.0
model.eval()
for data, labels in val_loader:
if torch.cuda.is_available():
data, labels = data.cuda(), labels.cuda()
target = model(data)
loss = criterion(target,labels)
valid_loss += loss.item() * data.size(0)
_, preds = torch.max(target, 1)
y_pred_val.append(preds.tolist())
y_true_val.append(labels.tolist())
y_pred_val = [item for sublist in y_pred_val for item in sublist]
y_true_val = [item for sublist in y_true_val for item in sublist]
val_acc = accuracy_score(y_true_val, y_pred_val)
acc_values_val.append(val_acc)
loss_values_val.append(valid_loss / len(val_loader))
print(f'Epoch {e+1}: \n Training Loss: {train_loss/len(train_loader)} \t Training Acc: {train_acc} \n Validation Loss: {valid_loss/len(val_loader)} \t Validation Acc: {val_acc}')
if min_valid_loss > valid_loss:
min_valid_loss = valid_loss
# Saving State Dict
torch.save(model.state_dict(), 'resnet_model.pth')
My guess is that the flatten operation at the end of my model is somehow causing problems. Any help would be greatly appreciated!