Conv3d Model generates a 1D tensor output instead of a batch

lezwon · February 2, 2019, 8:04am

Hello,
I am trying to classify a set of images using Conv3D. I’m pretty new to deep learning and not sure about this approach.
Here is my model:

class Model(nn.Module):
    
    def __init__(self):
        super(Model, self).__init__()
        self.conv1=nn.Conv3d(3,64,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
        self.maxpool1=nn.MaxPool3d(kernel_size=(1,2,2),stride=(1,2,2))
        self.conv2=nn.Conv3d(64,128,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
        self.maxpool2=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2))
        self.conv3=nn.Conv3d(128, 64,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
        self.maxpool3=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2))

        self.num_out_maxpool5 = 401408
        self.fc1=nn.Linear(self.num_out_maxpool5,2048)#TBA
        self.fc2=nn.Linear(2048,1024)
        self.fc3=nn.Linear(1024,512)
        self.fc4=nn.Linear(512,5)
        
        self.dropout=nn.Dropout(p=0.5)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.maxpool1(x)
        x = F.relu(self.conv2(x))
        x = self.maxpool2(x)
        x = F.relu(self.conv3(x))
        x = self.maxpool3(x)
        x = x.reshape(-1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        return F.log_softmax(x)

Dataloader

dataloader = DataLoader(ds, batch_size=4, shuffle=True, num_workers=0)
batch = next(iter(dataloader))
images, labels = batch
images.shape # torch.Size([4, 3, 4, 224, 224])
labels.shape # torch.Size([4, 5])

Model

net = Model().cuda()
inputs =  Variable(images)
output = net(inputs) #tensor([[-2.1499, -2.4374, -2.2974, -2.1973, -2.4695, -2.4120, -2.1355, -2.3000, -2.2593, -2.4376]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)

output.shape # torch.Size([1, 10])

The issue I’m facing is that whenever I attempt to train the model using a dataloader, and pass in a batch, I receive the output always in an incorrect shape torch.Size([1, 10]) instead of torch.Size([4, 5]). What am I doing wrong here?

ezyang · February 4, 2019, 4:46pm

Your model implies a specific input size. So, if your training set images are a different size, you’ll have to preprocess them to the correct size (somehow) before you can feed them into your model. See https://pytorch.org/tutorials/beginner/data_loading_tutorial.html

lezwon · February 5, 2019, 3:47pm

So I figured out what the issue was. In my forward function I used x = x.reshape(-1) which flattened all the dimensions of the tensor including the batch. Replaced it with x = x.view(-1, self.__num_flat_features(x)) and it works fine now. Here is my model:

class Model(nn.Module):
    
    def __init__(self):
        super(Model, self).__init__()
        self.conv1=nn.Conv3d(3,64,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
        self.maxpool1=nn.MaxPool3d(kernel_size=(1,2,2),stride=(1,2,2))
        self.conv2=nn.Conv3d(64,128,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
        self.maxpool2=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2))
        self.conv3=nn.Conv3d(128, 64,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
        self.maxpool3=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2))

        self.num_out_maxpool5 = 64 * 1 * 28 * 28
        self.fc1=nn.Linear(self.num_out_maxpool5,2048)#TBA
        self.fc2=nn.Linear(2048,1024)
        self.fc3=nn.Linear(1024,512)
        self.fc4=nn.Linear(512,5)
        
        self.dropout=nn.Dropout(p=0.5)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.maxpool1(x)
        x = F.relu(self.conv2(x))
        x = self.maxpool2(x)
        x = F.relu(self.conv3(x))
        x = self.maxpool3(x)
        x = x.view(-1, self.__num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        return x
    
    def __num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:       # Get the products
            num_features *= s
        return num_features