Hello,
I am trying to classify a set of images using Conv3D. I’m pretty new to deep learning and not sure about this approach.
Here is my model:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.conv1=nn.Conv3d(3,64,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
self.maxpool1=nn.MaxPool3d(kernel_size=(1,2,2),stride=(1,2,2))
self.conv2=nn.Conv3d(64,128,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
self.maxpool2=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2))
self.conv3=nn.Conv3d(128, 64,kernel_size=(3,3,3),stride=1,padding=(1,1,1))
self.maxpool3=nn.MaxPool3d(kernel_size=(2,2,2),stride=(2,2,2))
self.num_out_maxpool5 = 401408
self.fc1=nn.Linear(self.num_out_maxpool5,2048)#TBA
self.fc2=nn.Linear(2048,1024)
self.fc3=nn.Linear(1024,512)
self.fc4=nn.Linear(512,5)
self.dropout=nn.Dropout(p=0.5)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.maxpool1(x)
x = F.relu(self.conv2(x))
x = self.maxpool2(x)
x = F.relu(self.conv3(x))
x = self.maxpool3(x)
x = x.reshape(-1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.dropout(x)
x = F.relu(self.fc3(x))
x = self.dropout(x)
x = self.fc4(x)
return F.log_softmax(x)
Dataloader
dataloader = DataLoader(ds, batch_size=4, shuffle=True, num_workers=0)
batch = next(iter(dataloader))
images, labels = batch
images.shape # torch.Size([4, 3, 4, 224, 224])
labels.shape # torch.Size([4, 5])
Model
net = Model().cuda()
inputs = Variable(images)
output = net(inputs) #tensor([[-2.1499, -2.4374, -2.2974, -2.1973, -2.4695, -2.4120, -2.1355, -2.3000, -2.2593, -2.4376]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
output.shape # torch.Size([1, 10])
The issue I’m facing is that whenever I attempt to train the model using a dataloader, and pass in a batch, I receive the output always in an incorrect shape torch.Size([1, 10]) instead of torch.Size([4, 5]). What am I doing wrong here?