Thanks!
I’ve made some changes to the model according to my input.
Image input is torch.Size([1, 4, 224, 224]), numeric input (48 landmarks with 2 coordinates each) torch.Size([1, 48, 2]) and a label with torch.Size([1]).
class MixedNetwork(nn.Module):
def __init__(self):
super(MixedNetwork, self).__init__()
image_modules = list(models.resnet18().children())[:-1]
self.image_features = nn.Sequential(*image_modules)
self.landmark_features = nn.Sequential(
nn.Linear(in_features=2,out_features=16,bias=False),
nn.ReLU(inplace=True),
nn.Dropout(p=0.25),
nn.Linear(in_features=16,out_features=2,bias=False),
nn.ReLU(inplace=True),
nn.Dropout(p=0.25))
self.combined_features = nn.Sequential(
nn.Linear(32,16),
nn.ReLU(),
nn.Linear(16,2)
)
def forward(self, image, landmarks):
a = self.image_features(image)
b = self.landmark_features(landmarks)
x = torch.cat((a.view(a.size(0), -1), b.view(b.size(0), -1)), dim=1)
x = self.combined_features(x)
x = F.sigmoid(x)
return x
I keep getting the error:
RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [1, 1, 4, 224, 224]
Here I don’t understand why there’s an extra one in the size - the input tensor is [1, 4, 224, 224]. Secondly, the picture is colored, so 4 channels are okay for resnet18, right?