I’m using a pretrained resnet. My input image data contains 4 channels and they are of size 256x256. I have used an initial convolutional layer that accepts 4 channels and gives out 3 channels. After this, I have used transforms to resize the image to 224x224, so that it can be passed to the resnet block. However, while transforming, the transforms.ToPILImage() gives value error saying ValueError: pic should be 2/3 dimensional. Got 4 dimensions.
Here’s the code:
self.input_layer = nn.Conv2d(in_channels=4,out_channels=3,kernel_size=3,padding=1,stride=1)
self.input_trans = transforms.Compose([transforms.ToPILImage(mode = 'CMYK'),
transforms.Resize((224,224)),
transforms.ToTensor()])
self.base_model = torchvision.models.resnet50(pretrained=True)
self.lastlayer_out = self.base_model.fc.out_features
for child in self.base_model.children():
for param in child.parameters():
param.requires_grad = False
# Creating seperate classifiers for our labels
self.clarity = nn.Sequential(nn.Dropout(p=0.3),
nn.Linear(self.lastlayer_out,512),
nn.ReLU(),
nn.Dropout(p=0.3),
nn.Linear(512,n_clarity_classes),
nn.LogSoftmax(dim=1))
self.mining = nn.Sequential(nn.Dropout(p=0.3),
nn.Linear(self.lastlayer_out,512),
nn.ReLU(),
nn.Dropout(p=0.3),
nn.Linear(512,n_mine_classes),
nn.LogSoftmax(dim=1))
self.binary = nn.Sequential(nn.Dropout(p=0.3),
nn.Linear(self.lastlayer_out,512),
nn.ReLU(),
nn.Dropout(p=0.3),
nn.Linear(512,n_binary_classes),
nn.Sigmoid())
def forward(self,x):
w = self.input_layer(x) # Conv layer accepts 4 channels as input and gives 3 channels.
print(w.shape)
y = self.input_trans(w) # To resize the output to 224*224
z = self.base_model(y) # Passing through the pre-trained resnet model
return {
'clarity': self.clarity(z),
'mining': self.mining(z),
'binary': self.binary(z)
}