Here is my custom VGG19 class
class EncoderCNN(nn.Module):
def init(self, cnn_output_size):
super(EncoderCNN, self).__init__()
vggnet = models.vgg19(pretrained=True)
self.vggmodel = vggnet.features
self.linear = nn.Linear(100352, cnn_output_size)
self.tanh = nn.Tanh()
self.dropout = nn.Dropout(0.5)
def forward(self, image_tensor):
#INPUT = Image as Tensor
features = self.vggmodel(image_tensor)
features = features.view(-1,512*14*14)
features = self.linear(features)
features = self.dropout(self.Tanh(features))
print(features.size())
return features
When I build the model in main(), this is the structure I get:
EncoderCNN(
(vggmodel): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace)
(16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): ReLU(inplace)
(18): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(19): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace)
(23): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(24): ReLU(inplace)
(25): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(26): ReLU(inplace)
(27): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace)
(30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(31): ReLU(inplace)
(32): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(33): ReLU(inplace)
(34): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(35): ReLU(inplace)
(36): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(linear): Linear(in_features=100352, out_features=512, bias=True)
(tanh): Tanh()
(dropout): Dropout(p=0.5)
)
train_loader = torch.utils.data.DataLoader(d, **batch_size= 4**,
sampler=SubsetRandomSampler(train_indices),
shuffle = False,
num_workers = 0,collate_fn = customBatchBuilder)
batch contains image, question, ques_len and ans
image tensor is of 3 x 448 x 448 tensor
Now, when I execute the below lines,
for i, batch in enumerate(train_loader):
image, question, ques_len, ans = batch
optimizer.zero_grad()
img_emb = image_model(image)
I get this error:
TypeError: conv2d(): argument ‘input’ (position 1) must be Tensor, not tuple
Please help!