Alright, so I have this hypothetical model that takes in 2 inputs called it and vim, which are models that both output a tensor from range [[0]] or [[1]]. When I try training this though, there is an error:
warnings.warn(
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-104-f92cb761d1cf> in <cell line: 1>()
----> 1 train_combined(done_pneumnist, train_pneumnist, loss_fn, optimizer_done)
12 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py in forward(self, input)
123
124 def forward(self, input: Tensor) -> Tensor:
--> 125 return F.linear(input, self.weight, self.bias)
126
127 def extra_repr(self) -> str:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x64 and 2x224)
import torch.nn.functional as F
# Create the combination of ViMamba and the ViT
class CombinedViTM(nn.Module):
def __init__(self, vit, vim):
super(CombinedViTM, self).__init__()
# Convolutional layers for efficient feature extraction.
self.conv1 = nn.Conv2d(3, 16, 3, padding = 1)
self.conv2 = nn.Conv2d(16, 16, 3, padding = 1)
self.conv3 = nn.Conv2d(16, 32, 3, padding = 1)
self.conv4 = nn.Conv2d(32, 3, 3, padding = 1)
# Establish our ViMamba and ViT INSIDE the combined model
self.vit=vit
self.vim=vim
self.hidden_layers=[]
for i in range(16):
self.hidden_layers.append(nn.Linear(2, 224))
self.hidden_layers.append(nn.GELU())
self.hidden_layers.append(nn.Linear(224, 64))
self.hidden_layers.append(nn.GELU())
self.hidden_layers.append(nn.Linear(64, 16))
self.hidden_layers.append(nn.GELU())
self.hidden_layers.append(nn.Linear(16, 4))
self.hidden_layers.append(nn.GELU())
self.hidden_layers.append(nn.Linear(4, 2))
self.hidden_layers.append(nn.GELU())
# Create our MLP Head
self.head=nn.Sequential(
*self.hidden_layers,
nn.Linear(2, 224),
nn.ReLU(),
nn.Linear(224, 64),
nn.ReLU(),
nn.Linear(64, 16),
nn.ReLU(),
nn.Linear(16, 2),
nn.ReLU(),
nn.Linear(2,1),
nn.Sigmoid(),
)
def forward(self, x):
# Run the input through the convolutional layers.
x=self.conv1(x)
x=F.relu(x)
x=self.conv2(x)
x=F.relu(x)
x=self.conv3(x)
x=F.relu(x)
x=self.conv4(x)
x=F.relu(x)
# Process the results from both the ViMamba and the ViT
vit_result=self.vit(x)
vim_result=self.vim(x)
# Add both inputs (one may correct the other)
new_result=torch.flatten(torch.cat((vit_result, vim_result), dim=0))
# Process through the MLP head
result=self.head(new_result)
return result
What is the problem?