I am trying to replicate SimCLR model with link Google Colab using my dataset and model. When I execute using my model, it gives the error as below. Not able to understand where is the problem.
RuntimeError Traceback (most recent call last)
<ipython-input-34-4b3055af976b> in <module>
4 temperature=0.07,
5 weight_decay=1e-4,
----> 6 max_epochs=100)
4 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in load_state_dict(self, state_dict, strict)
1603 if len(error_msgs) > 0:
1604 raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
-> 1605 self.__class__.__name__, "\n\t".join(error_msgs)))
1606 return _IncompatibleKeys(missing_keys, unexpected_keys)
1607
RuntimeError: Error(s) in loading state_dict for SimCLR:
Missing key(s) in state_dict: "convnet.conv1.bias".
Unexpected key(s) in state_dict: "convnet.bn1.weight", "convnet.bn1.bias", "convnet.bn1.running_mean", "convnet.bn1.running_var", "convnet.bn1.num_batches_tracked", "convnet.layer1.0.conv1.weight", "convnet.layer1.0.bn1.weight", "convnet.layer1.0.bn1.bias", "convnet.layer1.0.bn1.running_mean", "convnet.layer1.0.bn1.running_var", "convnet.layer1.0.bn1.num_batches_tracked", "convnet.layer1.0.conv2.weight", "convnet.layer1.0.bn2.weight", "convnet.layer1.0.bn2.bias", "convnet.layer1.0.bn2.running_mean", "convnet.layer1.0.bn2.running_var", "convnet.layer1.0.bn2.num_batches_tracked", "convnet.layer1.1.conv1.weight", "convnet.layer1.1.bn1.weight", "convnet.layer1.1.bn1.bias", "convnet.layer1.1.bn1.running_mean", "convnet.layer1.1.bn1.running_var", "convnet.layer1.1.bn1.num_batches_tracked", "convnet.layer1.1.conv2.weight", "convnet.layer1.1.bn2.weight", "convnet.layer1.1.bn2.bias", "convnet.layer1.1.bn2.running_mean", "convnet.layer1.1.bn2.running_var", "convnet.layer1.1.bn2.num_batches_tracked", "convnet.layer2.0.conv1.weight", "convnet.layer2.0.bn1.weight", "convnet.layer2.0.bn1.bias", "convnet.layer2.0.bn1.running_mean", "convnet.layer2.0.bn1.running_var", "convnet.layer2.0.bn1.num_batches_tracked", "convnet.layer2.0.conv2.weight", "convnet.layer2.0.bn2.weight", "convnet.layer2.0.bn2.bias", "convnet.layer2.0.bn2.running_mean", "convnet.layer2.0.bn2.running_var", "convnet.layer2.0.bn2.num_batches_tracked", "convnet.layer2.0.downsample.0.weight", "convnet.layer2.0.downsample.1....
size mismatch for convnet.conv1.weight: copying a param with shape torch.Size([64, 3, 7, 7]) from checkpoint, the shape in current model is torch.Size([3, 1, 1]).
size mismatch for convnet.fc.0.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([1, 31488]).
size mismatch for convnet.fc.0.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([1]).
size mismatch for convnet.fc.2.weight: copying a param with shape torch.Size([128, 512]) from checkpoint, the shape in current model is torch.Size([128, 256]).
My model is as:
class LSTMModel(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(LSTMModel, self).__init__()
# Hidden dimensions
self.hidden_dim = hidden_dim
# Number of hidden layers
self.layer_dim = layer_dim
# Building your LSTM
# batch_first=True causes input/output tensors to be of shape
# (batch_dim, seq_dim, feature_dim)
self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
self.dropout = nn.Dropout(0.1)
# Readout layer
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# Initialize hidden state with zeros
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# Initialize cell state
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
# 28 time steps
# We need to detach as we are doing truncated backpropagation through time (BPTT)
# If we don't, we'll backprop all the way to the start even after going through another batch
x, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
# x = self.dropout(x)
input_dim = 16
hidden_dim = 100
layer_dim = 1
output_dim = 1
model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)
Below is the simclr class where the model is called
class SimCLR(pl.LightningModule):
def __init__(self, hidden_dim, lr, temperature, weight_decay, max_epochs=100):
super().__init__()
self.save_hyperparameters()
assert self.hparams.temperature > 0.0, 'The temperature must be a positive float!'
# Base model f(.)
self.convnet = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)
# self.convnet = Net()
# self.convnet = torchvision.models.resnet18(pretrained=False,
# num_classes=2*hidden_dim)
# The MLP for g(.) consists of Linear->ReLU->Linear
self.convnet.fc = nn.Sequential(
self.convnet.fc, # Linear(ResNet output, 4*hidden_dim)
nn.ReLU(inplace=True),
nn.Linear(2*hidden_dim, hidden_dim)
)