i have following model architecture
import logging
from dataclasses import dataclass
from typing import Dict, Optional
import torch
import torch.distributed as dist
from torch import nn, Tensor
from transformers import AutoModel
from transformers.file_utils import ModelOutput
class ConvEnc(nn.Module):
def __init__(self,model_name,emb_dim=512,channels = 8):
super().__init__()
self.model = AutoModel.from_pretrained(model_name)
self.conv2d_1 = torch.nn.Conv2d(in_channels=1,out_channels=channels,kernel_size=(1,11),stride=((5,3)))
self.bn_1 = torch.nn.BatchNorm2d(channels)
self.relu_1 = torch.nn.ReLU()
self.conv2d_2 = torch.nn.Conv2d(in_channels=channels,out_channels=2*channels,kernel_size=(3,3),stride=(3,3))
self.bn_2 = torch.nn.BatchNorm2d(2*channels)
self.relu_2 = torch.nn.ReLU()
self.conv2d_3 = torch.nn.Conv2d(in_channels=2*channels,out_channels=4*channels,kernel_size=(3,3),stride=(3,3))
self.bn_3 = torch.nn.BatchNorm2d(4*channels)
self.relu_3 = torch.nn.ReLU()
self.conv2d_4 = torch.nn.Conv2d(in_channels=4*channels,out_channels=8*channels,kernel_size=(3,3),stride=(3,3))
self.bn_4 = torch.nn.BatchNorm2d(8*channels)
self.relu_4 = torch.nn.ReLU()
self.fc = torch.nn.Linear(2304,emb_dim)
def forward(self,features):
prev = self.model(**features,return_dict=True)
cls_emb = prev.last_hidden_state[:,0]
hidden_state = prev.last_hidden_state
# print(hidden_state.shape)
padding_needed = max(0, 512 - hidden_state.size(1))
# Pad the tensor along the first dimension to make it 512
padded_tensor = torch.nn.functional.pad(hidden_state, (0, 0, 0, padding_needed))
padded_tensor = padded_tensor.unsqueeze(0).transpose(1,0)
y1 = self.conv2d_1(padded_tensor)
y1 = self.relu_1(y1)
y1 = self.bn_1(y1)
y1 = self.conv2d_2(y1)
y1 = self.relu_2(y1)
y1 = self.bn_2(y1)
y1 = self.conv2d_3(y1)
y1 = self.relu_3(y1)
y1 = self.bn_3(y1)
y1 = self.conv2d_4(y1)
y1 = self.relu_4(y1)
y1 = self.bn_4(y1)
# print(y1.shape)# batch size,64channels 3,12
y = self.fc(y1.view(y1.shape[0],-1))
return cls_emb,y
when i am saving this model using torch.save( self.model.state_dict() ,os.path.join(output_dir,“m.pth”))
model is saved but after loading a weight , now layer names are prefixed with extra model.
for example:
layer name in model : model.embeddings.word_embeddings.weight
layer name in loaded weight dict: model.model.embeddings.word_embeddings.weight
what i did wrong in model saving?
thanks