I have this code for a Positional Encoder class that’s submodule of my Transformer class see full in repo here: Architecture directory
import torch
import torch.nn as nn
import numpy as np
class PositionalEncoder(nn.Module):
"""
This class represents a positional encoder for a transformer model.
It is used to give the model information about the position of words in a sentence.
Inherits from the PyTorch nn.Module.
Args:
dm (int): The dimensionality of the model.
maxlen (int): The maximum sequence length that the model can handle.
dropout (float, optional): The dropout rate for the dropout layer. Default is 0.1.
scale (bool, optional): Whether to scale the embeddings by dm. Default is True.
Attributes:
dm (int): The models dimensionality.
drop (torch.nn.Dropout): The dropout layer.
scale (bool): Indicator for scaling the embeddings.
pos_encodings (torch.Tensor): The positional encodings, with shape (1, maxlen, dm).
"""
def __init__(self, dm, maxlen, dropout=0.1, scale=True):
super().__init__()
self.dm = dm
self.drop = nn.Dropout(dropout)
self.scale = scale
self.qconfig = None
# shape: pos - (maxlen, 1) dim - (dm, )
pos = torch.arange(maxlen).float().unsqueeze(1)
dim = torch.arange(dm).float()
# apply pos / (10000^2*i / dm) -> use sin for even indices & cosine for odd indices
values = pos / torch.pow(1e4, 2 * torch.div(dim, 2, rounding_mode="floor") / dm)
encodings = torch.where(dim.long() % 2 == 0, torch.sin(values), torch.cos(values))
# reshape: encodings - (1, maxlen, dm)
encodings = encodings.unsqueeze(0)
# register encodings w/o grad
self.register_buffer("pos_encodings", encodings)
def forward(self, embeddings):
"""
This method applies the positional encodings to the input embeddings.
Args:
embeddings (torch.Tensor): The input embeddings, with shape (batch_size, seq_len, dm).
Returns:
torch.Tensor: Positionally encoded embeddings, with shape (batch_size, seq_len, dm).
"""
# inshape: embeddings - (batch_size, seq_len, dm)
# enlarge embeddings (if applicable)
if self.scale:
embeddings = embeddings * np.sqrt(self.dm)
# sum embeddings w/ respective positonal encodings | shape: embeddings - (batch_size, seq_len, dm)
seq_len = embeddings.size(1)
embeddings = embeddings + self.pos_encodings[:, :seq_len]
# drop neurons | out - (batch_size, seq_len, dm)
out = self.drop(embeddings)
return out
I’m trying to run this code to quantize the model from the recent release using torch 2.0.1
import torch
from torch.ao.quantization import get_default_qconfig_mapping
from torch.quantization.quantize_fx import prepare_fx, convert_fx
from model.transformer import Transformer
qconfig_mapping = get_default_qconfig_mapping()
# Or explicity specify the qengine
# qengine = 'x86'
# torch.backends.quantized.engine = qengine
# qconfig_mapping = get_default_qconfig_mapping(qengine)
model_fp32 = Transformer(1000, 1000, maxlen=256, pad_id=0).eval()
x = torch.randint(0, 1000, (64, 5)).float()
# Insert observers according to qconfig and backend config
prepared_model = prepare_fx(model_fp32, qconfig_mapping, example_inputs=x)
# Calibration code not shown
# Convert to quantized model
quantized_model = convert_fx(prepared_model)
I get this error when executing and it’s because when running prepare_fx, the embeddings.size(1) returns a Proxy(size); I assume to trace the model so it can quantize:
embeddings = embeddings + self.pos_encodings[:, :seq_len]
~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
TypeError: slice indices must be integers or None or have an __index__ method
I tried setting the qsize attribute to None for both the Embedding class and PositionalEncoder class, but I still get the error. Is there anyway I can get around this so I can quantize my Transformer model?