Hello, I’m getting an error while attempting to create positional encoding. I aim to generate branches for analyzing individual targets, each with different classes. However, it appears that the tensors are not aligning correctly.
The transformer model is like the below:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from torchsummary import summary
class EncoderLayer(nn.Module):
def __init__(self, d_model, nhead):
super(EncoderLayer, self).__init__()
self.self_attn = nn.MultiheadAttention(d_model, nhead)
self.linear1 = nn.Linear(d_model, 256)
self.dropout = nn.Dropout(0.1)
self.linear2 = nn.Linear(256, d_model)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
def forward(self, x):
attn_output, _ = self.self_attn(x, x, x)
x = x + self.dropout(attn_output)
x = self.norm1(x)
x = F.relu(self.linear1(x))
x = self.dropout(x)
x = self.linear2(x)
x = x + self.dropout(x)
x = self.norm2(x)
return x
class DecoderLayer(nn.Module):
def __init__(self, d_model, nhead):
super(DecoderLayer, self).__init__()
self.self_attn = nn.MultiheadAttention(d_model, nhead)
self.linear1 = nn.Linear(d_model, 256)
self.dropout = nn.Dropout(0.1)
self.linear2 = nn.Linear(256, d_model)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
def forward(self, x):
attn_output, _ = self.self_attn(x, x, x)
x = x + self.dropout(attn_output)
x = self.norm1(x)
x = F.relu(self.linear1(x))
x = self.dropout(x)
x = self.linear2(x)
x = x + self.dropout(x)
x = self.norm2(x)
return x
class TransformerModel(nn.Module):
def __init__(self, input_size, output_sizes, d_model=128, nhead=8, num_encoder_layers=6, num_decoder_layers=6):
super(TransformerModel, self).__init__()
# Input embedding layer
self.embedding = nn.Linear(input_size, d_model)
# Shared Layers for Input Processing
self.shared_layers = nn.Sequential(
nn.Linear(d_model, 256),
nn.ReLU(),
nn.Linear(256, 128),
nn.ReLU()
)
# Positional encoding for input
self.input_positional_encoding = PositionalEncoding(d_model)
# Encoder layers
self.encoder_layers = nn.ModuleList([
EncoderLayer(d_model, nhead) for _ in range(num_encoder_layers)
])
# Output embedding layer for encoder
self.output_embedding = nn.Linear(d_model, input_size)
# Positional encoding for output of encoder
self.output_positional_encoding = PositionalEncoding(d_model)
# Decoder layers
self.decoder_layers = nn.ModuleList([
DecoderLayer(d_model, nhead) for _ in range(num_decoder_layers)
])
# Linear layer for final prediction for each target
self.linears = nn.ModuleDict({
target: nn.Linear(d_model, output_size) for target, output_size in output_sizes.items()
})
# Softmax layer
self.softmax = nn.Softmax(dim=-1)
def forward(self, x):
print(f"Input shape: {x.shape}")
# Input embedding
x = self.embedding(x)
print(f"Embedded shape: {x.shape}")
# Shared layers for input processing
x = self.shared_layers(x)
print(f"Shared layers shape: {x.shape}")
# Add positional encoding for input
x = self.input_positional_encoding(x)
print(f"PE input shape: {x.shape}")
# Encoder layers
for encoder_layer in self.encoder_layers:
x = encoder_layer(x)
# Output embedding for encoder
x = self.output_embedding(x)
print(f"Output embedding shape: {x.shape}")
# Add positional encoding for output of encoder
x = self.output_positional_encoding(x)
print(f"PE output shape: {x.shape}")
# Decoder layers
for decoder_layer in self.decoder_layers:
x = decoder_layer(x)
# Linear layers for final prediction for each target
outputs = {target: linear(x) for target, linear in self.linears.items()}
# Apply softmax if needed
outputs = {target: self.softmax(output) for target, output in outputs.items()}
return outputs
# Positional Encoding module
class PositionalEncoding(nn.Module):
def __init__(self, d_model, max_len=512):
super(PositionalEncoding, self).__init__()
self.encoding = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model))
self.encoding[:, 0::2] = torch.sin(position * div_term)
self.encoding[:, 1::2] = torch.cos(position * div_term)
self.encoding = self.encoding.unsqueeze(0)
def forward(self, x):
return x + self.encoding[:, :x.size(1)].detach()
input_size = 6
output_sizes = {'key': 24, 'pri_deg': 21, 'sec_deg': 21, 'quality': 10, 'inversion': 4, 'boundary': 4}
model = TransformerModel(input_size, output_sizes)
print(model)
# Example usage
input_data = torch.randn(32, input_size)
# # print(input_data)
output_data = model(input_data)
# # Print the summary
# summary(model, input_size=(32, input_size))
I am getting the error:
RuntimeError: The size of tensor a (32) must match the size of tensor b (128) at non-singleton dimension 1