I get the below error when trying to trace the following code.
frame #0: std::function<std::string ()>::operator()() const + 0x11 (0x7f3e4b04d441 in /bigdisk2/sunil/pytorchnightly/lib/python3.6/site-packages/torch/lib/libc10.so)
frame #1: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x2a (0x7f3e4b04cd7a in /bigdisk2/sunil/pytorchnightly/lib/python3.6/site-packages/torch/lib/libc10.so)
The code I am trying is this:
import torch
import torch.jit
import copy
from torch.nn import functional as F
from torch.nn import Module
from torch.nn import MultiheadAttention
from torch.nn import ModuleList
from torch.nn.init import xavier_uniform_
from torch.nn import Dropout
from torch.nn import Linear
from torch.nn import LayerNorm
class TransformerEncoderLayer(Module):
r"""TransformerEncoderLayer is made up of self-attn and feedforward network.
This standard encoder layer is based on the paper "Attention Is All You Need".
Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in
Neural Information Processing Systems, pages 6000-6010. Users may modify or implement
in a different way during application.
Args:
d_model: the number of expected features in the input (required).
nhead: the number of heads in the multiheadattention models (required).
dim_feedforward: the dimension of the feedforward network model (default=2048).
dropout: the dropout value (default=0.1).
Examples::
>>> encoder_layer = nn.TransformerEncoderLayer(d_model, nhead)
"""
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1):
super(TransformerEncoderLayer, self).__init__()
self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
# Implementation of Feedforward model
self.linear1 = Linear(d_model, dim_feedforward)
self.dropout = Dropout(dropout)
self.linear2 = Linear(dim_feedforward, d_model)
self.norm1 = LayerNorm(d_model)
self.norm2 = LayerNorm(d_model)
self.dropout1 = Dropout(dropout)
self.dropout2 = Dropout(dropout)
def forward(self, src, src_mask=None, src_key_padding_mask=None):
r"""Pass the input through the endocder layer.
Args:
src: the sequnce to the encoder layer (required).
src_mask: the mask for the src sequence (optional).
src_key_padding_mask: the mask for the src keys per batch (optional).
Shape:
see the docs in Transformer class.
"""
# src2 = self.self_attn(src, src, src, attn_mask=src_mask,
# key_padding_mask=src_key_padding_mask)[0]
# src = src + self.dropout1(src2)
src = self.norm1(src)
# print(src)
# src2 = self.linear2(self.dropout(F.relu(self.linear1(src))))
# src = src + self.dropout2(src2)
src = self.norm2(src)
# print(src)
return src
layerencoder = TransformerEncoderLayer(512,1).to('cuda')
input = torch.rand(1,1,512).to('cuda')
print(layerencoder(input).shape)
layerencoder.eval()
traced_model = torch.jit.trace(layerencoder,input)
If I remove the second layer normalization, I am able to trace. I am using PyTorch nightly build.