I am working on tracing a Bert based model for sentiment analysis so I can compile it using Apache TVM.
Here is the model:
5 class BERTGRUSentiment(nn.Module):
6 def init(self,
7 bert,
8 hidden_dim,
9 output_dim,
10 n_layers,
11 bidirectional,
12 dropout):
13
14 super().init()
15
16 self.bert = bert
17
18 embedding_dim = bert.config.to_dict()[‘hidden_size’]
19
20 self.rnn = nn.GRU(embedding_dim,
21 hidden_dim,
22 num_layers = n_layers,
23 bidirectional = bidirectional,
24 batch_first = True,
25 dropout = 0 if n_layers < 2 else dropout)
26
27 self.out = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
28
29 self.dropout = nn.Dropout(dropout)
30
31 def forward(self, text):
32
33 # text = [batch size, sent len]
34
35 with torch.no_grad():
36 embedded = self.bert(text)[0]
37
38 #embedded = [batch size, sent len, emb dim]
39
40 _, hidden = self.rnn(embedded)
41
42 #hidden = [n layers * n directions, batch size, emb dim]
43
44 if self.rnn.bidirectional:
45 hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
46 else:
47 hidden = self.dropout(hidden[-1,:,:])
48
49 #hidden = [batch size, hid dim]
50
51 output = self.out(hidden)
52
53 #output = [batch size, out dim]
54
55 return output
Here is how I try to compile it:
5 class BERTGRUSentiment(nn.Module):
6 def init(self,
7 bert,
8 hidden_dim,
9 output_dim,
10 n_layers,
11 bidirectional,
12 dropout):
13
14 super().init()
15
16 self.bert = bert
17
18 embedding_dim = bert.config.to_dict()[‘hidden_size’]
19
20 self.rnn = nn.GRU(embedding_dim,
21 hidden_dim,
22 num_layers = n_layers,
23 bidirectional = bidirectional,
24 batch_first = True,
25 dropout = 0 if n_layers < 2 else dropout)
26
27 self.out = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
28
29 self.dropout = nn.Dropout(dropout)
30
31 def forward(self, text):
32
33 # text = [batch size, sent len]
34
35 with torch.no_grad():
36 embedded = self.bert(text)[0]
37
38 #embedded = [batch size, sent len, emb dim]
39
40 _, hidden = self.rnn(embedded)
41
42 #hidden = [n layers * n directions, batch size, emb dim]
43
44 if self.rnn.bidirectional:
45 hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
46 else:
47 hidden = self.dropout(hidden[-1,:,:])
48
49 #hidden = [batch size, hid dim]
50
51 output = self.out(hidden)
52
53 #output = [batch size, out dim]
54
55 return output
However, when I run compile.py, I get the following error;
RuntimeError: The expanded size of the tensor (768) must match the existing size (512) at non-singleton dimension 1. Target sizes: [1, 768]. Tensor sizes: [1, 512]
Nevertheless, I am able to successfully train the model and run in on examples. What is going wrong?