here is my model
class LR_CNN(nn.Module):
"""
Input shape:
4D tensor with shape: (batch, K, pad_size, embed_dim)
Output shape
3D tensor with shape: (batch, K, len(filter_sizes)*num_filters)
"""
def __init__(self, config) -> None:
super(LR_CNN, self).__init__()
filter_sizes = [int(i) for i in config.filter_sizes.split()]
self.conv = nn.ModuleList([
nn.Conv3d(1, config.num_filters, (1, i, config.embed))
for i in filter_sizes
])
self.relu = nn.ReLU()
def conv_and_activate(self, x, conv):
out = conv(x).squeeze(-1)
out = F.max_pool2d(out, (1, out.size(3)))
out = self.relu(out)
out = out.squeeze(-1)
out = out.permute(0, 2, 1)
return out
def forward(self, x):
x = x.unsqueeze(1)
out = torch.cat(
[self.conv_and_activate(x, conv) for conv in self.conv], dim=2)
return out
class Mid_CNN(nn.Module):
"""
Input shape:
3D tensor with shape: (batch, pad_size, embed_dim)
Output shape
2D tensor with shape: (batch, len(filter_sizes)*num_filters)
"""
def __init__(self, config) -> None:
super(Mid_CNN, self).__init__()
filter_sizes = [int(i) for i in config.filter_sizes.split()]
self.conv = nn.ModuleList([
nn.Conv2d(1, config.num_filters, (i, config.embed))
for i in filter_sizes
])
self.relu = nn.ReLU()
self.fc = nn.Linear(
len(filter_sizes) * config.num_filters,
len(filter_sizes) * config.num_filters)
def conv_and_activate(self, x, conv):
out = conv(x).squeeze(-1)
out = F.max_pool1d(out, out.size(2))
out = self.relu(out)
return out.squeeze(-1)
def forward(self, x):
out = torch.cat(
[self.conv_and_activate(x.unsqueeze(1), conv) for conv in self.conv], dim=1)
out = self.fc(out)
out = self.relu(out)
return out.unsqueeze(1)
class LSTM(nn.Module):
"""
Input shape:
3D tensor with shape: (batch, K, len(filter_sizes)*num_filters)
Output shape
3D tensor with shape: (batch, K, num_directions * hidden_size)
"""
def __init__(self, config) -> None:
super(LSTM, self).__init__()
# self.config = config
input_size = len(config.filter_sizes.split()) * config.num_filters
self.lstm = nn.LSTM(input_size,
config.hidden_size,
dropout=config.drop_out,
bidirectional=True,
batch_first=True,
num_layers=config.num_layers)
def forward(self, x):
if len(x.size()) == 2:
x = x.unsqueeze(1)
out, _ = self.lstm(x)
return out
class Attention(nn.Module):
"""
Input shape:
3D tensor with shape: (batch, K, features)
Output shape
2D tensor with shape: (batch, features)
"""
def __init__(self, config) -> None:
super(Attention, self).__init__()
self.w = nn.Parameter(torch.Tensor(config.hidden_size * 2).cuda(config.cuda_id))
self.b = nn.Parameter(torch.Tensor(config.K).cuda(config.cuda_id))
self.u = nn.Parameter(torch.Tensor(config.K, config.K).cuda(config.cuda_id))
self._creat_weight()
def _creat_weight(self, mean=0.0, std=0.05):
self.w.data.normal_(mean, std)
self.b.data.normal_(mean, std)
self.u.data.normal_(mean, std)
def forward(self, x):
uit = torch.matmul(x, self.w)
temp = uit
temp += self.b
uit = temp
uit = torch.matmul(uit, self.u)
uit = torch.tanh(uit)
uit = torch.exp(uit)
ait = torch.sum(uit, dim=1).unsqueeze(1)
uit = torch.div(uit, ait).unsqueeze(2)
res = x * uit
return torch.sum(res, dim=1)
class CBA(nn.Module):
def __init__(self, config) -> None:
super(CBA, self).__init__()
self.lr_cnn = LR_CNN(config)
self.mid_cnn = Mid_CNN(config)
self.lr_lstm = LSTM(config)
self.mid_lstm = LSTM(config)
self.lr_attention = Attention(config)
self.fc = nn.Linear(2 * config.hidden_size, 2 * config.hidden_size)
self.fc1 = nn.Linear(6 * config.hidden_size, 2)
self.relu = nn.ReLU()
def forward(self, left, right, mid):
l_out = self.lr_cnn(left)
r_out = self.lr_cnn(right)
mid_out = self.mid_cnn(mid)
l_out = self.lr_lstm(l_out)
r_out = self.lr_lstm(r_out)
mid_out = self.mid_lstm(mid_out)
mid_out = self.fc(mid_out)
mid_out = self.relu(mid_out).squeeze(1)
l_out = self.lr_attention(l_out)
r_out = self.lr_attention(r_out)
out = torch.cat((torch.cat((l_out, mid_out), dim=1), r_out), dim=1)
out = self.fc1(out)
out = F.softmax(out, dim=-1)
return out