Hi.
I have the system shutdown problem while training. After training a few batches, the computer shut down itself and rebooted. I think the custom layer causes this problem because training works well without that layer. The custom layer applies self-attention to compute the weighted mean and standard deviation in the time domain.
I trained with 4 GPUs using nn.DataParallel and the Pytorch version is 1.5.0.
class AttnPooling(nn.Module):
def __init__(self, din, dh=500, eps=1e-12):
super(AttnPooling, self).__init__()
self.eps = eps
self.w1 = Parameter(torch.Tensor(dh, din))
self.w2 = Parameter(torch.Tensor(1, dh))
self.reset_parameters()
def reset_parameters(self) -> None:
init.kaiming_uniform_(self.w1, a=math.sqrt(5))
init.kaiming_uniform_(self.w2, a=math.sqrt(5))
def forward(self, x, dim):
# x : (Batch, F_dim, Time)
# h : (Batch, Time, F_dim)
h = x.transpose(1, 2)
attn = F.relu(F.linear(h, self.w1))
attn = F.softmax(F.linear(attn, self.w2), dim=1)
mean = torch.bmm(x, attn)
variance = (x - mean).pow(2).mean(dim=-1)
mean = mean.squeeze(dim)
mask = (variance <= self.eps).type(variance.dtype).to(variance.device)
variance = (1.0 - mask) * variance + mask * self.eps
stddev = variance.sqrt()
# mean: (B, F_dim), stddev: (B, F_dim)
pooling = torch.cat((mean, stddev), dim=-1)
return pooling