Hello,
I’m trying to train an adversarial network and I’m using BCELoss from PyTorch. I’ve provided the discriminator network, training code, error message snippet below. I’m training the network only for 5 epochs and there is no error generated for the initial 4 epochs but stuck into a runtime error at the fifth epoch. Any suggestions, please?
Discriminator Network
class Discriminator(nn.Module):
def init(self):
super(Discriminator, self).init()
self.Dconv1 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(1, 5)),
nn.LeakyReLU(negative_slope=0.2)
)
self.Dconv2 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(1, 3)),
nn.LeakyReLU(negative_slope=0.2)
)
self.Dfc1 = nn.Sequential(
nn.Linear(in_features=1536, out_features=256),
nn.LeakyReLU(negative_slope=0.2),
nn.Dropout(),
nn.Linear(in_features=256, out_features=1),
nn.Sigmoid()
)
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
n = m.weight.size(1)
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
def forward(self, input):
out = self.Dconv1(input)
out = self.Dconv2(out)
out = out.reshape(-1, 512*1*3)
out = self.Dfc1(out)
return out
Training Code Snippet
s1_length = len(s1_source)
s2_length = len(s2_source)
t_length= len(s1_target)
logging.warning("Iteration: %d, S1 length: %d, S2 length: %d, Target length: %d", i, s1_length, s2_length, t_length)
s1_error_fake = loss(s1_source, ones_target(s1_length))
s1_error_real = loss(s1_target, zeros_target(t_length))
s1_t_dis_loss = s1_error_fake + s1_error_real
s2_error_fake = loss(s2_source, ones_target(s2_length))
s2_error_real = loss(s2_target, zeros_target(t_length))
s2_t_dis_loss = s2_error_fake + s2_error_real
logging.warning("S1 Disc loss: %s, S2 Disc Loss: %s", s1_t_dis_loss.data, s2_t_dis_loss.data)
Error Message
146it [00:02, 53.81it/s]
146it [00:02, 56.52it/s]
146it [00:02, 57.55it/s]
42it [00:00, 58.23it/s]
RuntimeError Traceback (most recent call last)
in
63 logging.warning(“Iteration: %d, S1 length: %d, S2 length: %d, Target length: %d”, i, s1_length, s2_length, t_length)
64
—> 65 s1_error_fake = loss(s1_source, ones_target(s1_length))
66 s1_error_real = loss(s1_target, zeros_target(t_length))
67 s1_t_dis_loss = s1_error_fake + s1_error_real
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
→ 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
510 @weak_script_method
511 def forward(self, input, target):
→ 512 return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
513
514
~/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
2111
2112 return torch._C._nn.binary_cross_entropy(
→ 2113 input, target, weight, reduction_enum)
2114
2115
RuntimeError: reduce failed to synchronize: device-side assert triggered
1