# Casting to double() does not work for one of the batches in the iteration

I face some instability in training my model in a continual setting. I have 20 tasks and after task 12, the accuracy goes down to 0.0007, so nothing happens in terms of learning. My hypothesis is maybe the problem is a numerical instability. so, I would like to train on a higher precision, aka float64. To so so, I am changing my code as follows:

``````def update_model(self, x, y, criterion, optimizer):
# chekc the label type, output of the bayesian model

do_cutmix = self.cutmix and np.random.rand(1) < 0.5
if do_cutmix:
x, labels_a, labels_b, lam = cutmix_data(x=x, y=y, alpha=1.0)

x = x.double()
labels_a = labels_a.double()
labels_b = labels_b.double()

# take care of the output of the bayesian model and its probabilistic loss
if self.bayesian:
self.model.double()
logit_dict = self.model(x)

loss = lam * criterion(logit_dict, labels_a)['total_loss'] + (1 - lam) * criterion(
logit_dict, labels_b)['total_loss']
#loss = losses_dict['total_loss']
logit = criterion(logit_dict, labels_a)['prediction']
logit = logit.mean(dim=2)
else:
self.model.double()
logit = self.model(x)
loss = lam * criterion(logit, labels_a) + (1 - lam) * criterion(
logit, labels_b
)
else:

if self.bayesian:
# measure forward pass time
#t_start = time.time()
self.model.double()
logit_dict = self.model(x)
#t_end = time.time() - t_start
# logger.info(f'forward pass time: {t_end:.2f} s')

# criterion is the probabilistic loss class
#t_s = time.time()
losses_dict = criterion(logit_dict, y)
#t_e = time.time() - t_s
#logger.info(f'loss time: {t_e:.2f} s')

loss = losses_dict['total_loss']
logit = losses_dict['prediction'] # Shape: torch.Size([10, 10, 64]) --> (batch_size, num_classes, samples)
# change the shape of the logit to be (batch_size, num_classes)
logit = logit.mean(dim=2)
else:
self.model.double()
logit = self.model(x)
loss = criterion(logit, y)

# calculate the number of correct predictions per batch for the bayesian model as well here
_, preds = logit.topk(self.topk, 1, True, True)

loss.backward()
''' ToDo: is it necessary to clip the gradient? it was done in mnvi code
Maybe they didn't need it but I'm not sure. For the Bayesian case, it is probably needed.
'''
if self.bayesian:

optimizer.step()
return loss.item(), torch.sum(preds == y.unsqueeze(1)).item(), y.size(0)

def _train(
):

total_loss, correct, num_data = 0.0, 0.0, 0.0

self.model.train()
else:
raise NotImplementedError("None of dataloder is valid")

for i, data in enumerate(data_iterator):
if len(data) == 2:
stream_data, mem_data = data
x = torch.cat([stream_data["image"], mem_data["image"]])
y = torch.cat([stream_data["label"], mem_data["label"]])
else:
x = data["image"]
y = data["label"]
# set to double
x = x.double().to(self.device)
y = y.double().to(self.device)

# this is equivalent to the step code in the test repo
l, c, d = self.update_model(x, y, criterion, optimizer)
# Compute the moving averages - equivalent to MovingAverage in the test repo
total_loss += l
correct += c
num_data += d

else:

``````

but I get this error:

``````
outputs_mean = F.conv2d(
RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.cuda.DoubleTensor) should be the same
``````

I did some debugging by printing the dtype of the inputs to the first layer. I see that the batch of iteration 5 is actually in float32!
Am I doing the casting correct? I already deactivated the cutmix augmentation and the error still persists, so it cannot be the reason for it.
I appreciate your help on both the stability and the casting to float64.

Hi Sam!

In your `if do_cutmix:` section of code, you cast `x` to `double()`, but
in the associated `else:` section, you don’t. Could the `else` branch be
causing the problem?

Best.

K. Frank