RuntimeError: Found dtype Double but expected Float during (loss.backward())

Hi Team, I’m performing object detection task. I’m getting below error,

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_8392/1841461244.py in <module>
      2     _n = len(train_loader)
      3     for ix, inputs in enumerate(train_loader):
----> 4         loss, losses = train_batch(inputs, model, optimizer)
      5         loc_loss, regr_loss, loss_objectness, loss_rpn_box_reg = \
      6             [losses[k] for k in ['loss_classifier','loss_box_reg','loss_objectness','loss_rpn_box_reg']]

~\AppData\Local\Temp/ipykernel_8392/1903493105.py in train_batch(inputs, model, optimizer)
      8     losses = model(input_, targets)
      9     loss = sum(loss for loss in losses.values())
---> 10     loss.backward()
     11     optimizer.step()
     12     return loss, losses

~\miniconda3\envs\pytorch_gpu\lib\site-packages\torch\_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
    253                 create_graph=create_graph,
    254                 inputs=inputs)
--> 255         torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
    256 
    257     def register_hook(self, hook):

~\miniconda3\envs\pytorch_gpu\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
    147     Variable._execution_engine.run_backward(
    148         tensors, grad_tensors_, retain_graph, create_graph, inputs,
--> 149         allow_unreachable=True, accumulate_grad=True)  # allow_unreachable flag
    150 
    151 

RuntimeError: Found dtype Double but expected Float

please find the code which I used,

from torch_snippets import *
from PIL import Image
import glob, numpy as np, cv2, warnings,random, albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
warnings.filterwarnings('ignore')

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(42)

IMAGE_ROOT = 'images'
DF_RAW = pd.read_csv('train_labels.csv')
DF_RAW['image_id'] = DF_RAW['filename'].apply(lambda x: x.split('.')[0])
DF_RAW['labels'] = DF_RAW['class'].apply(lambda x: 1 if x=='car' else 0)

label2target = {l:t+1 for t,l in enumerate(DF_RAW['class'].unique())}
label2target['background'] = 0
target2label = {t:l for l,t in label2target.items()}
background_class = label2target['background']
num_classes = len(label2target)

def preprocess_image(img):
    img = torch.tensor(img).permute(2,0,1)
    return img.to(device).float()

class OpenDataset(torch.utils.data.Dataset):
    def __init__(self, df, image_folder=IMAGE_ROOT,transforms = None):
        self.root = image_folder
        self.df = df
        self.unique_images = df['image_id'].unique()
        self.transforms = transforms
    def __len__(self): return len(self.unique_images)
    def __getitem__(self, ix):
        image_id = self.unique_images[ix]
        #image_path = f'{self.root}/{image_id}.jpg'
        image = cv2.imread(f'{self.root}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image = np.array(image)/255
        df = self.df.copy()
        df = df[df['image_id'] == image_id]
        boxes = df[['xmin','ymin','xmax','ymax']].values
        classes = df['class'].values
        target = {}
        target["boxes"] = boxes
        #target["labels"] = torch.Tensor([label2target[i] for i in classes]).long()
        target["labels"] = torch.Tensor([label2target[i] for i in classes]).long()

        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': torch.Tensor([label2target[i] for i in classes]).long()
            }
            sample = self.transforms(**sample)

            target["boxes"] = torch.as_tensor(sample['bboxes'])
            #target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
        image = preprocess_image(image)
        
        return image, target
    def collate_fn(self, batch):
        return tuple(zip(*batch)) 


def get_train_transform():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

from sklearn.model_selection import train_test_split
trn_ids, val_ids = train_test_split(DF_RAW['image_id'].unique(), test_size=0.1, random_state=99)
trn_df, val_df = DF_RAW[DF_RAW['image_id'].isin(trn_ids)], DF_RAW[DF_RAW['image_id'].isin(val_ids)]
print(len(trn_df), len(val_df))
 
train_ds = OpenDataset(trn_df,transforms = get_train_transform())
test_ds = OpenDataset(val_df,transforms = get_valid_transform())
 
train_loader = DataLoader(train_ds, batch_size=2, collate_fn=train_ds.collate_fn, drop_last=True,shuffle=True)
test_loader = DataLoader(test_ds, batch_size=2, collate_fn=test_ds.collate_fn, drop_last=True,shuffle=False)

import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 
device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
def get_model():
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model


# Defining training and validation functions for a single batch
def train_batch(inputs, model, optimizer):
    model.train()
    input_, targets = inputs
    input_ = list(image.to(device) for image in input_)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
    optimizer.zero_grad()
    losses = model(input_, targets)
    loss = sum(loss for loss in losses.values())
    loss.backward()
    optimizer.step()
    return loss, losses
 
@torch.no_grad() # this will disable gradient computation in the function below
def validate_batch(inputs, model):
    model.train() # to obtain the losses, model needs to be in train mode only. # #Note that here we are not defining the model's forward method 
#and hence need to work per the way the model class is defined
    input_, targets = inputs
    input_ = list(image.to(device) for image in input_)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
 
    optimizer.zero_grad()
    losses = model(input_, targets)
    loss = sum(loss for loss in losses.values())
    return loss, losses


model = get_model().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
n_epochs = 5
log = Report(n_epochs)

for epoch in range(n_epochs):
    _n = len(train_loader)
    for ix, inputs in enumerate(train_loader):
        loss, losses = train_batch(inputs, model, optimizer)
        loc_loss, regr_loss, loss_objectness, loss_rpn_box_reg = \
            [losses[k] for k in ['loss_classifier','loss_box_reg','loss_objectness','loss_rpn_box_reg']]
        pos = (epoch + (ix+1)/_n)
        log.record(pos, trn_loss=loss.item(), trn_loc_loss=loc_loss.item(), 
                   trn_regr_loss=regr_loss.item(), trn_objectness_loss=loss_objectness.item(),
                   trn_rpn_box_reg_loss=loss_rpn_box_reg.item(), end='\r')
 
    _n = len(test_loader)
    for ix,inputs in enumerate(test_loader):
        loss, losses = validate_batch(inputs, model)
        loc_loss, regr_loss, loss_objectness, loss_rpn_box_reg = \
          [losses[k] for k in ['loss_classifier','loss_box_reg','loss_objectness','loss_rpn_box_reg']]
        pos = (epoch + (ix+1)/_n)
        log.record(pos, val_loss=loss.item(), val_loc_loss=loc_loss.item(), 
                  val_regr_loss=regr_loss.item(), val_objectness_loss=loss_objectness.item(),
                  val_rpn_box_reg_loss=loss_rpn_box_reg.item(), end='\r')
    if (epoch+1)%(n_epochs//5)==0: log.report_avgs(epoch+1)

I’m trying to fix this, it would be help if anyone can help me here.

Thanks in advance.

Could you replace the data loading with some random tensors and check if the error would still be reproducible? If so, could you then post it so that we could debug? If not, I would guess that your data loading is creating some inputs in float64, which might be creating the issue so you could try to narrow down which of these tensors is could be.

I encounter a similar issue today but the weird part is that when using l1_loss everything works well but after I switch to mse_loss, the RuntimeError happens.

I figured out, where it is happening. When I print the “losses.values()” one of the loss values is torch.float64. But when I tried to change its dtype, its still throwing the same error.

@ptrblck could you please help me to sort this out?

tensor(1.0891, device='cuda:0', grad_fn=<NllLossBackward>)
torch.float32
tensor(0.0064, device='cuda:0', grad_fn=<DivBackward0>)
torch.float32
tensor(0.0779, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
torch.float64
tensor(0.0163, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward0>)
loss values: dict_values([tensor(1.0891, device='cuda:0', grad_fn=<NllLossBackward>), tensor(0.0064, device='cuda:0', grad_fn=<DivBackward0>), tensor(0.0779, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), tensor(0.0163, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward0>)])

Another findings, it only happens when I implement the data augmentation. Please find the loss value below,

If I remove the data augmentation part, I can run the code with no error.

loss values: dict_values([tensor(1.0371, device='cuda:0', grad_fn=<NllLossBackward>), tensor(0.2726, device='cuda:0', grad_fn=<DivBackward0>), tensor(0.0177, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), tensor(0.0003, device='cuda:0', grad_fn=<DivBackward0>)])

can you try this:

 loss = sum(loss for loss.float() in losses.values())

I’m not sure, why the backward pass is raising the issue. If the dtype is wrong the forward should already complain. Could you still post an executable code snippet so that I could debug it, please?
Also, which PyTorch version are you using?

Hi @ptrblck, Please find the code,
torch version - “1.9.0”

I have two labels - ‘car’ and ‘person’ + ‘background’

from torch_snippets import *
from PIL import Image
import glob, numpy as np, cv2, warnings,random, albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
warnings.filterwarnings('ignore')

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(42)

IMAGE_ROOT = 'images'
DF_RAW = pd.read_csv('train_labels.csv')
DF_RAW['image_id'] = DF_RAW['filename'].apply(lambda x: x.split('.')[0])
DF_RAW['labels'] = DF_RAW['class'].apply(lambda x: 1 if x=='car' else 0)

label2target = {l:t+1 for t,l in enumerate(DF_RAW['class'].unique())}
label2target['background'] = 0
target2label = {t:l for l,t in label2target.items()}
background_class = label2target['background']
num_classes = len(label2target)

def preprocess_image(img):
    img = torch.tensor(img).permute(2,0,1)
    return img.to(device).float()

class OpenDataset(torch.utils.data.Dataset):
    def __init__(self, df, image_folder=IMAGE_ROOT,transforms = None):
        self.root = image_folder
        self.df = df
        self.unique_images = df['image_id'].unique()
        self.transforms = transforms
    def __len__(self): return len(self.unique_images)
    def __getitem__(self, ix):
        image_id = self.unique_images[ix]
        #image_path = f'{self.root}/{image_id}.jpg'
        image = cv2.imread(f'{self.root}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image = np.array(image)/255
        df = self.df.copy()
        df = df[df['image_id'] == image_id]
        boxes = df[['xmin','ymin','xmax','ymax']].values
        classes = df['class'].values
        target = {}
        target["boxes"] = boxes
        #target["labels"] = torch.Tensor([label2target[i] for i in classes]).long()
        target["labels"] = torch.Tensor([label2target[i] for i in classes]).long()

        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': torch.Tensor([label2target[i] for i in classes]).long()
            }
            sample = self.transforms(**sample)

            target["boxes"] = torch.as_tensor(sample['bboxes'])
            #target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
        image = preprocess_image(image)
        
        return image, target
    def collate_fn(self, batch):
        return tuple(zip(*batch)) 


def get_train_transform():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

from sklearn.model_selection import train_test_split
trn_ids, val_ids = train_test_split(DF_RAW['image_id'].unique(), test_size=0.1, random_state=99)
trn_df, val_df = DF_RAW[DF_RAW['image_id'].isin(trn_ids)], DF_RAW[DF_RAW['image_id'].isin(val_ids)]
print(len(trn_df), len(val_df))
 
train_ds = OpenDataset(trn_df,transforms = get_train_transform())
test_ds = OpenDataset(val_df,transforms = get_valid_transform())
 
train_loader = DataLoader(train_ds, batch_size=2, collate_fn=train_ds.collate_fn, drop_last=True,shuffle=True)
test_loader = DataLoader(test_ds, batch_size=2, collate_fn=test_ds.collate_fn, drop_last=True,shuffle=False)

import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 
device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
def get_model():
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model


# Defining training and validation functions for a single batch
def train_batch(inputs, model, optimizer):
    model.train()
    input_, targets = inputs
    input_ = list(image.to(device) for image in input_)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
    optimizer.zero_grad()
    losses = model(input_, targets)
    loss = sum(loss for loss in losses.values())
    loss.backward()
    optimizer.step()
    return loss, losses
 
@torch.no_grad() # this will disable gradient computation in the function below
def validate_batch(inputs, model):
    model.train() # to obtain the losses, model needs to be in train mode only. # #Note that here we are not defining the model's forward method 
#and hence need to work per the way the model class is defined
    input_, targets = inputs
    input_ = list(image.to(device) for image in input_)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
 
    optimizer.zero_grad()
    losses = model(input_, targets)
    loss = sum(loss for loss in losses.values())
    return loss, losses


model = get_model().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
n_epochs = 5
log = Report(n_epochs)

for epoch in range(n_epochs):
    _n = len(train_loader)
    for ix, inputs in enumerate(train_loader):
        loss, losses = train_batch(inputs, model, optimizer)
        loc_loss, regr_loss, loss_objectness, loss_rpn_box_reg = \
            [losses[k] for k in ['loss_classifier','loss_box_reg','loss_objectness','loss_rpn_box_reg']]
        pos = (epoch + (ix+1)/_n)
        log.record(pos, trn_loss=loss.item(), trn_loc_loss=loc_loss.item(), 
                   trn_regr_loss=regr_loss.item(), trn_objectness_loss=loss_objectness.item(),
                   trn_rpn_box_reg_loss=loss_rpn_box_reg.item(), end='\r')
 
    _n = len(test_loader)
    for ix,inputs in enumerate(test_loader):
        loss, losses = validate_batch(inputs, model)
        loc_loss, regr_loss, loss_objectness, loss_rpn_box_reg = \
          [losses[k] for k in ['loss_classifier','loss_box_reg','loss_objectness','loss_rpn_box_reg']]
        pos = (epoch + (ix+1)/_n)
        log.record(pos, val_loss=loss.item(), val_loc_loss=loc_loss.item(), 
                  val_regr_loss=regr_loss.item(), val_objectness_loss=loss_objectness.item(),
                  val_rpn_box_reg_loss=loss_rpn_box_reg.item(), end='\r')
    if (epoch+1)%(n_epochs//5)==0: log.report_avgs(epoch+1)

What was the fix, a I’ve been trying to make the code snippet executable and didn’t have a chance yet to debug it?

Apologies @ptrblck , I couldn’t fix it. Need your support.

@ptrblck , did you get a chance to look into this issue?

No, sorry as I haven’t had time to figure out the shapes of all tensors etc. Could you post the initialization of random tensors, which would reproduce the issue using the model?