RuntimeError: Found dtype Long but expected Double

Anna_Inberg · September 6, 2022, 10:09am

Good afternoon,

I’m a newbie in PyTorch, building a binary classification model based on 2 inputs: images and numeric data.
Here’s the custom dataset code and the model as well:

class FaceLandmarksDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, data_frame, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.data_frame = data_frame
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.data_frame.iloc[idx, 2])
        image = color.rgba2rgb(io.imread(img_name))
        landmarks = self.data_frame.iloc[idx, 3:]
        landmarks = np.array([landmarks]).astype('float').reshape(-1, 2)
        labels = self.data_frame.iloc[idx, 1].reshape(1)
        # labels = labels.squeeze()
        sample = {'image': image, 'landmarks': landmarks, 'labels': labels}

        if self.transform:
            sample = self.transform(sample)

        return sample



class Rescale(object):
    """Rescale the image in a sample to a given size.

    Args:
        output_size (tuple or int): Desired output size. If tuple, output is
            matched to output_size. If int, smaller of image edges is matched
            to output_size keeping aspect ratio the same.
    """

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample):
        image, landmarks, labels = sample['image'], sample['landmarks'], sample['labels']

        h, w = image.shape[:2]
        if isinstance(self.output_size, int):
            if h > w:
                new_h, new_w = self.output_size * h / w, self.output_size
            else:
                new_h, new_w = self.output_size, self.output_size * w / h
        else:
            new_h, new_w = self.output_size

        new_h, new_w = int(new_h), int(new_w)

        img = transform.resize(image, (new_h, new_w))

        # h and w are swapped for landmarks because for images,
        # x and y axes are axis 1 and 0 respectively
        landmarks = landmarks * [new_w / w, new_h / h]
        return {'image': img, 'landmarks': landmarks, 'labels': labels}


class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, landmarks, labels = sample['image'], sample['landmarks'], sample['labels']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C x H x W
        image = image.transpose((2, 0, 1))
        img_torch = torch.from_numpy(image)
        landmarks_torch = torch.from_numpy(landmarks.flatten())
        labels_torch = torch.from_numpy(labels).type(torch.float64)

        return {'image': img_torch,
                'landmarks': landmarks_torch,
                'labels': labels_torch}

Here’s a printout of data size and dtypes:
Images - torch.Size([3, 224, 224]) torch.float64, numeric - torch.Size([96]) torch.float64 and labels - torch.Size([1]) torch.float64.

class MixedNetwork(nn.Module):
    def __init__(self):
        super(MixedNetwork, self).__init__()
        
        image_modules = list(models.resnet50().children())[:-1]
        self.image_features = nn.Sequential(*image_modules)

        self.landmark_features = nn.Sequential(
            nn.Linear(in_features=96, out_features=192,bias=False), 
            nn.ReLU(inplace=True), 
            nn.Dropout(p=0.25),
            nn.Linear(in_features=192,out_features=1000,bias=False), 
            nn.ReLU(inplace=True), 
            nn.Dropout(p=0.25))
        
        self.combined_features = nn.Sequential(
            nn.Linear(3048, 512),
            nn.ReLU(),
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Linear(32,1))
        
    def forward(self, image, landmarks):
        a = self.image_features(image)
        print("shape of a", a.shape)
        b = self.landmark_features(landmarks)
        print("shape of b", b.shape)
        x = torch.cat((a.view(a.size(0), -1), b.view(b.size(0), -1)), dim=1)
        x = self.combined_features(x)
        x = F.sigmoid(x)
        return x

class Trainer():
    
    def __init__(self,criterion = None,optimizer = None,schedular = None):
        
        self.criterion = criterion
        self.optimizer = optimizer
        self.schedular = schedular
    
    def train_batch_loop(self,model,train_dataloader):
        
        train_loss = 0.0
        train_acc = 0.0
        
        for sample in train_dataloader:
            # move the data to CPU
            images = sample["image"].type(torch.cuda.DoubleTensor).to(device)
            landmarks = sample["landmarks"].type(torch.cuda.DoubleTensor).to(device)
            labels = sample["labels"].type(torch.cuda.DoubleTensor).to(device)
                    
            self.optimizer.zero_grad()
            logits = model(images, landmarks)
            labels=labels.to(torch.int64)
            loss = self.criterion(logits, labels)
            
            loss.backward()
            self.optimizer.step()
            
            train_loss += loss.item()
            train_acc += accuracy(logits, labels)
            
        return train_loss / len(train_dataloader), train_acc / len(train_dataloader) 

    
    def valid_batch_loop(self,model,val_dataloader):
        
        valid_loss = 0.0
        valid_acc = 0.0
        
        for sample in val_dataloader:
            # move the data to CPU
            images = sample["image"].type(torch.cuda.DoubleTensor).to(device)
            landmarks = sample["landmarks"].type(torch.cuda.DoubleTensor).to(device)
            labels = sample["labels"].type(torch.cuda.DoubleTensor).to(device)

            self.optimizer.zero_grad()
            logits = model(images, landmarks)
            loss = self.criterion(logits,labels)
            
            valid_loss += loss.item()
            valid_acc += accuracy(logits,labels)
            
        return valid_loss / len(val_dataloader), valid_acc / len(val_dataloader)
            
        
    def fit(self,model,trainloader,validloader,epochs):
        
        valid_min_loss = np.Inf 
        
        for i in range(epochs):
            
            model.train() # this turn on dropout
            avg_train_loss, avg_train_acc = self.train_batch_loop(model,trainloader) ###
            
            model.eval()  # this turns off the dropout lapyer and batch norm
            avg_valid_loss, avg_valid_acc = self.valid_batch_loop(model,validloader) ###
            
            if avg_valid_loss <= valid_min_loss :
                print("Valid_loss decreased {} --> {}".format(valid_min_loss,avg_valid_loss))
                torch.save(model.state_dict(),'ColabCatPainModel.pt')
                valid_min_loss = avg_valid_loss

                
            print("Epoch : {} Train Loss : {:.6f} Train Acc : {:.6f}".format(i+1, avg_train_loss, avg_train_acc))
            print("Epoch : {} Valid Loss : {:.6f} Valid Acc : {:.6f}".format(i+1, avg_valid_loss, avg_valid_acc))

model = MixedNetwork()
model.double()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters() , lr = 0.001)
epochs = 25
batch_size = 1

trainer = Trainer(criterion,optimizer)
trainer.fit(model, train_dataloader, val_dataloader,epochs = epochs)

Now, I keep getting the error message:

RuntimeError: Found dtype Long but expected Double

I don’t understand where this Long is coming from - I’ve converted everything to Double in Trainer class. Could you, please, help?

parthshah231 · September 6, 2022, 11:12am

Could you post the complete stack trace?

Anna_Inberg · September 6, 2022, 11:45am

shape of a torch.Size([1, 2048, 1, 1])
shape of b torch.Size([1, 1000])
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:1960: UserWarning: nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.
  warnings.warn("nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.")
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-11-56fcd5d5ed8c> in <module>
     11 
     12 trainer = Trainer(criterion,optimizer)
---> 13 trainer.fit(model, train_dataloader, val_dataloader,epochs = epochs)

4 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
   3081         weight = weight.expand(new_size)
   3082 
-> 3083     return torch._C._nn.binary_cross_entropy(input, target, weight, reduction_enum)
   3084 
   3085 

RuntimeError: Found dtype Long but expected Double

parthshah231 · September 6, 2022, 12:05pm

Okay, could to try to add .float() after your torch.from_numpy() calls, for example in Tensor class
Mostly, that should solve it.

Anna_Inberg · September 6, 2022, 12:18pm

I added .float() but still, the mistake is the same

Here’s the modified ToTensor() function:

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, landmarks, labels = sample['image'], sample['landmarks'], sample['labels']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C x H x W
        image = image.transpose((2, 0, 1))
        img_torch = torch.from_numpy(image).float()
        landmarks_torch = torch.from_numpy(landmarks.flatten()).float()
        labels_torch = torch.from_numpy(labels).float()

        return {'image': img_torch,
                'landmarks': landmarks_torch,
                'labels': labels_torch}

parthshah231 · September 6, 2022, 12:38pm

Is the RuntimeError still with respect to Long or did it now find Float instead? because just calling it float would convert in to float32 by default but then your type for labels is .type(torch64)

Anna_Inberg · September 6, 2022, 1:03pm

Here’s the printout of input sizes and dtypes from the dataloader:

torch.Size([3, 224, 224]) torch.float32 torch.Size([96]) torch.float32 torch.Size([1]) torch.float32

Error message after model.fit:

shape of a torch.Size([1, 2048, 1, 1])
shape of b torch.Size([1, 1000])
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:1960: UserWarning: nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.
  warnings.warn("nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.")
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-12-56fcd5d5ed8c> in <module>
     11 
     12 trainer = Trainer(criterion,optimizer)
---> 13 trainer.fit(model, train_dataloader, val_dataloader,epochs = epochs)

4 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
   3081         weight = weight.expand(new_size)
   3082 
-> 3083     return torch._C._nn.binary_cross_entropy(input, target, weight, reduction_enum)
   3084 
   3085 

RuntimeError: Found dtype Long but expected Double

ptrblck · September 6, 2022, 4:42pm

Inside your training loop you are transforming the labels tensor to long while a FloatTensor is expected for nn.BCELoss:

labels=labels.to(torch.int64)

Remove this line of code and use the same dtype (float or double) as is used for the model parameters and its output.

Anna_Inberg · September 6, 2022, 5:22pm

Thanks! The error does not show up anymore.
Could you, please, help with one more thing?

I need to add some status printouts in the training loop. The only printout in fit function does not show anything. What’s the best way to do it?

ptrblck · September 6, 2022, 5:31pm

The print statements in the fit method look correct and I don’t know why they wouldn’t show up. Could you add more debug print statements e.g. at the beginning of the fit method and make sure it’s shown?

Anna_Inberg · September 6, 2022, 6:06pm

Done - added more prints and now I can see the training progress. Every epoch takes about 5 min.
@ parthshah231 and @ ptrblck - thanks to your help my model started to work!