RuntimeError: "div_cpu" not implemented for 'Bool'

hello,i’m a beginner of pytorch.today,i run my code with CIFAR10 dataset,but this error happended!
but,when i use another one’s computer to run it,it goes well. i dont know whether if it’s my pytorch environment’s problem.
also,i find when i use “conda list” in anaconda prompt ,it shows cuda’s version is 10.0


but when i use “nvidia-smi” in cmd,it shows cuda’s version is 11.0

i dont know why.

here is my code below:

import torch
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

tf = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size=4

trainset = torchvision.datasets.CIFAR10(root='./data_cifar10', train=True,
                                        download=True, transform=tf)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)
testset = torchvision.datasets.CIFAR10(root='./data_cifar10', train=False,
                                       download=True, transform=tf)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')


class Net(nn.Module):  
    def __init__(self):
        super(Net, self).__init__()  
        self.conv1 = nn.Conv2d(3, 6, 5)  
        self.pool = nn.MaxPool2d(2, 2)  
        self.conv2 = nn.Conv2d(6, 16, 5)  
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):  
        x = self.pool(F.relu(self.conv1(x))) 
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5) 
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x



net = Net()

criterion = nn.CrossEntropyLoss()    #交叉熵损失函数
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)   

def train(epoch):
    running_loss = 0.0 
    for i, data in enumerate(trainloader, 0): 
        inputs, labels = data  
        inputs, labels = Variable(inputs), Variable(labels)  
        optimizer.zero_grad()  


        outputs = net(inputs) 
        loss = criterion(outputs, labels)  
        loss.backward()  
        optimizer.step()  
        running_loss += loss.item()  
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000)) 
            running_loss = 0.0  

    print('Finished Training')


def test_total():
    correct = 0  
    total = 0  
    for data in testloader:  
        images, labels = data
        outputs = net(Variable(images)) 
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)  # 更新测试图片的数量,这里labels.size(0)=images.size(0)=batch_size=4
        correct += (predicted == labels).sum()  

    print('Accuracy of the network on the 10000 test images: %d %%' % (
            100 * correct / total)) 

def test_class():
    class_correct = list(0. for i in range(10))  # 定义一个存储每类中测试正确的个数的 列表,初始化为0
    class_total = list(0. for i in range(10))  # 定义一个存储每类中测试总数的个数的 列表,初始化为0
    for data in testloader:  # 以一个batch为单位进行循环
        images, labels = data
        outputs = net(Variable(images))
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):  # 因为每个batch都有4张图片,所以还需要一个4的小循环
            label = labels[i]  # 对各个类的进行各自累加
            class_correct[label] += c[i]
            class_total[label] += 1

    for i in range(10):
        print(classes[i])
        print(100 * class_correct[i] / class_total[i],'\n')


for epoch in range(3):
    train(epoch)
    test_total()
    test_class()

Could you post the complete error message, please?

If you’ve installed the PyTorch binaries, they will use the specified cudatoolkit and will not use the system CUDA installation.

thanks for your reply. here is my error message. it can only work well in the first train epoch.
if i delete the outputs in test_class,it trains well,and tests well,but i cannot see the classification accuracy.

C:\Users\asus\anaconda3\envs\pytorch\python.exe C:/Users/asus/PycharmProjects/first_pytorch/CIFAR-10/qqqqqqq.py
Files already downloaded and verified
Files already downloaded and verified
[1,  2000] loss: 2.199
[1,  4000] loss: 1.851
[1,  6000] loss: 1.665
[1,  8000] loss: 1.550
[1, 10000] loss: 1.501
[1, 12000] loss: 1.445
Finished Training
Accuracy of the network on the 10000 test images: 48 %
Traceback (most recent call last):
  File "C:/Users/asus/PycharmProjects/first_pytorch/CIFAR-10/qqqqqqq.py", line 108, in <module>
    test_class()
  File "C:/Users/asus/PycharmProjects/first_pytorch/CIFAR-10/qqqqqqq.py", line 102, in test_class
    print(100 * class_correct[i] / class_total[i],'\n')
RuntimeError: "div_cpu" not implemented for 'Bool'
plane

Process finished with exit code 1

I cannot reproduce this error locally using bool tensors for class_correct or class_total.
However, based on your code I assume that class_correct is a list containing BoolTensors.
If that’s the case, you could cast these tensors to float() before dividing them by class_total[i].

1 Like

thanks a lot. i just add c=c.float() it really runs well.