An error occurred when using the pre-training model inception_v3 in pytorch

import torch
from torchvision import models

##input image size:299*299

Traceback (most recent call last):
File “/home/lab326/songpeng/my_code/”, line 545, in
Loss, Accuracy=main()
File “/home/lab326/songpeng/my_code/”, line 490, in main
train(train_loader, model, criterion, optimizer, epoch)
File “/home/lab326/songpeng/my_code/”, line 165, in train
loss = criterion(y_pred, label)
File “/home/lab326/.local/lib/python3.5/site-packages/torch/nn/modules/”, line 477, in call
result = self.forward(*input, **kwargs)
File “/home/lab326/.local/lib/python3.5/site-packages/torch/nn/modules/”, line 862, in forward
ignore_index=self.ignore_index, reduction=self.reduction)
File “/home/lab326/.local/lib/python3.5/site-packages/torch/nn/”, line 1550, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
File “/home/lab326/.local/lib/python3.5/site-packages/torch/nn/”, line 975, in log_softmax
return input.log_softmax(dim)
AttributeError: ‘tuple’ object has no attribute ‘log_softmax’

It says your y_pred variable is a tuple. Could you post your training code?

I don’t think it has anything to do with this y_pred, because I didn’t have a similar problem when I called resnet and vgg.

def main():

# 训练集图片读取
class TrainDataset(Dataset):
    def __init__(self, label_list, transform=None, target_transform=None, loader=default_loader):
        imgs = []
        for index, row in label_list.iterrows():
            imgs.append((row['img_path'], row['labels']))
        self.imgs = imgs
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader

    def __getitem__(self, index):
        filename, label = self.imgs[index]
        img = self.loader(filename)
        if self.transform is not None:
            img = self.transform(img)
        return img, label

    def __len__(self):
        return len(self.imgs)

# 验证集图片读取
class ValDataset(Dataset):
    def __init__(self, label_list, transform=None, target_transform=None, loader=default_loader):
        imgs = []
        for index, row in label_list.iterrows():
            imgs.append((row['img_path'], row['labels']))
        self.imgs = imgs
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader

    def __getitem__(self, index):
        filename, label = self.imgs[index]
        img = self.loader(filename)
        if self.transform is not None:
            img = self.transform(img)
        return img, label

    def __len__(self):
        return len(self.imgs)

# 测试集图片读取
class TestDataset(Dataset):
    def __init__(self, label_list, transform=None, target_transform=None, loader=default_loader):
        imgs = []
        for index, row in label_list.iterrows():
            imgs.append((row['img_path'], row['labels']))
        self.imgs = imgs
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader

    def __getitem__(self, index):
        filename, label = self.imgs[index]
        img = self.loader(filename)
        if self.transform is not None:
            img = self.transform(img)
        return img, label

    def __len__(self):
        return len(self.imgs)

# 数据增强:在给定角度中随机进行旋转
class FixedRotation(object):
    def __init__(self, angles):
        self.angles = angles

    def __call__(self, img):
        return fixed_rotate(img, self.angles)

def fixed_rotate(img, angles):
    angles = list(angles)
    angles_num = len(angles)
    index = random.randint(0, angles_num - 1)
    return img.rotate(angles[index])

# 训练函数
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()

    # switch to train mode

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading
        data_time.update(time.time() - end)

        result = []
        for temp in target:
            temp1 = temp.replace("'", "")
            temp2 = temp1.replace(",", "")
            temp3 = temp2.replace(" ", "")
            temp3 = [int(m) for m in temp3]
            for j in range(len(temp3)):
                if temp3[j] == 1:
                    l = j
        result1 = np.array(result)
        result2 = torch.from_numpy(result1)
        # print(result2)
        image_var = torch.tensor(images).cuda(async=True)
        label = torch.tensor(result2).cuda(async=True)

        # compute y_pred
        y_pred = model(image_var)
        loss = criterion(y_pred, label)

        # measure accuracy and record loss
        prec, PRED_COUNT = accuracy(, result2, topk=(1, 1))
        losses.update(loss.item(), images.size(0))
        acc.update(prec, PRED_COUNT)

        # compute gradient and do SGD step

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Accuray {acc.val:.3f} ({acc.avg:.3f})'.format(
                epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc))

# 验证函数
def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()

    # switch to evaluate mode

    end = time.time()
    for i, (images, target) in enumerate(val_loader):
        result = []
        for temp in target:
            temp1 = temp.replace("'", "")
            temp2 = temp1.replace(",", "")
            temp3 = temp2.replace(" ", "")
            temp3 = [int(m) for m in temp3]
            for j in range(len(temp3)):
                if temp3[j] == 1:
                    l = j
        result1 = np.array(result)
        result2 = torch.from_numpy(result1)
        image_var = torch.tensor(images).cuda(async=True)
        target = torch.tensor(result2).cuda(async=True)

        # compute y_pred
        with torch.no_grad():
            y_pred = model(image_var)
            loss = criterion(y_pred, target)

        # measure accuracy and record loss
        prec, PRED_COUNT = accuracy(, result2, topk=(1, 1))
        losses.update(loss.item(), images.size(0))
        acc.update(prec, PRED_COUNT)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Val: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Accuray {acc.val:.3f} ({acc.avg:.3f})'.format(
                i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc))

    print(' * Accuray {acc.avg:.3f}'.format(acc=acc), '(Previous Best Acc: %.3f)' % best_precision,
          ' * Loss {loss.avg:.3f}'.format(loss=losses), 'Previous Lowest Loss: %.3f)' % lowest_loss)
    # with open('./result/%s.txt' % file_name, 'a') as file:
    #     file.write('VAIL:Precision: %.8f, Loss: %.8f\n' % (acc.avg, losses.avg))
    return acc.avg, losses.avg

# 测试函数
def test(test_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()
    end = time.time()
    for i, (images, target) in enumerate(test_loader):
        result = []
        for temp in target:
            temp1 = temp.replace("'", "")
            temp2 = temp1.replace(",", "")
            temp3 = temp2.replace(" ", "")
            temp3 = [int(m) for m in temp3]
            for j in range(len(temp3)):
                if temp3[j] == 1:
                    l = j
        result1 = np.array(result)
        result2 = torch.from_numpy(result1)
        image_var = torch.tensor(images).cuda(async=True)
        target = torch.tensor(result2).cuda(async=True)

        # compute y_pred
        with torch.no_grad():
            y_pred = model(image_var)
            loss = criterion(y_pred, target)

        # measure accuracy and record loss
        prec, PRED_COUNT = accuracy(, result2, topk=(1, 1))

        losses.update(loss.item(), images.size(0))
        acc.update(prec, PRED_COUNT)
        batch_time.update(time.time() - end)
        end = time.time()
        if i % print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Accuray {acc.val:.3f} ({acc.avg:.3f})'.format(
                i, len(test_loader), batch_time=batch_time, loss=losses, acc=acc))
        loss_avg = losses.avg
        pre_avg = acc.avg
    print(' * Accuray {acc.avg:.3f}'.format(acc=acc),
          ' * Loss {loss.avg:.3f}'.format(loss=losses))
    with open('./result/%s.txt' % file_name, 'a') as file:
        file.write('TEST:Precision: %.8f, Loss: %.8f\n' % (pre_avg, loss_avg))

# 保存最新模型以及最优模型
def save_checkpoint(state, is_best, is_lowest_loss, filename='./model/%s/checkpoint.pth.tar' % file_name):, filename)
    if is_best:
        shutil.copyfile(filename, './model/%s/model_best.pth.tar' % file_name)
    if is_lowest_loss:
        shutil.copyfile(filename, './model/%s/lowest_loss.pth.tar' % file_name)

# 用于计算精度和时间的变化
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

# 学习率衰减:lr = lr / lr_decay
def adjust_learning_rate():

    nonlocal lr
    lr = lr / lr_decay
    return optim.Adam(model.parameters(), lr, weight_decay=weight_decay, amsgrad=True)

# 计算top K准确率
def accuracy(y_pred, y_actual, topk=(1,)):

    """Computes the precision@k for the specified values of k"""
    final_acc = 0
    maxk = max(topk)
    # for prob_threshold in np.arange(0, 1, 0.01):
    PRED_COUNT = y_actual.size(0)
    prob, pred = y_pred.topk(maxk, 1, True, True)
    # prob = np.where(prob > prob_threshold, prob, 0)
    for j in range(pred.size(0)):
        if int(y_actual[j]) == int(pred[j]):
            PRED_CORRECT_COUNT += 1
    if PRED_COUNT == 0:
        final_acc = 0
        final_acc = PRED_CORRECT_COUNT / PRED_COUNT
    return final_acc * 100, PRED_COUNT

# 程序主体

# 设定GPU ID
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
# 小数据集上,batch size不易过大,如出现out of memory,应再调小batch size
batch_size = 64
# 进程数量,最好不要超过电脑最大进程数,尽量能被batch size整除,windows下报错可以改为workers=0
workers = 12

# epoch数量,分stage进行,跑完一个stage后降低学习率进入下一个stage
stage_epochs = [20, 10, 10]
# 初始学习率
lr = 1e-4
# 学习率衰减系数 (new_lr = lr / lr_decay)
lr_decay = 5
# 正则化系数
weight_decay = 1e-4

# 参数初始化
stage = 0
start_epoch = 0
total_epochs = sum(stage_epochs)
best_precision = 0
lowest_loss = 100

# 设定打印频率,即多少step打印一次,用于观察loss和acc的实时变化
# 打印结果中,括号前面为实时loss和acc,括号内部为epoch内平均loss和acc
print_freq = 1
# 验证集比例
val_ratio = 0.12
# 是否只验证,不训练
evaluate = False
# 是否从断点继续跑
resume = False
# 创建inception_v4模型
# model =
# model =
model = models.inception_v3(pretrained=True)
model.aux_logit = False

in_features = model.fc.in_features

model.fc = nn.Linear(in_features, 16)

# print(model)

#, './model.pkl')
# import pickle

# f = open('dict_word.pkl', 'rb')
# for line in f:
#     print(line)
# word = pickle.load(open("./model.pkl", 'rb'), encoding='utf-8')
#    train = pickle.load(open("./model.pkl", 'rb'),encoding='iso-8859-1')
#    print(train)
#    file=open("./model.txt",'r')
#    for temp in train:
#        file.write(temp)


# model = torch.nn.DataParallel(model).cuda()
# optionally resume from a checkpoint
if resume:
    checkpoint_path = './model/%s/checkpoint.pth.tar' % file_name
    if os.path.isfile(checkpoint_path):
        print("=> loading checkpoint '{}'".format(checkpoint_path))
        checkpoint = torch.load(checkpoint_path)
        start_epoch = checkpoint['epoch'] + 1
        best_precision = checkpoint['best_precision']
        lowest_loss = checkpoint['lowest_loss']
        stage = checkpoint['stage']
        lr = checkpoint['lr']
        # 如果中断点恰好为转换stage的点,需要特殊处理
        if start_epoch in np.cumsum(stage_epochs)[:-1]:
            stage += 1

            optimizer = adjust_learning_rate()
            model.load_state_dict(torch.load('./model/%s/model_best.pth.tar' % file_name)['state_dict'])
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        print("=> no checkpoint found at '{}'".format(resume))

# 读取训练图片列表
all_data = pd.read_csv('/home/lab326/songpeng/train_val/train.csv')
# 分离训练集和测试集,stratify参数用于分层抽样
train_data_list, val_data_list = train_test_split(all_data, test_size=0.1, stratify=all_data['labels'])
test_data_list = pd.read_csv('/home/lab326/songpeng/train_val/val.csv')
# 读取测试图片列表
# test_data_list = pd.read_csv('data/test.csv')

# 图片归一化,由于采用ImageNet预训练网络,因此这里直接采用ImageNet网络的参数
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

# 训练集图片变换,输入网络的尺寸为384*384
train_data = TrainDataset(train_data_list,
                              transforms.Resize((299, 299)),
                              # transforms.ColorJitter(0.15, 0.15, 0.15, 0.075),
                              # transforms.RandomHorizontalFlip(),
                              # transforms.RandomGrayscale(),
                              # # transforms.RandomRotation(20),
                              # FixedRotation([0, 90, 180, 270]),
                              # transforms.RandomCrop(224),

# 验证集图片变换
val_data = ValDataset(val_data_list,
                          transforms.Resize((299, 299)),
                          # transforms.RandomHorizontalFlip(),
                          # transforms.RandomRotation(20),
                          # transforms.CenterCrop(224),

# 测试集图片变换
test_data = TestDataset(test_data_list,
                            transforms.Resize((299, 299)),
                            # transforms.RandomHorizontalFlip(),
                            # transforms.RandomRotation(20),
                            # transforms.CenterCrop(224),

# 生成图片迭代器
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=workers)
val_loader = DataLoader(val_data, batch_size=batch_size * 2, shuffle=False, pin_memory=False, num_workers=workers)
test_loader = DataLoader(test_data, batch_size=batch_size * 2, shuffle=False, pin_memory=False, num_workers=workers)

# 使用交叉熵损失函数
criterion = nn.CrossEntropyLoss().cuda()

# 优化器,使用带amsgrad的Adam
optimizer = optim.Adam(model.parameters(), lr, weight_decay=weight_decay, amsgrad=True)
Loss_list = []
Accuracy_list = []
if evaluate:
    validate(val_loader, model, criterion)
    # 开始训练
    for epoch in range(start_epoch, total_epochs):
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)
        # evaluate on validation set
        precision, avg_loss = validate(val_loader, model, criterion)
        # precision, avg_loss = validate(val_loader, model, criterion)
        precision = precision / 100
        # 在日志文件中记录每个epoch的精度和loss
        with open('./result/%s.txt' % file_name, 'a') as acc_file:
            acc_file.write('Epoch: %2d, Precision: %.8f, Loss: %.8f\n' % (epoch, precision, avg_loss))

        # 记录最高精度与最低loss,保存最新模型与最佳模型
        is_best = precision > best_precision
        is_lowest_loss = avg_loss < lowest_loss
        best_precision = max(precision, best_precision)
        lowest_loss = min(avg_loss, lowest_loss)
        state = {
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'best_precision': best_precision,
            'lowest_loss': lowest_loss,
            'stage': stage,
            'lr': lr,
        save_checkpoint(state, is_best, is_lowest_loss)

        # 判断是否进行下一个stage
        if (epoch + 1) in np.cumsum(stage_epochs)[:-1]:
            stage += 1
            lr = lr / lr_decay
            # optim.Adam(model.parameters(), lr, weight_decay=weight_decay, amsgrad=True)

            optimizer = adjust_learning_rate()
            model.load_state_dict(torch.load('./model/%s/model_best.pth.tar' % file_name)['state_dict'])
            print('Step into next stage')
            with open('./result/%s.txt' % file_name, 'a') as acc_file:
                acc_file.write('---------------Step into next stage----------------\n')

# 记录线下最佳分数
with open('./result/%s.txt' % file_name, 'a') as acc_file:
    acc_file.write('* best acc: %.8f  %s\n' % (best_precision, os.path.basename(__file__)))
with open('./result/best_acc.txt', 'a') as acc_file:
    acc_file.write('%s  * best acc: %.8f  %s\n' % (
        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())), best_precision,

# 读取最佳模型,预测测试集
best_model = torch.load('./model/%s/lowest_loss.pth.tar' % file_name)
test(test_loader, model, criterion)

# 释放GPU缓存
return Loss_list, Accuracy_list

Im pretty sure the loss function is telling you its input, in this case y_pred, is a tuple. Could you call: print(type(y_pred) before computing the loss value?

By default the Inception model will return the auxiliary loss with the output, so y_pred will be a tuple as @Diego assumed.
Pass aux_logits=False to the constructor and you’ll only get the output.

it shows <class ‘tuple’> when I print(type(y_pred))before computing the loss value

Whether I add or not model.aux_logits=False, the same error will be displayed.

y_pred = model(image_var)
loss = criterion(y_pred, label)

<class ‘tuple’>
(tensor([[-0.0804, 0.5624, -0.0453, …, 0.3110, 0.1275, 0.4255],
[-0.1372, 0.1931, 0.0448, …, -0.0704, 0.4254, 0.5051],
[-0.4169, -0.2284, 0.0816, …, 0.2264, -0.0320, 0.1253],
[-0.0581, 0.4046, 0.1634, …, -0.1360, 0.3605, -0.3578],
[ 0.2408, 0.5761, -0.2318, …, -0.2415, 0.4273, 0.3533],
[-0.5235, -0.4139, 0.1069, …, -0.4818, -0.2902, -0.0036]],
device=‘cuda:0’, grad_fn=), tensor([[-0.8809, 0.3767, -3.1792, …, 0.7448, -0.0900, 0.0051],
[-0.4402, 0.5015, -2.4917, …, 0.9189, 1.2812, -0.9067],
[ 2.6763, -0.0138, 0.2158, …, 0.3181, 0.1110, -1.3704],
[-0.8310, -0.7097, 0.2146, …, -0.9113, -4.0065, -0.8130],
[ 0.3442, -0.1871, -1.6047, …, -1.9643, -0.5006, -1.1983],
[ 4.4484, 1.5294, -0.4082, …, 1.0616, 2.4863, 0.3728]],
device=‘cuda:0’, grad_fn=))

I was amazed when I print type(y_pred) to show tuple, when I print y_pred the specific value shows tensor

It doesn’t just show a tensor when you print the value, it shows a tuple of tensors. Although if you past aux_logits=False to the constructor as @ptrblck said you shouldnt encounter this problem anymore, but if you do, just try running this line instead

loss = criterion(y_pred[0], label)

Thanks for your help.
if I try running loss = criterion(y_pred[0], label),it will not occur error,but I am amazed why the y_pred has two tensor data.

train function does not occur error, but validate function occur error.

Traceback (most recent call last):
File “/home/lab326/songpeng/my_code/”, line 562, in
Loss, Accuracy=main()
File “/home/lab326/songpeng/my_code/”, line 508, in main
precision, avg_loss = validate(val_loader, model, criterion)
File “/home/lab326/songpeng/my_code/”, line 231, in validate
loss = criterion(y_pred[0], target)
File “/home/lab326/.local/lib/python3.5/site-packages/torch/nn/modules/”, line 477, in call
result = self.forward(*input, **kwargs)
File “/home/lab326/.local/lib/python3.5/site-packages/torch/nn/modules/”, line 862, in forward
ignore_index=self.ignore_index, reduction=self.reduction)
File “/home/lab326/.local/lib/python3.5/site-packages/torch/nn/”, line 1550, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
File “/home/lab326/.local/lib/python3.5/site-packages/torch/nn/”, line 975, in log_softmax
return input.log_softmax(dim)
RuntimeError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

Print the shape of y_pred in the validation function:


And let me know what the output is.

when I try running loss = criterion(y_pred, label) in validate function,the error doesn’t occur.
but I do not understand why the y_pred’s tpye is not same in train function and validate fuction. Besides,why does the y_pred’s in train fuction have two tensor data? What is the difference between y_pred[0] and y_pred[1]?
These problems make me very confused

If you can answer me, I will thank you very much.

I think @ptrblck explained it in his answer above:

By default the Inception model will return the auxiliary loss with the output, so y_pred will be a tuple as @Diego assumed.
Pass aux_logits=False to the constructor and you’ll only get the output.

Did you check this explanation on GitHub issue. You might also want to have a look at source code.

model = models.inception_v3(pretrained=True)
model.aux_logit = False
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, 16)

Whether I add or not model.aux_logits=False,y_pred’s result is same .

Pass the argument to the constructor:

model = models.inception_v3(pretrained=True, aux_logits=False)
I still have a question, I built a model, its accuracy and high, reaching 0.98, but the value of loss finally stabilized at around 0.78, not falling, it is reasonable to say that accuracy and loss should be inversely proportional, high accuracy If the loss should be low
The following figure, DerNet, is a model I built. Compared with VGG and Resnet, the other parameters are the same. want to know what is the factor that affects the loss.


The loss depends on the criterion you are using and a certain discrepancy between the loss value and accuracy might come from the difference in the logits.
Have a look at this dummy example:

x1 = torch.tensor([[-0.1, 0.1, -0.1, -0.1]])
x2 = torch.tensor([[-1., 1., -1., -1.]])
target = torch.tensor([1])
criterion = nn.CrossEntropyLoss()
loss1 = criterion(x1, target)
loss2 = criterion(x2, target)

While both tensors predict the target and achieve a 100% accuracy, the loss is different, since x2 passes a higher probability to class1 than x1.

In your case, however, I guess the effect is too strong and constant. Are you using any regularization, as it looks like you are adding e.g. weight decay to the loss value.