RuntimeEroor:invalid hash value

This problem occurred when deeplab v3+ was trained. I have been unable to find a solution. I hope to get help

Could you post a code snippet to reproduce this issue by wrapping the code in three backticks ```? :slight_smile:
Is this error raised during the download of a model?

I reproduce the source code: https://github.com/giovanniguidi/deeplabV3-PyTorch
After configuring the environment according to the requirements in readme, I modified the data loader part as follows:
config.yml:
dataset:
base_path: “F:\pytorch_deeplabplus\deeplabV3-PyTorch-master\deeplabV3-PyTorch-master\datasets\VOC2007”
dataset_name: “VOC2007”
image:
out_stride: 16
base_size: 513
crop_size: 513
network:
backbone: “xception” #choices=[‘resnet’, ‘xception’, ‘drn’, ‘mobilenet’]
sync_bn: true
freeze_bn: true
use_cuda: true
num_classes: 21 #deepfashions 14
training:
workers: 2 #4
loss_type: “ce” #choices=[‘ce’, ‘focal’]
epochs: 100 #50
start_epoch: 0
batch_size: 2
use_balanced_weights: false
lr: 0.0005
lr_scheduler: “poly” #choices=[‘poly’, ‘step’, ‘cos’]
momentum: 0.9
weight_decay: 0.0005
nesterov: false
callbacks:
weights_initialization:
use_pretrained_weights: true
restore_from: “./experiments/checkpoint_last.pth.tar”
model_best_checkpoint:
enabled: true
out_file: “./experiments/checkpoint_best.pth.tar”
monitor: “val_loss”
model_last_checkpoint:
enabled: true
out_file: “./experiments/checkpoint_last.pth.tar”
train_on_subset:
enabled: true
dataset_fraction: 0.1
output:
output_weights: “./snapshots/checkpoint_best.pth.tar”
tensorboard:
enabled: false
log_dir: “./tensorboard/”
no_val: false
val_interval: 1
inference:
batch_size: 2
#gpu_ids: 0
#seed: 1
#ft: false
#no_val: false

data_generator.py
#from data_generators.datasets import cityscapes, coco, combine_dbs, pascal, sbd, deepfashion
from torch.utils.data import DataLoader
#from data_generators.deepfashion import DeepFashionSegmentation
#from data_generators.Mydataset import MydatasetSegmentation
from data_generators.pascal import VOCSegmentation

def initialize_data_loader(config):

if config['dataset']['dataset_name'] == 'VOC2007':
    train_set = VOCSegmentation(config, split='train')
    #val_set = VOCSegmentation(config, split='val')
    test_set = VOCSegmentation(config, split='test')

else:
    raise Exception('dataset not implemented yet!')

num_classes = train_set.num_classes
train_loader = DataLoader(train_set, batch_size=config['training']['batch_size'], shuffle=True, num_workers=config['training']['workers'], pin_memory=True)
#val_loader = DataLoader(val_set, batch_size=config['training']['batch_size'], shuffle=False, num_workers=config['training']['workers'], pin_memory=True)
test_loader = DataLoader(test_set, batch_size=config['training']['batch_size'], shuffle=False, num_workers=config['training']['workers'], pin_memory=True)

return train_loader, test_loader, num_classes

trainer.py
import argparse
import os
import numpy as np
from tqdm import tqdm

from data_generators.data_generator import initialize_data_loader
from models.sync_batchnorm.replicate import patch_replication_callback
from models.deeplab import DeepLab
from losses.loss import SegmentationLosses
from utils.calculate_weights import calculate_weigths_labels
from utils.lr_scheduler import LR_Scheduler
from utils.saver import Saver
from utils.summaries import TensorboardSummary
from utils.metrics import Evaluator
import torch
import yaml

class Trainer(object):
def init(self, config):

    self.config = config
    self.best_pred = 0.0

    # Define Saver
    self.saver = Saver(config)
    self.saver.save_experiment_config()
    # Define Tensorboard Summary
    self.summary = TensorboardSummary(self.config['training']['tensorboard']['log_dir'])
    self.writer = self.summary.create_summary()
    
    self.train_loader, self.val_loader, self.test_loader, self.nclass = initialize_data_loader(config)
    
    # Define network
    model = DeepLab(num_classes=self.nclass,
                    backbone=self.config['network']['backbone'],
                    output_stride=self.config['image']['out_stride'],
                    sync_bn=self.config['network']['sync_bn'],
                    freeze_bn=self.config['network']['freeze_bn'])

    train_params = [{'params': model.get_1x_lr_params(), 'lr': self.config['training']['lr']},
                    {'params': model.get_10x_lr_params(), 'lr': self.config['training']['lr'] * 10}]

    # Define Optimizer
    optimizer = torch.optim.SGD(train_params, momentum=self.config['training']['momentum'],
                                weight_decay=self.config['training']['weight_decay'], nesterov=self.config['training']['nesterov'])

    # Define Criterion
    # whether to use class balanced weights
    if self.config['training']['use_balanced_weights']:
        classes_weights_path = os.path.join(self.config['dataset']['base_path'], self.config['dataset']['dataset_name'] + '_classes_weights.npy')
        if os.path.isfile(classes_weights_path):
            weight = np.load(classes_weights_path)
        else:
            weight = calculate_weigths_labels(self.config, self.config['dataset']['dataset_name'], self.train_loader, self.nclass)
        weight = torch.from_numpy(weight.astype(np.float32))
    else:
        weight = None

    self.criterion = SegmentationLosses(weight=weight, cuda=self.config['network']['use_cuda']).build_loss(mode=self.config['training']['loss_type'])
    self.model, self.optimizer = model, optimizer
    
    # Define Evaluator
    self.evaluator = Evaluator(self.nclass)
    # Define lr scheduler
    self.scheduler = LR_Scheduler(self.config['training']['lr_scheduler'], self.config['training']['lr'],
                                        self.config['training']['epochs'], len(self.train_loader))


    # Using cuda
    if self.config['network']['use_cuda']:
        self.model = torch.nn.DataParallel(self.model)
        patch_replication_callback(self.model)
        self.model = self.model.cuda()

    # Resuming checkpoint

    if self.config['training']['weights_initialization']['use_pretrained_weights']:
        if not os.path.isfile(self.config['training']['weights_initialization']['restore_from']):
            raise RuntimeError("=> no checkpoint found at '{}'" .format(self.config['training']['weights_initialization']['restore_from']))

        if self.config['network']['use_cuda']:
            checkpoint = torch.load(self.config['training']['weights_initialization']['restore_from'])
        else:
            checkpoint = torch.load(self.config['training']['weights_initialization']['restore_from'], map_location={'cuda:0': 'cpu'})

        self.config['training']['start_epoch'] = checkpoint['epoch']

        if self.config['network']['use_cuda']:
            self.model.load_state_dict(checkpoint['state_dict'])
        else:
            self.model.load_state_dict(checkpoint['state_dict'])

if not self.config[‘ft’]:

        self.optimizer.load_state_dict(checkpoint['optimizer'])
        self.best_pred = checkpoint['best_pred']
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(self.config['training']['weights_initialization']['restore_from'], checkpoint['epoch']))


def training(self, epoch):
    train_loss = 0.0
    self.model.train()
    tbar = tqdm(self.train_loader)
    num_img_tr = len(self.train_loader)
    for i, sample in enumerate(tbar):
        image, target = sample['image'], sample['label']
        if self.config['network']['use_cuda']:
            image, target = image.cuda(), target.cuda()
        self.scheduler(self.optimizer, i, epoch, self.best_pred)
        self.optimizer.zero_grad()
        output = self.model(image)
        loss = self.criterion(output, target)
        loss.backward()
        self.optimizer.step()
        train_loss += loss.item()
        tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1)))
        self.writer.add_scalar('train/total_loss_iter', loss.item(), i + num_img_tr * epoch)

        # Show 10 * 3 inference results each epoch
        if i % (num_img_tr // 10) == 0:
            global_step = i + num_img_tr * epoch
            self.summary.visualize_image(self.writer, self.config['dataset']['dataset_name'], image, target, output, global_step)

    self.writer.add_scalar('train/total_loss_epoch', train_loss, epoch)
    print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.config['training']['batch_size'] + image.data.shape[0]))
    print('Loss: %.3f' % train_loss)

    #save last checkpoint
    self.saver.save_checkpoint({
        'epoch': epoch + 1,

‘state_dict’: self.model.module.state_dict(),

        'state_dict': self.model.state_dict(),
        'optimizer': self.optimizer.state_dict(),
        'best_pred': self.best_pred,
    }, is_best = False, filename='checkpoint_last.pth.tar')

    #if training on a subset reshuffle the data 
    if self.config['training']['train_on_subset']['enabled']:
        self.train_loader.dataset.shuffle_dataset()    


def validation(self, epoch):
    self.model.eval()
    self.evaluator.reset()
    tbar = tqdm(self.val_loader, desc='\r')
    test_loss = 0.0
    for i, sample in enumerate(tbar):
        image, target = sample['image'], sample['label']
        if self.config['network']['use_cuda']:
            image, target = image.cuda(), target.cuda()
        with torch.no_grad():
            output = self.model(image)
        loss = self.criterion(output, target)
        test_loss += loss.item()
        tbar.set_description('Val loss: %.3f' % (test_loss / (i + 1)))
        pred = output.data.cpu().numpy()
        target = target.cpu().numpy()
        pred = np.argmax(pred, axis=1)
        # Add batch sample into evaluator
        self.evaluator.add_batch(target, pred)

    # Fast test during the training
    Acc = self.evaluator.Pixel_Accuracy()
    Acc_class = self.evaluator.Pixel_Accuracy_Class()
    mIoU = self.evaluator.Mean_Intersection_over_Union()
    FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union()
    self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch)
    self.writer.add_scalar('val/mIoU', mIoU, epoch)
    self.writer.add_scalar('val/Acc', Acc, epoch)
    self.writer.add_scalar('val/Acc_class', Acc_class, epoch)
    self.writer.add_scalar('val/fwIoU', FWIoU, epoch)
    print('Validation:')
    print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.config['training']['batch_size'] + image.data.shape[0]))
    print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format(Acc, Acc_class, mIoU, FWIoU))
    print('Loss: %.3f' % test_loss)

    new_pred = mIoU
    if new_pred > self.best_pred:
        self.best_pred = new_pred
        self.saver.save_checkpoint({
            'epoch': epoch + 1,

‘state_dict’: self.model.module.state_dict(),

            'state_dict': self.model.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'best_pred': self.best_pred,
        },  is_best = True, filename='checkpoint_best.pth.tar')

pascal.py
from future import print_function, division
import os
from PIL import Image
#import json
import numpy as np
from torch.utils.data import Dataset

from torchvision import transforms
from preprocessing import custom_transforms as tr
import random

class VOCSegmentation(Dataset):
“”"
PascalVoc dataset
“”"
num_classes = 21

def __init__(self,
             config,
             #base_dir=Path.db_root_dir('pascal'),
             split='train',
             ):
    """
    :param base_dir: path to VOC dataset directory
    :param split: train/val
    :param transform: transform to apply
    """
    super().__init__()
    self._base_dir = config['dataset']['base_path']
    self._image_dir = os.path.join(self._base_dir, 'train','JPEGImages') #train images
    self._cat_dir = os.path.join(self._base_dir, 'train','SegmentationClass') #train labels #segmentationclass语义分割相关,segmentationobject实例分割相关

    if isinstance(split, str):
        self.split = [split]
    else:
        split.sort()
        self.split = split

    self.args = args

    _splits_dir = os.path.join(self._base_dir, 'train','ImageSets', 'Segmentation') #

    self.im_ids = []
    self.images = []
    self.categories = []

    for splt in self.split:
        with open(os.path.join(os.path.join(_splits_dir, splt + '.txt')), "r") as f:
            lines = f.read().splitlines()

        for ii, line in enumerate(lines):
            _image = os.path.join(self._image_dir, line + ".jpg")
            _cat = os.path.join(self._cat_dir, line + ".png")
            assert os.path.isfile(_image)
            assert os.path.isfile(_cat)
            self.im_ids.append(line)
            self.images.append(_image)
            self.categories.append(_cat)

    assert (len(self.images) == len(self.categories))

    # Display stats
    print('Number of images in {}: {:d}'.format(split, len(self.images)))

def __len__(self):
    return len(self.images)


def __getitem__(self, index):
    _img, _target = self._make_img_gt_point_pair(index)
    sample = {'image': _img, 'label': _target}

    for split in self.split:
        if split == "train":
            return self.transform_tr(sample)
        elif split == 'test':
            return self.transform_val(sample)


def _make_img_gt_point_pair(self, index):
    _img = Image.open(self.images[index]).convert('RGB')
    _target = Image.open(self.categories[index])

    return _img, _target

def transform_tr(self, sample):
    composed_transforms = transforms.Compose([
        tr.RandomHorizontalFlip(),
        tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size),
        tr.RandomGaussianBlur(),
        tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        tr.ToTensor()])

    return composed_transforms(sample)

def transform_val(self, sample):

    composed_transforms = transforms.Compose([
        tr.FixScaleCrop(crop_size=self.args.crop_size),
        tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        tr.ToTensor()])

    return composed_transforms(sample)
    
def preprocess(sample, crop_size=513):

    composed_transforms = transforms.Compose([
        tr.FixScaleCrop(crop_size=crop_size),
        tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        tr.ToTensor()])

    return composed_transforms(sample)

def __str__(self):
    return 'VOC2007(split=' + str(self.split) + ')'

main.py
import argparse
import os
import numpy as np

import torch
import yaml

from trainers.trainer import Trainer
from predictors.predictor import Predictor

def train(args):

config_path = args.conf

with open(config_path) as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

config['network']['use_cuda'] = config['network']['use_cuda'] and torch.cuda.is_available()
config['checkname'] = 'deeplab-'+str(config['network']['backbone'])

torch.manual_seed(config[‘seed’])

trainer = Trainer(config)
    
print('Starting Epoch:', trainer.config['training']['start_epoch'])
print('Total Epoches:', trainer.config['training']['epochs'])

for epoch in range(trainer.config['training']['start_epoch'], trainer.config['training']['epochs']):
    trainer.training(epoch)
    if not trainer.config['training']['no_val'] and epoch % config['training']['val_interval'] == (config['training']['val_interval'] - 1):
        trainer.validation(epoch)

trainer.writer.close()

def predict_on_test_set(args):

print(“predict on test”)

config_path = args.conf

with open(config_path) as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

config['network']['use_cuda'] = config['network']['use_cuda'] and torch.cuda.is_available()

predictor = Predictor(config, checkpoint_path='./experiments/checkpoint_best.pth.tar')

predictor.inference_on_test_set()

def predict(args):

print(“predict”)

config_path = args.conf

with open(config_path) as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

filename = args.filename

print(filename)

config['network']['use_cuda'] = config['network']['use_cuda'] and torch.cuda.is_available()

predictor = Predictor(config, checkpoint_path='./experiments/checkpoint_last.pth.tar')

image, prediction = predictor.segment_image(filename)

return image, prediction

print(np.max(prediction))

if name == “main”:

parser = argparse.ArgumentParser(description='Seq2seq')
parser.add_argument('-c', '--conf', help='path to configuration file', required=True)

group = parser.add_mutually_exclusive_group()
group.add_argument('--train', action='store_true', help='Train')    
group.add_argument('--predict_on_test_set', action='store_true', help='Predict on test set')
group.add_argument('--predict', action='store_true', help='Predict on single file')

parser.add_argument('--filename', help='path to file')

args = parser.parse_args()


if args.predict_on_test_set:
    predict_on_test_set(args)      

elif args.predict:
    if args.filename is None:
        raise Exception('missing --filename FILENAME')
    else:
        predict(args)

elif args.train:
    print('Starting training')
    train(args)   
else:
    raise Exception('Unknown args') 

when i run: python main.py -c configs/config.yml --train
The above problems arise

Could you try to narrow down the line of code which triggers this error?
Based on the initial description it seems that the download fails.

It was a download error, I have fixed the problem, but a new problem has emerged:
RuntimeError: Given group=1,weight of size 256 304 3 3,expected input[1,2096,129,129]to have 304 channels,but got 2096 channels instead.
I tried to find out the reason for the mistake, but it didn’t work