Cuda runtime error (30) after upgrading from 0.3.1 to ver 0.4.0

I am using python 3.6.5 on Ubuntu 14.04.
I upgraded pytorch 0.4.0 via ‘pip uninstall torch torchvision; pip install torch torchvision’

After upgrade, cuda operation for tensor or model gives me cuda runtime error(30).

Following is the example code.

device = torch.device(‘cuda’)
aaa = torch.Tensor(5).to(device)
aaa = torch.Tensor(5).cuda()

And here is the error message
Traceback (most recent call last):
File “”, line 1, in
File “/home/kenkim/anaconda3/lib/python3.6/site-packages/torch/cuda/”, line 161, in _lazy_init
RuntimeError: cuda runtime error (30) : unknown error at /pytorch/aten/src/THC/THCGeneral.cpp:844

Could you tell me any fix that I should try?

p.s. When I downgrade to 0.3.0.post4, aaa.cuda() works without any error.

p.s. nvidia-smi output
| NVIDIA-SMI 384.111 Driver Version: 384.111 |
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| 0 TITAN X (Pascal) Off | 00000000:01:00.0 Off | N/A |
| 23% 39C P0 54W / 250W | 0MiB / 12189MiB | 0% Default |
| 1 TITAN X (Pascal) Off | 00000000:02:00.0 Off | N/A |
| 0% 34C P0 49W / 250W | 0MiB / 12189MiB | 0% Default |

| Processes: GPU Memory |
| GPU PID Type Process name Usage |
| No running processes found |

device = torch.device(‘cuda:0’)

Thanks for the reply.

However, adding gpu index gives me same error.

Hello, I have encountered the same problem with you : RuntimeError: cuda runtime error (30) : unknown error at /pytorch/aten/src/THC/
What did you do in the end? My CUDA is 9.0, CUDNN is 7.0, and pytorch is 0.4.0. Is this question related to GPU, CUDA, drivers, or is it related to my pytorch? Here is my code and look forward to your reply.Thanks very much!

This is my code:

System libs

import os
import datetime
import argparse
from distutils.version import LooseVersion

Numerical libs

import numpy as np
import torch
import torch.nn as nn
from import loadmat

Our libs

from dataset import TestDataset
from models import ModelBuilder, SegmentationModule
from utils import colorEncode
from lib.nn import user_scattered_collate, async_copy_to
from lib.utils import as_numpy, mark_volatile
import as torchdata
import cv2

def visualize_result(data, preds, args):
colors = loadmat(‘data/color150.mat’)[‘colors’]
(img, info) = data

# prediction
pred_color = colorEncode(preds, colors)

# aggregate images and save
im_vis = np.concatenate((img, pred_color),

img_name = info.split('/')[-1]
            img_name.replace('.jpg', '.png')), im_vis)

def test(segmentation_module, loader, args):

for i, batch_data in enumerate(loader):
    # process data
    batch_data = batch_data[0]
    segSize = (batch_data['img_ori'].shape[0],

    img_resized_list = batch_data['img_data']

    with torch.no_grad():
        pred = torch.zeros(0, args.num_class, segSize[0], segSize[1])

        for img in img_resized_list:
            feed_dict = batch_data.copy()
            feed_dict['img_data'] = img
            del feed_dict['img_ori']
            del feed_dict['info']
            feed_dict = async_copy_to(feed_dict, args.gpu_id)

            # forward pass
            pred_tmp = segmentation_module(feed_dict, segSize=segSize)
            pred = pred + pred_tmp.cpu() / len(args.imgSize)

        _, preds = torch.max(pred, dim=1)
        preds = as_numpy(preds.squeeze(0))

    # visualization
        (batch_data['img_ori'], batch_data['info']),
        preds, args)

    print('[{}] iter {}'
          .format("%Y-%m-%d %H:%M:%S"), i)

def main(args):

# Network Builders
builder = ModelBuilder()
net_encoder = builder.build_encoder(
net_decoder = builder.build_decoder(

crit = nn.NLLLoss(ignore_index=0)

segmentation_module = SegmentationModule(net_encoder, net_decoder, crit)

# Dataset and Loader
list_test = [{'fpath_img': args.test_img}]
dataset_val = TestDataset(
    list_test, args, max_sample=args.num_val)
loader_val = torchdata.DataLoader(


# Main loop
test(segmentation_module, loader_val, args)

print('Inference done!')

if name == ‘main’:
assert LooseVersion(torch.version) >= LooseVersion(‘0.4.0’),
‘PyTorch>=0.4.0 is required’

parser = argparse.ArgumentParser()
# Path related arguments
#parser.add_argument('--test_img', default='./TEST_IMG/test_img/ADE_val_00001519.jpg',help='folder to test path')
parser.add_argument('--test_img', default='/home/dpl/zhaolp/semantic-segmentation-pytorch-master/semantic-segmentation-pytorch-master/TEST_IMG/test_img/ADE_val_00001519.jpg',help='folder to test path')

parser.add_argument(’–model_path’, default=’./MODEL_PATH/model_path’,

parser.add_argument('--model_path', default='/home/dpl/zhaolp/semantic-segmentation-pytorch-master/semantic-segmentation-pytorch-master/MODEL_PATH/model_path',
                    help='folder to model path')
parser.add_argument('--suffix', default='_epoch_20.pth',
                    help="which snapshot to load")

# Model related arguments
parser.add_argument('--arch_encoder', default='resnet50_dilated8',
                    help="architecture of net_encoder")
parser.add_argument('--arch_decoder', default='ppm_bilinear_deepsup',
                    help="architecture of net_decoder")
parser.add_argument('--fc_dim', default=2048, type=int,
                    help='number of features between encoder and decoder')

# Data related arguments
parser.add_argument('--num_val', default=-1, type=int,
                    help='number of images to evalutate')
parser.add_argument('--num_class', default=150, type=int,
                    help='number of classes')
parser.add_argument('--batch_size', default=1, type=int,
                    help='batchsize. current only supports 1')
parser.add_argument('--imgSize', default=[300, 400, 500, 600],
                    nargs='+', type=int,
                    help='list of input image sizes.'
                         'for multiscale testing, e.g. 300 400 500')
parser.add_argument('--imgMaxSize', default=1000, type=int,
                    help='maximum input image size of long edge')
parser.add_argument('--padding_constant', default=8, type=int,
                    help='maxmimum downsampling rate of the network')
parser.add_argument('--segm_downsampling_rate', default=8, type=int,
                    help='downsampling rate of the segmentation label')

# Misc arguments
parser.add_argument('--result', default='.',
                    help='folder to output visualization results')
parser.add_argument('--gpu_id', default=0, type=int,
                    help='gpu_id for evaluation')

args = parser.parse_args()

# absolute paths of model weights
args.weights_encoder = os.path.join(args.model_path,
                                    'encoder' + args.suffix)
args.weights_decoder = os.path.join(args.model_path,
                                    'decoder' + args.suffix)

assert os.path.exists(args.weights_encoder) and \
    os.path.exists(args.weights_encoder), 'checkpoint does not exitst!'

if not os.path.isdir(args.result):