'Device Side assert triggered at....' Error

Hello,

I’m trying to run a CNN on a gpu through the command prompt (instead of jupyter). I keep getting the following error:

C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src/THC/THCTensorIndex.cu:307: block: [0,0,0], thread: [0,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src/THC/THCTensorIndex.cu:307: block: [0,0,0], thread: [1,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
THCudaCheck FAIL file=..\aten\src\THC\THCCachingHostAllocator.cpp line=278 error=710 : device-side assert triggered
Traceback (most recent call last):
  File "inspection_model.py", line 544, in <module>
    train_roof_df, test_roof_df,train_losses, test_losses = roof_run()
  File "inspection_model.py", line 498, in roof_run
    outputs = roof_model(image, numerical_data, categorical_data)
  File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "inspection_model.py", line 450, in forward
    cat_embedd = torch.cat(embeddings, 1)
RuntimeError: cuda runtime error (710) : device-side assert triggered at ..\aten\src\THC\THCCachingHostAllocator.cpp:278

This does run on the CPU but I have over 100,000 photos so I want my GPUs help.

Here is my model:

class Image_Model(nn.Module):
    def __init__(self, embedding_size):
        super().__init__()
        self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
        self.embedding_dropout = nn.Dropout(p = 0.04)
        self.cnn = models.vgg19(pretrained=True)
        for param in self.cnn.parameters():
            param_requires_grad = False
        n_features = self.cnn.classifier[6].out_features
        self.fc2 = nn.Sequential(nn.Linear(n_features, 1049))
        self.fc3 = nn.Sequential(nn.Linear(1049, 256))
        self.fc5 = nn.Dropout(p = 0.04)
        self.fc9 = nn.Sequential(nn.Linear(256, 2))
        
    def forward(self, image, numerical_columns, cat_columns):
        embeddings = []
        for i, e in enumerate(self.all_embeddings):
            embeddings.append(e(cat_columns[:,i]))
            
        cat_embedd = torch.cat(embeddings, 1)
        x = self.cnn(image)

        x = torch.cat((x, numerical_columns), dim = 1)
        x = torch.cat((x, cat_embedd), dim = 1)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.fc5(x)
        x = F.relu(self.fc9(x))
        x = F.log_softmax(x)
        return x

torch.manual_seed(1010)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
roof_model = Image_Model(embedding_size=embeddings).to(device)
criterion = torch.nn.NLLLoss().to(device)
optimizer = torch.optim.Adam(roof_model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, verbose = True, min_lr = .00000001)

epochs = 2
def roof_run():
    train_losses = np.zeros(epochs)
    test_losses = np.zeros(epochs)
    t0 = datetime.now()
    train_roof_predictions = []
    test_roof_predictions = []
    for epoch in range(epochs):
        print(f"epoch, {epoch}")
        train_loss = []
        for image, label, policy, categorical_data, numerical_data in train_loader_roof:
            image = image.to(device)
            label = label.long()
            label = label.to(device) 
            numerical_data = numerical_data.to(device)
            categorical_data = categorical_data.to(device)
            optimizer.zero_grad()
            
            outputs = roof_model(image, numerical_data, categorical_data)
            policy_tuple = (policy.cpu().data.numpy().item(), np.exp(outputs[0][0].cpu().data.numpy()))
            train_roof_predictions.append(policy_tuple)
            loss = criterion(outputs, label)
            train_loss.append(loss.item())

            loss.backward()
            optimizer.step()
        
        train_loss = np.mean(train_loss)

        test_loss = []
        print(f"testing for epoch {epoch}")
        for image, label, policy, categorical_data, numerical_data in test_loader_roof:
            # move data to GPU
            image = image.to(device)
            label = label.to(device)
            policy = policy.to(device)

            categorical_data = categorical_data.to(device)
            numerical_data = numerical_data.to(device)

            outputs = roof_model(image, numerical_data, categorical_data)
            policy_test_tuple = (policy.cpu().data.numpy().item(), np.exp(outputs[0][0].cpu().data.numpy()))
            test_roof_predictions.append(policy_test_tuple)
            loss = criterion(outputs, label)
            test_loss.append(loss.item())
            
        test_loss = np.mean(test_loss)
        scheduler.step(test_loss)
        
        #save the losses
        train_losses[epoch] = train_loss
        test_losses[epoch] = test_loss
        
        if epoch % 2 == 0:
            print(f"train loss: {train_losses[epoch]}, test loss: {test_losses[epoch]}")
        if es.step(test_loss):
            break

    dt = datetime.now() - t0
    print('Duration:', dt)
    
    return train_roof_df, test_roof_df,  train_losses, test_losses
    
train_roof_df, test_roof_df,train_losses, test_losses = roof_run()

I’m not sure what to do from here. The error seems to hit at the following place in the model:
File "inspection_model.py", line 452, in forward cat_embedd = torch.cat(embeddings, 1)

I’m not sure what is wrong at that particular line. It will run on the GPU when I don’t use categorical embeddings but I need those for the predictive power.

Run the python script CUDA_LAUNCH_BLOCKING=1 python script.py. This will produce the correct python stack trace (as CUDA calls are asynchronous). Also, print the shapes of cat_embedd and embedings.

It says it doesn’t recognize `CUDA_LAUNCH_BLOCKING=1 as a command

Add it to the environment and see if it works. os.environ['CUDA_LAUNCH_BLOCKING'] = 1. Also, tell the output shapes, and what version of pytorch, cuda driver are you on.

‘os.environ[‘CUDA_LAUNCH_BLOCKING’]’ is not recognized as an internal or external command,
operable program or batch file.

It should be ="1". Strings only allowed.

Well, if you type the command in CMD, then it should be set CUDA_LAUNCH_BLOCKING=1.

This is what I put:
os.environ['CUDA_LAUNCH_BLOCKING']="1" python inspection_model.py

Hi @peterjc123. I did that and I got no error but the script didn’t run either.

set CUDA_LAUNCH_BLOCKING=1 python inspection_model.py

You type it in your python script. import os and then enter that line at the start of your script. Also, it will not solve not problem as it will just output a stack trace. What are the shapes of cat_embedd and embeddings? Print these.

embeddings:

[(3, 2), (21, 11), (4, 2), (6, 3), (15, 8), (7, 4), (3, 2), (10, 5), (13, 7), (33, 17), (2, 1)]

I don’t know cat_embedd as it errors out there.

Here is what I got with the CUDA_LAUNCH_BLOCKING=1

C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src/THC/THCTensorIndex.cu:307: block: [0,0,0], thread: [0,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src/THC/THCTensorIndex.cu:307: block: [0,0,0], thread: [1,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
THCudaCheck FAIL file=..\aten\src\THC\THCGeneral.cpp line=313 error=710 : device-side assert triggered
Traceback (most recent call last):
  File "inspection_model.py", line 547, in <module>
    train_losses, test_losses = roof_run()
  File "inspection_model.py", line 501, in roof_run
    outputs = roof_model(image, numerical_data, categorical_data)
  File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "inspection_model.py", line 453, in forward
    cat_embedd = torch.cat(embeddings, 1)
RuntimeError: cuda runtime error (710) : device-side assert triggered at ..\aten\src\THC\THCGeneral.cpp:313

torch.cat works as

a = torch.randn(4,3)
b = torch.randn(10,3)
c = torch.cat((a,b), 0)
c.shape # (14,3)

So you cannot use cat_embedd = torch.cat(embeddings,1). If I do embeddings = torch.tensor(embeddings) the output is of shape (11,2).

I’m not sure I understand. SHould I do torch.cat(embeddings, 0)?

torch.cat expects a tuple of tensors. So you need to provide it tensors as (a,b,c) and then it will concatenate those like in the example I showed above. In your case embeddings is a python list, which will not work.

1 Like

Nevermind. I tried that and got the following:

    cat_embedd = torch.cat(embeddings, 0)
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 1 and 2 in dimension 1 at C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src\THC/generic/THCTensorMath.cu:71

Also, what is the expected output you want by doing torch.cat(embeddings,1).

New error with `embeddigns = torch.tensor(embeddins)

C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src/THC/THCTensorIndex.cu:307: block: [0,0,0], thread: [0,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src/THC/THCTensorIndex.cu:307: block: [0,0,0], thread: [1,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
Traceback (most recent call last):
  File "inspection_model.py", line 550, in <module>
    train_losses, test_losses = roof_run()
  File "inspection_model.py", line 504, in roof_run
    outputs = roof_model(image, numerical_data, categorical_data)
  File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "inspection_model.py", line 455, in forward
    embeddings = torch.tensor(embeddings)
ValueError: only one element tensors can be converted to Python scalars

The output is that I want to combine my concatenate my categorical and numerical columns with the output from my VGG19 image model to give more lift.

Hey all,

Does anyone else have any ideas what I can do? Here is the full traceback I’m getting now:


  File "C:\Users\JORDAN.HOWELL.GITDIR\Documents\GitHub\Inspection_Photo_Pytorch_Model\inspection_model.py", line 555, in <module>
    train_losses = roof_run()

  File "C:\Users\JORDAN.HOWELL.GITDIR\Documents\GitHub\Inspection_Photo_Pytorch_Model\inspection_model.py", line 507, in roof_run
    outputs = roof_model(image, numerical_data, categorical_data)

  File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)

  File "C:\Users\JORDAN.HOWELL.GITDIR\Documents\GitHub\Inspection_Photo_Pytorch_Model\inspection_model.py", line 454, in forward
    cat_embedd = torch.cat(embeddings, 1)

RuntimeError: cuda runtime error (710) : device-side assert triggered at ..\aten\src\THC\THCGeneral.cpp:313

Try to run your code on the CPU and see, if you get a proper error message.
If it’s running fine on the CPU, run the script on the GPU again via:

CUDA_LAUNCH_BLOCKING=1 python script.py args

which should give you a stacktrace pointing out the line of code, which raised the issue.