'Device Side assert triggered at....' Error

Jordan_Howell · May 22, 2020, 1:43pm

Hello,

I’m trying to run a CNN on a gpu through the command prompt (instead of jupyter). I keep getting the following error:

C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src/THC/THCTensorIndex.cu:307: block: [0,0,0], thread: [0,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src/THC/THCTensorIndex.cu:307: block: [0,0,0], thread: [1,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
THCudaCheck FAIL file=..\aten\src\THC\THCCachingHostAllocator.cpp line=278 error=710 : device-side assert triggered
Traceback (most recent call last):
  File "inspection_model.py", line 544, in <module>
    train_roof_df, test_roof_df,train_losses, test_losses = roof_run()
  File "inspection_model.py", line 498, in roof_run
    outputs = roof_model(image, numerical_data, categorical_data)
  File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "inspection_model.py", line 450, in forward
    cat_embedd = torch.cat(embeddings, 1)
RuntimeError: cuda runtime error (710) : device-side assert triggered at ..\aten\src\THC\THCCachingHostAllocator.cpp:278

This does run on the CPU but I have over 100,000 photos so I want my GPUs help.

Here is my model:

class Image_Model(nn.Module):
    def __init__(self, embedding_size):
        super().__init__()
        self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
        self.embedding_dropout = nn.Dropout(p = 0.04)
        self.cnn = models.vgg19(pretrained=True)
        for param in self.cnn.parameters():
            param_requires_grad = False
        n_features = self.cnn.classifier[6].out_features
        self.fc2 = nn.Sequential(nn.Linear(n_features, 1049))
        self.fc3 = nn.Sequential(nn.Linear(1049, 256))
        self.fc5 = nn.Dropout(p = 0.04)
        self.fc9 = nn.Sequential(nn.Linear(256, 2))
        
    def forward(self, image, numerical_columns, cat_columns):
        embeddings = []
        for i, e in enumerate(self.all_embeddings):
            embeddings.append(e(cat_columns[:,i]))
            
        cat_embedd = torch.cat(embeddings, 1)
        x = self.cnn(image)

        x = torch.cat((x, numerical_columns), dim = 1)
        x = torch.cat((x, cat_embedd), dim = 1)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.fc5(x)
        x = F.relu(self.fc9(x))
        x = F.log_softmax(x)
        return x

torch.manual_seed(1010)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
roof_model = Image_Model(embedding_size=embeddings).to(device)
criterion = torch.nn.NLLLoss().to(device)
optimizer = torch.optim.Adam(roof_model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2, verbose = True, min_lr = .00000001)

epochs = 2
def roof_run():
    train_losses = np.zeros(epochs)
    test_losses = np.zeros(epochs)
    t0 = datetime.now()
    train_roof_predictions = []
    test_roof_predictions = []
    for epoch in range(epochs):
        print(f"epoch, {epoch}")
        train_loss = []
        for image, label, policy, categorical_data, numerical_data in train_loader_roof:
            image = image.to(device)
            label = label.long()
            label = label.to(device) 
            numerical_data = numerical_data.to(device)
            categorical_data = categorical_data.to(device)
            optimizer.zero_grad()
            
            outputs = roof_model(image, numerical_data, categorical_data)
            policy_tuple = (policy.cpu().data.numpy().item(), np.exp(outputs[0][0].cpu().data.numpy()))
            train_roof_predictions.append(policy_tuple)
            loss = criterion(outputs, label)
            train_loss.append(loss.item())

            loss.backward()
            optimizer.step()
        
        train_loss = np.mean(train_loss)

        test_loss = []
        print(f"testing for epoch {epoch}")
        for image, label, policy, categorical_data, numerical_data in test_loader_roof:
            # move data to GPU
            image = image.to(device)
            label = label.to(device)
            policy = policy.to(device)

            categorical_data = categorical_data.to(device)
            numerical_data = numerical_data.to(device)

            outputs = roof_model(image, numerical_data, categorical_data)
            policy_test_tuple = (policy.cpu().data.numpy().item(), np.exp(outputs[0][0].cpu().data.numpy()))
            test_roof_predictions.append(policy_test_tuple)
            loss = criterion(outputs, label)
            test_loss.append(loss.item())
            
        test_loss = np.mean(test_loss)
        scheduler.step(test_loss)
        
        #save the losses
        train_losses[epoch] = train_loss
        test_losses[epoch] = test_loss
        
        if epoch % 2 == 0:
            print(f"train loss: {train_losses[epoch]}, test loss: {test_losses[epoch]}")
        if es.step(test_loss):
            break

    dt = datetime.now() - t0
    print('Duration:', dt)
    
    return train_roof_df, test_roof_df,  train_losses, test_losses
    
train_roof_df, test_roof_df,train_losses, test_losses = roof_run()

I’m not sure what to do from here. The error seems to hit at the following place in the model:
File "inspection_model.py", line 452, in forward cat_embedd = torch.cat(embeddings, 1)

I’m not sure what is wrong at that particular line. It will run on the GPU when I don’t use categorical embeddings but I need those for the predictive power.

Kushaj · May 22, 2020, 4:46pm

Run the python script CUDA_LAUNCH_BLOCKING=1 python script.py. This will produce the correct python stack trace (as CUDA calls are asynchronous). Also, print the shapes of cat_embedd and embedings.

Jordan_Howell · May 22, 2020, 4:54pm

It says it doesn’t recognize `CUDA_LAUNCH_BLOCKING=1 as a command

Kushaj · May 22, 2020, 5:02pm

Add it to the environment and see if it works. os.environ['CUDA_LAUNCH_BLOCKING'] = 1. Also, tell the output shapes, and what version of pytorch, cuda driver are you on.

Jordan_Howell · May 22, 2020, 5:06pm

‘os.environ[‘CUDA_LAUNCH_BLOCKING’]’ is not recognized as an internal or external command,
operable program or batch file.

Kushaj · May 22, 2020, 5:08pm

It should be ="1". Strings only allowed.

peterjc123 · May 22, 2020, 5:12pm

Well, if you type the command in CMD, then it should be set CUDA_LAUNCH_BLOCKING=1.

Jordan_Howell · May 22, 2020, 5:15pm

This is what I put:
os.environ['CUDA_LAUNCH_BLOCKING']="1" python inspection_model.py

Jordan_Howell · May 22, 2020, 5:16pm

Hi @peterjc123. I did that and I got no error but the script didn’t run either.

set CUDA_LAUNCH_BLOCKING=1 python inspection_model.py

Kushaj · May 22, 2020, 5:17pm

You type it in your python script. import os and then enter that line at the start of your script. Also, it will not solve not problem as it will just output a stack trace. What are the shapes of cat_embedd and embeddings? Print these.

Jordan_Howell · May 22, 2020, 5:18pm

embeddings:

[(3, 2), (21, 11), (4, 2), (6, 3), (15, 8), (7, 4), (3, 2), (10, 5), (13, 7), (33, 17), (2, 1)]

I don’t know cat_embedd as it errors out there.

Jordan_Howell · May 22, 2020, 5:28pm

Here is what I got with the CUDA_LAUNCH_BLOCKING=1

C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src/THC/THCTensorIndex.cu:307: block: [0,0,0], thread: [0,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src/THC/THCTensorIndex.cu:307: block: [0,0,0], thread: [1,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
THCudaCheck FAIL file=..\aten\src\THC\THCGeneral.cpp line=313 error=710 : device-side assert triggered
Traceback (most recent call last):
  File "inspection_model.py", line 547, in <module>
    train_losses, test_losses = roof_run()
  File "inspection_model.py", line 501, in roof_run
    outputs = roof_model(image, numerical_data, categorical_data)
  File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "inspection_model.py", line 453, in forward
    cat_embedd = torch.cat(embeddings, 1)
RuntimeError: cuda runtime error (710) : device-side assert triggered at ..\aten\src\THC\THCGeneral.cpp:313

Kushaj · May 22, 2020, 5:37pm

torch.cat works as

a = torch.randn(4,3)
b = torch.randn(10,3)
c = torch.cat((a,b), 0)
c.shape # (14,3)

So you cannot use cat_embedd = torch.cat(embeddings,1). If I do embeddings = torch.tensor(embeddings) the output is of shape (11,2).

Jordan_Howell · May 22, 2020, 5:43pm

I’m not sure I understand. SHould I do torch.cat(embeddings, 0)?

Kushaj · May 22, 2020, 5:44pm

torch.cat expects a tuple of tensors. So you need to provide it tensors as (a,b,c) and then it will concatenate those like in the example I showed above. In your case embeddings is a python list, which will not work.

Jordan_Howell · May 22, 2020, 5:45pm

Nevermind. I tried that and got the following:

    cat_embedd = torch.cat(embeddings, 0)
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 1 and 2 in dimension 1 at C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src\THC/generic/THCTensorMath.cu:71

Kushaj · May 22, 2020, 5:45pm

Also, what is the expected output you want by doing torch.cat(embeddings,1).

Jordan_Howell · May 22, 2020, 5:49pm

New error with `embeddigns = torch.tensor(embeddins)

C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src/THC/THCTensorIndex.cu:307: block: [0,0,0], thread: [0,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
C:/w/1/s/tmp_conda_3.7_100118/conda/conda-bld/pytorch_1579082551706/work/aten/src/THC/THCTensorIndex.cu:307: block: [0,0,0], thread: [1,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
Traceback (most recent call last):
  File "inspection_model.py", line 550, in <module>
    train_losses, test_losses = roof_run()
  File "inspection_model.py", line 504, in roof_run
    outputs = roof_model(image, numerical_data, categorical_data)
  File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "inspection_model.py", line 455, in forward
    embeddings = torch.tensor(embeddings)
ValueError: only one element tensors can be converted to Python scalars

The output is that I want to combine my concatenate my categorical and numerical columns with the output from my VGG19 image model to give more lift.

Jordan_Howell · June 1, 2020, 1:46pm

Hey all,

Does anyone else have any ideas what I can do? Here is the full traceback I’m getting now:


  File "C:\Users\JORDAN.HOWELL.GITDIR\Documents\GitHub\Inspection_Photo_Pytorch_Model\inspection_model.py", line 555, in <module>
    train_losses = roof_run()

  File "C:\Users\JORDAN.HOWELL.GITDIR\Documents\GitHub\Inspection_Photo_Pytorch_Model\inspection_model.py", line 507, in roof_run
    outputs = roof_model(image, numerical_data, categorical_data)

  File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)

  File "C:\Users\JORDAN.HOWELL.GITDIR\Documents\GitHub\Inspection_Photo_Pytorch_Model\inspection_model.py", line 454, in forward
    cat_embedd = torch.cat(embeddings, 1)

RuntimeError: cuda runtime error (710) : device-side assert triggered at ..\aten\src\THC\THCGeneral.cpp:313

ptrblck · June 2, 2020, 6:50am

Try to run your code on the CPU and see, if you get a proper error message.
If it’s running fine on the CPU, run the script on the GPU again via:

CUDA_LAUNCH_BLOCKING=1 python script.py args

which should give you a stacktrace pointing out the line of code, which raised the issue.