Tabular Data Model throwing NotImplementableError

So I’m trying to run a tabular data only model as a base line for a CNN+data model. I’m getting a NotImplementedError: somewhere in the forward and I’m not sure where I’m messing up. Here is the model:

class Data_Only_Model(nn.Module):
    def __init__(self, embedding_size, num_numerical_cols, output_size, layers, p = 0.4):
        '''
        Args
        ---------------------------
        embedding_size: Contains the embedding size for the categorical columns
        num_numerical_cols: Stores the total number of numerical columns
        output_size: The size of the output layer or the number of possible outputs.
        layers: List which contains number of neurons for all the layers.
        p: Dropout with the default value of 0.5
        
        '''
        super().__init__()

        #list of ModuleList objects for all categorical columns
        self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
        
        #drop out value for all layers
        self.embedding_dropout = nn.Dropout(p)
        
        #list of 1 dimension batch normalization objects for all numerical columns
        self.batch_norm_num = nn.BatchNorm1d(num_numerical_cols)

        #the number of categorical and numerical columns are added together and stored in input_size
        all_layers = []
        num_categorical_cols = sum((nf for ni, nf in embedding_size))
        input_size = num_categorical_cols + num_numerical_cols
        
        #loop iterates to add corresonding layers to all_layers list above
        for i in layers:
            all_layers.append(nn.Linear(input_size, i))
            all_layers.append(nn.ReLU(inplace=True))
            all_layers.append(nn.BatchNorm1d(i))
            all_layers.append(nn.Dropout(p))
            input_size = i
            
        #append output layer to list of layers    
        all_layers.append(nn.Linear(layers[-1], output_size))
        
        #pass all layers to the sequential class
        self.layers = nn.Sequential(*all_layers)
        
        
        #define the foward method
        def forward(self, x_categorical, x_numerical):
            #this starts the embedding of categorical columns
            embeddings = []
            for i,e in enumerate(self.all_embeddings):
                embeddings.append(e(x_categorical[:,i]))
                
            x = torch.cat(embeddings, 1)
            x = self.embedding_dropout(x)

            #normalizing numerical columns
            x_numerical = self.batch_norm_num(x_numerical)

            #concatenating numerical and categorical columns
            x = torch.cat([x, x_numerical], 1)
            x = self.layers(x)

            return x
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(d_o_model.parameters(), lr=0.001)

epochs = 300
aggregated_losses = []

for i in range(epochs):
    i += 1
    y_pred = d_o_model(categorical_data, numerical_data)
    single_loss = loss_function(y_pred, target_columns)
    aggregated_losses.append(single_loss)

    if i%25 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

    optimizer.zero_grad()
    single_loss.backward()
    optimizer.step()

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

Here is a full traceback of the error:


NotImplementedError Traceback (most recent call last)
in
4 for i in range(epochs):
5 i += 1
----> 6 y_pred = d_o_model(categorical_data, numerical_data)
7 single_loss = loss_function(y_pred, target_columns)
8 aggregated_losses.append(single_loss)

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
–> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in forward(self, *input)
95 registered hooks while the latter silently ignores them.
96 “”"
—> 97 raise NotImplementedError
98
99 def register_buffer(self, name, tensor):

NotImplementedError:

Looking at your code, I think you have an indentation problem: Your forward function is defined inside the __init__ and not in the class :wink:

Thanks. I’m getting a new error. If I can’t find the solution on here, do you want me to add to this or close it out and open another question?

Sure, you can add it here.

Ok. New Error after I fixed the indentions:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-35-b80fee36e101> in <module>
      6     categorical_data = categorical_data.cuda()
      7     numerical_data = numerical_data.cuda()
----> 8     y_pred = d_o_model(categorical_data, numerical_data)
      9     single_loss = loss_function(y_pred, target_columns)
     10     aggregated_losses.append(single_loss)

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

<ipython-input-32-1d6027d92b85> in forward(self, x_categorical, x_numerical)
     49             embeddings.append(e(x_categorical[:,i]))
     50 
---> 51         x = torch.cat(embeddings, 1)
     52         x = self.embedding_dropout(x)
     53 

RuntimeError: cuda runtime error (710) : device-side assert triggered at ..\aten\src\THC\THCCachingHostAllocator.cpp:278

Here is the training loop:

epochs = 300
aggregated_losses = []

for i in range(epochs):
    i += 1
    categorical_data = categorical_data.cuda()
    numerical_data = numerical_data.cuda()
    y_pred = d_o_model(categorical_data, numerical_data)
    single_loss = loss_function(y_pred, target_columns)
    aggregated_losses.append(single_loss)

    if i%25 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

    optimizer.zero_grad()
    single_loss.backward()
    optimizer.step()

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

My tensor of labels are made up of 0 and 1s. The target.size() is torch.Size[64438]). Here is the model as I’ve added as sigmoid at the end.

class Data_Only_Model(nn.Module):
    def __init__(self, embedding_size, num_numerical_cols, output_size, layers, p = 0.4):
        '''
        Args
        ---------------------------
        embedding_size: Contains the embedding size for the categorical columns
        num_numerical_cols: Stores the total number of numerical columns
        output_size: The size of the output layer or the number of possible outputs.
        layers: List which contains number of neurons for all the layers.
        p: Dropout with the default value of 0.5
        
        '''
        super().__init__()

        #list of ModuleList objects for all categorical columns
        self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
        
        #drop out value for all layers
        self.embedding_dropout = nn.Dropout(p)
        
        #list of 1 dimension batch normalization objects for all numerical columns
        self.batch_norm_num = nn.BatchNorm1d(num_numerical_cols)

        #the number of categorical and numerical columns are added together and stored in input_size
        all_layers = []
        num_categorical_cols = sum((nf for ni, nf in embedding_size))
        input_size = num_categorical_cols + num_numerical_cols
        
        #loop iterates to add corresonding layers to all_layers list above
        for i in layers:
            all_layers.append(nn.Linear(input_size, i))
            all_layers.append(nn.ReLU(inplace=True))
            all_layers.append(nn.BatchNorm1d(i))
            all_layers.append(nn.Dropout(p))
            input_size = i
            
        #append output layer to list of layers    
        all_layers.append(nn.Linear(layers[-1], output_size))
        
        #pass all layers to the sequential class
        self.layers = nn.Sequential(*all_layers)
        
        
    #define the foward method
    def forward(self, x_categorical, x_numerical):
        #this starts the embedding of categorical columns
        embeddings = []
        for i,e in enumerate(self.all_embeddings):
            embeddings.append(e(x_categorical[:,i]))

        x = torch.cat(embeddings, 1)
        x = self.embedding_dropout(x)

        #normalizing numerical columns
        x_numerical = self.batch_norm_num(x_numerical)

        #concatenating numerical and categorical columns
        x = torch.cat([x, x_numerical], 1)
        x = self.layers(x)
        x = F.sigmoid(x)
        return x

i think this may be some sort of memory issue. i take out some of my categorical embeddings and I get a new error.

Could you post the new error with a reproducible code snippet, please?

It’s funny. When i send the model, categorical_data and numerical_data to cuda(), I get the same error as above. When i run it on cpu, I don’t.

Do embeddings take up a lot of space?

Ok, I’m starting to get confused. I got this to run on my GPU yesterday and today’s same notebook, with the same model gave me the below again:

RuntimeError                              Traceback (most recent call last)
<ipython-input-31-d7f3d5975bb8> in <module>
     13             break
     14 
---> 15         y_pred = d_o_model(num, cat)
     16         single_loss = criterion(y_pred, label)
     17         aggregated_losses.append(single_loss.cpu().data.numpy())

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

<ipython-input-28-a1d9e4e91784> in forward(self, x_categorical, x_numerical)
     49             embeddings.append(e(x_categorical[:,i]))
     50 
---> 51         x = torch.cat(embeddings, 1)
     52         x = self.embedding_dropout(x)
     53 

RuntimeError: cuda runtime error (710) : device-side assert triggered at ..\aten\src\THC\THCCachingHostAllocator.cpp:278

I’m not finding anything on this device-side assert triggered on the forum.

Here is my training loop"

aggregated_losses = []
max_batch = 6450

for i in range(epochs):
    
    for b, (label, cat, num) in enumerate(train_loader):
        num = num.cuda()
        cat = cat.cuda()
        label = label.cuda()
        b += 1
        if b == max_batch:
            break
            
        y_pred = d_o_model(num, cat)
        single_loss = criterion(y_pred, label)
        aggregated_losses.append(single_loss.cpu().data.numpy())

        print(f'epoch: {i:3}, batch: {b:3}, loss: {single_loss.item():10.8f}')

        optimizer.zero_grad()
        single_loss.backward()
        optimizer.step()
        
    scheduler.step(single_loss)


print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

The model is the same as the original post.

ok. I found to run CUDA_LAUNCH_BLOCKING=1 and got the following full traceback:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-42-b6fff162d821> in <module>
     15             break
     16 
---> 17         y_pred = d_o_model(num, cat)
     18         single_loss = criterion(y_pred, label)
     19 

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

<ipython-input-39-a995c8522c32> in forward(self, x_categorical, x_numerical)
     57         #concatenating numerical and categorical columns
     58         x = torch.cat([x, x_numerical], 1)
---> 59         x = self.layers(x)
     60         #x = F.log_softmax(x)
     61         return x

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\container.py in forward(self, input)
     90     def forward(self, input):
     91         for module in self._modules.values():
---> 92             input = module(input)
     93         return input
     94 

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\batchnorm.py in forward(self, input)
     79             input, self.running_mean, self.running_var, self.weight, self.bias,
     80             self.training or not self.track_running_stats,
---> 81             exponential_average_factor, self.eps)
     82 
     83     def extra_repr(self):

C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
   1668     return torch.batch_norm(
   1669         input, weight, bias, running_mean, running_var,
-> 1670         training, momentum, eps, torch.backends.cudnn.enabled
   1671     )
   1672 

RuntimeError: cuDNN error: CUDNN_STATUS_MAPPING_ERROR

I’m a little more confused now.

Was the code running before and suddenly you see this error again?
Could you provide an executable code snippet with all model arguments, data shapes etc., so that we could try to reproduce this issue?

Is it possible to send you the notebook?

Jordan

You could just post the code snippet here using some random data, so that I could run the script.

@ptrblck I apologize. I noticed I had 800 NA values in one of the columns. It’s running now. I apologize for wasting your time. I renamed a df and filled na’s on the old name.

I’m glad you’ve figured it out :slight_smile: