I got the following error RuntimeError: cuda runtime error (700) : an illegal memory access was encountered at /opt/conda/conda-bld/pytorch_1595629411241/work/aten/src/THC/THCCachingHostAllocator.cpp:278 while running my code with cuda().
So I decided to check the device number for the variables. I printed following variables from forward() function
input_ device no: 1
support device no: 1
weight device no: -1
Here is the code that caused the error.
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
class GNNLinear(Module):
def init(self, in_features, out_features, num_supports, act_fn, dropout=False, batch_norm=False):
¦ super(GNNLinear, self).init()
¦ self.num_supports = num_supports
¦ self.in_features = in_features
¦ self.out_features = out_features
¦ self.dropout = dropout
¦ if self.dropout:
¦ ¦ self.dropout_fn = nn.Dropout(p=0.5, inplace=True)
¦ self.batch_norm = batch_norm
¦ if self.batch_norm:
¦ ¦ self.batch_norm_fn = nn.BatchNorm1d(out_features)
¦ self.act_fn = act_fn
¦ self.weight = []
¦ for i in range(num_supports):
¦ ¦ self.weight.append(Parameter(torch.FloatTensor(in_features, out_features)))
¦ if bias:
¦ ¦ self.bias = Parameter(torch.FloatTensor(out_features))
¦ else:
¦ ¦ self.register_parameter('bias', None)
¦ self.reset_parameters()
def reset_parameters(self):
¦ for i in range(self.num_supports):
¦ ¦ ¦bound = 1. / math.sqrt(self.weight[i].size(1))
¦ ¦ ¦self.weight[i].data.uniform_(-bound, bound)
¦ if self.bias is not None:
¦ ¦ self.bias = Parameter(torch.zeros(self.bias.shape, dtype=torch.float32))
def forward(self, input_, support):
¦ if self.dropout:
¦ ¦ input_ = self.dropout_fn(input_)
¦ z = 0
¦ for i in range(self.num_supports):
¦ ¦ print('input_ device no: ', input_.get_device())
¦ ¦ print('support device no: ', support[i].get_device())
¦ ¦ print('weight device no: ', self.weight[i].get_device())
¦ ¦ h_i = torch.mm(input_, self.weight[i])
print(h_i)
¦ ¦ z_i = torch.sparse.mm(support[i], h_i)
¦ ¦ z += z_i
¦ if self.bias is not None:
¦ ¦ z = z + self.bias
¦ if self.act_fn:
¦ ¦ z = self.act_fn(z)
¦ if self.batch_norm:
¦ ¦ z = self.batch_norm_fn(z)
¦ return z
¦
def __repr__(self):
¦ return self.__class__.__name__ + ' (' \
¦ ¦ ¦ + str(self.in_features) + ' -> ' \
¦ ¦ ¦ + str(self.out_features) + ')'