HI, I am trying to call the native functions. However, I met ‘CUDA error: an illegal memory access was encountered’ when I ran the CUDA version and it gave ‘Segmentation fault’ when I switched to the CPU version.
I tested the code on pytorch 1.5/1.6/1.7 with cuda 9.2, all pytorch version gave the same error.
What I was trying to do:
First wrap the functions:
#include <torch/extension.h>
#include <ATen/NativeFunctions.h>
#include <ATen/Config.h>
std::tuple<at::Tensor, at::Tensor, at::Tensor> layer_norm_forward_cpu(
const at::Tensor & input,
const at::Tensor & weight,
const at::Tensor & bias,
int64_t M, int64_t N, double eps) {
return at::native::layer_norm_cpu(input, weight, bias, M, N, eps);
}
std::tuple<at::Tensor, at::Tensor, at::Tensor> backward_layer_norm_cpu(
const at::Tensor & grad_out,
const at::Tensor & input,
const at::Tensor & mean,
const at::Tensor & rstd,
const at::Tensor & weight,
int64_t M, int64_t N, std::array<bool,3> output_mask) {
return at::native::layer_norm_backward_cpu(grad_out, input, mean, rstd, weight, M, N, output_mask);
}
std::tuple<at::Tensor, at::Tensor, at::Tensor> layer_norm_forward_cuda(
const at::Tensor & input,
const at::Tensor & weight,
const at::Tensor & bias,
int64_t M, int64_t N, double eps) {
return at::native::layer_norm_cuda(input, weight, bias, M, N, eps);
}
std::tuple<at::Tensor, at::Tensor, at::Tensor> backward_layer_norm_cuda(
const at::Tensor & grad_out,
const at::Tensor & input,
const at::Tensor & mean,
const at::Tensor & rstd,
const at::Tensor & weight,
int64_t M, int64_t N, std::array<bool,3> output_mask) {
return at::native::layer_norm_backward_cuda(grad_out, input, mean, rstd, weight, M, N, output_mask);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("layer_norm_forward_cpu", &layer_norm_forward_cpu, "layer norm forward (cpu version)");
m.def("layer_norm_backward_cpu", &backward_layer_norm_cpu, "layer norm backward (cpu version)");
m.def("layer_norm_forward_cuda", &layer_norm_forward_cuda, "layer norm forward (cuda version)");
m.def("layer_norm_backward_cuda",&backward_layer_norm_cuda, "layer norm backward (cuda version)");
}
and then call the functions in
import torch
import torch.nn as nn
import torch.nn.functional as F
import pdb
import native
class layer_norm(torch.autograd.Function):
@staticmethod
def forward(ctx, x, normalized_shape, weight, bias, eps, training):
N = 1
if isinstance(normalized_shape, int):
N = normalized_shape
elif isinstance(normalized_shape, (list, tuple)):
for i in normalized_shape:
N *= i
else:
raise RuntimeError("unexpected type of normalized_shape".format(type(normalized_shape)))
M = x.nelement() // N
if x.is_cuda:
y, mean, rstd = native.layer_norm_forward_cuda(x, weight, bias, M, N, eps)
else:
y, mean, rstd = native.layer_norm_forward_cpu(x, weight, bias, M, N, eps)
if training:
ctx.layer_norm_input = x
ctx.layer_norm_parameters = (mean, rstd, weight, M, N)
return y
@staticmethod
def backward(ctx, grad_output):
x = ctx.layer_norm_input
mean, rstd, weight, M, N = ctx.layer_norm_parameters
output_mask = [True, True, True]
if grad_output.is_cuda:
grad_input, grad_weight, grad_bias = native.layer_norm_backward_cuda(grad_output, x, mean, rstd, weight, M, N, output_mask)
else:
grad_input, grad_weight, grad_bias = native.layer_norm_backward_cpu(grad_output, x, mean, rstd, weight, M, N, output_mask)
ctx.layer_norm_input = None
ctx.layer_norm_parameters = None
return grad_input, None, grad_weight, grad_bias, None, None, None, None
class LayerNorm(nn.LayerNorm):
def __init__(self, normalized_shape, eps=1e-05, elementwise_affine=True):
nn.LayerNorm.__init__(self, normalized_shape, eps=eps, elementwise_affine=elementwise_affine)
def forward(self, x):
y = layer_norm.apply(x, self.normalized_shape, self.weight, self.bias, self.eps, self.training)
return y
if __name__ == "__main__":
seed = 2809
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True
torch.backends.cudnn.deterministic=True #https://github.com/pytorch/pytorch/issues/8019
model = nn.Sequential(
nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=False),
LayerNorm([64,56,56])
)
print(model)
#model = model.cuda()
model.train()
iteration = 10
for i in range(iteration):
print("index: ", i)
x = torch.rand(512,64,56,56)
x = x - 0.5
#x = x.cuda()
y = model(x)
z = y.sum()
z.backward()
I also uploaded all the code to GitHub - irving-qin/nativefunctions
Just run bash install.sh
for testing.
The forward process of the wrapped layer norm seemed to be normal. However, it throwed errors in the backward function.
Thanks so much if anyone could give me some tips.