Trying to use the triton kernel for this convolution, however I get an error. Am I doing something wrong?
import torch
import torch._dynamo
from torch._inductor import config
@config.patch({"triton.convolution" : "triton"})
def run_conv():
class M(torch.nn.Module):
def __init__(
self,
**kwargs,
):
super().__init__()
self.conv1 = torch.nn.Conv2d(
4,
320,
(3,3),
(1,1),
(0,0),
(1,1),
)
def forward(self, x):
x1 = self.conv1(x)
return x1
mod = M().cuda()
opt_mod = torch._dynamo.optimize("inductor")(mod)
inputs = (
torch.randn([2, 4, 66, 66], dtype=torch.float32, device="cuda"),
)
opt_y = opt_mod(*inputs)
print(opt_y)
run_conv()
…
File "/home/alexander/triton/python/triton/runtime/autotuner.py", line 95, in prune_configs
pruned_configs = self.early_config_prune(self.configs, self.nargs)
File "/home/alexander/pytorch/torch/_inductor/triton_ops/conv_perf_model.py", line 100, in early_config_prune
cc = _triton.runtime.cc(backend, device)
AttributeError: module 'triton._C.libtriton.triton.runtime' has no attribute 'cc'
in “torch/_inductor/triton_ops/conv.py”,
I tried to remove the 'return’s in the @triton.jit functions
and to not use the autotuner, and I had to remove the ‘tl.store’ in _kernel_delta_x() or it would get stuck
tl.store(y_ptrs, acc, mask=mask_y)
return
but I’m not sure what is wrong with the tl.store.