I was curious why it’s a bit slower to run 1D kernel using conv3D(k,1,1) versus Conv1D(k) and reshaping the input.
For example, if I define conv1,2,3 as just kernel:(k) the below is around 5x faster:
def forward(self, x):
# Tensor is NxCxTxHxW
b,c,t,h,w = x.size()
# Temporal axis
conv0 = self.conv1(
x.view(b,c,t*h*w)).view(b,c,t,h,w)
# Height axis
conv1 = self.conv2(
x.permute(0,1,3,4,2).reshape(b,c,h*w*t)).view(b,c,h,w,t).permute(0,1,4,2,3)
# Width axis
conv2 = self.conv3(
x.permute(0,1,4,3,2).reshape(b,c,w*h*t)).view(b,c,w,h,t).permute(0,1,4,3,2)
# Out
out = self.relu(conv0) + self.relu(conv1) + self.relu(conv2)
return out
Than using:
# Temporal
self.conv0 = nn.Conv3d(
in_channels,
out_channels,
kernel_size=(kernel_size[0], 1, 1),
stride=(1, 1, 1),
padding=(padding[0], 0, 0),
groups=groups,
bias=bias)
# Height
self.conv1 = nn.Conv3d(
in_channels,
out_channels,
kernel_size=(1, kernel_size[1], 1),
stride=(1, 1, 1),
padding=(0, padding[1], 0),
groups=groups,
bias=bias)
# Width
self.conv2 = nn.Conv3d(
in_channels,
out_channels,
kernel_size=(1, 1, kernel_size[2]),
stride=(1, 1, 1),
padding=(0, 0, padding[2]),
groups=groups,
bias=bias)
def forward(self, x):
t0, t1, t2 = self.conv0(x), self.conv1(x), self.conv2(x)
out = self.relu(t0) + self.relu(t1) + self.relu(t2)
return out
And as far as I’m aware the resulting operation should be the same?