Surprising behavior from grid_sample

If I sample a square image using F.grid_sample(input, grid), where the grid has been created using F.affine_grid(theta, size), with a non zero rotation part of theta, I get the expected result. If I do the same for a non square image I get a very skewed image.

Is this the intended behavior? And if so is there another easy way to get a sampled image that is not skewed when sampling non square images?

Note: in the minimal examples here theta only has a rotation part. No translation. In the real task where I want to use affine_grid and grid_sample I am using both rotation and translation.

Code to produce these results:

import torch
import torch.nn.functional as F
import imageio


def fill_center(img, s):
    n, rows, cols = img.shape
    s //= 2
    c_row = rows // 2
    c_col = cols // 2
    img[:, c_row - s:c_row + s, c_col - s:c_col + s] = 1
    return img


def rot_mat(angles):
    coss = torch.cos(angles).unsqueeze(1)
    sins = torch.sin(angles).unsqueeze(1)
    rotations = torch.stack((torch.cat((coss, -sins), dim=1),
                             torch.cat((sins, coss), dim=1)), dim=2)  # n_images, 2, 2
    return rotations


shapes = [[300, 300], [300, 1000]]
names = ["square", "wide"]
n_images = 2
parameters = torch.zeros((n_images, 3))
parameters[1, 0] = 3.1415 / 4
for shape, name in zip(shapes, names):

    orig_image = torch.zeros((1, *shape)) + 0.1
    orig_image = fill_center(orig_image, 100)

    orig_image = orig_image.expand(n_images, *orig_image.shape)

    theta = torch.cat((rot_mat(parameters[:, 0]), parameters[:, 1:, None]), dim=-1)

    scale = 1
    n, c, rows, cols = orig_image.shape
    sample_grid = F.affine_grid(theta, (n_images, 1, int(rows * scale), int(cols * scale)), align_corners=False).to(orig_image.device)

    images = F.grid_sample(orig_image, sample_grid, align_corners=False)

    images *= 255
    images = images.permute(0, 2, 3, 1).to("cpu", torch.uint8)

    for i in range(n_images):
        imageio.imsave(f"{name}_{i:0>3}.jpg", images[i, ...])

In case anybody is interested. I managed to obtain the desired results by adding scaling to theta according to the image size:

    theta = torch.cat((rot_mat(parameters[:, 0]), parameters[:, 1:, None]), dim=-1)
    theta[:, :, 0] *= shape[1] / 2
    theta[:, :, 1] *= shape[0] / 2

    scale = 1
    n, c, rows, cols = orig_image.shape
    sample_grid = F.affine_grid(theta, (n_images, 1, int(rows * scale), int(cols * scale)), align_corners=False).to(orig_image.device)
    sample_grid[..., 0] /= shape[1] / 2
    sample_grid[..., 1] /= shape[0] / 2

    images = F.grid_sample(orig_image, sample_grid, align_corners=False)

giving the following result
wide_001