Torchvision.transforms.functional.to_tensor: TypeError: array() takes 1 positional argument but 2 were given error:

DavidWRomero · July 2, 2021, 9:44am

Hi,

I am having a very weird error when performing transforms.ToTensor() in the data loading of a dataset.

My current torch / numpy versions are:

pytorch == 1.9.0
torchvision == 0.10.0
numpy == 1.21.0

In particular, I am loading the rotatedMNIST dataset following this template:

class RotatedMNISTBase(VisionDataset):
    """Rotated MNIST datasets.
    Download the datasets from https://sites.google.com/a/lisa.iro.umontreal.ca/public_static_twiki/variations-on-the-mnist-digits
    and preprocess it as in (Cohen and Welling) https://github.com/tscohen/gconv_experiments/blob/master/gconv_experiments/MNIST_ROT/mnist_rot.py
    """

    resources = [
        (
            "http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_new.zip",
            "0f9a947ff3d30e95cd685462cbf3b847",
        ),
    ]

    training_file = "training.pt"
    test_file = "test.pt"
    classes = [
        "0 - zero",
        "1 - one",
        "2 - two",
        "3 - three",
        "4 - four",
        "5 - five",
        "6 - six",
        "7 - seven",
        "8 - eight",
        "9 - nine",
    ]

    def __init__(
        self, root, train=True, transform=None, target_transform=None, download=False
    ):
        super().__init__(root, transform=transform, target_transform=target_transform)
        self.train = train  # training set or test set

        if download:
            self.download()

        if not self._check_exists():
            raise RuntimeError(
                "Dataset not found." + " You can use download=True to download it"
            )

        if self.train:
            data_file = self.training_file
        else:
            data_file = self.test_file
        self.data, self.targets = torch.load(
            os.path.join(self.processed_folder, data_file)
        )

    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        img, target = self.data[index], int(self.targets[index])

        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img = Image.fromarray(img.numpy(), mode="L")

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target

    def __len__(self):
        return len(self.data)

    @property
    def raw_folder(self):
        return os.path.join(self.root, self.__class__.__name__, "raw")

    @property
    def processed_folder(self):
        return os.path.join(self.root, self.__class__.__name__, "processed")

    @property
    def class_to_idx(self):
        return {_class: i for i, _class in enumerate(self.classes)}

    def _check_exists(self):
        return os.path.exists(
            os.path.join(self.processed_folder, self.training_file)
        ) and os.path.exists(os.path.join(self.processed_folder, self.test_file))

    def download(self):
        """Download the MNIST data if it doesn't exist in processed_folder already."""

        if self._check_exists():
            return

        os.makedirs(self.raw_folder, exist_ok=True)
        os.makedirs(self.processed_folder, exist_ok=True)

        # download files
        for url, md5 in self.resources:
            filename = url.rpartition("/")[2]
            download_and_extract_archive(
                url, download_root=self.raw_folder, filename=filename, md5=md5
            )

        # process and save as torch files
        print("Processing...")

        train_filename = os.path.join(
            self.raw_folder, "mnist_all_rotation_normalized_float_train_valid.amat"
        )
        test_filename = os.path.join(
            self.raw_folder, "mnist_all_rotation_normalized_float_test.amat"
        )

        train_val = torch.from_numpy(np.loadtxt(train_filename))
        test = torch.from_numpy(np.loadtxt(test_filename))

        train_val_data = train_val[:, :-1].reshape(-1, 28, 28)
        train_val_data = (train_val_data * 256).round().type(torch.uint8)
        train_val_labels = train_val[:, -1].type(torch.uint8)
        training_set = (train_val_data[:10000], train_val_labels[:10000])
        # we ignore the validation test

        test_data = test[:, :-1].reshape(-1, 28, 28)
        test_data = (test_data * 256).round().type(torch.uint8)
        test_labels = test[:, -1].type(torch.uint8)
        test_set = (test_data, test_labels)

        with open(os.path.join(self.processed_folder, self.training_file), "wb") as f:
            torch.save(training_set, f)
        with open(os.path.join(self.processed_folder, self.test_file), "wb") as f:
            torch.save(test_set, f)

        print("Done!")

    def extra_repr(self):
        return "Split: {}".format("Train" if self.train is True else "Test")


class RotatedMNIST(RotatedMNISTBase):
    def __init__(
        self,
        partition,
        augment,
    ):
        root = utils.get_original_cwd()
        root = os.path.join(root, "data")

        if partition == "train":
            train = True
        else:
            train = False

        data_mean = (0.1307,)
        data_stddev = (0.3081,)

        if augment == "None":
            transform = torchvision.transforms.Compose(
                [
                    torchvision.transforms.ToTensor(),
                    torchvision.transforms.Normalize(data_mean, data_stddev),
                ]
            )
        else:
            raise NotImplementedError()

        super().__init__(
            root=root,
            train=train,
            transform=transform,
            download=True,
        )

When I iterate through the data in the training routine, I get the following error:

Original Traceback (most recent call last):
  File "/homes/romeroguzman/anaconda3/envs/psym/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "/homes/romeroguzman/anaconda3/envs/psym/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/homes/romeroguzman/anaconda3/envs/psym/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/homes/romeroguzman/PycharmProjects/pSym/dr/datasets/rotMNIST.py", line 107, in __getitem__
    img = self.transform(img)
  File "/homes/romeroguzman/anaconda3/envs/psym/lib/python3.9/site-packages/torchvision/transforms/transforms.py", line 60, in __call__
    img = t(img)
  File "/homes/romeroguzman/anaconda3/envs/psym/lib/python3.9/site-packages/torchvision/transforms/transforms.py", line 97, in __call__
    return F.to_tensor(pic)
  File "/homes/romeroguzman/anaconda3/envs/psym/lib/python3.9/site-packages/torchvision/transforms/functional.py", line 129, in to_tensor
    np.array(pic, dtype=mode_to_nptype.get(pic.mode, np.uint8), copy=True)
TypeError: __array__() takes 1 positional argument but 2 were given

I have the impression that this is a compatibility issue between numpy and torchvision.

At first sight, It seemed to me that np.array simply did not longer support having the dtype argument as a positional argument. To check if that was the case, I simply replace the line in the torchvision.transforms.functional file by

np.array(pic, dtype=mode_to_nptype.get(pic.mode, np.uint8), copy=True)

Nevertheless, an equivalent error was raised.

It seems thus, that np.array does not longer support the dtype argument at all (which seems quite weird to me). The problem was solved by doing

img = torch.from_numpy(
        np.array(pic, copy=True).astype(mode_to_nptype.get(pic.mode, np.uint8))
    )

I am not sure if this is a problem of torchvision itself or it is a problem of the data I am using.

Any comments / insights / suggestions are much appreciated

David

ptrblck · July 2, 2021, 7:15pm

This seems to be the PIL/Pillow issue as described here.

DavidWRomero · July 6, 2021, 1:03pm

Hi @ptrblck, problem solved. Thank you!

Torchvision.transforms.functional.to_tensor: TypeError: __array__() takes 1 positional argument but 2 were given error:

Torchvision.transforms.functional.to_tensor: TypeError: array() takes 1 positional argument but 2 were given error: