What is the recommended format to save data in pytorch?

Seems that the ToTensor op gives problems. Most likely the best way to save data is in numpy format so that transforms do not complain.

Error:

TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>

sample code that gave error:


# saving torch tensors

import torch
import torch.nn as nn
import torchvision

from pathlib import Path
from collections import OrderedDict

path = Path('~/data/tmp/').expanduser()
path.mkdir(parents=True, exist_ok=True)

tensor_a = torch.rand(2,3)
tensor_b = torch.rand(1,3)

db = {'a': tensor_a, 'b': tensor_b}

torch.save(db, path/'torch_db')
loaded = torch.load(path/'torch_db')
print( loaded['a'] == tensor_a )
print( loaded['b'] == tensor_b )

# testing if ToTensor() screws things up
lb, ub = -1, 1
N, Din, Dout = 3, 1, 1
x = torch.distributions.Uniform(low=lb, high=ub).sample((N, Din))
print(x)

f = nn.Sequential(OrderedDict([
    ('f1', nn.Linear(Din,Dout)),
    ('out', nn.SELU())
]))
y = f(x)

transform = torchvision.transforms.transforms.ToTensor()
y_proc = transform(y)
print(y_proc)

reference for saving data in numpy:


# saving data in numpy

import numpy as np
import pickle
from pathlib import Path

path = Path('~/data/tmp/').expanduser()
path.mkdir(parents=True, exist_ok=True)

lb,ub = -1,1
num_samples = 5
x = np.random.uniform(low=lb,high=ub,size=(1,num_samples))
y = x**2 + x + 2

# using save (to npy), savez (to npz)
np.save(path/'x', x)
np.save(path/'y', y)
np.savez(path/'db', x=x, y=y)
with open(path/'db.pkl', 'wb') as db_file:
    pickle.dump(obj={'x':x, 'y':y}, file=db_file)

## using loading npy, npz files
x_loaded = np.load(path/'x.npy')
y_load = np.load(path/'y.npy')
db = np.load(path/'db.npz')
with open(path/'db.pkl', 'rb') as db_file:
    db_pkl = pickle.load(db_file)

print(x is x_loaded)
print(x == x_loaded)
print(x == db['x'])
print(x == db_pkl['x'])
print('done')

reference: https://stackoverflow.com/a/62883249/1601580