I am using an implementation of the VAE-GAN from the original paper of ABL Larsen et al. in PyTorch.
I have not the used dataset on hand but trying to use the same implementation with my data, returns me the error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-10-b7345b0afa68> in <module>()
42 datav = Variable(data).cuda()
43 datav = datav.float()
---> 44 mean, logvar, rec_enc = G(datav)
45 #print ("The size of rec_enc:", rec_enc.size())
46
8 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
1368 if input.dim() == 2 and bias is not None:
1369 # fused op is marginally faster
-> 1370 ret = torch.addmm(bias, input, weight.t())
1371 else:
1372 output = input.matmul(weight.t())
RuntimeError: size mismatch, m1: [4 x 262144], m2: [16384 x 2048] at /pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:290
I am not really sure which size is mismatching though and I couldn’t find something relevant in the documentation. The only thing I 've changed is the channel=1
as it is grayscale images and
#data, _ = next(iter(train_loader))
data = np.squeeze(next(iter(train_loader)))
instead. Still I can’t find the original type and shape of the original dataset so it’s like looking in the dark. Thanks
For your reference here is my dataloader:
class NiftyDataset(Dataset):
'''
Class that loads nii files, resizes them to 96x96 and feeds them
'''
def __init__(self, root_dir, transform):
'''
root_dir - string - path towards the folder containg the data
'''
# Save the root_dir as a class variable
self.root_dir = root_dir
# Save the filenames in the root_dir as a class variable
self.filenames = listdir(self.root_dir)
self.transform = transform
def __len__(self):
return len(self.filenames)
def __getitem__(self,idx):
# Fetch file filename
img_name = self.filenames[idx]
# Load the nifty image
img = nib.load(os.path.join(self.root_dir,img_name))
# Get the voxel values as a numpy array
img = np.array(img.get_fdata())
print(f"Inside __getitem__ {img.shape} is the image shape")
# Expanding the array with 1 new dimension as feature channel
img = np.expand_dims(img, 0)
print(f"Inside __getitem__ {img.shape} with extra channel")
return img
batch_size = 4
num_workers = 4
image_size = 64
T = transforms.Compose([transforms.Resize(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),])
# Loading the data
dataset = NiftyDataset(
root_dir=join("/content/ImagesHands/"), transform = T
)
# Create the required DataLoaders for training and testing
dataset_loader = DataLoader(
dataset,
shuffle=True,
batch_size=batch_size,
num_workers=num_workers,
drop_last=True
)
# Show a random image from training
plt.imshow(np.squeeze(next(iter(dataset))), cmap="gray")
plt.axis('off')
plt.show()
Which gives me:
Inside __getitem__ (256, 256) is the image shape
Inside __getitem__ (1, 256, 256) with extra channel