AP_M
(AP)
August 9, 2022, 10:56am
1
I am trying to implement iterations over a dataloader where i get the error “ValueError: not enough values to unpack (expected 2, got 1)”. The dataset class and the dataloader iteration is coded below.
class SwelltrainDataset(T.utils.data.Dataset):
def __init__(self, Swelltrain, transform=False):
sc = StandardScaler()
X_tr = sc.fit_transform(X_train)
Y_tr = y_train
self.transform = transform
self.X_tr = torch.tensor(X_tr, dtype = torch.float32)
self.Y_tr = torch.tensor(Y_tr, dtype = torch.float32)
def __len__(self):
return len(self.Y_tr)
def __getitem__(self, idx):
self.x1 = self.transform(torch.from_numpy(X_tr))
self.x2 = self.transform(torch.from_numpy(X_tr))
return torch.stack([x1,x2]), Y_tr
batch_size = 256
train_ds = SwelltrainDataset(Swelltrain)
train_loader = T.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
for i in range(1,epochs+1):
total_loss = torch.tensor(0.).to(device)
total_num = 0
for j, (X_tr,Y_tr) in enumerate(tqdm(train_loader,desc=f'training epoch: {i}')):
X_tr = X_tr.view(X_tr.shape[0]*2,X_tr.shape[2],X_tr.shape[3],X_tr.shape[4])
out = model(X_tr.to(device))
optimizer.zero_grad()
loss = loss_func(out)
total_num +=X_tr.size(0)
loss.backward()
optimizer.step()
total_loss += loss.detach().item() * X_tr.size(0)
print(f"Epoch {i} training loss: {total_loss/total_num}")
The error comes at line:
for j, (X_tr,Y_tr) in enumerate(tqdm(train_loader,desc=f'training epoch: {i}')):
I don’t think your code would work, as you are using undefined variables in the __getitem__
method.
I guess you want to use self.X_tr
and would either want to return self.x1
or more likely index the tensor with the passed idx
and return only a single sample?
Could you post a minimal, executable code snippet with these fixes which would reproduce the issue, please?
AP_M
(AP)
August 12, 2022, 6:25am
4
I made the changes in the code as shown below but still the same error occurs
class SwelltrainDataset(T.utils.data.Dataset):
def __init__(self, Swelltrain):
sc = StandardScaler()
X_tr = sc.fit_transform(X_train)
Y_tr = y_train
self.X_tr = torch.tensor(X_tr, dtype = torch.float32)
self.Y_tr = torch.tensor(Y_tr, dtype = torch.float32)
def __len__(self):
return len(self.Y_tr)
def __getitem__(self, idx):
return self.X_tr[idx], self.Y_tr[idx]
I am trying to implement SIMCLR using the link: " simclr/main.py at main · larsh0103/simclr · GitHub " using my dataset but getting the mentioned error.
model = SimCLR()
optimizer = torch.optim.SGD(model.parameters(),lr=0.3* (batch_size/256), momentum=0.9)
epochs: int = 1
model.train()
device = model.device
for i in range(1,epochs+1):
total_loss = torch.tensor(0.).to(device)
total_num = 0
for j, (X_tr,Y_tr) in enumerate(tqdm(train_loader,desc=f'training epoch: {i}')):
X_tr = X_tr.view(X_tr.shape[0]*2,X_tr.shape[2],X_tr.shape[3],X_tr.shape[4])
out = model(X_tr.to(device))
optimizer.zero_grad()
loss = loss_func(out)
total_num +=X_tr.size(0)
loss.backward()
optimizer.step()
total_loss += loss.detach().item() * X_tr.size(0)
print(f"Epoch {i} training loss: {total_loss/total_num}")
writer.add_scalar("train_loss", total_loss/total_num, global_step=i)
Your Dataset
definition works fine for me using random numpy arrays:
class SwelltrainDataset(Dataset):
def __init__(self):
sc = sklearn.preprocessing.StandardScaler()
X_train = np.random.randn(100, 10)
X_tr = sc.fit_transform(X_train)
Y_tr = np.random.randn(100, 1)
self.X_tr = torch.tensor(X_tr, dtype = torch.float32)
self.Y_tr = torch.tensor(Y_tr, dtype = torch.float32)
def __len__(self):
return len(self.Y_tr)
def __getitem__(self, idx):
return self.X_tr[idx], self.Y_tr[idx]
dataset = SwelltrainDataset()
train_loader = DataLoader(dataset, batch_size=2)
for j, (X_tr,Y_tr) in enumerate(train_loader):
print(j, X_tr.shape, Y_tr.shape)