ValueError: not enough values to unpack (expected 2, got 1)

I am trying to implement iterations over a dataloader where i get the error “ValueError: not enough values to unpack (expected 2, got 1)”. The dataset class and the dataloader iteration is coded below.

class SwelltrainDataset(T.utils.data.Dataset):

  def __init__(self, Swelltrain, transform=False):
    
    sc = StandardScaler()
    
    X_tr = sc.fit_transform(X_train)
    Y_tr = y_train
    self.transform = transform      
    self.X_tr = torch.tensor(X_tr, dtype = torch.float32)
    self.Y_tr = torch.tensor(Y_tr, dtype = torch.float32)
    
    
  def __len__(self):
    return len(self.Y_tr)

  def __getitem__(self, idx):
        
        self.x1 = self.transform(torch.from_numpy(X_tr))
        self.x2 = self.transform(torch.from_numpy(X_tr))
                
        return torch.stack([x1,x2]), Y_tr
batch_size = 256
train_ds = SwelltrainDataset(Swelltrain)
train_loader = T.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
 for i in range(1,epochs+1):
        total_loss = torch.tensor(0.).to(device)
        total_num = 0
        
        for j, (X_tr,Y_tr) in enumerate(tqdm(train_loader,desc=f'training epoch: {i}')):  
            X_tr = X_tr.view(X_tr.shape[0]*2,X_tr.shape[2],X_tr.shape[3],X_tr.shape[4])
            
            out = model(X_tr.to(device))
            
            optimizer.zero_grad()
            loss = loss_func(out)
            total_num +=X_tr.size(0)
            loss.backward()
            optimizer.step()

            total_loss += loss.detach().item() * X_tr.size(0)
            print(f"Epoch {i} training loss: {total_loss/total_num}")

The error comes at line:

for j, (X_tr,Y_tr) in enumerate(tqdm(train_loader,desc=f'training epoch: {i}')):

I don’t think your code would work, as you are using undefined variables in the __getitem__ method.
I guess you want to use self.X_tr and would either want to return self.x1 or more likely index the tensor with the passed idx and return only a single sample?
Could you post a minimal, executable code snippet with these fixes which would reproduce the issue, please?

I made the changes in the code as shown below but still the same error occurs

class SwelltrainDataset(T.utils.data.Dataset):

  def __init__(self, Swelltrain):
    
    sc = StandardScaler()
    
    X_tr = sc.fit_transform(X_train)
    Y_tr = y_train
      
    self.X_tr = torch.tensor(X_tr, dtype = torch.float32)
    self.Y_tr = torch.tensor(Y_tr, dtype = torch.float32)

  def __len__(self):
    return len(self.Y_tr)

  def __getitem__(self, idx):
        
       
        return self.X_tr[idx], self.Y_tr[idx]

I am trying to implement SIMCLR using the link: " simclr/main.py at main · larsh0103/simclr · GitHub" using my dataset but getting the mentioned error.

model = SimCLR()
    
    optimizer = torch.optim.SGD(model.parameters(),lr=0.3* (batch_size/256), momentum=0.9)
    epochs: int = 1
    model.train()
    device = model.device
    for i in range(1,epochs+1):
        total_loss = torch.tensor(0.).to(device)
        total_num = 0
        
        for j, (X_tr,Y_tr) in enumerate(tqdm(train_loader,desc=f'training epoch: {i}')):  
            X_tr = X_tr.view(X_tr.shape[0]*2,X_tr.shape[2],X_tr.shape[3],X_tr.shape[4])
            
            out = model(X_tr.to(device))
            
            optimizer.zero_grad()
            loss = loss_func(out)
            total_num +=X_tr.size(0)
            loss.backward()
            optimizer.step()

            total_loss += loss.detach().item() * X_tr.size(0)
            print(f"Epoch {i} training loss: {total_loss/total_num}")
            writer.add_scalar("train_loss", total_loss/total_num, global_step=i)

Your Dataset definition works fine for me using random numpy arrays:

class SwelltrainDataset(Dataset):
    def __init__(self):
        sc = sklearn.preprocessing.StandardScaler()
        X_train = np.random.randn(100, 10)
        X_tr = sc.fit_transform(X_train)
        Y_tr = np.random.randn(100, 1)
          
        self.X_tr = torch.tensor(X_tr, dtype = torch.float32)
        self.Y_tr = torch.tensor(Y_tr, dtype = torch.float32)

    def __len__(self):
        return len(self.Y_tr)

    def __getitem__(self, idx):
        return self.X_tr[idx], self.Y_tr[idx]
  

dataset = SwelltrainDataset()
train_loader = DataLoader(dataset, batch_size=2)

for j, (X_tr,Y_tr) in enumerate(train_loader):
    print(j, X_tr.shape, Y_tr.shape)