ValueError: Stratified CV requires explicitely passing a suitable y

omarabdelaziz · June 2, 2019, 7:37pm

I’n trying to use skorch for crosss after I convert my Y into one host encoded vector but I get this error
this is the dataset class


class AmericanSignLanguage(Dataset):
  
  def __init__(self, batch_size):
    'initilization'
    #loading features
    self.x = np.load('drive/My Drive/x_features_v2_t.npy')
    self.shape  = self.x.shape
    print(self.shape)
    self.x = np.reshape(self.x,(self.shape[1], self.shape[2],self.shape[0]))
    
    #loaading labels (target)
    self.y = np.load('drive/My Drive/y_labels_v2_t.npy') 
    
    #loading sequnce list
    self.seq_lengths = np.load('drive/My Drive/seq_lengths_t.npy')
    
    #conveting the labels into one host encoding 
    self.encoder = LabelBinarizer()
    
    #one hot encoding
    self.one_hot_labels = self.encoder.fit_transform(self.y)
    
    #splitting the data into train and test
    self.num_classes = len(np.unique(self.y))
    self.validation_split = .2
    self.shuffle_dataset = True
    self.random_seed= 42
    
    # create feature and targets tensor for train set. As you remember we need variable to accumulate gradients. Therefore first we create tensor, then we will create variable
    self.featuresTrain = torch.tensor(self.x)
    self.targetsTrain = torch.tensor(self.one_hot_labels)# data type is long
    self.seq_lengths = torch.tensor(self.seq_lengths)
    
    # Pytorch train and test sets
    self.dataset = torch.utils.data.TensorDataset(self.featuresTrain,self.targetsTrain,self.seq_lengths)
    
    # Creating data indices for training and validation splits:
    self.dataset_size = len(self.dataset)
    self.indices = list(range(self.dataset_size))
    

   
    
    #cleaning memory stuff

    gc.collect()
 

  def __len__(self):
    return dataset_size
  
  def __getitem__(self, idx):
    return self.dataset[idx]
  
  def getDataset(self):
    return self.dataset
  def getX(self):
    return self.x
  def getY(self):
    return self.one_hot_labels
  def getShape(self):
    return self.shape
  def getNumClasses(self):
   return self.num_classes

and this is skorch part

net = NeuralNetClassifier(
    model,
    max_epochs=20,
    lr=0.01,
    device='cuda',  # uncomment this to train with CUDA
)
net.fit(X, y)

aatrey · August 17, 2020, 3:14pm

I have a similar problem. Did you get around to solving it?