Hi. I got IndexError after doing ConcatDataset. Because I wanna make bigger dataset and then undersampling (original dataset is imbalanced).
Here is my code
transform_train2 = transforms.Compose([transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
transform_train3 = transforms.Compose([transforms.RandomCrop(32,padding=4),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
cifar_train2 = dset.CIFAR100("./", train=True, transform=transform_train2, target_transform=None, download=True)
cifar_train3 = dset.CIFAR100("./", train=True, transform=transform_train3, target_transform=None, download=True)
train_loader2 = torch.utils.data.DataLoader(torch.utils.data.ConcatDataset([cifar_train2, cifar_train3]),batch_size=100000, shuffle=False,num_workers=2,drop_last=False)
mlp_train_inputs = torch.cuda.FloatTensor(100000,1,32,32)
mlp_train_targets = torch.cuda.LongTensor(100000)
f_list = open("list_v3.txt", "r")
list_sampler_indices = f_list.readlines()
for i in range(0,35668):
list_sampler_indices[i] = int(list_sampler_indices[i])
f_small_train = open("20171220_small_MLP_train_targets.txt", "r")
for i in range(0, 100000):
mlp_train_targets[i] = int(f_small_train.readline())
f_small_train.close()
mlp_train_dataset = torch.utils.data.TensorDataset(mlp_train_inputs, mlp_train_targets)
Sampler = torch.utils.data.sampler.SubsetRandomSampler(list_sampler_indices)
mlp_train_loader = torch.utils.data.DataLoader(mlp_train_dataset, batch_size=args.bs, sampler=Sampler, drop_last=False)
def mlp_train(epoch):
print('\nEpoch: %d' % epoch)
net2.train()
train_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(mlp_train_loader):
if use_cuda:
inputs, targets = inputs.cuda(), targets.cuda()
optimizer2.zero_grad()
inputs, targets = Variable(inputs), Variable(targets)
outputs = net2(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer2.step()
train_loss += loss.data[0]
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += predicted.eq(targets.data).cpu().sum()
progress_bar(batch_idx, len(mlp_train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
% (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
And here, I got error message like this:
Traceback (most recent call last):140 …] Step: 40ms | Tot: 3s211ms | Loss: 0.745 | Acc: 50.000% (9728/
File “new_main_mlp_v6.py”, line 305, in
mlp_train(epoch)
File “new_main_mlp_v6.py”, line 204, in mlp_train
for batch_idx, (inputs, targets) in enumerate(mlp_train_loader):
File “/home/mhha/.conda/envs/pytorchmh/lib/python3.5/site-packages/torch/utils/data/dataloader.py”, line 178, in next
batch = self.collate_fn([self.dataset[i] for i in indices])
File “/home/mhha/.conda/envs/pytorchmh/lib/python3.5/site-packages/torch/utils/data/dataloader.py”, line 178, in
batch = self.collate_fn([self.dataset[i] for i in indices])
File “/home/mhha/.conda/envs/pytorchmh/lib/python3.5/site-packages/torch/utils/data/dataset.py”, line 36, in getitem
return self.data_tensor[index], self.target_tensor[index]
IndexError: When performing advanced indexing the indexing objects must be LongTensors or convertible to LongTensors
What is the problem? I cannot understand why i got indexerror.
Before i do data augmentation, that is, just using 50000 training datasets, there was no error like this.