'tuple' object is not callable

AIBOI · March 25, 2022, 9:57am

This was the error:

----> 1 model_conv = train_model(model_conv,train_dl, val_dl, criterion, optimizer_conv,exp_lr_scheduler, num_epochs=25)

4 frames
<ipython-input-43-2490278d3bb0> in train_model(model, train_dl, val_dl, criterion, optimizer, scheduler, num_epochs)
     13         print('-' * 10)
     14 
---> 15         for images,labels in train_dl:
     16           with torch.set_grad_enabled(True):
     17 

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in __next__(self)
    519             if self._sampler_iter is None:
    520                 self._reset()
--> 521             data = self._next_data()
    522             self._num_yielded += 1
    523             if self._dataset_kind == _DatasetKind.Iterable and \

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
   1201             else:
   1202                 del self._task_info[idx]
-> 1203                 return self._process_data(data)
   1204 
   1205     def _try_put_index(self):

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in _process_data(self, data)
   1227         self._try_put_index()
   1228         if isinstance(data, ExceptionWrapper):
-> 1229             data.reraise()
   1230         return data
   1231 

/usr/local/lib/python3.7/dist-packages/torch/_utils.py in reraise(self)
    432             # instantiate since we don't know how to
    433             raise RuntimeError(msg) from None
--> 434         raise exception
    435 
    436 

TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.7/dist-packages/torchvision/datasets/folder.py", line 234, in __getitem__
    sample = self.transform(sample)
TypeError: 'tuple' object is not callable

The error was directed at the for loop

-> for train in train_dl:
     with torch.set_grad_enabled:
        input,labels = train
        input,labels = input.to(device), labels.to(device)

I assumed it was an error in the train_dl which is the training dataloader, however, I could not find anything wrong with it. I have listed the process of how the dataloaders are created, I would appreciate it if anybody were to point out an error. Thanks

How I created the dataloaders:

data = download_url("https://s3.amazonaws.com/fast-ai-imageclas/cifar10.tgz",".")

with tarfile.open('./cifar10.tgz', 'r:gz') as tar:
    tar.extractall(path='./data')




if os.path.exists("/content/data/cifar10/validate") is False:

  os.makedirs("/content/data/cifar10/validate")

  os.makedirs("/content/data/cifar10/validate/airplane")

  os.makedirs("/content/data/cifar10/validate/automobile")

  os.makedirs("/content/data/cifar10/validate/bird")

  os.makedirs("/content/data/cifar10/validate/cat")

  os.makedirs("/content/data/cifar10/validate/deer")

  os.makedirs("/content/data/cifar10/validate/dog")

  os.makedirs("/content/data/cifar10/validate/frog")

  os.makedirs("/content/data/cifar10/validate/horse")

  os.makedirs("/content/data/cifar10/validate/ship")

  os.makedirs("/content/data/cifar10/validate/truck") 

for i in sample(glob.glob("/content/data/cifar10/train/airplane/*.png"),500):
  shutil.move(i,"/content/data/cifar10/validate/airplane")

for i in sample(glob.glob("/content/data/cifar10/train/automobile/*.png"),500):
  shutil.move(i,"/content/data/cifar10/validate/automobile")

for i in sample(glob.glob("/content/data/cifar10/train/bird/*.png"),500):
  shutil.move(i,"/content/data/cifar10/validate/bird")

for i in sample(glob.glob("/content/data/cifar10/train/cat/*.png"),500):
  shutil.move(i,"/content/data/cifar10/validate/cat")

for i in sample(glob.glob("/content/data/cifar10/train/deer/*.png"),500):
  shutil.move(i,"/content/data/cifar10/validate/deer")

for i in sample(glob.glob("/content/data/cifar10/train/dog/*.png"),500):
  shutil.move(i,"/content/data/cifar10/validate/dog")

for i in sample(glob.glob("/content/data/cifar10/train/frog/*.png"),500):
  shutil.move(i,"/content/data/cifar10/validate/frog")

for i in sample(glob.glob("/content/data/cifar10/train/horse/*.png"),500):
  shutil.move(i,"/content/data/cifar10/validate/horse")

for i in sample(glob.glob("/content/data/cifar10/train/ship/*.png"),500):
  shutil.move(i,"/content/data/cifar10/validate/ship")

for i in sample(glob.glob("/content/data/cifar10/train/truck/*.png"),500):
  shutil.move(i,"/content/data/cifar10/validate/truck")

   

from torch.utils.data.dataset import random_split
mean = np.array([0.5, 0.5, 0.5])
std = np.array([0.25, 0.25, 0.25])


transform =  transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),


train_ds = ImageFolder("/content/data/cifar10/train",transform)
val_ds = ImageFolder("/content/data/cifar10/validate", transform)

train_dl = DataLoader(train_ds, batch_size = 32, shuffle=True, num_workers=2, pin_memory=True)
val_dl  =  DataLoader(val_ds,batch_size = 32,shuffle = False, num_workers=2, pin_memory=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def train_model(model, train_dl , val_dl, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    running_training_loss = 0.0
    running_validation_loss = 0.0
    running_corrects_training = 0
    running_corrects_validation = 0


    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        #training loop
        for images,labels in train_dl:
          with torch.set_grad_enabled(True):
           
           images,labels = images.to(device), labels.to(device)

           with model.train():
              outputs = model(images)
              _, preds_train = torch.max(outputs, 1)
              loss_train = criterion(outputs, labels)

              optimizer.zero_grad()
              loss_train.backward()
              optimizer.step()


          #stats
          running_training_loss += loss_train.item()
          running_corrects_training += torch.sum(preds_train == labels.data)


        #scheduler step      
        scheduler.step()

        #stats
        epoch_loss_training = running_training_loss / len(train_dl)
        epoch_acc_training = running_corrects_training.double() / len(train_dl)
        
        #validation loop
        for image,label in val_dl:
           with torch.set_grad_enabled(False):
            image,label = image.to(device), label.to(device)

            with model.eval():
              outputs = model(image)
              _, preds_val = torch.max(outputs, 1)
              loss_val = criterion(outputs, label)


           #stats
           running_validation_loss += loss_val.item()
           running_corrects_validation += torch.sum(preds_val == labels.data)
          

        epoch_loss_validation = running_validation_loss / len(val_dl)
        epoch_acc_validation = running_corrects_validation.double() / len(val_dl)

        print("epoch {}, epoch training loss {}, epoch training acc {}, epoch_loss_validation{}, epoch_acc_validation" .format(epoch, epoch_loss_training, epoch_acc_training,
                                                                                                                           epoch_loss_validation, epoch_acc_validation))
        

        #finding best accuracy
        if running_corrects_validation > best_acc:
          best_acc = running_corrects_validation
          best_model_wts = copy.deepcopy(model.state_dict())
        print()


        # total time
        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
           time_elapsed // 60, time_elapsed % 60))
        print('Best val Acc: {:4f}'.format(best_acc))

        

        # load best model weights
        model.load_state_dict(best_model_wts)
        return model


#downloading pretrained model/finetune
model_conv = torchvision.models.resnet18(pretrained=True)
for param in model_conv.parameters():
    param.requires_grad = False

#defining parameters

num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 10)

model_conv = model_conv.to(device)

criterion = nn.CrossEntropyLoss()

optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)


model_conv = train_model(model_conv,train_dl, val_dl, criterion, optimizer_conv,exp_lr_scheduler, num_epochs=25)

Matias_Vasquez · March 25, 2022, 10:30am

torch.set_grad_enabled requires a flag

with torch.set_grad_enabled(True):
    pass

or

with torch.set_grad_enabled(False):
    pass

I do not think that this is the problem, but maybe you could post more of your code with the entire error message to see which line is causing this problem

AIBOI · March 25, 2022, 11:47am

Hi there I have uploaded the whole source code, I hope this helps

Matias_Vasquez · March 25, 2022, 11:51am

If you try to print a single image from your Dataset, you see that the problem actually starts before your DataLoader.

Here you see that the error is when trying to apply the transform to each image before returning it.

So the actual error is when you define your transform = transforms.Compose([...

Here you have a coma at the end.

transform =  transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),

should actually be

transform =  transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])

no comma at the end.

When this works, you will then encounter the other error I mentioned, torch.set_grad_enabled(True)

Hope this helps

AIBOI · March 25, 2022, 11:55am

Hi there I actually just updated the code again, there was some error in the training loop

AIBOI · March 25, 2022, 11:56am

Ah I see, I will try it out thanks

AIBOI · March 25, 2022, 11:59am

Unfortunately the problem still persists, it is the same error

Matias_Vasquez · March 25, 2022, 12:17pm

After deleting the comma at the end of your transform definition I do not get that error (I do get OTHER errors, but not the same).

Can you confirm if its the same error in the same line?

AIBOI · March 25, 2022, 12:28pm

I have changed it to this

transform =  transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])

The output after executing the entire code is this

TypeError                                 Traceback (most recent call last)
<ipython-input-49-9faf2ff48bcb> in <module>()
----> 1 model_conv = train_model(model_conv,train_dl, val_dl, criterion, optimizer_conv,exp_lr_scheduler, num_epochs=25)

4 frames
<ipython-input-45-be6674e5d7ab> in train_model(model, train_dl, val_dl, criterion, optimizer, scheduler, num_epochs)
     15 
     16         #training loop
---> 17         for images,labels in train_dl:
     18           with torch.set_grad_enabled(True):
     19 

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in __next__(self)
    519             if self._sampler_iter is None:
    520                 self._reset()
--> 521             data = self._next_data()
    522             self._num_yielded += 1
    523             if self._dataset_kind == _DatasetKind.Iterable and \

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
   1201             else:
   1202                 del self._task_info[idx]
-> 1203                 return self._process_data(data)
   1204 
   1205     def _try_put_index(self):

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in _process_data(self, data)
   1227         self._try_put_index()
   1228         if isinstance(data, ExceptionWrapper):
-> 1229             data.reraise()
   1230         return data
   1231 

/usr/local/lib/python3.7/dist-packages/torch/_utils.py in reraise(self)
    432             # instantiate since we don't know how to
    433             raise RuntimeError(msg) from None
--> 434         raise exception
    435 
    436 

TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.7/dist-packages/torchvision/datasets/folder.py", line 234, in __getitem__
    sample = self.transform(sample)
TypeError: 'tuple' object is not callable

Matias_Vasquez · March 25, 2022, 12:32pm

Can you post what happens when you do this

print(train_ds[0])

AIBOI · March 25, 2022, 12:33pm

I restarted the IDE, now it is fine, there is another problem

__enter__

Thanks anyway, its always the pesky punctuations

Matias_Vasquez · March 25, 2022, 12:35pm

Here you have to do

model.eval()

model.train()

not

with model.eval():
   pass

AIBOI · March 25, 2022, 12:38pm

Why can’t I use the with statement in front of model.eval() / model.train()?

Matias_Vasquez · March 25, 2022, 1:48pm

My guess is that it is not supported.

In order to be supported you need __enter__ and __exit__ methods implemented.

Here is a goog explanation of how the with statement works:

AIBOI · March 25, 2022, 4:03pm

Hey man i have faced another issue

RuntimeError                              Traceback (most recent call last)
<ipython-input-14-b4c5dff1539c> in <module>()
      1 model_conv = train_model(model_conv,train_dl,val_dl, criterion, optimizer_conv,
----> 2                          exp_lr_scheduler, num_epochs=25)

<ipython-input-13-f484d720bf26> in train_model(model, train_dl, val_dl, criterion, optimizer, scheduler, num_epochs)
     57            #stats
     58            running_validation_loss += loss_val.item()* image.size(0)
---> 59            running_corrects_validation += torch.sum(preds_val == labels)
     60 
     61 

RuntimeError: The size of tensor a (32) must match the size of tensor b (8) at non-singleton dimension 0

I assumed it must have been due to issues with the dimension of my tensor, so I ran some tests
for images,labels in train_dl:

`for images,labels in train_dl:`
  output = model_conv(images)

  loss = F.cross_entropy(output,labels)

  _,preds = torch.max(output,1)

  accuracy = torch.sum(preds==labels)

  print(output.size())

  print(labels.size())

  print(preds.size())

  print(accuracy)

  break

Here is the output of the test

torch.Size([32, 10])
torch.Size([32])
torch.Size([32])
tensor(3)

Do you have any clue what the error is talking about? I have read that it may have to do with the loss function, however I am still uncertain.

Matias_Vasquez · March 25, 2022, 4:12pm

There it says your issue is on line 59.

Can you post what the shape of preds_val and labels are?

AIBOI · March 26, 2022, 5:45am

Here is a sample of test which I did to determine the sizes of the tensors in my training.
Code:

for images,labels in train_dl:
  output = model_conv(images)
  loss = F.cross_entropy(output,labels)
  _,preds_vals = torch.max(output,1)
  accuracy = torch.sum(preds_vals==labels)

  print(f"Output: {output.size()}" )
  print(f"labels: {labels.size()}")
  print(f"preds_vals: {preds_vals.size()}" )
  print(f"accuracy: {accuracy}")
  break

Outputs:

Output: torch.Size([32, 10])
labels: torch.Size([32])
preds_vals: torch.Size([32])
accuracy: 1

The preds_vals and label sizes are depicted below

Matias_Vasquez · March 26, 2022, 9:11am

I think the problem is not on the training loop, but on the validation loop.

Also, I think it would be best if you create a new thread. That way when someone with a similar issue is looking for it, it will be easier for them to find it. You can @ me if you want and I will gladly help you