Onv2d(): argument 'input' (position 1) must be Tensor, not tuple

Ashish_Gupta1 · February 22, 2019, 12:24pm

I am making an encoder decoder model but this error occur in my encoder.

<ipython-input-2-2b8ee621aeff> in forward(self, x)
     29 
     30   def forward(self,x):
---> 31     output=self.encoder(x)
     32     ok=self.decoder(output)
     33 

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

<ipython-input-3-b65547714ad9> in forward(self, x)
     61 
     62 
---> 63         conv2a = self.conv2a(pool1)
     64         rect2a = self.leaky_relu(conv2a)
     65         conv2b = self.conv2b(rect2a)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/conv.py in forward(self, input)
    318     def forward(self, input):
    319         return F.conv2d(input, self.weight, self.bias, self.stride,
--> 320                         self.padding, self.dilation, self.groups)
    321 
    322 

TypeError: conv2d(): argument 'input' (position 1) must be Tensor, not tuple

My CODE

class GRUNet(nn.Module):
  def __init__(self):
        print('\nGruInitializing')
        super(GRUNet,self).__init__()
     
        
        self.batch_size, self.img_w, self.img_h=1,128,128
        
        self.input_shape = (self.batch_size, 3, self.img_w, self.img_h)
        
        #number of filters for each convolution layer in the encoder
        self.n_convfilter = [96, 128, 256, 256, 256, 256]
        
        #the dimension of the fully connected layer
        self.n_fc_filters = [1024]
        
        #number of filters for each 2d convolution layer in the decoder
        self.n_deconvfilter = [128, 128, 128, 64, 32, 2]
        
        self.conv2d_filter_shape = (self.n_deconvfilter[0], self.n_deconvfilter[0], 3, 3, 3)
        self.encoder=encoder(self.input_shape,self.n_convfilter,\
                            self.n_fc_filters,self.conv2d_filter_shape)
        self.decoder=decoder(self.n_deconvfilter)
    
  def forward(self,x):
    output=self.encoder(x)
    ok=self.decoder(output)

And the encoder structure:

class encoder(nn.Module):
  def __init__(self,input_shape,n_convfilter,\
               n_fc_filters, conv2d_filter_shape):
        print("\ninitalizing \"encoder\"")
        super(encoder,self).__init__()
        #conv1
        self.conv1a = Conv2d(input_shape[1], n_convfilter[0], 7, padding=3)
        self.conv1b = Conv2d(n_convfilter[0], n_convfilter[0], 3, padding=1)
        
        #conv2
        self.conv2a = Conv2d(n_convfilter[0], n_convfilter[1], 3, padding=1)
        self.conv2b = Conv2d(n_convfilter[1], n_convfilter[1], 3, padding=1)
        self.conv2c = Conv2d(n_convfilter[0], n_convfilter[1], 1)
def forward(self, x):
       
        #x is the input and the size of x is (batch_size, channels, heights, widths).
        
        conv1a = self.conv1a(x)
        rect1a = self.leaky_relu(conv1a)
        conv1b = self.conv1b(rect1a)
        rect1 = self.leaky_relu(conv1b)
        pool1 = self.pool(rect1)
        
        
        conv2a = self.conv2a(pool1)
        rect2a = self.leaky_relu(conv2a)
        conv2b = self.conv2b(rect2a)
        rect2 = self.leaky_relu(conv2b)
        conv2c = self.conv2c(pool1)
        res2 = conv2c + rect2
        pool2 = self.pool(res2)

I am Testing it on an Tensor(randomly generated array):

arr=np.random.randn(1,3,128,128)
arr=torch.tensor(arr)
model(arr)

Ashish_Gupta1 · February 27, 2019, 5:28pm

@ptrblck Please help if possible.

ptrblck · February 27, 2019, 5:52pm

self.pool seems to be missing in your encoder.
Could you check, if that’s the complete code?
Currently your encoder only contains conv layers.

PS: I’ve formatted your code, as it was a bit hard to read. You can add code snippets using three backticks ```

Ashish_Gupta1 · February 27, 2019, 6:06pm

Thanks for replying.
I have added pool after every Conv Layer.

class encoder(nn.Module):
  def __init__(self,input_shape,n_convfilter,\
               n_fc_filters, conv2d_filter_shape):
        print("\ninitalizing \"encoder\"")
        super(encoder,self).__init__()
        #conv1
        self.conv1a = Conv2d(input_shape[1], n_convfilter[0], 7, padding=3)
        self.conv1b = Conv2d(n_convfilter[0], n_convfilter[0], 3, padding=1)
        
        #conv2
        self.conv2a = Conv2d(n_convfilter[0], n_convfilter[1], 3, padding=1)
        self.conv2b = Conv2d(n_convfilter[1], n_convfilter[1], 3, padding=1)
        self.conv2c = Conv2d(n_convfilter[0], n_convfilter[1], 1)
        
        #conv3
        self.conv3a = Conv2d(n_convfilter[1], n_convfilter[2], 3, padding=1)
        self.conv3b = Conv2d(n_convfilter[2], n_convfilter[2], 3, padding=1)
        self.conv3c = Conv2d(n_convfilter[1], n_convfilter[2], 1)
        
        #conv4
        self.conv4a = Conv2d(n_convfilter[2], n_convfilter[3], 3, padding=1)
        self.conv4b = Conv2d(n_convfilter[3], n_convfilter[3], 3, padding=1)
        
        #conv5
        self.conv5a = Conv2d(n_convfilter[3], n_convfilter[4], 3, padding=1)
        self.conv5b = Conv2d(n_convfilter[4], n_convfilter[4], 3, padding=1)
        self.conv5c = Conv2d(n_convfilter[3], n_convfilter[4], 1)
        
        #conv6
        self.conv6a = Conv2d(n_convfilter[4], n_convfilter[5], 3, padding=1)
        self.conv6b = Conv2d(n_convfilter[5], n_convfilter[5], 3, padding=1)
        
        
        #pooling layer
        self.pool = MaxPool2d(kernel_size= 2, padding= 1,return_indices=True)
        
        
        #nonlinearities of the network
        self.leaky_relu = LeakyReLU(negative_slope= 0.01)
        self.sigmoid = Sigmoid()
        self.tanh = Tanh()
        
        
        #find the input feature map size of the fully connected layer
        fc7_feat_w, fc7_feat_h = self.fc_in_featmap_size(input_shape, num_pooling=6)
        #define the fully connected layer
        self.fc7 = Linear(int(n_convfilter[5] * fc7_feat_w * fc7_feat_h), n_fc_filters[0])
        self.fc8=Linear(n_fc_filters[0],1024)#1024 by mine side
        self.fc9=Linear(1024,1024)
            
        
            
  def forward(self, x):
        i=1
        #x is the input and the size of x is (batch_size, channels, heights, widths).
        
        conv1a = self.conv1a(x)
        rect1a = self.leaky_relu(conv1a)
        conv1b = self.conv1b(rect1a)
        rect1 = self.leaky_relu(conv1b)
        pool1 = self.pool(rect1)
        
        conv2a = self.conv2a(pool1)
        rect2a = self.leaky_relu(conv2a)
        conv2b = self.conv2b(rect2a)
        rect2 = self.leaky_relu(conv2b)
        conv2c = self.conv2c(pool1)
        res2 = conv2c + rect2
        pool2 = self.pool(res2)
        
        
        conv3a = self.conv3a(pool2)
        rect3a = self.leaky_relu(conv3a)
        conv3b = self.conv3b(rect3a)
        rect3 = self.leaky_relu(conv3b)
        conv3c = self.conv3c(pool2)
        res3 = conv3c + rect3
        pool3 = self.pool(res3)
        
        conv4a = self.conv4a(pool3)
        rect4a = self.leaky_relu(conv4a)
        conv4b = self.conv4b(rect4a)
        rect4 = self.leaky_relu(conv4b)
        pool4 = self.pool(rect4)
        
        
        conv5a = self.conv5a(pool4)
        rect5a = self.leaky_relu(conv5a)
        conv5b = self.conv5b(rect5a)
        rect5 = self.leaky_relu(conv5b)
        conv5c = self.conv5c(pool4)
        res5 = conv5c + rect5
        pool5 = self.pool(res5)
        
        
        conv6a = self.conv6a(pool5)
        rect6a = self.leaky_relu(conv6a)
        conv6b = self.conv6b(rect6a)
        rect6 = self.leaky_relu(conv6b)
        res6 = pool5 + rect6
        pool6 = self.pool(res6)
        
        
        pool6 = pool6.view(pool6.size(0), -1)
        
        
        fc7 = self.fc7(pool6)
        rect7 = self.leaky_relu(fc7)
        
        fc8 = self.fc8(rect7)
        rect8 = self.leaky_relu(fc8)
        
        fc9 = self.fc9(rect8)
        rect7 = self.leaky_relu(fc9)
        
        return rect7
    #infer the input feature map size, (height, width) of the fully connected layer
    
  def fc_in_featmap_size(self, input_shape, num_pooling):
        #fully connected layer
        img_w = input_shape[2]
        img_h = input_shape[3]
        #infer the size of the input feature map of the fully connected layer
        fc7_feat_w = img_w
        fc7_feat_h = img_h
        for i in range(num_pooling):
            #image downsampled by pooling layers
            #w_out= np.floor((w_in+ 2*padding[0]- dilation[0]*(kernel_size[0]- 1)- 1)/stride[0]+ 1)
            fc7_feat_w = np.floor((fc7_feat_w + 2 * 1 - 1 * (2 - 1) - 1) / 2 + 1)
            fc7_feat_h = np.floor((fc7_feat_h + 2 * 1 - 1 * (2 - 1) - 1) / 2 + 1)
            return fc7_feat_w, fc7_feat_h
          
          ```

ptrblck · February 27, 2019, 6:09pm

Thanks for the code!
Your pooling layer is created using return_indices=True, i.e. the result will be a tuple containing the pooled activation map and the indices.
I’m not sure where and how you would like to use the indices, but this should fix the error:

...
pool1, idx1 = self.pool(rect1)
conv2a = self.conv2a(pool1)
...

Ashish_Gupta1 · February 27, 2019, 6:17pm

It works. Thank you.
I have to use these indices further in 2dUnpoolLayer.
So while reading Documentation i read to use return_indices.
Also guide me how to use idx1(indices) in 2dUnpool.

ptrblck · February 27, 2019, 6:21pm

You would have to pass the indices as the second argument to nn.MaxUnpool2d. The docs have an example using it.

Hoda_Atef · March 27, 2019, 3:26pm

i want to help me please @ptrblck
i have error in training
Traceback (most recent call last):
File “D:\deep-ranking\model9\ranktest.py”, line 219, in
train_imgpath[j:j+10] = (query_name.data).cpu().numpy()
AttributeError: ‘tuple’ object has no attribute ‘data’

code

TESTING PHASE GENERATING TRAIN EMBEDDINGS AND SAVING IN CSV FILE
train_embeddings = np.zeros([100000,4096])
train_labels = np.zeros([100000,1])
train_imgpath = np.zeros([100000,1])
j=0
net.eval()
with torch.no_grad(): #doesn’t calculcate gradient
for data in trainloader:
imgid, query_name, query, inclass, outclass = data
query = F.interpolate(query,scale_factor = 3.5)
query = query.to(device)
query_out = net(query)
train_embeddings[j:j+10] = (query_out.data).cpu().numpy()
train_labels[j:j+10] = (imgid.data).cpu().numpy()
train_imgpath[j:j+10] = (query_name.data).cpu().numpy()

    j+=4
    print(j)
    #print('yallla {}'.format(j))

np.savetxt(‘train_embeddings.csv’,train_embeddings,delimiter = ‘,’)
np.savetxt(‘train_labels.csv’,train_labels,delimiter = ‘,’)
np.savetxt(‘train_imgpath.csv’,train_imgpath,delimiter = ‘,’)

ptrblck · March 28, 2019, 7:20pm

query_name seems to be a tuple, not a Tensor.
Could you check your Dataset what exactly will be returned in __getitem__?

rashidbaloch · June 14, 2020, 11:26am

Hi I am also facing this problem and I have tried alot to fix it but unable to figure out.
I am using a Pascal dataset for Image classification.
Here is my code snippet.

class Unit(nn.Module):
    def __init__(self,in_channels,out_channels):
        super(Unit,self).__init__()
        

        self.conv = nn.Conv2d(in_channels=in_channels,kernel_size=3,out_channels=out_channels,stride=1,padding=1)
        self.bn = nn.BatchNorm2d(num_features=out_channels)
        self.relu = nn.ReLU()

    def forward(self,input):
        output = self.conv(input)
        output = self.bn(output)
        output = self.relu(output)

        return output

class CNN(nn.Module):
    def __init__(self,num_classes=20):
        super(CNN,self).__init__()

        #Create 14 layers of the unit with max pooling in between
        self.unit1 = Unit(in_channels=3,out_channels=32)
        self.unit2 = Unit(in_channels=32, out_channels=32)
        self.unit3 = Unit(in_channels=32, out_channels=32)

        self.pool1 = nn.MaxPool2d(kernel_size=2)

        self.unit4 = Unit(in_channels=32, out_channels=64)
        self.unit5 = Unit(in_channels=64, out_channels=64)
        self.unit6 = Unit(in_channels=64, out_channels=64)
        self.unit7 = Unit(in_channels=64, out_channels=64)

        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.unit8 = Unit(in_channels=64, out_channels=128)
        self.unit9 = Unit(in_channels=128, out_channels=128)
        self.unit10 = Unit(in_channels=128, out_channels=128)
        self.unit11 = Unit(in_channels=128, out_channels=128)

        self.pool3 = nn.MaxPool2d(kernel_size=2)

        self.unit12 = Unit(in_channels=128, out_channels=128)
        self.unit13 = Unit(in_channels=128, out_channels=128)
        self.unit14 = Unit(in_channels=128, out_channels=128)

        self.avgpool = nn.AvgPool2d(kernel_size=4)
        
        #Add all the units into the Sequential layer in exact order
        self.net = nn.Sequential(self.unit1, self.unit2, self.unit3, self.pool1, self.unit4, self.unit5, self.unit6
                                 ,self.unit7, self.pool2, self.unit8, self.unit9, self.unit10, self.unit11, self.pool3,
                                 self.unit12, self.unit13, self.unit14, self.avgpool)

        self.fc = nn.Linear(in_features=128*7*7,out_features=num_classes)

    def forward(self, input):
        output = self.net(input)
        output = output.view(-1,128*7*7)
        output = self.fc(output)
        return output

optimizer = Adam(model.parameters(), lr=0.001,weight_decay=0.0001)
loss_fn = nn.CrossEntropyLoss()

#Create a learning rate adjustment function that divides the learning rate by 10 every 30 epochs
def adjust_learning_rate(epoch):

    lr = 0.001

    if epoch > 180:
        lr = lr / 1000000
    elif epoch > 150:
        lr = lr / 100000
    elif epoch > 120:
        lr = lr / 10000
    elif epoch > 90:
        lr = lr / 1000
    elif epoch > 60:
        lr = lr / 100
    elif epoch > 30:
        lr = lr / 10

    for param_group in optimizer.param_groups:
        param_group["lr"] = lr




def save_models(epoch):
    torch.save(model.state_dict(), "pascalmodel_{}.model".format(epoch))
    print("Checkpoint saved")

def test():
    model.eval()
    test_acc = 0.0
    for i, (images, labels) in enumerate(test_loader):
      
        if cuda_avail:
                images = Variable(images.cuda())
                labels = Variable(labels.cuda())

        #Predict classes using images from the test set
        outputs = model(images)
        _,prediction = torch.max(outputs.data, 1)
        prediction = prediction.cpu().numpy()
        test_acc += torch.sum(prediction == labels.data)
        


    #Compute the average acc and loss over all 10000 test images
    test_acc = test_acc / 200

    return test_acc

def train(num_epochs):
    best_acc = 0.0

    for epoch in range(num_epochs):
        model.train()
        train_acc = 0.0
        train_loss = 0.0
        for i, (images, labels) in enumerate(train_loader):
            #Move images and labels to gpu if available
            if cuda_avail:
                images = Variable(images.cuda())
                labels = Variable(labels.cuda())

            #Clear all accumulated gradients
            optimizer.zero_grad()
            #Predict classes using images from the test set
            outputs = model(images)
            #Compute the loss based on the predictions and actual labels
            loss = loss_fn(outputs,labels)
            #Backpropagate the loss
            loss.backward()

            #Adjust parameters according to the computed gradients
            optimizer.step()

            train_loss += loss.cpu().data * images.size(0) #train_loss += loss.cpu().data[0] * images.size(0)
            _, prediction = torch.max(outputs.data, 1) #_, prediction = torch.max(outputs.data, 1)
            
            train_acc += torch.sum(prediction == labels.data)

        #Call the learning rate adjustment function
        adjust_learning_rate(epoch)

        #Compute the average acc and loss over all 50000 training images
        train_acc = train_acc / 800
        train_loss = train_loss / 800

        #Evaluate on the test set
        test_acc = test()

        # Save the model if the test acc is greater than our current best
        if test_acc > best_acc:
            save_models(epoch)
            best_acc = test_acc


        # Print the metrics
        print("Epoch {}, Train Accuracy: {} , TrainLoss: {} , Test Accuracy: {}".format(epoch, train_acc, train_loss, test_acc))


if __name__ == "__main__":
    train(200) # 200 is the number of epochs

I am getting this error TypeError: sum(): argument ‘input’ (position 1) must be Tensor, not bool
on test_acc and train_acc

ptrblck · June 15, 2020, 2:02am

I guess prediction == labels.data might return a bool instead of a tensor.
Could you remove the .data usage and rerun the code, please?
Generally the usage of .data is not recommended, as it might yield unwanted side effects.

Also, Variables are deprecated since PyTorch 0.4, so you can use tensors in newer versions.

Let me know, if that helps solving the issue.

rashidbaloch · June 15, 2020, 2:44pm

Hi ptrblck,
Thank you for your response. As per your instructions I have removed .data but still receive the same error message and I have also changed variables to torch.as_tensor.
A little background regarding my dataset.
I have a dataset of Images containing total 1000 images each with different resolution.
I have splitted the dataset into train=800 ,test=200 set respectively.
I am replicating CNN based on CIFAR10 dataset on my custom Pascal sentence dataset.
Here is my Complete code:

#Import needed packages
import os
import sys
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
import torch.nn.functional as F
#from torchvision.datasets import Dataset
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
import torch.utils.data as data
#from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import numpy as np
#use_cuda = True


class Unit(nn.Module):
    def __init__(self,in_channels,out_channels):
        super(Unit,self).__init__()
        

        self.conv = nn.Conv2d(in_channels=in_channels,kernel_size=3,out_channels=out_channels,stride=1,padding=1)
        self.bn = nn.BatchNorm2d(num_features=out_channels)
        self.relu = nn.ReLU()

    def forward(self,input):
        output = self.conv(input)
        output = self.bn(output)
        output = self.relu(output)

        return output

class CNN(nn.Module):
    def __init__(self,num_classes=20):
        super(CNN,self).__init__()

        #Create 14 layers of the unit with max pooling in between
        self.unit1 = Unit(in_channels=3,out_channels=32)
        self.unit2 = Unit(in_channels=32, out_channels=32)
        self.unit3 = Unit(in_channels=32, out_channels=32)

        self.pool1 = nn.MaxPool2d(kernel_size=2)

        self.unit4 = Unit(in_channels=32, out_channels=64)
        self.unit5 = Unit(in_channels=64, out_channels=64)
        self.unit6 = Unit(in_channels=64, out_channels=64)
        self.unit7 = Unit(in_channels=64, out_channels=64)

        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.unit8 = Unit(in_channels=64, out_channels=128)
        self.unit9 = Unit(in_channels=128, out_channels=128)
        self.unit10 = Unit(in_channels=128, out_channels=128)
        self.unit11 = Unit(in_channels=128, out_channels=128)

        self.pool3 = nn.MaxPool2d(kernel_size=2)

        self.unit12 = Unit(in_channels=128, out_channels=128)
        self.unit13 = Unit(in_channels=128, out_channels=128)
        self.unit14 = Unit(in_channels=128, out_channels=128)

        self.avgpool = nn.AvgPool2d(kernel_size=4)
        
        #Add all the units into the Sequential layer in exact order
        self.net = nn.Sequential(self.unit1, self.unit2, self.unit3, self.pool1, self.unit4, self.unit5, self.unit6
                                 ,self.unit7, self.pool2, self.unit8, self.unit9, self.unit10, self.unit11, self.pool3,
                                 self.unit12, self.unit13, self.unit14, self.avgpool)

        self.fc = nn.Linear(in_features=128*7*7,out_features=num_classes)

    def forward(self, input):
        output = self.net(input)
        output = output.view(-1,128*7*7)
        output = self.fc(output)
        return output

TRAIN_DATA_PATH = '/dataset/split_data_withLabels/images/train_set/' # 800 samples 
TEST_DATA_PATH = '/dataset/split_data_withLabels/images/test_set/'   # 200 samples

#Define transformations for the training set, flip the images randomly, crop out and apply mean and std normalization
train_transformations = transforms.Compose([                                           
        transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
        transforms.RandomRotation(degrees=15),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),  # Image net standards
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  # Imagenet standards])
])

BATCH_SIZE = 16

train_set = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=train_transformations)
train_loader = data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

#Define transformations for the test set
test_transformations = transforms.Compose([                                            
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


test_set = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=test_transformations)
test_loader  = data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) 

#Check if gpu support is available
cuda_avail = torch.cuda.is_available()

#Create model, optimizer and loss function
model = CNN(num_classes=20)

if cuda_avail:
    model.cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001,weight_decay=0.0001)
loss_fn = nn.CrossEntropyLoss()

#Create a learning rate adjustment function that divides the learning rate by 10 every 30 epochs
def adjust_learning_rate(epoch):

    lr = 0.001

    if epoch > 180:
        lr = lr / 1000000
    elif epoch > 150:
        lr = lr / 100000
    elif epoch > 120:
        lr = lr / 10000
    elif epoch > 90:
        lr = lr / 1000
    elif epoch > 60:
        lr = lr / 100
    elif epoch > 30:
        lr = lr / 10

    for param_group in optimizer.param_groups:
        param_group["lr"] = lr




def save_models(epoch):
    torch.save(model.state_dict(), "pascalmodel_{}.model".format(epoch))
    print("Checkpoint saved")

def test():
    model.eval()
    test_acc = 0.0
    for i, (images, labels) in enumerate(test_loader):
      
        if cuda_avail:
                images = torch.as_tensor(images.cuda())
                labels = torch.as_tensor(labels.cuda())

        #Predict classes using images from the test set
        outputs = model(images)
        _,prediction = torch.max(outputs.data, 1)
        prediction = prediction.cpu().numpy()
        test_acc += torch.sum(prediction == labels)
        


    #Compute the average acc and loss over all 200 test images
    test_acc = test_acc/len(test_loader.dataset)  #test_acc = test_acc / 200

    return test_acc

def train(num_epochs):
    best_acc = 0.0

    for epoch in range(num_epochs):
        model.train()
        train_acc = 0.0
        train_loss = 0.0
        for i, (images, labels) in enumerate(train_loader):
            #Move images and labels to gpu if available
            if cuda_avail:
                images = torch.as_tensor(images.cuda())
                labels = torch.as_tensor(labels.cuda())

            #Clear all accumulated gradients
            optimizer.zero_grad()
            #Predict classes using images from the test set
            outputs = model(images)
            #Compute the loss based on the predictions and actual labels
            loss = loss_fn(outputs,labels)
            #Backpropagate the loss
            loss.backward()

            #Adjust parameters according to the computed gradients
            optimizer.step()

            train_loss += loss.cpu().data * images.size(0) #train_loss += loss.cpu().data[0] * images.size(0)
            _, prediction = torch.max(outputs.data, 1) #_, prediction = torch.max(outputs.data, 1)
            
            train_acc += torch.sum(prediction == labels)

        #Call the learning rate adjustment function
        adjust_learning_rate(epoch)

        #Compute the average acc and loss over all 800 training images
        train_acc = train_acc/len(train_loader.dataset) #train_acc = train_acc / 800  # train_loss = train_loss/len(train_loader.dataset)
        train_loss = train_loss/len(train_loader.dataset) #train_loss = train_loss / 800  

        #Evaluate on the test set
        test_acc = test()

        # Save the model if the test acc is greater than our current best
        if test_acc > best_acc:
            save_models(epoch)
            best_acc = test_acc


        # Print the metrics
        print("Epoch {}, Train Accuracy: {} , TrainLoss: {} , Test Accuracy: {}".format(epoch, train_acc, train_loss, test_acc))


if __name__ == "__main__":
    train(200) # 200 is the number of epochs
    print("Number of train samples: ", len(train_loader))
    #print("Number of test samples: ", len(test_loader))
    #print("Detected Classes are: ", train_set.class_to_idx) # Classes are detected by following structure

rashidbaloch · June 15, 2020, 4:27pm

The problem occurs at this part of the code.

def test():
    model.eval()
    test_acc = 0.0

    for i, (images, labels) in enumerate(test_loader):
      
        if cuda_avail:
                images = torch.as_tensor(images.cuda())
                labels = torch.as_tensor(labels.cuda())

        #Predict classes using images from the test set
        outputs = model(images)
        _,prediction = torch.max(outputs.data, 1)
        prediction = prediction.cpu().numpy() 
        
        test_acc += torch.sum(prediction == labels)

TypeError: sum(): argument 'input' (position 1) must be Tensor, not bool
prediction is (numpy.ndarray) type where is somehow returned as Bool and argument expects a Tensor

ptrblck · June 15, 2020, 11:06pm

Thanks for the follow-up.
Try to use tensors for the comparison and the sum operation by removing the numpy() call in:

prediction = prediction.cpu().numpy()

Currently you are trying to compare a numpy array to a tensor, which will yield the issue, as it won’t compare elementwise:

x = torch.randn(10, 10)
pred = x.argmax(1)
target = torch.randint(0, 10, (10,))
print(pred.numpy() == target)
> False # Bool !!! 
print(pred == target)
> tensor([False, False, False, False, False, False,  True, False, False, False])

111408 · November 11, 2020, 1:47pm

I tried your method,but I recieved the error as follows:
UNPOOL_1 = self.unpool1(POOL_2 , indices1)
TypeError: ‘tuple’ object is not callable
it seems that I can’t use the tuple directly

ptrblck · November 12, 2020, 12:47am

Based on the error message I guess indices1 might contain the activation and the pooling indices returned by a previously used pooling layer?
If so, just pass the indices as the second argument to the unpooling layer.

111408 · November 12, 2020, 1:11am

I print indices1 like this
tensor([[[[ 0, 2, 4, …, 22, 24, 26],
[ 56, 58, 60, …, 78, 80, 82],
[112, 114, 116, …, 134, 136, 138],
…,
[616, 618, 620, …, 638, 640, 642],
[672, 674, 676, …, 694, 696, 698],
[728, 730, 732, …, 750, 752, 754]],

     [[  0,   2,   4,  ...,  22,  24,  26],
      [ 56,  58,  60,  ...,  78,  80,  82],
      [112, 114, 116,  ..., 134, 136, 138],
      ...,
      [616, 618, 620,  ..., 638, 640, 642],
      [672, 674, 676,  ..., 694, 696, 698],
      [728, 730, 732,  ..., 750, 752, 754]],

     [[  0,   2,   4,  ...,  22,  24,  26],
      [ 56,  58,  60,  ...,  78,  80,  82],
      [112, 114, 116,  ..., 134, 136, 138],
      ...,
      [616, 618, 620,  ..., 638, 640, 642],
      [672, 674, 676,  ..., 694, 696, 698],
      [728, 730, 732,  ..., 750, 752, 754]],

     ...,

     [[  0,   2,   4,  ...,  22,  24,  26],
      [ 56,  58,  60,  ...,  78,  80,  82],
      [112, 114, 116,  ..., 134, 136, 138],
      ...,
      [616, 618, 620,  ..., 638, 640, 642],
      [672, 674, 676,  ..., 694, 696, 698],
      [728, 730, 732,  ..., 750, 752, 754]],

     [[  0,   2,   4,  ...,  22,  24,  26],
      [ 56,  58,  60,  ...,  78,  80,  82],
      [112, 114, 116,  ..., 134, 136, 138],
      ...,
      [616, 618, 620,  ..., 638, 640, 642],
      [672, 674, 676,  ..., 694, 696, 698],
      [728, 730, 732,  ..., 750, 752, 754]],

     [[  0,   2,   4,  ...,  22,  24,  26],
      [ 56,  58,  60,  ...,  78,  80,  82],
      [112, 114, 116,  ..., 134, 136, 138],
      ...,
      [616, 618, 620,  ..., 638, 640, 642],
      [672, 674, 676,  ..., 694, 696, 698],
      [728, 730, 732,  ..., 750, 752, 754]]],

111408 · November 12, 2020, 1:12am

it is my model code

class AutoEncoder(nn.Module):

def __init__(self):
    super(AutoEncoder, self).__init__()

    #encoder
    self.conv1 = nn.Conv2d(1, 16, 5, 1, 2)
    self.relu1 = nn.ReLU()
    self.pool1 = nn.MaxPool2d(2, return_indices=True)
    self.conv2 = nn.Conv2d(16, 32, 5, 1, 2)
    self.relu2 = nn.ReLU()
    self.pool2 = nn.MaxPool2d(2, return_indices=True)
    #decoder
    self.unpool1 = nn.MaxUnpool2d(2),
    self.convT1 = nn.ConvTranspose2d(32, 16, 5, 1, 2),
    self.relu3 = nn.ReLU(),
    self.unpool2 = nn.MaxUnpool2d(2),
    self.convT2 = nn.ConvTranspose2d(16, 1, 5, 1, 2),
    self.relu3 = nn.ReLU()



def forward(self, x):
    CONV_1 = self.conv1(x)
    RELU_1 = self.relu1(CONV_1)
    POOL_1, INDICES_1 = self.pool1(RELU_1)
    CONV_2 = self.conv2(POOL_1)
    RELU_2 = self.relu2(CONV_2)
    POOL_2, INDICES_2 = self.pool2(RELU_2)
    encoded = POOL_2
    print(INDICES_1)
    UNPOOL_1 = self.unpool1(POOL_2 , INDICES_1)
    CONVT_1 = self.convT1(UNPOOL_1)
    RELU_3 = self.ReLU(CONVT_1)
    UNPOOL_2 = self.unpool2(RELU_3, INDICES_2)
    CONVT_2 = self.convT2(UNPOOL_2)
    RELU_4 = self.ReLU(CONVT_2)
    decoded = RELU_4

    return encoded, decoded

ptrblck · November 12, 2020, 6:00am

Thanks for the code.
The error is raised, since you are assigning a tuple to self.unpool1 (and the following layers) by adding a comma after nn.MaxUnpool2d(2),.
If you remove it, you would get a shape mismatch, since INCIDES_2 are expected in self.unpool1.
Also, self.ReLU is undefined, so I assume you would like to use self.relu3 instead. You are currently creating self.relu3 two times, so I guess the last one should be self.relu4.

Here is the model with the mentioned fixes:


class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
    
        #encoder
        self.conv1 = nn.Conv2d(1, 16, 5, 1, 2)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2, return_indices=True)
        self.conv2 = nn.Conv2d(16, 32, 5, 1, 2)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2, return_indices=True)
        #decoder
        self.unpool1 = nn.MaxUnpool2d(2)
        self.convT1 = nn.ConvTranspose2d(32, 16, 5, 1, 2)
        self.relu3 = nn.ReLU()
        self.unpool2 = nn.MaxUnpool2d(2)
        self.convT2 = nn.ConvTranspose2d(16, 1, 5, 1, 2)
        self.relu4 = nn.ReLU()
    
    def forward(self, x):
        CONV_1 = self.conv1(x)
        RELU_1 = self.relu1(CONV_1)
        POOL_1, INDICES_1 = self.pool1(RELU_1)
        CONV_2 = self.conv2(POOL_1)
        RELU_2 = self.relu2(CONV_2)
        POOL_2, INDICES_2 = self.pool2(RELU_2)
        encoded = POOL_2
        UNPOOL_1 = self.unpool1(POOL_2 , INDICES_2)
        CONVT_1 = self.convT1(UNPOOL_1)
        RELU_3 = self.relu3(CONVT_1)
        UNPOOL_2 = self.unpool2(RELU_3, INDICES_1)
        CONVT_2 = self.convT2(UNPOOL_2)
        RELU_4 = self.relu4(CONVT_2)
        decoded = RELU_4
        return encoded, decoded

model = AutoEncoder()
x = torch.randn(1, 1, 224, 224)
out = model(x)

111408 · November 13, 2020, 2:46am

That works,thank you and have a nice day