Problem with training FCN for segmentation of multi-channel data

Hello dear programmers,
I am very new to Pytorch with basic programming skills. My task consists of performing segmentation of seismic data. My training data consists of 9 channels and the labels are of one channel. I have managed to build up a network. However, it can not train and it shows the following error
“TypeError: conv2d(): argument ‘input’ (position 1) must be Tensor, not int”.

My codes are given below

Please, any help would be highly appreciated

old_data = np.load()

old_label = np.load()

data = np.zeros((old_data.shape[0],3,old_data.shape[2],old_data.shape[3]))

for i in range(old_data.shape[0]):

    a = old_data[i]

    a = np.resize(a, (3,old_data.shape[2],old_data.shape[3]))

    data[i,:,:] = a

from torch.utils.data import TensorDataset

X = torch.from_numpy(np.array(data)).float()

Y = torch.from_numpy(np.array(old_label)).long()

print(X.size(),Y.size())

CT_dataset = TensorDataset(X,Y)

kwargs = {'num_workers': 4, 'pin_memory': True} if torch.cuda.is_available() else {}

train_loader = torch.utils.data.DataLoader(

    CT_dataset,

    batch_size=1, shuffle=True, **kwargs)

pretrained_net = models.resnet34(pretrained=False)

num_classes = 2 #len(classes)

class fcn(nn.Module):

    def __init__(self, num_classes):

        super(fcn, self).__init__()

        self.stage1 = nn.Sequential(*list(pretrained_net.children())[:-4])

        self.stage2 = list(pretrained_net.children())[-4]

        self.stage3 = list(pretrained_net.children())[-3]

        self.scores1 = nn.Conv2d(512, num_classes, 1)

        self.scores2 = nn.Conv2d(256, num_classes, 1)

        self.scores3 = nn.Conv2d(128, num_classes, 1)

        self.upsample_8x = nn.ConvTranspose2d(num_classes, num_classes, 16, 8, 4, bias=False)

        self.upsample_4x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False)

        self.upsample_2x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False)

    def forward(self, x):

        x = self.stage1(x)

        s1 = x  # 1/8

        x = self.stage2(x)

        s2 = x  # 1/16

        x = self.stage3(x)

        s3 = x  # 1/32

        s3 = self.scores1(s3)

        s3 = self.upsample_2x(s3)

        s2 = self.scores2(s2)

        s2 = s2 + s3

        s1 = self.scores3(s1)

        s2 = self.upsample_4x(s2)

        s = s1 + s2

        s = self.upsample_8x(s2)

        return s

model = fcn(num_classes)

criterion = nn.NLLLoss2d()

basic_optim = torch.optim.SGD(model.parameters(), lr=1e-2, weight_decay=1e-4)

optimizer = basic_optim

for e in range(50):

    if e > 0 and e % 50 == 0:

        optimizer.set_learning_rate(optimizer.learning_rate * 0.1)

    train_loss = 0

    

    model= model.train()

    from tqdm import tqdm

    for  data in tqdm(enumerate(train_loader)):

        optimizer.zero_grad()

        x, y_true = data

    

        if torch.cuda.is_available():

            x, y_true = x.cuda(), y_true.cuda()

           

        # forward

        out = model(x)

        out = F.log_softmax(out, dim=1)  # (b, n, h, w)

        loss = criterion(out, y_true )

        # backward

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        train_loss += loss.data[0]

        label_pred = out.max(dim=1)[1].data.cpu().numpy()

        label_true = y_true .data.cpu().numpy()

        acc=get_accuracy(label_true,label_pred)

        print(acc)

There were a few minor issues in your code:

  • if you are dealing with 9 input channels, the first conv layer should be changed to accept these 9 channels
  • you are currently training from scratch, so you might want to set pretrained=True as the model name suggests
  • in your DataLoader loop you are using enumerate so the first returned variable will be the index
  • don’t use .data anymore, as this might have unwanted side effects. To accumulate the loss, use loss.item().

This code should work for your use case:

class fcn(nn.Module):
    def __init__(self, num_classes):
        super(fcn, self).__init__()
        self.stage1 = nn.Sequential(*list(pretrained_net.children())[:-4])
        # change input channels to 9
        self.stage1[0] = nn.Conv2d(9, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.stage2 = list(pretrained_net.children())[-4]
        self.stage3 = list(pretrained_net.children())[-3]
        self.scores1 = nn.Conv2d(512, num_classes, 1)
        self.scores2 = nn.Conv2d(256, num_classes, 1)
        self.scores3 = nn.Conv2d(128, num_classes, 1)
        self.upsample_8x = nn.ConvTranspose2d(num_classes, num_classes, 16, 8, 4, bias=False)
        self.upsample_4x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False)
        self.upsample_2x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False)

    def forward(self, x):
        x = self.stage1(x)
        s1 = x  # 1/8
        x = self.stage2(x)
        s2 = x  # 1/16
        x = self.stage3(x)
        s3 = x  # 1/32
        s3 = self.scores1(s3)
        s3 = self.upsample_2x(s3)
        s2 = self.scores2(s2)
        s2 = s2 + s3
        s1 = self.scores3(s1)
        s2 = self.upsample_4x(s2)
        s = s1 + s2
        s = self.upsample_8x(s2)
        return s


num_classes = 2 #len(classes)
X = torch.randn(2, 9, 224, 224)
Y = torch.randint(0, num_classes, (2, 224, 224))

CT_dataset = TensorDataset(X,Y)
kwargs = {'num_workers': 4, 'pin_memory': True} if torch.cuda.is_available() else {}
train_loader = torch.utils.data.DataLoader(
    CT_dataset,
    batch_size=1, shuffle=True, **kwargs)

pretrained_net = models.resnet34(pretrained=True)

model = fcn(num_classes).cuda()
criterion = nn.NLLLoss()
basic_optim = torch.optim.SGD(model.parameters(), lr=1e-2, weight_decay=1e-4)
optimizer = basic_optim

for e in range(50):
    if e > 0 and e % 50 == 0:
        optimizer.set_learning_rate(optimizer.learning_rate * 0.1)
    train_loss = 0
    model= model.train()

    for idx, data in tqdm(enumerate(train_loader)):
        optimizer.zero_grad()
        x, y_true = data
        if torch.cuda.is_available():
            x, y_true = x.cuda(), y_true.cuda()

        # forward
        out = model(x)
        out = F.log_softmax(out, dim=1)  # (b, n, h, w)
        loss = criterion(out, y_true)
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

PS: I’ve edited your code and added code formatting.
You can post code directly by wrapping it into three backticks ``` :wink:

Dear sir,
I am very grateful for your reply.

I have tried to resize my data as follows so that it could meet the input requirements stated in the tutorial. Please any suggestions on that?

Are you seeing any error using my modified code and you input shapes?
If so, please post the shape of data and the error message.

X = torch.from_numpy(np.array(old_data)).float()

Y = torch.from_numpy(np.array(old_label)).long()

print(X.size(),Y.size())

CT_dataset = TensorDataset(X,Y)

kwargs = {‘num_workers’: 4, ‘pin_memory’: True} if torch.cuda.is_available() else {}

train_loader = torch.utils.data.DataLoader(

CT_dataset,

batch_size=1, shuffle=True, **kwargs)

pretrained_net = models.resnet34(pretrained=True)

num_classes = 2 #len(classes)

class fcn(nn.Module):

def __init__(self, num_classes):

    super(fcn, self).__init__()

    self.stage1 = nn.Sequential(*list(pretrained_net.children())[:-4])

    # change input channels to 9

    self.stage1[0] = nn.Conv2d(9, 64, kernel_size=7, stride=2, padding=3, bias=False)

    self.stage2 = list(pretrained_net.children())[-4]

    self.stage3 = list(pretrained_net.children())[-3]

    self.scores1 = nn.Conv2d(512, num_classes, 1)

    self.scores2 = nn.Conv2d(256, num_classes, 1)

    self.scores3 = nn.Conv2d(128, num_classes, 1)

    self.upsample_8x = nn.ConvTranspose2d(num_classes, num_classes, 16, 8, 4, bias=False)

    self.upsample_4x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False)

    self.upsample_2x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False)

def forward(self, x):

    x = self.stage1(x)

    s1 = x  # 1/8

    x = self.stage2(x)

    s2 = x  # 1/16

    x = self.stage3(x)

    s3 = x  # 1/32

    s3 = self.scores1(s3)

    s3 = self.upsample_2x(s3)

    s2 = self.scores2(s2)

    s2 = s2 + s3

    s1 = self.scores3(s1)

    s2 = self.upsample_4x(s2)

    s = s1 + s2

    s = self.upsample_8x(s2)

    return s

num_classes = 2 #len(classes)

X = torch.randn(2, 9, 224, 224)

Y = torch.randint(0, num_classes, (2, 224, 224))

CT_dataset = TensorDataset(X,Y)

kwargs = {‘num_workers’: 4, ‘pin_memory’: True} if torch.cuda.is_available() else {}

train_loader = torch.utils.data.DataLoader(

CT_dataset,

batch_size=1, shuffle=True, **kwargs)

pretrained_net = models.resnet34(pretrained=True)

#model = fcn(num_classes).cuda()

model = fcn(num_classes)

criterion = nn.NLLLoss()

basic_optim = torch.optim.SGD(model.parameters(), lr=1e-2, weight_decay=1e-4)

optimizer = basic_optim

for e in range(50):

if e > 0 and e % 50 == 0:

    optimizer.set_learning_rate(optimizer.learning_rate * 0.1)

train_loss = 0

model= model.train()

for idx, data in tqdm (enumerate(train_loader)):

    optimizer.zero_grad()

    x, y_true = data

    if torch.cuda.is_available():

        x, y_true = x.cuda(), y_true.cuda()

    # forward

    out = model(x)

    out = F.log_softmax(out, dim=1)  # (b, n, h, w)

    loss = criterion(out, y_true)

    # backward

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    train_loss += loss.item()

    label_pred = out.max(dim=1)[1].data.cpu().numpy()

    label_true = y_true .data.cpu().numpy()

    acc= get_accuracy(label_true,label_pred)

    print(acc)

Sorry, I forgot to mention that my original samples are of sizes (9,64,64) and (1,64,64) for data and labels, respectively. After making some modification just as you suggested, I got the following the error:

RuntimeError: invalid argument 3: only batches of spatial targets supported (3D tensors) but got targets of dimension: 4 at C:\w\1\s\windows\pytorch\aten\src\THNN/generic/SpatialClassNLLCriterion.c:61

torch.Size([1104, 9, 64, 64]) torch.Size([1104, 1, 64, 64])

this is the data shape and the error is as follows:
RuntimeError: invalid argument 3: only batches of spatial targets supported (3D tensors) but got targets of dimension: 4 at C:\w\1\s\windows\pytorch\aten\src\THNN/generic/SpatialClassNLLCriterion.c:61

The target tensor should be a LongTensor containing the class indices with the shape [batch_size, height, width].
Based on your shape description it looks like you are using an additional channel dimension.
Try to squeeze it via target = target.squeeze(1).

Thank you very much for your prompt reply sir. I have defined the following function for estimating the accuracy of the designed model

def get_accuracy(SR,GT,threshold=0.5):

SR = SR > threshold

GT = GT == torch.max(GT)

corr = torch.sum(SR==GT)

tensor_size = SR.size(0)*SR.size(1)*SR.size(2)*SR.size(3)

acc = float(corr)/float(tensor_size)

return acc

I have added the following lines to my codes
out = model(x)

    out = F.log_softmax(out, dim=1)  # (b, n, h, w)

    loss = criterion(out, y_true)

    # backward

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    train_loss += loss.item()

    label_pred = out.max(dim=1)[1].data.cpu().numpy()

    label_true = y_true .data.cpu().numpy()

    acc= get_accuracy(label_true,label_pred)

    print(acc)

I got the following error after running
TypeError: max(): argument ‘input’ (position 1) must be Tensor, not numpy.ndarray

Try to remove the numpy call for label_pred and label_true and pass the tensors to get_accuracy.

I also had to unsqueeze the labels and it worked
label_true= y_true.unsqueeze(1)

It is now running and the performance is very poor.

Please, would it be necessary to perform some processing operations such as normalization, data augmentation,etc? By the way, the whole training data might be around 20 000 samples. The small sample set used currently is just for setting up a demo.
Any suggestions would be highly appreciated given my shortages in programming and segmentation tasks