Pytorch Implementation for ResNet Based UNet

I want to implement a ResNet based UNet for segmentation (without pre-training). I have referred to this implementation using Keras but my project has been implemented using PyTorch that I am not sure if I have done the correct things.

Keras based implementation
U-net with simple Resnet Blocks

My PyTorch implementation (I am not sure if I am correct …) Any suggestions will be highly appreciated.

def double_conv(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, 3, padding=1),
        nn.BatchNorm2d(out_channels), 
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, 3, padding=1),
        nn.BatchNorm2d(out_channels), 
        nn.ReLU(inplace=True)
    )
def root_block(in_channels, out_channels):
return nn.Sequential(
    nn.Conv2d(in_channels, out_channels, 3, padding=1),
    nn.BatchNorm2d(out_channels), 
    nn.ReLU(inplace=True),
    nn.Conv2d(out_channels, out_channels, 3, padding=1),
    nn.BatchNorm2d(out_channels), 
)

# Define the UNet architecture
class ResNetUNet(nn.Module):

def __init__(self, n_class):
    super().__init__()

    self.dconv_down1 = double_conv(3, 64)
    self.dconv_down11 = root_block(64, 64)
    self.dconv_down2 = double_conv(64, 128)
    self.dconv_down21 = root_block(128, 128)
    self.dconv_down3 = double_conv(128, 256)
    self.dconv_down31 = root_block(256, 256)
    self.dconv_down4 = double_conv(256, 512)
    self.dconv_down41 = root_block(512, 512)

    self.maxpool = nn.MaxPool2d(2)
    self.relu = nn.ReLU(inplace=True)

    self.dconv_up3 = double_conv(256 + 512, 256)
    self.dconv_up31 = root_block(256, 256)
    self.dconv_up2 = double_conv(128 + 256, 128)
    self.dconv_up21 = root_block(128, 128)
    self.dconv_up1 = double_conv(128 + 64, 64)
    self.dconv_up11 = root_block(64, 64)

    self.conv_last = nn.Conv2d(64, n_class, 1)

def forward(self, x):
    conv1 = self.dconv_down1(x)
    x = self.dconv_down11(conv1)
    x += conv1
    x = self.relu(x)
    x = self.maxpool(x)

    conv2 = self.dconv_down2(x)
    x = self.dconv_down21(conv2)
    x += conv2
    x = self.relu(x)
    x = self.maxpool(x)

    conv3 = self.dconv_down3(x)
    x = self.dconv_down31(conv3)
    x += conv3
    x = self.relu(x)
    x = self.maxpool(x)

    conv4 = self.dconv_down4(x)
    x = self.dconv_down41(conv4)
    x += conv4
    x = self.relu(x)

    deconv3 = nn.functional.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
    deconv3 = torch.cat([deconv3, conv3], dim=1)
    uconv3 = self.dconv_up3(deconv3)
    x = self.dconv_up31(uconv3)
    x += uconv3
    x = self.relu(x)

    deconv2 = nn.functional.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
    deconv2 = torch.cat([deconv2, conv2], dim=1)
    uconv2 = self.dconv_up2(deconv2)
    x = self.dconv_up21(uconv2)
    x += uconv2
    x = self.relu(x)

    deconv1 = nn.functional.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
    deconv1 = torch.cat([deconv1, conv1], dim=1)
    uconv1 = self.dconv_up1(deconv1)
    x = self.dconv_up11(uconv1)
    x += uconv1
    x = self.relu(x)

    out = self.conv_last(x)

    return out

Keras based implementation extracted as below:

def convolution_block(x, filters, size, strides=(1,1), padding='same', activation=True):
x = Conv2D(filters, size, strides=strides, padding=padding)(x)
x = BatchNormalization()(x)
if activation == True:
    x = Activation('relu')(x)
return x

def residual_block(blockInput, num_filters=16):
x = Activation('relu')(blockInput)
x = BatchNormalization()(x)
x = convolution_block(x, num_filters, (3,3) )
x = convolution_block(x, num_filters, (3,3), activation=False)
x = Add()([x, blockInput])
return x

# Build model
def build_model(input_layer, start_neurons, DropoutRatio = 0.5):
# 101 -> 50
conv1 = Conv2D(start_neurons * 1, (3, 3), activation=None, padding="same")(input_layer)
conv1 = residual_block(conv1,start_neurons * 1)
conv1 = residual_block(conv1,start_neurons * 1)
conv1 = Activation('relu')(conv1)
pool1 = MaxPooling2D((2, 2))(conv1)
pool1 = Dropout(DropoutRatio/2)(pool1)

# 50 -> 25
conv2 = Conv2D(start_neurons * 2, (3, 3), activation=None, padding="same")(pool1)
conv2 = residual_block(conv2,start_neurons * 2)
conv2 = residual_block(conv2,start_neurons * 2)
conv2 = Activation('relu')(conv2)
pool2 = MaxPooling2D((2, 2))(conv2)
pool2 = Dropout(DropoutRatio)(pool2)

# 25 -> 12
conv3 = Conv2D(start_neurons * 4, (3, 3), activation=None, padding="same")(pool2)
conv3 = residual_block(conv3,start_neurons * 4)
conv3 = residual_block(conv3,start_neurons * 4)
conv3 = Activation('relu')(conv3)
pool3 = MaxPooling2D((2, 2))(conv3)
pool3 = Dropout(DropoutRatio)(pool3)

# 12 -> 6
conv4 = Conv2D(start_neurons * 8, (3, 3), activation=None, padding="same")(pool3)
conv4 = residual_block(conv4,start_neurons * 8)
conv4 = residual_block(conv4,start_neurons * 8)
conv4 = Activation('relu')(conv4)
pool4 = MaxPooling2D((2, 2))(conv4)
pool4 = Dropout(DropoutRatio)(pool4)

# Middle
convm = Conv2D(start_neurons * 16, (3, 3), activation=None, padding="same")(pool4)
convm = residual_block(convm,start_neurons * 16)
convm = residual_block(convm,start_neurons * 16)
convm = Activation('relu')(convm)

# 6 -> 12
deconv4 = Conv2DTranspose(start_neurons * 8, (3, 3), strides=(2, 2), padding="same")(convm)
uconv4 = concatenate([deconv4, conv4])
uconv4 = Dropout(DropoutRatio)(uconv4)

uconv4 = Conv2D(start_neurons * 8, (3, 3), activation=None, padding="same")(uconv4)
uconv4 = residual_block(uconv4,start_neurons * 8)
uconv4 = residual_block(uconv4,start_neurons * 8)
uconv4 = Activation('relu')(uconv4)

# 12 -> 25
#deconv3 = Conv2DTranspose(start_neurons * 4, (3, 3), strides=(2, 2), padding="same")(uconv4)
deconv3 = Conv2DTranspose(start_neurons * 4, (3, 3), strides=(2, 2), padding="valid")(uconv4)
uconv3 = concatenate([deconv3, conv3])    
uconv3 = Dropout(DropoutRatio)(uconv3)

uconv3 = Conv2D(start_neurons * 4, (3, 3), activation=None, padding="same")(uconv3)
uconv3 = residual_block(uconv3,start_neurons * 4)
uconv3 = residual_block(uconv3,start_neurons * 4)
uconv3 = Activation('relu')(uconv3)

# 25 -> 50
deconv2 = Conv2DTranspose(start_neurons * 2, (3, 3), strides=(2, 2), padding="same")(uconv3)
uconv2 = concatenate([deconv2, conv2])
    
uconv2 = Dropout(DropoutRatio)(uconv2)
uconv2 = Conv2D(start_neurons * 2, (3, 3), activation=None, padding="same")(uconv2)
uconv2 = residual_block(uconv2,start_neurons * 2)
uconv2 = residual_block(uconv2,start_neurons * 2)
uconv2 = Activation('relu')(uconv2)

# 50 -> 101
#deconv1 = Conv2DTranspose(start_neurons * 1, (3, 3), strides=(2, 2), padding="same")(uconv2)
deconv1 = Conv2DTranspose(start_neurons * 1, (3, 3), strides=(2, 2), padding="valid")(uconv2)
uconv1 = concatenate([deconv1, conv1])

uconv1 = Dropout(DropoutRatio)(uconv1)
uconv1 = Conv2D(start_neurons * 1, (3, 3), activation=None, padding="same")(uconv1)
uconv1 = residual_block(uconv1,start_neurons * 1)
uconv1 = residual_block(uconv1,start_neurons * 1)
uconv1 = Activation('relu')(uconv1)

uconv1 = Dropout(DropoutRatio/2)(uconv1)
output_layer = Conv2D(1, (1,1), padding="same", activation="sigmoid")(uconv1)

return output_layer

How did you define double_conv?

Thanks for your reply. Please see the editing with double_conv.

Thanks for the code update.

The first few operations in your PyTorch model:

    conv1 = self.dconv_down1(x)
    x = self.dconv_down11(conv1)
    x += conv1
    x = self.relu(x)
    x = self.maxpool(x)

would yield the following execution order:

x -> conv -> bn -> relu -> conv -> bn -> relu -> out -> conv -> bn -> relu - > conv -> bn -> x
x += out

While in Keras the execution order would be:

x -> conv -> out -> relu -> bn -> conv -> bn -> relu -> conv -> bn -> x
x += out

E.g. one difference is that in the Keras model you are using the output of the first conv layer as the residual connection in residual_block, while you are using the output of the first double_conv block as the residual connection in PyTorch.
if I’m not mistaken.

@ptrblck Thanks for your analysis.

  1. I think the Keras codes called the residual_block twice and it should be something like:
x -> conv -> out -> relu -> bn -> conv -> bn -> relu -> conv -> bn -> conv -> bn -> relu -> conv -> bn ->x
x += out
  1. For my PyTorch implementation, actually I didn’t get my head around how to do the x = x + residual, which you denoted as ‘out’ here because it is not as intuitive as the original paper described that x and out are with difference sizes. So the correct things we should do is something like:
x -> conv -> out -> conv -> bn -> relu -> conv -> bn -> relu -> conv -> bn -> relu -> conv -> bn ->x
x += out

The first conv is just to make sure out can be added to the final x. Then followed by three conv -> bn -> relu with the fourth one without relu. Am I right?

  1. If my understanding is correct, I am not so sure how to implement the first part x -> conv -> out, should I run the out = self.dconv_down11(conv1) first?

Any suggestions would be really appreciated.