RuntimeError:Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same after model.to("cuda")

Here is my D-Unet model. After doing model.to(device), the system still raise this RuntimeError.

DEVICE = “cuda” if torch.cuda.is_available() else “cpu”

x = torch.rand(16, 4, 192, 192).to(DEVICE)

model = D_Unet().to(DEVICE)

pred = model(x)

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

import torch.nn as nn

import torch

class D_SE_Add(nn.Module):

def __init__(self, in3d_depth=2, in3d_Cs=64, out_Cs=64):

    super().__init__()

    self.conv3d_1 = nn.Conv3d(in_channels=in3d_Cs, out_channels=1, kernel_size=1, padding="same")

    self.conv2d_1 = nn.Conv2d(in_channels=in3d_depth, out_channels=out_Cs, kernel_size=3, padding="same")

    self.conv2d_2 = nn.Conv2d(in_channels=out_Cs*2, out_channels=out_Cs, kernel_size=3, padding="same")

    self.relu = nn.ReLU()

def forward(self, input3d, input2d, se_block_out_Cs=64):

    avgpool_ksize = input3d.size()[-1]

    x = self.conv3d_1(input3d)

    x = torch.squeeze(x, 1)

    x = self.conv2d_1(x)

    x = self.relu(x)

    x = Squeeze_Excite_Block(avgpool_ksize, filters=se_block_out_Cs, ratio=16)(x)

    input2d = Squeeze_Excite_Block(avgpool_ksize, filters=se_block_out_Cs, ratio=16)(input2d)

    x = torch.cat((x, input2d), dim=1)

    x = self.conv2d_2(x)

    x = self.relu(x)

    return x

class Squeeze_Excite_Block(nn.Module):

def __init__(self, avgpool_ksize, filters=64, ratio=16) -> None:

    super().__init__()

    self.avgpool2d =  nn.AvgPool2d(avgpool_ksize)

    self.relu = nn.ReLU()

    self.linear_1 = nn.Linear(filters, filters//ratio, bias=False)

    self.linear_2 = nn.Linear(filters//ratio, filters, bias=False)

def forward(self, x):  

    bs, filters = x.size()[:2]

    se_shape = (bs, 1, 1, filters)

    se = self.avgpool2d(x)  

    se = torch.reshape(se, se_shape)

    se = self.linear_1(se)

    se = self.relu(se)

    se = self.linear_2(se)

    se = torch.sigmoid(se)

    se = se.permute(0, 3, 1, 2)

    out = torch.mul(x, se)

    return out

class Bn_Block(nn.Module):

def __init__(self, in_filters, out_filters) -> None:

    super().__init__()

    self.conv2d_1 = nn.Conv2d(in_channels=in_filters, out_channels=out_filters, kernel_size=3, padding='same')

    self.bn2d_1 = nn.BatchNorm2d(num_features=out_filters, eps=1e-03, momentum=0.99)#.cuda()

    self.relu = nn.ReLU()#.cuda()

    self.conv2d_2 = nn.Conv2d(in_channels=out_filters, out_channels=out_filters, kernel_size=3, padding='same')#.cuda()

    self.bn2d_2 = nn.BatchNorm2d(num_features=out_filters, eps=1e-03, momentum=0.99)

def forward(self, x):

    x = self.conv2d_1(x)

    x = self.bn2d_1(x)

    x = self.relu(x)

    x = self.conv2d_2(x)

    x = self.bn2d_2(x)

    self.relu(x)

    return x

class Bn_Block3d(nn.Module):

def __init__(self, in_filters, out_filters) -> None:

    super().__init__()

    self.conv3d_1 = nn.Conv3d(in_channels=in_filters, out_channels=out_filters, kernel_size=3, padding='same')

    self.bn3d_1 = nn.BatchNorm3d(num_features=out_filters, eps=1e-03, momentum=0.99)

    self.relu = nn.ReLU()

    self.conv3d_2 = nn.Conv3d(in_channels=out_filters, out_channels=out_filters, kernel_size=3, padding='same')

    self.bn3d_2 = nn.BatchNorm3d(num_features=out_filters, eps=1e-03, momentum=0.99)

def forward(self, x):

    x = self.conv3d_1(x)

    x = self.bn3d_1(x)

    x = self.relu(x)

    x = self.conv3d_2(x)

    x = self.bn3d_2(x)

    x = self.relu(x)

    return x

class D_Unet(nn.Module):

def __init__(self) -> None:

    super().__init__()

    self.conv2d_1 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=3, padding="same", padding_mode='zeros')

    self.conv2d_2 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, padding="same", padding_mode='zeros')

    self.conv2d_3 = nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, padding="same", padding_mode='zeros')

    self.conv2d_4 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, padding="same", padding_mode='zeros')

    self.conv2d_5 = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=1, padding="same", padding_mode='zeros')

    self.maxpool3d = nn.MaxPool3d(kernel_size=2)

    self.maxpool2d = nn.MaxPool2d(kernel_size=2)

    self.relu = nn.ReLU()

    self.dropout = nn.Dropout(p=0.3)

    self.upsampling2d = nn.Upsample(scale_factor=2)

def forward(self, x):

    # Encoder

    # 3D branch

    input3d = torch.unsqueeze(x, 1) # (B, 1, 4, 192, 192)

    conv3d1 = Bn_Block3d(in_filters=1, out_filters=32)(input3d)# (B, 32, 4, 192, 192)

    pool3d1 = self.maxpool3d(conv3d1) # (B, 32, 2, 96, 96)

    conv3d2 = Bn_Block3d(in_filters=32, out_filters=64)(pool3d1)# (B, 64, 2, 96, 96)

    pool3d2 = self.maxpool3d(conv3d2)

    conv3d3 = Bn_Block3d(in_filters=64, out_filters=128)(pool3d2)

    # 2D branch

    conv1 = Bn_Block(in_filters=4, out_filters=32)(x)

    pool1 = self.maxpool2d(conv1)

    conv2 = Bn_Block(in_filters=32, out_filters=64)(pool1)

    # D_SE_Add Block

    conv2 = D_SE_Add(2, 64, 64)(conv3d2, conv2, se_block_out_Cs=64)

    pool2 = self.maxpool2d(conv2)

    conv3 = Bn_Block(in_filters=64, out_filters=128)(pool2)

    conv3 = D_SE_Add(1, 128, 128)(conv3d3, conv3, se_block_out_Cs=128)

    pool3 = self.maxpool2d(conv3)

    conv4 = Bn_Block(in_filters=128, out_filters=256)(pool3)

    conv4 = self.dropout(conv4)

    pool4 = self.maxpool2d(conv4)

    conv5 = Bn_Block(in_filters=256, out_filters=512)(pool4)

    conv5 = self.dropout(conv5) # (B, 512, 12, 12)

    # Decoder

    up6 =  self.conv2d_1(self.upsampling2d(conv5)) # (B, 256, 24, 24)

    merge6 = torch.cat((conv4, up6), dim=1) # (B, 512, 24, 24)

    conv6 = Bn_Block(in_filters=512, out_filters=256)(merge6) # (B, 256, 24, 24)

    up7 = self.conv2d_2(self.upsampling2d(conv6)) # (B, 128, 48, 48)

    merge7 = torch.cat((conv3, up7), dim=1) # (B, 256, 48, 48)

    conv7 = Bn_Block(in_filters=256, out_filters=128)(merge7) # (B, 128, 48, 48)

    up8 = self.conv2d_3(self.upsampling2d(conv7)) # (B, 64, 96, 96)

    merge8 = torch.cat((conv2, up8), dim=1) # (B, 128, 96, 96)

    conv8 = Bn_Block(in_filters=128, out_filters=64)(merge8) # (B, 64, 96, 96)

    up9 = self.conv2d_4(self.upsampling2d(conv8)) # (B, 32, 128, 128)

    merge9 = torch.cat((conv1, up9), dim=1) # (B, 64, 128, 128)

    conv9 = Bn_Block(in_filters=64, out_filters=32)(merge9)

    conv10 = self.conv2d_5(conv9)  

    # out = torch.sigmoid(conv10)

    return conv10

I have also checked if the model and input are both on GPU

for p in model.parameters():
print('model param is on device: ', p.device)

model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0

x.device
device(type=‘cuda’, index=0)

Thanks in advanced!! Any help would be greatly appreciated!

You are creating new modules in the forward method of D_Unet e.g. here:

conv3d1 = Bn_Block3d(in_filters=1, out_filters=32)(input3d)

which won’t be pushed to the device in the model.to() call.
I’m not familiar with your approach, but the standard approach is to initialize the layers in the __init__ method and use them in the forward. Also, your current approach would create parameters in these layers, which will be randomly initialized and never trained.

Thank you! I’ll try to fix it based on the method you provided!

@ptrblck
But how can the case work properly?!?!

class Block(Module):

def __init__(self, inChannels, outChannels):

    super().__init__()

    # store the convolution and RELU layers

    self.conv1 = Conv2d(inChannels, outChannels, 3)

    self.conv2 = Conv2d(outChannels, outChannels, 3)

    # self.conv3 = Conv2d(outChannels, outChannels, 3)

    self.relu = ReLU()

def forward(self, x):

    # apply CONV => RELU => CONV block to the inputs and return it

    return self.relu(self.conv2(self.relu(self.conv1(x))))

    # return self.relu(self.conv3(self.relu(self.conv2(self.relu(self.conv1(x))))))

class Encoder(Module):

def __init__(self, channels=(config.NUM_CHANNELS, 16, 32, 64)):

    super().__init__()

    # store the encoder blocks and maxpooling layer

    self.encBlocks = ModuleList(

        [Block(channels[i], channels[i + 1])

            for i in range(len(channels) - 1)])

    self.pool = MaxPool2d(2)

def forward(self, x):

    # initialize an empty list to store the intermediate outputs

    blockOutputs = []

    # loop through the encoder blocks

    for block in self.encBlocks:

        # pass the inputs through the current encoder block, store

        # the outputs, and then apply maxpooling on the output

        x = block(x)

        blockOutputs.append(x)

        x = self.pool(x)

    # return the list containing the intermediate outputs

    return blockOutputs

class Decoder(Module):

def __init__(self, channels=(64, 32, 16)):

    super().__init__()

    # initialize the number of channels, upsampler blocks, and

    # decoder blocks

    self.channels = channels

    self.upconvs = ModuleList(

        [ConvTranspose2d(channels[i], channels[i + 1], 2, 2)

            for i in range(len(channels) - 1)])

    self.dec_blocks = ModuleList(

        [Block(channels[i], channels[i + 1])

            for i in range(len(channels) - 1)])

def forward(self, x, encFeatures):

    # loop through the number of channels

    for i in range(len(self.channels) - 1):

        # pass the inputs through the upsampler blocks

        x = self.upconvs[i](x)

        # crop the current features from the encoder blocks,

        # concatenate them with the current upsampled features,

        # and pass the concatenated output through the current

        # decoder block

        encFeat = self.crop(encFeatures[i], x)

        x = torch.cat([x, encFeat], dim=1)

        x = self.dec_blocks[i](x)

    # return the final decoder output

    return x

def crop(self, encFeatures, x):

    # grab the dimensions of the inputs, and crop the encoder

    # features to match the dimensions

    (_, _, H, W) = x.shape

    encFeatures = CenterCrop([H, W])(encFeatures)

    # return the cropped features

    return encFeatures

class UNet(Module):

def __init__(self, encChannels=(config.NUM_CHANNELS, 16, 32, 64),

     decChannels=(64, 32, 16),

     nbClasses=1, retainDim=True,

     outSize=(config.INPUT_IMAGE_HEIGHT,  config.INPUT_IMAGE_WIDTH)):

    super().__init__()

    # initialize the encoder and decoder

    self.encoder = Encoder(encChannels)

    self.decoder = Decoder(decChannels)

    # initialize the regression head and store the class variables

    self.head = Conv2d(decChannels[-1], nbClasses, 1)

    self.retainDim = retainDim

    self.outSize = outSize

def forward(self, x):

    # grab the features from the encoder

    encFeatures = self.encoder(x)

    # pass the encoder features through decoder making sure that

    # their dimensions are suited for concatenation

    decFeatures = self.decoder(encFeatures[::-1][0],

        encFeatures[::-1][1:])

    # pass the decoder features through the regression head to

    # obtain the segmentation mask

    map = self.head(decFeatures)

    # check to see if we are retaining the original output

    # dimensions and if so, then resize the output to match them

    if self.retainDim:

        map = F.interpolate(map, self.outSize)

    # return the segmentation map

    return map

Here is a small example:

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        # initialize modules
        self.linear = nn.Linear(1, 1)
        
    def forward(self, x):
        # use layers
        x = self.linear(x)
        return x
    
# create model object and initialize internal layers defined in __init__
model = MyModel()
# push all internal layers and parameters to the device
model.to('cuda')

x = torch.randn(1, 1, device='cuda')
out = model(x)

Got it! Thank you very much again!