RuntimeError:Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same after model.to("cuda")

thedrewyang · March 15, 2022, 7:56am

Here is my D-Unet model. After doing model.to(device), the system still raise this RuntimeError.

DEVICE = “cuda” if torch.cuda.is_available() else “cpu”

x = torch.rand(16, 4, 192, 192).to(DEVICE)

model = D_Unet().to(DEVICE)

pred = model(x)

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

import torch.nn as nn

import torch

class D_SE_Add(nn.Module):

def __init__(self, in3d_depth=2, in3d_Cs=64, out_Cs=64):

    super().__init__()

    self.conv3d_1 = nn.Conv3d(in_channels=in3d_Cs, out_channels=1, kernel_size=1, padding="same")

    self.conv2d_1 = nn.Conv2d(in_channels=in3d_depth, out_channels=out_Cs, kernel_size=3, padding="same")

    self.conv2d_2 = nn.Conv2d(in_channels=out_Cs*2, out_channels=out_Cs, kernel_size=3, padding="same")

    self.relu = nn.ReLU()

def forward(self, input3d, input2d, se_block_out_Cs=64):

    avgpool_ksize = input3d.size()[-1]

    x = self.conv3d_1(input3d)

    x = torch.squeeze(x, 1)

    x = self.conv2d_1(x)

    x = self.relu(x)

    x = Squeeze_Excite_Block(avgpool_ksize, filters=se_block_out_Cs, ratio=16)(x)

    input2d = Squeeze_Excite_Block(avgpool_ksize, filters=se_block_out_Cs, ratio=16)(input2d)

    x = torch.cat((x, input2d), dim=1)

    x = self.conv2d_2(x)

    x = self.relu(x)

    return x

class Squeeze_Excite_Block(nn.Module):

def __init__(self, avgpool_ksize, filters=64, ratio=16) -> None:

    super().__init__()

    self.avgpool2d =  nn.AvgPool2d(avgpool_ksize)

    self.relu = nn.ReLU()

    self.linear_1 = nn.Linear(filters, filters//ratio, bias=False)

    self.linear_2 = nn.Linear(filters//ratio, filters, bias=False)

def forward(self, x):  

    bs, filters = x.size()[:2]

    se_shape = (bs, 1, 1, filters)

    se = self.avgpool2d(x)  

    se = torch.reshape(se, se_shape)

    se = self.linear_1(se)

    se = self.relu(se)

    se = self.linear_2(se)

    se = torch.sigmoid(se)

    se = se.permute(0, 3, 1, 2)

    out = torch.mul(x, se)

    return out

class Bn_Block(nn.Module):

def __init__(self, in_filters, out_filters) -> None:

    super().__init__()

    self.conv2d_1 = nn.Conv2d(in_channels=in_filters, out_channels=out_filters, kernel_size=3, padding='same')

    self.bn2d_1 = nn.BatchNorm2d(num_features=out_filters, eps=1e-03, momentum=0.99)#.cuda()

    self.relu = nn.ReLU()#.cuda()

    self.conv2d_2 = nn.Conv2d(in_channels=out_filters, out_channels=out_filters, kernel_size=3, padding='same')#.cuda()

    self.bn2d_2 = nn.BatchNorm2d(num_features=out_filters, eps=1e-03, momentum=0.99)

def forward(self, x):

    x = self.conv2d_1(x)

    x = self.bn2d_1(x)

    x = self.relu(x)

    x = self.conv2d_2(x)

    x = self.bn2d_2(x)

    self.relu(x)

    return x

class Bn_Block3d(nn.Module):

def __init__(self, in_filters, out_filters) -> None:

    super().__init__()

    self.conv3d_1 = nn.Conv3d(in_channels=in_filters, out_channels=out_filters, kernel_size=3, padding='same')

    self.bn3d_1 = nn.BatchNorm3d(num_features=out_filters, eps=1e-03, momentum=0.99)

    self.relu = nn.ReLU()

    self.conv3d_2 = nn.Conv3d(in_channels=out_filters, out_channels=out_filters, kernel_size=3, padding='same')

    self.bn3d_2 = nn.BatchNorm3d(num_features=out_filters, eps=1e-03, momentum=0.99)

def forward(self, x):

    x = self.conv3d_1(x)

    x = self.bn3d_1(x)

    x = self.relu(x)

    x = self.conv3d_2(x)

    x = self.bn3d_2(x)

    x = self.relu(x)

    return x

class D_Unet(nn.Module):

def __init__(self) -> None:

    super().__init__()

    self.conv2d_1 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=3, padding="same", padding_mode='zeros')

    self.conv2d_2 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, padding="same", padding_mode='zeros')

    self.conv2d_3 = nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, padding="same", padding_mode='zeros')

    self.conv2d_4 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, padding="same", padding_mode='zeros')

    self.conv2d_5 = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=1, padding="same", padding_mode='zeros')

    self.maxpool3d = nn.MaxPool3d(kernel_size=2)

    self.maxpool2d = nn.MaxPool2d(kernel_size=2)

    self.relu = nn.ReLU()

    self.dropout = nn.Dropout(p=0.3)

    self.upsampling2d = nn.Upsample(scale_factor=2)

def forward(self, x):

    # Encoder

    # 3D branch

    input3d = torch.unsqueeze(x, 1) # (B, 1, 4, 192, 192)

    conv3d1 = Bn_Block3d(in_filters=1, out_filters=32)(input3d)# (B, 32, 4, 192, 192)

    pool3d1 = self.maxpool3d(conv3d1) # (B, 32, 2, 96, 96)

    conv3d2 = Bn_Block3d(in_filters=32, out_filters=64)(pool3d1)# (B, 64, 2, 96, 96)

    pool3d2 = self.maxpool3d(conv3d2)

    conv3d3 = Bn_Block3d(in_filters=64, out_filters=128)(pool3d2)

    # 2D branch

    conv1 = Bn_Block(in_filters=4, out_filters=32)(x)

    pool1 = self.maxpool2d(conv1)

    conv2 = Bn_Block(in_filters=32, out_filters=64)(pool1)

    # D_SE_Add Block

    conv2 = D_SE_Add(2, 64, 64)(conv3d2, conv2, se_block_out_Cs=64)

    pool2 = self.maxpool2d(conv2)

    conv3 = Bn_Block(in_filters=64, out_filters=128)(pool2)

    conv3 = D_SE_Add(1, 128, 128)(conv3d3, conv3, se_block_out_Cs=128)

    pool3 = self.maxpool2d(conv3)

    conv4 = Bn_Block(in_filters=128, out_filters=256)(pool3)

    conv4 = self.dropout(conv4)

    pool4 = self.maxpool2d(conv4)

    conv5 = Bn_Block(in_filters=256, out_filters=512)(pool4)

    conv5 = self.dropout(conv5) # (B, 512, 12, 12)

    # Decoder

    up6 =  self.conv2d_1(self.upsampling2d(conv5)) # (B, 256, 24, 24)

    merge6 = torch.cat((conv4, up6), dim=1) # (B, 512, 24, 24)

    conv6 = Bn_Block(in_filters=512, out_filters=256)(merge6) # (B, 256, 24, 24)

    up7 = self.conv2d_2(self.upsampling2d(conv6)) # (B, 128, 48, 48)

    merge7 = torch.cat((conv3, up7), dim=1) # (B, 256, 48, 48)

    conv7 = Bn_Block(in_filters=256, out_filters=128)(merge7) # (B, 128, 48, 48)

    up8 = self.conv2d_3(self.upsampling2d(conv7)) # (B, 64, 96, 96)

    merge8 = torch.cat((conv2, up8), dim=1) # (B, 128, 96, 96)

    conv8 = Bn_Block(in_filters=128, out_filters=64)(merge8) # (B, 64, 96, 96)

    up9 = self.conv2d_4(self.upsampling2d(conv8)) # (B, 32, 128, 128)

    merge9 = torch.cat((conv1, up9), dim=1) # (B, 64, 128, 128)

    conv9 = Bn_Block(in_filters=64, out_filters=32)(merge9)

    conv10 = self.conv2d_5(conv9)  

    # out = torch.sigmoid(conv10)

    return conv10

I have also checked if the model and input are both on GPU

for p in model.parameters():
print('model param is on device: ', p.device)

model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0
model param is on device: cuda:0

x.device
device(type=‘cuda’, index=0)

Thanks in advanced!! Any help would be greatly appreciated!

ptrblck · March 15, 2022, 8:04am

You are creating new modules in the forward method of D_Unet e.g. here:

conv3d1 = Bn_Block3d(in_filters=1, out_filters=32)(input3d)

which won’t be pushed to the device in the model.to() call.
I’m not familiar with your approach, but the standard approach is to initialize the layers in the __init__ method and use them in the forward. Also, your current approach would create parameters in these layers, which will be randomly initialized and never trained.

thedrewyang · March 15, 2022, 8:08am

Thank you! I’ll try to fix it based on the method you provided!

thedrewyang · March 15, 2022, 8:12am

@ptrblck
But how can the case work properly?!?!

class Block(Module):

def __init__(self, inChannels, outChannels):

    super().__init__()

    # store the convolution and RELU layers

    self.conv1 = Conv2d(inChannels, outChannels, 3)

    self.conv2 = Conv2d(outChannels, outChannels, 3)

    # self.conv3 = Conv2d(outChannels, outChannels, 3)

    self.relu = ReLU()

def forward(self, x):

    # apply CONV => RELU => CONV block to the inputs and return it

    return self.relu(self.conv2(self.relu(self.conv1(x))))

    # return self.relu(self.conv3(self.relu(self.conv2(self.relu(self.conv1(x))))))

class Encoder(Module):

def __init__(self, channels=(config.NUM_CHANNELS, 16, 32, 64)):

    super().__init__()

    # store the encoder blocks and maxpooling layer

    self.encBlocks = ModuleList(

        [Block(channels[i], channels[i + 1])

            for i in range(len(channels) - 1)])

    self.pool = MaxPool2d(2)

def forward(self, x):

    # initialize an empty list to store the intermediate outputs

    blockOutputs = []

    # loop through the encoder blocks

    for block in self.encBlocks:

        # pass the inputs through the current encoder block, store

        # the outputs, and then apply maxpooling on the output

        x = block(x)

        blockOutputs.append(x)

        x = self.pool(x)

    # return the list containing the intermediate outputs

    return blockOutputs

class Decoder(Module):

def __init__(self, channels=(64, 32, 16)):

    super().__init__()

    # initialize the number of channels, upsampler blocks, and

    # decoder blocks

    self.channels = channels

    self.upconvs = ModuleList(

        [ConvTranspose2d(channels[i], channels[i + 1], 2, 2)

            for i in range(len(channels) - 1)])

    self.dec_blocks = ModuleList(

        [Block(channels[i], channels[i + 1])

            for i in range(len(channels) - 1)])

def forward(self, x, encFeatures):

    # loop through the number of channels

    for i in range(len(self.channels) - 1):

        # pass the inputs through the upsampler blocks

        x = self.upconvs[i](x)

        # crop the current features from the encoder blocks,

        # concatenate them with the current upsampled features,

        # and pass the concatenated output through the current

        # decoder block

        encFeat = self.crop(encFeatures[i], x)

        x = torch.cat([x, encFeat], dim=1)

        x = self.dec_blocks[i](x)

    # return the final decoder output

    return x

def crop(self, encFeatures, x):

    # grab the dimensions of the inputs, and crop the encoder

    # features to match the dimensions

    (_, _, H, W) = x.shape

    encFeatures = CenterCrop([H, W])(encFeatures)

    # return the cropped features

    return encFeatures

class UNet(Module):

def __init__(self, encChannels=(config.NUM_CHANNELS, 16, 32, 64),

     decChannels=(64, 32, 16),

     nbClasses=1, retainDim=True,

     outSize=(config.INPUT_IMAGE_HEIGHT,  config.INPUT_IMAGE_WIDTH)):

    super().__init__()

    # initialize the encoder and decoder

    self.encoder = Encoder(encChannels)

    self.decoder = Decoder(decChannels)

    # initialize the regression head and store the class variables

    self.head = Conv2d(decChannels[-1], nbClasses, 1)

    self.retainDim = retainDim

    self.outSize = outSize

def forward(self, x):

    # grab the features from the encoder

    encFeatures = self.encoder(x)

    # pass the encoder features through decoder making sure that

    # their dimensions are suited for concatenation

    decFeatures = self.decoder(encFeatures[::-1][0],

        encFeatures[::-1][1:])

    # pass the decoder features through the regression head to

    # obtain the segmentation mask

    map = self.head(decFeatures)

    # check to see if we are retaining the original output

    # dimensions and if so, then resize the output to match them

    if self.retainDim:

        map = F.interpolate(map, self.outSize)

    # return the segmentation map

    return map

ptrblck · March 15, 2022, 8:16am

Here is a small example:

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        # initialize modules
        self.linear = nn.Linear(1, 1)
        
    def forward(self, x):
        # use layers
        x = self.linear(x)
        return x
    
# create model object and initialize internal layers defined in __init__
model = MyModel()
# push all internal layers and parameters to the device
model.to('cuda')

x = torch.randn(1, 1, device='cuda')
out = model(x)

thedrewyang · March 15, 2022, 8:17am

Got it! Thank you very much again!