Unable to add deconv Layers-Kernel dies

hi trying to append transpose layer to Resnet 50 but kernel dies every time i build the model
class UnetBlock(nn.Module):
def init(self, up_in, x_in, n_out):
super().init()

    x_out = 128
    
    up_out=128
    self.x_conv  = nn.Conv2d(x_in,  x_out,  1)
    self.tr_conv = nn.ConvTranspose2d(up_in, up_out, 2, stride=2)
    self.bn = nn.BatchNorm2d(x_out+up_out)
    
def forward(self, up_p, x_p):
    up_p = self.tr_conv(up_p)
    x_p = self.x_conv(x_p)
    cat_p = torch.cat([up_p,x_p], dim=1)
    return self.bn(F.relu(cat_p))

class Resnet4Channel(nn.Module):
def init(self, encoder_depth=34, pretrained=True, num_classes=28):
super().init()

    encoder = resnet50(pre=true)
    
    w = encoder.conv1.weight
    self.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3,
                           bias=False)
    self.conv1.weight =  torch.nn.Parameter(torch.cat((w, 0.5*(w[:,:1,:,:]+w[:,2:,:,:])),dim=1))
    
    
    
    self.bn1 = encoder.bn1
    self.relu = nn.ReLU(inplace=True) 
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    self.layer1 = encoder.layer1
    self.layer2 = encoder.layer2
    self.layer3 = encoder.layer3
    self.layer4 = encoder.layer4
    
    self.avgpool = encoder.avgpool
    
    #self.x_unet  =  UnetBlock(512,256,256) #unet
    #self.l =nn.Linear(256, 512)
     
    
    self.fc = nn.Linear(512 * (1 if encoder_depth==34 else 4), num_classes)
    
class SaveFeatures():
  features=None
  def __init__(self, m): 
    self.hook = m.register_forward_hook(self.hook_fn)
  def hook_fn(self, module, input, output): 
    self.features = output
  def remove(self): self.hook.remove()

def forward(self, x):
    self.new_classifier = nn.Sequential(*list(encoder.children())[:-1]) #check
    
    self.sfs = [SaveFeatures(self.new_classifier[4])] #check

  
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    x = self.avgpool(x)
    
    x = self.x_unet(x, self.sfs[0].features) #check
    x = x.view(x.size(0), -1)
    x = self.fc(x)

    return x

please let me know where m i wrong…

i have chosen index as 4 because when i describe resenet 50 then layer 4 give output of 256 activations concatenated with deconv to give final output of 512 ,which is what goes to classfication layer…

problem is faced at point of extracting features i guess.

I tried to debug your code and had to fix some issues first:

  • move SaveFeatures out of the class definition
  • set encoder as an class attribute (self.encoder), since it’s used in forward
  • uncomment the self.x_unet, since it’s used in forward

After these fixes, you’ll get a spatial size mismatch in your UnetBlock for the current code (2 vs. 56).

Would this info help you debugging this issue?
Are you using Jupyter notebooks or another IDE?

hi ,thanks for reply
i use jupyter
I moved the save features out,encoder was already a class attribute.
here is complete code i paste in

class SaveFeatures():
features=None
def init(self, m):
self.hook = m.register_forward_hook(self.hook_fn)
def hook_fn(self, module, input, output):
self.features = output
def remove(self): self.hook.remove()

class Resnet4Channel(nn.Module):
def init(self, encoder_depth=34, pretrained=True, num_classes=28):
super().init()

    encoder = RESNET_ENCODERS[encoder_depth](pretrained=pretrained)
    
    # we initialize this conv to take in 4 channels instead of 3
    # we keeping corresponding weights and initializing new weights with zeros
    # this trick taken from https://www.kaggle.com/iafoss/pretrained-resnet34-with-rgby-0-460-public-lb
    w = encoder.conv1.weight
    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                           bias=False)
   
    
    
    self.bn1 = encoder.bn1
    self.relu = nn.ReLU(inplace=True) 
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    self.layer1 = encoder.layer1
    self.layer2 = encoder.layer2
    self.layer3 = encoder.layer3
    self.layer4 = encoder.layer4
    
    self.avgpool = encoder.avgpool
    
    self.x_unet  =  UnetBlock(512,256,256) #if comment x_unet how i append it to resnet
   
     
    
    self.fc = nn.Linear(512 * (1 if encoder_depth==34 else 4), num_classes)
    


def forward(self, x):
    self.new_classifier = nn.Sequential(*list(encoder.children())[:-1])
    
    self.sfs = [SaveFeatures(self.new_classifier[4])]
    
  
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    x = self.avgpool(x)
    
    x = self.x_unet(x, self.sfs[0].features) # features and x input get concatenated to produce 512
    x = x.view(x.size(0), -1)
    x = self.fc(x)

    return x

I still get a similar error unfortunately.
Since RESNET_ENCODERS is not defined in your code, I swapped it for torchvision.models.resnet50.
In the code you’ve posted, encoders is still just used in forward without being defined. Maybe you’ve defined it somewhere outside your class? I had to store the encoder again as a class attribute, since otherwise it wasn’t found.
Once this was fixed I’ll get the following error:

RuntimeError: Given transposed=1, weight of size [512, 128, 2, 2], expected input[1, 2048, 1, 1] to have 512 channels, but got 2048 channels instead

It looks like the UnetBlock takes the wrong number of channels.
If I define it as

self.x_unet  =  UnetBlock(2048,256,256)

the error vanishes, but the first one comes back:

RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 1. Got 2 and 56 in dimension 2 at /opt/conda/conda-bld/pytorch-nightly_1546596893918/work/aten/src/TH/generic/THTensorMoreMath.cpp:1291

This still points to a wrong spatial size in torch.cat.

should setting ouptut of concat to 2048 fix the issue ?? as classfication takes 512*4 for the input

Unfortunately not, since torch.cat tries to concatenate the tensors in the specified dimension, while all other dimensions should match.
Currently x_p is [batch_size, 128, 56, 56], while up_p is [batch_size, 128, 2, 2].
The error comes most likely from the wrong SaveFeatures tensor.
What is your intention of using self.new_classifier[4] for it? Is it defined as such in a paper or did you just pick this layer?
A quick fix might be to just pool the bigger activation and go with it, but I’m not sure if that’s what you want.
Here are the changes:

# in UnetBlock:
...
x_p = F.adaptive_max_pool2d(x_p, 2)
cat_p = torch.cat([up_p,x_p], dim=1)

# in Resnet4Channel:
self.fc = nn.Linear(1024, num_classes)

my intent was to append the unet layer just before the classification layer ,feed it the output of last layer before classfication layer, and give the output of the unet layer in the desired output channel number . I mistook 5124 as 512 …so that way i was wanting to give 256 input to conv layer ,get 256 from de conv layer so that is why put wrong output no of channels for the unet layers.
But overall objective was
Resnet50 minus classification (512
4,num classes) -> Unet-> Classfication ( 512*4 ,num classes)

so have to set the dimension such in channel for deconv is fixed that will be i believe 2048 ,out can be 1024 ,similarly out from conv layer can be 1024 ,both together should total to 2048 for last Fc layer.

hi ,
I was able to resolve dimension related issues with this code.Howerver now filling running fit
i get load dict state error missing keys :

Missing key(s) in state_dict: “conv1.weight”, “bn1.weight”, “bn1.bias”, “bn1.running_mean”, “bn1.running_var”, “layer1.0.conv1.weight”, “layer1.0.bn1.weight”, …
…at the end of error ,in between all layers are printed…
Unexpected key(s) in state_dict: “0.0.weight”, “0.1.weight”, “0.1.bias”, “0.1.running_mean”, “0.1.running_var”

class SaveFeatures():

features=None
  
def __init__(self, m): 
    self.hook = m.register_forward_hook(self.hook_fn)
  
def hook_fn(self, module, input, output): 
    self.features = output
  
def remove(self): self.hook.remove()

def get_base(m):
#layers = cut_model(f(True), cut)
return nn.Sequential(*list(m.children()) )

class UnetBlock(nn.Module):
def init(self, up_in, x_in, n_out,m):
super().init()
#up_out = x_out = n_out//2
x_out = 1024
#n_out//2
up_out=1024
self.x_conv = nn.Conv2d(1024, x_out, 1)
self.tr_conv = nn.ConvTranspose2d(up_in, up_out, 2, stride=2)
self.bn = nn.BatchNorm2d(x_out+up_out)
self.sfs = [SaveFeatures(get_base(m)[5]) ]
def forward(self, up_p ):
up_p = self.tr_conv(up_p)
x_p = self.x_conv(self.sfs[0].features)
cat_p = torch.cat([up_p,x_p], dim=1)
return self.bn(F.relu(cat_p))

RESNET_ENCODERS = {
34: torchvision.models.resnet34,
50: torchvision.models.resnet50,
101: torchvision.models.resnet101,
152: torchvision.models.resnet152,
}

class Resnet4Channel1(nn.Module):
def init(self, encoder_depth=50, pretrained=True, num_classes=28):
super().init()

    encoder = RESNET_ENCODERS[encoder_depth](pretrained=pretrained)
    
    # we initialize this conv to take in 4 channels instead of 3
    # we keeping corresponding weights and initializing new weights with zeros
    # this trick taken from https://www.kaggle.com/iafoss/pretrained-resnet34-with-rgby-0-460-public-lb
    w = encoder.conv1.weight
    self.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3,
                           bias=False)
    self.conv1.weight  = torch.nn.Parameter(torch.cat((w,w[:,:1,:,:]),dim=1))
    #nn.Parameter(torch.cat((w,torch.zeros(64,1,7,7)),dim=1))
    
    
    self.bn1 = encoder.bn1
    self.relu = nn.ReLU(inplace=True) 
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    self.layer1 = encoder.layer1
    self.layer2 = encoder.layer2
    self.layer3 = encoder.layer3
    self.layer4 = encoder.layer4
    
    
    self.avgpool = encoder.avgpool
    self.x_unet  =  UnetBlock(2048,256,256,self.layer3)
    #self.x_unet  =  UnetBlock(2048,256,256)
     
    
    self.fc = nn.Linear(512 * (1 if encoder_depth==34 else 4), num_classes)
    
    #self.sfs = [SaveFeatures(get_base()[4]) ]
    #self.sfs=get_base(self.layer3)[5]
    
    
def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)
    

    x = self.avgpool(x)
    x = self.x_unet(x )
    
    x = x.view(x.size(0), -1)
   
    x = self.fc(x)

    return x
def close(self):
    for sf in self.sfs:
        
        
        sf.remove()
        gc.collect()

Model summary with listed number format.The end of the model.

(124): AvgPool2d(kernel_size=7, stride=1, padding=0)
(125): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1))
(126): ConvTranspose2d(2048, 1024, kernel_size=(2, 2), stride=(2, 2))
(127): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(128): Linear(in_features=2048, out_features=28, bias=True)

Layerwise :

(2): Bottleneck(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
)
)
(avgpool): AvgPool2d(kernel_size=7, stride=1, padding=0)
(x_unet): UnetBlock(
(x_conv): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1))
(tr_conv): ConvTranspose2d(2048, 1024, kernel_size=(2, 2), stride=(2, 2))
(bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(fc): Linear(in_features=2048, out_features=28, bias=True)

  1. head of the model

(conv1): Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): Bottleneck(
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(downsample): Sequential(
(0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)…

not sure why i m getting error while loading weights…
I did initialize the head of the model with weights for all the 4 channels