While Upsampling I don't get the need size

Hello, first of all, This is my code

-- coding: utf-8 --

“”"fork-of-siamese-network-family-relationship (1).ipynb

Automatically generated by Colaboratory.

Original file is located at
Google Colab
“”"

from google.colab import drive
drive.mount(‘/content/drive’)

!mkdir input
! cp /content/drive/MyDrive/recognizing-faces-in-the-wild.zip /content/input

!pip install validators matplotlib
!pip install timm==0.5.4

Commented out IPython magic to ensure Python compatibility.

import torch
from PIL import Image
import torchvision.transforms as transforms
import numpy as np
import json
import requests
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings(‘ignore’)

%matplotlib inline

device = torch.device(“cuda”) if torch.cuda.is_available() else torch.device(“cpu”)
print(f’Using {device} for inference’)

!unzip /content/input/recognizing-faces-in-the-wild.zip -d /content/input

!mkdir train
!unzip /content/input/train.zip -d ./input/train

import torch
import torch.nn as nn
import numpy as np
import pandas as pd

from glob import glob
from collections import defaultdict

“”“## Test”“”

import torchvision
from random import choice
from torch.utils.data import Dataset
from PIL import Image
class KinDataset(Dataset):
def init(self, relations, person_to_images_map, transform=None):
self.relations = relations # 가능한 모든 부모 - 자식 관계를 모은 csv 파일
self.transform = transform
self.person_to_images_map = person_to_images_map # (가족 id : 사람데이터) 형태의 데이터
self.ppl = list(person_to_images_map.keys()) # 사람 리스트

def __len__(self):
    return len(self.relations)*2

def __getitem__(self, idx):

    if (idx%2==0): #Positive samples
        p1, p2 = self.relations[idx//2]
        label = 1
    else:          #Negative samples
        while True:
            p1 = choice(self.ppl) # 사람 리스트에서 하나 뽑기
            p2 = choice(self.ppl) # 사람 리스트에서 하나 뽑기
            if p1 != p2 and (p1, p2) not in self.relations and (p2, p1) not in self.relations:
                # 만약 둘이 같은 사람이 아니고 가족 관계가 아닐 경우
                break
        label = 0

    # 고른 사진의 경로에서 이미지 불러오기
    path1, path2 = choice(self.person_to_images_map[p1]), choice(self.person_to_images_map[p2])
    img1, img2 = Image.open(path1), Image.open(path2)

    if self.transform:
        img1, img2 = self.transform(img1), self.transform(img2)
    imgs=torch.cat((img1,img2))

    return imgs, label

print(“Prepare data…”)
train_file_path = “/content/input/train_relationships.csv”
train_folders_path = “./input/train/”
val_famillies = “F09”

all_images = glob(train_folders_path + “//*.jpg”)

train_images = [x for x in all_images if val_famillies not in x]
val_images = [x for x in all_images if val_famillies in x]

train_person_to_images_map = defaultdict(list)

ppl = [x.split(“/”)[-3] + “/” + x.split(“/”)[-2] for x in all_images]

for x in train_images:
train_person_to_images_map[x.split(“/”)[-3] + “/” + x.split(“/”)[-2]].append(x)

val_person_to_images_map = defaultdict(list)

for x in val_images:
val_person_to_images_map[x.split(“/”)[-3] + “/” + x.split(“/”)[-2]].append(x)

relationships = pd.read_csv(train_file_path)
relationships = list(zip(relationships.p1.values, relationships.p2.values))
relationships = [x for x in relationships if x[0] in ppl and x[1] in ppl]

train_relations = [x for x in relationships if val_famillies not in x[0]]
val_relations = [x for x in relationships if val_famillies in x[0]]

from torch.utils.data import DataLoader
from torchvision import transforms

train_transform = transforms.Compose([
transforms.Resize(160),
transforms.Grayscale(num_output_channels=3),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5])
])
val_transform = transforms.Compose([
transforms.Resize(160),
transforms.Grayscale(num_output_channels=3),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5])
])

trainset = KinDataset(train_relations, train_person_to_images_map, train_transform)
valset = KinDataset(val_relations, val_person_to_images_map, val_transform)

trainloader = DataLoader(trainset, batch_size=200, shuffle=True)
valloader = DataLoader(valset, batch_size=200, shuffle=False)

“”"# Model part

:desktop_computer: SiameseNet

“”"

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models

class BasicConv2d(nn.Module):
“”"
basic convoluation model
“”"

def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
    super().__init__()
    self.conv = nn.Conv2d(
        in_planes, out_planes,
        kernel_size=kernel_size, stride=stride,
        padding=padding, bias=False
    )  # verify bias false
    self.bn = nn.BatchNorm2d(
        out_planes,
        eps=0.001,  # value found in tensorflow
        momentum=0.1,  # default pytorch value
        affine=True
    )
    self.relu = nn.ReLU(inplace=False)

def forward(self, x):
    x = self.conv(x)
    x = self.bn(x)
    x = self.relu(x)
    return x

class res_unit(nn.Module):
“”"
this is the attention module before Residual structure
“”"
def init(self,channel,up_size = None):
“”"

    :param channel: channels of input feature map
    :param up_size: upsample size
    """
    super(res_unit,self).__init__()
    self.pool = nn.MaxPool2d(2,2)
    self.conv = nn.Conv2d(channel,channel,3,padding=1)
    if up_size == None:
        self.upsample = nn.Upsample(scale_factor=2,mode='bilinear',align_corners=False)
    else:
        self.upsample = nn.Upsample(size=(up_size,up_size), mode='bilinear', align_corners=False)
    self.sigmoid = nn.Sigmoid()
def forward(self,x):
    identity = x
    print("Intial Size",x.size(),identity.size())
    x = self.pool(x)
    print("Size After pooling",x.size(),identity.size())
    x = self.conv(x)
    print("Size After Convolution",x.size(),identity.size())
    x = self.upsample(x)
    print("Size after Upsampling",x.size(),identity.size())
    x = self.sigmoid(x)
    x = torch.mul(identity,x)
    return x

class basenet(nn.Module):
def init(self):
super().init()
self.base = _attenNet()
self.fea = nn.Sequential(
nn.Dropout(),
nn.Linear(128 * 9 * 9, 512),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(512, 128),
)

def forward(self,x):
    x = self.base(x)
    x = x.view(-1, 9 * 9 * 128)
    x = self.fea(x)
    return x

class _atten(nn.Module):
“”"
the attention Module in
“”"
def init(self):
super(_atten,self).init()
self.conv1 = nn.Conv2d(6,32,5)
self.conv2 = nn.Conv2d(32,64,5)
self.at1 = res_unit(32)
self.at2 = res_unit(64)
self.pool = nn.MaxPool2d(2,2)
self.bn1 = nn.BatchNorm2d(32)
self.bn2 = nn.BatchNorm2d(64)
#self.fc1 = nn.Linear((99128),512)
#self.dp = nn.Dropout()
#self.fc2 = nn.Linear(512,2)

def forward(self,x):
    """
    :param x: 6x64x64
    :return:
    """
    x = self.conv1(x)
    identity1 = x
    x = self.at1(x)
    x = identity1+x
    x = self.bn1(x)
    x = self.pool(F.relu(x))

    x = self.conv2(x)
    identity2 = x
    x = self.at2(x)
    x = identity2 + x
    x = self.bn2(x)
    x = self.pool(F.relu((x)))


    # x = x.view(-1, 9*9*128)
    # x = F.relu(self.fc1(x))
    # x = self.dp(x)
    # x = self.fc2(x)
    return x

class _attenNet(nn.Module):
def init(self):
super().init()
self.base = _atten()
self.conv3 = nn.Conv2d(64, 128, 5)
self.at3 = res_unit(128,up_size=9)
self.bn3 = nn.BatchNorm2d(128)

def forward(self,x):
    x = self.base(x)
    x = self.conv3(x)
    identity3 = x
    x = self.at3(x)
    x = identity3 + x
    x = self.bn3(x)
    x = F.relu(x)
    return x

class res_addNet(nn.Module):
def init(self):
super().init()
self.conv3 = nn.Conv2d(64, 128, 5)
self.at3 = res_unit(128, up_size=9)
self.bn3 = nn.BatchNorm2d(128)

def forward(self, x):
    x = self.conv3(x)
    identity3 = x
    x = self.at3(x)
    x = identity3 + x
    x = self.bn3(x)
    x = F.relu(x)
    return x

class each_brach(nn.Module):
def init(self):
super().init()
self.conv3 = nn.Conv2d(64, 128, 5)
self.at3 = res_unit(128, up_size=9)
self.bn3 = nn.BatchNorm2d(128)

    self.fc = nn.Sequential(
        nn.Linear((9 * 9 * 128), 512),
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(512, 2)
    )
def forward(self, x):
    x = self.conv3(x)
    identity3 = x
    x = self.at3(x)
    x = identity3 + x
    x = self.bn3(x)
    x = F.relu(x)
    x = x.view(-1, 9*9*128)
    x = self.fc(x)
    return x

class My_Network(nn.Module):
“”"
concatenate 4x2 output + add loss layer
“”"
def init(self):
super().init()
self.base = _atten()

    self.fd_fc = each_brach()

def forward(self, x):
    x = self.base(x)
    x = self.fd_fc(x)
    print("Done")
    return x

def train():
net.train() # 학습 시작
train_loss = 0.0
running_loss = 0.0
running_corrects = 0

for i, batch in enumerate(trainloader):
    # 로더에서 하나씩 꺼내오며
    optimizer.zero_grad()

    img, label = batch
    img, label = img.to(device), label.float().view(-1,1).to(device)
    output = net(img)
    preds = output>0.5
    loss = criterion(output, label)
    loss.backward()
    optimizer.step()

    train_loss += loss.item()
    running_loss += loss.item()
    running_corrects += torch.sum(preds == (label>0.5))

    step = 100
    if i % step == step-1:
        print(' [{} - {:.2f}%],\ttrain loss: {:.5}'.format(epoch+1, 100*(i+1)/len(trainloader), running_loss/step/200))
        running_loss = 0

train_loss /= len(trainset)
running_corrects = running_corrects.item()/len(trainset)
print('[{}], \ttrain loss: {:.5}\tacc: {:.5}'.format(epoch+1, train_loss, running_corrects))
return train_loss, running_corrects

def validate():
net.eval()
val_loss = 0.0
running_corrects = 0

for batch in valloader:
    img ,label = batch
    img, label = img.to(device), label.float().view(-1,1).to(device)
    with torch.no_grad():
        output = net(img)
        preds = output>0.5
        loss = criterion(output, label)

    val_loss += loss.item()
    running_corrects += torch.sum(preds == (label>0.5))

val_loss /= len(valset)
running_corrects = running_corrects.item()/len(valset)
print('[{}], \tval loss: {:.5}\tacc: {:.5}'.format(epoch+1, val_loss, running_corrects))

return val_loss, running_corrects

“”“## :grinning: Train”“”

print(“Initialize network…”)
device = torch.device(‘cuda’ if torch.cuda.is_available() else ‘cpu’)
net=My_Network().to(device)
lr = 1e-3

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

from torch.optim.lr_scheduler import ReduceLROnPlateau
scheduler = ReduceLROnPlateau(optimizer, patience=10)

print(“Start training…”)
num_epoch = 100

best_val_loss = 1000
best_epoch = 0

history =
accuracy =
for epoch in range(num_epoch):
train_loss, train_acc = train()
val_loss, val_acc = validate()
history.append((train_loss, val_loss))
accuracy.append((train_acc,val_acc))
scheduler.step(val_loss)

if val_loss < best_val_loss:
    best_val_loss = val_loss
    best_epoch = epoch
    torch.torch.save(net.state_dict(), 'net_checkpoint.pth')

torch.save(net.state_dict(),‘net_full_training.pth’)
I add few print statement for debugging in which I remarked that upsampling is causing the error here is the result of the debugging :
Start training…
Intial Size torch.Size([200, 32, 156, 156]) torch.Size([200, 32, 156, 156])
Size After pooling torch.Size([200, 32, 78, 78]) torch.Size([200, 32, 156, 156])
Size After Convolution torch.Size([200, 32, 78, 78]) torch.Size([200, 32, 156, 156])
Size after Upsampling torch.Size([200, 32, 156, 156]) torch.Size([200, 32, 156, 156])
Intial Size torch.Size([200, 64, 74, 74]) torch.Size([200, 64, 74, 74])
Size After pooling torch.Size([200, 64, 37, 37]) torch.Size([200, 64, 74, 74])
Size After Convolution torch.Size([200, 64, 37, 37]) torch.Size([200, 64, 74, 74])
Size after Upsampling torch.Size([200, 64, 74, 74]) torch.Size([200, 64, 74, 74])
Intial Size torch.Size([200, 128, 33, 33]) torch.Size([200, 128, 33, 33])
Size After pooling torch.Size([200, 128, 16, 16]) torch.Size([200, 128, 33, 33])
Size After Convolution torch.Size([200, 128, 16, 16]) torch.Size([200, 128, 33, 33])
Size after Upsampling torch.Size([200, 128, 9, 9]) torch.Size([200, 128, 33, 33])

RuntimeError Traceback (most recent call last)
in <cell line: 9>()
8 accuracy =
9 for epoch in range(num_epoch):
—> 10 train_loss, train_acc = train()
11 val_loss, val_acc = validate()
12 history.append((train_loss, val_loss))

9 frames
in forward(self, x)
61 print(“Size after Upsampling”,x.size(),identity.size())
62 x = self.sigmoid(x)
—> 63 x = torch.mul(identity,x)
64 return x
65

RuntimeError: The size of tensor a (33) must match the size of tensor b (9) at non-singleton dimension 3

Hello,
I haven’t checked what your code is doing exactly, but the prints that you have seems to be explanable by the following lines.

You have those two:

  self.at1 = res_unit(32)
  self.at2 = res_unit(64)

if i’m not wrong, they outpout your 2 first print blocks. You don’t defined a up_size, so your code execute since you wrote that when up_size is None, the scale factor is 2.

Further you call res_unit with a dedicated size:

self.at3 = res_unit(128,**up_size=9**)

That seems to fix your upsampling step size at 9, which trigger the error.