Fixing RuntimeError: size mismatch, m1: [2 x 21504], m2: [25088 x 512]

archy2211 · July 13, 2020, 9:49pm

HI Im not sure what happen but when i try to run my code this error keep pop up:RuntimeError: size mismatch, m1: [2 x 21504], m2: [25088 x 512] at C:\Users\builder\AppData\Local\Temp\pip-req-build-r5jc5joa\aten\src\TH/generic/THTensorMath.cpp:41

Here is my code can anyone check what went wrong
class Bottleneck(nn.Module):
def init(self, inp, oup, stride, expansion):
super(Bottleneck, self).init()
self.connect = stride == 1 and inp == oup
#
self.conv = nn.Sequential(
# pw
nn.Conv2d(inp, inp * expansion, 1, 1, 0, bias=False),
nn.BatchNorm2d(inp * expansion),
nn.PReLU(inp * expansion),
# nn.ReLU(inplace=True),

        # dw
        nn.Conv2d(inp * expansion, inp * expansion, 3, stride, 1, groups=inp * expansion, bias=False),
        nn.BatchNorm2d(inp * expansion),
        nn.PReLU(inp * expansion),
        # nn.ReLU(inplace=True),

        # pw-linear
        nn.Conv2d(inp * expansion, oup, 1, 1, 0, bias=False),
        nn.BatchNorm2d(oup),
    )

def forward(self, x):
    if self.connect:
        return x + self.conv(x)
    else:
        return self.conv(x)

class ConvBlock(nn.Module):
def init(self, inp, oup, k, s, p, dw=False, linear=False):
super(ConvBlock, self).init()
self.linear = linear
if dw:
self.conv = nn.Conv2d(inp, oup, k, s, p, groups=inp, bias=False)
else:
self.conv = nn.Conv2d(inp, oup, k, s, p, bias=False)
self.bn = nn.BatchNorm2d(oup)
if not linear:
self.prelu = nn.PReLU(oup)

def forward(self, x):
    x = self.conv(x)
    x = self.bn(x)
    if self.linear:
        return x
    else:
        return self.prelu(x)

MobiFace_bottleneck_setting = [
# t, c , n, s
[2, 64, 1, 2],
[2, 64, 2, 1],
[4, 128, 1, 2],
[2, 128, 3, 1],
[4, 256, 1, 2],
[2, 256, 6, 1]
]
class MobiFace(nn.Module):
def init(self, bottleneck_setting=MobiFace_bottleneck_setting, final_linear=False):
super(MobiFace, self).init()
self.final_linear = final_linear

    self.conv1 = ConvBlock(3, 64, 3, 2, 1)

    self.dw_conv1 = ConvBlock(64, 64, 3, 1, 1, dw=True)

    self.inplanes = 64
    block = Bottleneck
    self.blocks = self._make_layer(block, bottleneck_setting)

    self.conv2 = ConvBlock(256, 512, 1, 1, 0, linear=True)

    self.linear1 = nn.Linear(7*7*512, 512)

    self.prelu1 = nn.PReLU()

    for m in self.modules():
        if isinstance(m, nn.Conv2d):
            n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            m.weight.data.normal_(0, math.sqrt(2. / n))
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data.fill_(1)
            m.bias.data.zero_()

def _make_layer(self, block, setting):
    layers = []
    for t, c, n, s in setting:
        for i in range(n):
            if i == 0:
                layers.append(block(self.inplanes, c, s, t))
            else:
                layers.append(block(self.inplanes, c, 1, t))
            self.inplanes = c

    return nn.Sequential(*layers)

def forward(self, x):
    x = self.conv1(x)
    x = self.dw_conv1(x)
    x = self.blocks(x)
    x = self.conv2(x)
    x = x.view(x.size(0), -1)
    x = self.linear1(x)
    if self.final_linear is False:
        x = self.prelu1(x)

    return x

class ArcMarginProduct(nn.Module):
def init(self, in_features=128, out_features=200, s=32.0, m=0.50, easy_margin=False):
super(ArcMarginProduct, self).init()
self.in_features = in_features
self.out_features = out_features
self.s = s
self.m = m
self.weight = Parameter(torch.Tensor(out_features, in_features))
nn.init.xavier_uniform_(self.weight)
# init.kaiming_uniform_()
# self.weight.data.normal_(std=0.001)

    self.easy_margin = easy_margin
    self.cos_m = math.cos(m)
    self.sin_m = math.sin(m)
    # make the function cos(theta+m) monotonic decreasing while theta in [0°,180°]
    self.th = math.cos(math.pi - m)
    self.mm = math.sin(math.pi - m) * m

def forward(self, x, label):
    cosine = F.linear(F.normalize(x), F.normalize(self.weight))
    sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
    phi = cosine * self.cos_m - sine * self.sin_m
    if self.easy_margin:
        phi = torch.where(cosine > 0, phi, cosine)
    else:
        phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)

    one_hot = torch.zeros(cosine.size(), device='cuda')
    one_hot.scatter_(1, label.view(-1, 1).long(), 1)
    output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
    output *= self.s
    return output

if name == “main”:
input = Variable(torch.FloatTensor(2, 3, 112, 96))
net = MobiFace()
print(net)
x = net(input)
print(x.shape)

ptrblck · July 14, 2020, 11:15am

I assume the error might be raised in this line of code:

x = self.linear1(x)

If that’s the case, the number of features in the flattened activation doesn’t match the specified in_features in the self.linear1 layer and you would have to adapt the layer.

PS: you can post code snippets by wrapping them into three backticks ```, which makes debugging easier

archy2211 · July 14, 2020, 11:20am

can you explain what you mean byy specified.

ptrblck · July 15, 2020, 2:43am

While creating the nn.Linear layer, you would have to set specific numbers for the in_features and out_features. I guess, that the number for in_features is wrong and has to be changed to the number of expected features from the incoming activation.

To get this number, you could add a print statement inside the forward method and print the shape of this activation (e.g. via print(x.shape)) before passing it to the layer, which raises this error.