@ptrblck i tried to change all the bn layers to gn layers and was able to change but while training model with groupnorm i am getting this error :
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<timed exec> in <module>
<ipython-input-22-cf57c28a75f1> in train_epoch(loader, optimizer)
9 loss_func = criterion
10 optimizer.zero_grad()
---> 11 logits = model(data)
12 loss = loss_func(logits, target)
13 loss.backward()
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
<ipython-input-10-902386208771> in forward(self, x)
26
27 def forward(self, x):
---> 28 x = self.extract(x)
29 x = self.myfc(x)
30 return x
<ipython-input-10-902386208771> in extract(self, x)
23
24 def extract(self, x):
---> 25 return self.enet(x)
26
27 def forward(self, x):
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/kaggle/input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master/efficientnet_pytorch/model.py in forward(self, inputs)
176
177 # Convolution layers
--> 178 x = self.extract_features(inputs)
179
180 # Pooling and final linear layer
/kaggle/input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master/efficientnet_pytorch/model.py in extract_features(self, inputs)
158
159 # Stem
--> 160 x = relu_fn(self._bn0(self._conv_stem(inputs)))
161
162 # Blocks
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/normalization.py in forward(self, input)
223 def forward(self, input):
224 return F.group_norm(
--> 225 input, self.num_groups, self.weight, self.bias, self.eps)
226
227 def extra_repr(self):
/opt/conda/lib/python3.7/site-packages/torch/nn/functional.py in group_norm(input, num_groups, weight, bias, eps)
1971 + list(input.size()[2:]))
1972 return torch.group_norm(input, num_groups, weight, bias, eps,
-> 1973 torch.backends.cudnn.enabled)
1974
1975
RuntimeError: expected device cpu but got device cuda:0
here is my full model :
enetv2(
(enet): EfficientNet(
(_conv_stem): Conv2dStaticSamePadding(
3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
(static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
)
(_bn0): GroupNorm(1, 32, eps=1e-05, affine=True)
(_blocks): ModuleList(
(0): MBConvBlock(
(_depthwise_conv): Conv2dStaticSamePadding(
32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
(static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
)
(_bn1): GroupNorm(1, 32, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
32, 8, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
8, 32, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 16, eps=1e-05, affine=True)
)
(1): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 96, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
96, 96, kernel_size=(3, 3), stride=[2, 2], groups=96, bias=False
(static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
)
(_bn1): GroupNorm(1, 96, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
96, 4, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
4, 96, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 24, eps=1e-05, affine=True)
)
(2): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 144, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
144, 144, kernel_size=(3, 3), stride=(1, 1), groups=144, bias=False
(static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
)
(_bn1): GroupNorm(1, 144, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
144, 6, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
6, 144, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
144, 24, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 24, eps=1e-05, affine=True)
)
(3): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 144, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
144, 144, kernel_size=(5, 5), stride=[2, 2], groups=144, bias=False
(static_padding): ZeroPad2d(padding=(1, 2, 1, 2), value=0.0)
)
(_bn1): GroupNorm(1, 144, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
144, 6, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
6, 144, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
144, 40, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 40, eps=1e-05, affine=True)
)
(4): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 240, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
240, 240, kernel_size=(5, 5), stride=(1, 1), groups=240, bias=False
(static_padding): ZeroPad2d(padding=(2, 2, 2, 2), value=0.0)
)
(_bn1): GroupNorm(1, 240, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
240, 10, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
10, 240, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
240, 40, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 40, eps=1e-05, affine=True)
)
(5): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 240, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
240, 240, kernel_size=(3, 3), stride=[2, 2], groups=240, bias=False
(static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
)
(_bn1): GroupNorm(1, 240, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
240, 10, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
10, 240, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 80, eps=1e-05, affine=True)
)
(6): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 480, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
480, 480, kernel_size=(3, 3), stride=(1, 1), groups=480, bias=False
(static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
)
(_bn1): GroupNorm(1, 480, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
480, 20, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
20, 480, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
480, 80, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 80, eps=1e-05, affine=True)
)
(7): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 480, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
480, 480, kernel_size=(3, 3), stride=(1, 1), groups=480, bias=False
(static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
)
(_bn1): GroupNorm(1, 480, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
480, 20, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
20, 480, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
480, 80, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 80, eps=1e-05, affine=True)
)
(8): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 480, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
480, 480, kernel_size=(5, 5), stride=[1, 1], groups=480, bias=False
(static_padding): ZeroPad2d(padding=(2, 2, 2, 2), value=0.0)
)
(_bn1): GroupNorm(1, 480, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
480, 20, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
20, 480, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
480, 112, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 112, eps=1e-05, affine=True)
)
(9): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 672, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
672, 672, kernel_size=(5, 5), stride=(1, 1), groups=672, bias=False
(static_padding): ZeroPad2d(padding=(2, 2, 2, 2), value=0.0)
)
(_bn1): GroupNorm(1, 672, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
672, 28, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
28, 672, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 112, eps=1e-05, affine=True)
)
(10): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 672, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
672, 672, kernel_size=(5, 5), stride=(1, 1), groups=672, bias=False
(static_padding): ZeroPad2d(padding=(2, 2, 2, 2), value=0.0)
)
(_bn1): GroupNorm(1, 672, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
672, 28, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
28, 672, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 112, eps=1e-05, affine=True)
)
(11): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 672, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
672, 672, kernel_size=(5, 5), stride=[2, 2], groups=672, bias=False
(static_padding): ZeroPad2d(padding=(1, 2, 1, 2), value=0.0)
)
(_bn1): GroupNorm(1, 672, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
672, 28, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
28, 672, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
672, 192, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 192, eps=1e-05, affine=True)
)
(12): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 1152, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
1152, 1152, kernel_size=(5, 5), stride=(1, 1), groups=1152, bias=False
(static_padding): ZeroPad2d(padding=(2, 2, 2, 2), value=0.0)
)
(_bn1): GroupNorm(1, 1152, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
1152, 48, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
48, 1152, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 192, eps=1e-05, affine=True)
)
(13): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 1152, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
1152, 1152, kernel_size=(5, 5), stride=(1, 1), groups=1152, bias=False
(static_padding): ZeroPad2d(padding=(2, 2, 2, 2), value=0.0)
)
(_bn1): GroupNorm(1, 1152, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
1152, 48, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
48, 1152, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 192, eps=1e-05, affine=True)
)
(14): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 1152, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
1152, 1152, kernel_size=(5, 5), stride=(1, 1), groups=1152, bias=False
(static_padding): ZeroPad2d(padding=(2, 2, 2, 2), value=0.0)
)
(_bn1): GroupNorm(1, 1152, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
1152, 48, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
48, 1152, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 192, eps=1e-05, affine=True)
)
(15): MBConvBlock(
(_expand_conv): Conv2dStaticSamePadding(
192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn0): GroupNorm(1, 1152, eps=1e-05, affine=True)
(_depthwise_conv): Conv2dStaticSamePadding(
1152, 1152, kernel_size=(3, 3), stride=[1, 1], groups=1152, bias=False
(static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
)
(_bn1): GroupNorm(1, 1152, eps=1e-05, affine=True)
(_se_reduce): Conv2dStaticSamePadding(
1152, 48, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_se_expand): Conv2dStaticSamePadding(
48, 1152, kernel_size=(1, 1), stride=(1, 1)
(static_padding): Identity()
)
(_project_conv): Conv2dStaticSamePadding(
1152, 320, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn2): GroupNorm(1, 320, eps=1e-05, affine=True)
)
)
(_conv_head): Conv2dStaticSamePadding(
320, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False
(static_padding): Identity()
)
(_bn1): GroupNorm(1, 1280, eps=1e-05, affine=True)
(_fc): Identity()
)
(myfc): Linear(in_features=1280, out_features=5, bias=True)
(avg_pool): GeM(p=3.0000, eps=1e-06)
)