How to modify the last layer of SEResNext50_32x4d

I was wondering how to convert the last linear layer of the above architecture for classification task(in my case 6 classes) as there is no fc like in resnext ,any help is appreciated .
This is the entire architecture
ResNet(
(net): Sequential(
(0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
(4): Sequential(
(0): ResNetBlock(
(net): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(256, 16, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(16, 256, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): ResNetBlock(
(net): Sequential(
(0): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(256, 16, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(16, 256, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
)
(2): ResNetBlock(
(net): Sequential(
(0): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(256, 16, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(16, 256, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
)
)
(5): Sequential(
(0): ResNetBlock(
(net): Sequential(
(0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(512, 32, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(32, 512, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): ResNetBlock(
(net): Sequential(
(0): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(512, 32, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(32, 512, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
)
(2): ResNetBlock(
(net): Sequential(
(0): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(512, 32, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(32, 512, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
)
(3): ResNetBlock(
(net): Sequential(
(0): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(512, 32, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(32, 512, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
)
)
(6): Sequential(
(0): ResNetBlock(
(net): Sequential(
(0): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(1024, 64, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(64, 1024, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): ResNetBlock(
(net): Sequential(
(0): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(1024, 64, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(64, 1024, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
)
(2): ResNetBlock(
(net): Sequential(
(0): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(1024, 64, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(64, 1024, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
)
(3): ResNetBlock(
(net): Sequential(
(0): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(1024, 64, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(64, 1024, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
)
(4): ResNetBlock(
(net): Sequential(
(0): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(1024, 64, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(64, 1024, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
)
(5): ResNetBlock(
(net): Sequential(
(0): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(1024, 64, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(64, 1024, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
)
)
(7): Sequential(
(0): ResNetBlock(
(net): Sequential(
(0): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(2048, 128, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(128, 2048, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): ResNetBlock(
(net): Sequential(
(0): Conv2d(2048, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(2048, 128, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(128, 2048, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
)
(2): ResNetBlock(
(net): Sequential(
(0): Conv2d(2048, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(4): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(7): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): SEBlock(
(net): Sequential(
(0): AdaptiveAvgPool2d(output_size=1)
(1): Conv2d(2048, 128, kernel_size=(1, 1), stride=(1, 1))
(2): ReLU(inplace=True)
(3): Conv2d(128, 2048, kernel_size=(1, 1), stride=(1, 1))
(4): Sigmoid()
)
)
)
(relu): ReLU(inplace=True)
)
)
(8): AdaptiveAvgPool2d(output_size=(1, 1))
(9): Dropout(p=0.0, inplace=False)
(10): Flatten()
(11): Linear(in_features=2048, out_features=1000, bias=True)
)
)