class MSFBCNN(nn.Module):
def __init__(self,input_dim,output_dim,FT=10):
super(MSFBCNN, self).__init__()
self.T = input_dim[1]
self.FT = FT
self.D = 1
self.FS = self.FT*self.D
self.C=input_dim[0]
self.output_dim = output_dim
# Parallel temporal convolutions
self.conv1a = nn.Conv2d(1, self.FT, (1, 65), padding = (0,32),bias=False)
self.conv1b = nn.Conv2d(1, self.FT, (1, 41), padding = (0,20),bias=False)
self.conv1c = nn.Conv2d(1, self.FT, (1, 27), padding = (0,13),bias=False)
self.conv1d = nn.Conv2d(1, self.FT, (1, 17), padding = (0,8),bias=False)
self.batchnorm1 = nn.BatchNorm2d(4*self.FT, False)
self.pooling1 = nn.AvgPool2d(kernel_size=(1, 16), stride=(1,2), padding=(0,0))
# Spatial convolution
self.conv2 = nn.Conv2d(4*self.FT, self.FS, (self.C,1),padding=(0,0),groups=1,bias=False)
self.batchnorm2 = nn.BatchNorm2d(self.FS, False)
#Temporal average pooling
self.pooling2 = nn.AvgPool2d(kernel_size=(1, 32),stride=(1,4),padding=(0,0))
self.drop=nn.Dropout(0.44)
#LSTM
self.lstm = nn.LSTM(input_size=131,hidden_size=512,num_layers=2, batch_first=True)
#Classification
self.fc1 = nn.Linear(512, self.output_dim)
def forward(self, x):
# Layer 1
x1 = self.conv1a(x);
x2 = self.conv1b(x);
x3 = self.conv1c(x);
x4 = self.conv1d(x);
x = torch.cat([x1,x2,x3,x4],dim=1)
x = self.batchnorm1(x)
x = self.pooling1(x)
x = self.drop(x)
# Layer 2
x = torch.pow(self.batchnorm2(self.conv2(x)),2)
x = self.pooling2(x)
#x = torch.log(x)
x = self.drop(x)
#LSTM
x = x.view(x.size(0),x.size(1),x.size(3))
#x = x[:, 1, :, :]
h0 = torch.zeros(2, x.size(0), 512).cuda().requires_grad_()
c0 = torch.zeros(2, x.size(0), 512).cuda().requires_grad_()
x, _ = self.lstm(x, (h0, c0))
# FC Layer
x = x[:, -1, :]
x = self.fc1(x)
return x
Can anybody tell me how to reduce overfitting? I change every hyperparameter. @ptrblck