here is my train loss and val loss, val loss is a bit of strange.
however, It is definitely overfitting. My datasets only contains 1500 's 30×30 grayscale image.
I wonder that is there any wrong data is my datasets according to the val loss?
here is my train loss and val loss, val loss is a bit of strange.
however, It is definitely overfitting. My datasets only contains 1500 's 30×30 grayscale image.
I wonder that is there any wrong data is my datasets according to the val loss?
The network seems to be overfitting. Also if any garbage data is present, that could show up in your train loss.
class PReNet_LSTM(nn.Module):
def __init__(self, recurrent_iter=6, use_GPU=True):
super(PReNet_LSTM, self).__init__()
self.iteration = recurrent_iter
self.use_GPU = use_GPU
self.conv0 = nn.Sequential(
nn.Conv2d(2, 32, 3, 1, 1),
nn.ReLU()
)
self.res_conv1 = nn.Sequential(
nn.Conv2d(32, 32, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(32, 32, 3, 1, 1),
nn.ReLU()
)
self.res_conv2 = nn.Sequential(
nn.Conv2d(32, 32, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(32, 32, 3, 1, 1),
nn.ReLU()
)
self.res_conv3 = nn.Sequential(
nn.Conv2d(32, 32, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(32, 32, 3, 1, 1),
nn.ReLU()
)
self.res_conv4 = nn.Sequential(
nn.Conv2d(32, 32, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(32, 32, 3, 1, 1),
nn.ReLU()
)
self.res_conv5 = nn.Sequential(
nn.Conv2d(32, 32, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(32, 32, 3, 1, 1),
nn.ReLU()
)
self.conv_i = nn.Sequential(
nn.Conv2d(32 + 32, 32, 3, 1, 1),
nn.Sigmoid()
)
self.conv_f = nn.Sequential(
nn.Conv2d(32 + 32, 32, 3, 1, 1),
nn.Sigmoid()
)
self.conv_g = nn.Sequential(
nn.Conv2d(32 + 32, 32, 3, 1, 1),
nn.Tanh()
)
self.conv_o = nn.Sequential(
nn.Conv2d(32 + 32, 32, 3, 1, 1),
nn.Sigmoid()
)
self.conv = nn.Sequential(
nn.Conv2d(32, 1, 3, 1, 1),
)
def forward(self, input):
batch_size, row, col = input.size(0), input.size(2), input.size(3)
x = input
h = Variable(torch.zeros(batch_size, 32, row, col))
c = Variable(torch.zeros(batch_size, 32, row, col))
if self.use_GPU:
h = h.cuda()
c = c.cuda()
# x_list = []
for i in range(self.iteration):
# x1 = x
x = torch.cat((input, x), 1)
x = self.conv0(x)
x = torch.cat((x, h), 1)
i = self.conv_i(x)
f = self.conv_f(x)
g = self.conv_g(x)
o = self.conv_o(x)
c = f * c + i * g
h = o * torch.tanh(c)
x = h
resx = x
x = F.relu(self.res_conv1(x) + resx)
resx = x
x = F.relu(self.res_conv2(x) + resx)
resx = x
x = F.relu(self.res_conv3(x) + resx)
resx = x
x = F.relu(self.res_conv4(x) + resx)
resx = x
x = F.relu(self.res_conv5(x) + resx)
x = self.conv(x)
# x_list.append(x)
return x
above is the model architecture I applied. which place should I insert dropout layer?
You seem to have only convolutions only in your network without any maxpooling
. I would suggest applying strided-convolution
or maxpooling
. In case of Dropout
apply it after the convolution and just before a dense layer and in-between some dense layers