A strange kinf of overfitting

here is my train loss and val loss, val loss is a bit of strange.
however, It is definitely overfitting. My datasets only contains 1500 's 30×30 grayscale image.

I wonder that is there any wrong data is my datasets according to the val loss?

The network seems to be overfitting. Also if any garbage data is present, that could show up in your train loss.

  1. You can apply more augmentations to train.
  2. Dropouts can help.
  3. Like you said, you can check your val data distribution and also look for any errors in it.
class PReNet_LSTM(nn.Module):
    def __init__(self, recurrent_iter=6, use_GPU=True):
        super(PReNet_LSTM, self).__init__()
        self.iteration = recurrent_iter
        self.use_GPU = use_GPU

        self.conv0 = nn.Sequential(
            nn.Conv2d(2, 32, 3, 1, 1),
        self.res_conv1 = nn.Sequential(
            nn.Conv2d(32, 32, 3, 1, 1),
            nn.Conv2d(32, 32, 3, 1, 1),
        self.res_conv2 = nn.Sequential(
            nn.Conv2d(32, 32, 3, 1, 1),
            nn.Conv2d(32, 32, 3, 1, 1),
        self.res_conv3 = nn.Sequential(
            nn.Conv2d(32, 32, 3, 1, 1),
            nn.Conv2d(32, 32, 3, 1, 1),
        self.res_conv4 = nn.Sequential(
            nn.Conv2d(32, 32, 3, 1, 1),
            nn.Conv2d(32, 32, 3, 1, 1),
        self.res_conv5 = nn.Sequential(
            nn.Conv2d(32, 32, 3, 1, 1),
            nn.Conv2d(32, 32, 3, 1, 1),
        self.conv_i = nn.Sequential(
            nn.Conv2d(32 + 32, 32, 3, 1, 1),
        self.conv_f = nn.Sequential(
            nn.Conv2d(32 + 32, 32, 3, 1, 1),
        self.conv_g = nn.Sequential(
            nn.Conv2d(32 + 32, 32, 3, 1, 1),
        self.conv_o = nn.Sequential(
            nn.Conv2d(32 + 32, 32, 3, 1, 1),
        self.conv = nn.Sequential(
            nn.Conv2d(32, 1, 3, 1, 1),

    def forward(self, input):
        batch_size, row, col = input.size(0), input.size(2), input.size(3)

        x = input
        h = Variable(torch.zeros(batch_size, 32, row, col))
        c = Variable(torch.zeros(batch_size, 32, row, col))

        if self.use_GPU:
            h = h.cuda()
            c = c.cuda()

#        x_list = []
        for i in range(self.iteration):
#            x1 = x
            x = torch.cat((input, x), 1)
            x = self.conv0(x)

            x = torch.cat((x, h), 1)
            i = self.conv_i(x)
            f = self.conv_f(x)
            g = self.conv_g(x)
            o = self.conv_o(x)
            c = f * c + i * g
            h = o * torch.tanh(c)

            x = h
            resx = x
            x = F.relu(self.res_conv1(x) + resx)
            resx = x
            x = F.relu(self.res_conv2(x) + resx)
            resx = x
            x = F.relu(self.res_conv3(x) + resx)
            resx = x
            x = F.relu(self.res_conv4(x) + resx)
            resx = x
            x = F.relu(self.res_conv5(x) + resx)
            x = self.conv(x)

#            x_list.append(x)

        return x

above is the model architecture I applied. which place should I insert dropout layer?

You seem to have only convolutions only in your network without any maxpooling . I would suggest applying strided-convolution or maxpooling . In case of Dropout apply it after the convolution and just before a dense layer and in-between some dense layers