Spatiotemporal Classification by GRU or RNN

Hi everyone,

I am trying to write a code for motion classification on motion capture data.
The 1-D CNN works perfectly with more than 90% accuracy but when I am trying GRU it improves for 1 or 2 % and then plateaus at around 25% for classifying among 8 classes. RNN works a little bit better but plateaus at 30%.

Theses are my RNN and CNN models:

class RNN_ClassifierModel(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.hidden_channels = 100
        self.num_layers = 1
        self.input_channels = 69

        self.gru = nn.GRU(
            self.input_channels,
            self.hidden_channels,
            self.num_layers,
        )
        self.linear = nn.Linear(
            self.hidden_channels,
            len(self.config.training_sel_motions),
        )

    def forward(self, xb):
        xb = xb.permute(2,0,1)
        _, hidden_cell = self.gru(xb)
        hidden_cell = hidden_cell.squeeze(0)
        out = self.linear(hidden_cell)
        return out


class CNN_ClassifierModel(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.input_channels = 69

        self.conv1 = nn.Conv1d(
            self.input_channels,
            self.input_channels * 2,
            kernel_size=4,
            stride=2,
            padding=1,
        )
        self.conv2 = nn.Conv1d(
            self.input_channels * 2,
            self.input_channels * 4,
            kernel_size=4,
            stride=2,
            padding=1,
        )
        self.conv3 = nn.Conv1d(
            self.input_channels * 4,
            self.input_channels * 8,
            kernel_size=4,
            stride=2,
            padding=1,
        )
        self.pool = nn.MaxPool1d(2, stride=2)
        self.linear = nn.Linear(
            self.input_channels * 8, len(self.config.training_sel_motions)
        )
        self.adaptive_pool = nn.AdaptiveAvgPool1d(1)

    def forward(self, xb):
        xb = self.pool(F.relu(self.conv1(xb)))
        xb = self.pool(F.relu(self.conv2(xb)))
        xb = self.pool(F.relu(self.conv3(xb)))

        xb = self.adaptive_pool(xb)
        xb = xb.squeeze(2)
        out = self.linear(xb)
        return out

Everything else is the same for both models. I am using nn.CrossEntropyLoss() for loss.

Thanks