CNN-GRU-CTC model does not train

I am using a CNN-GRU-CTC model to perform image OCR of codes on a black background. The code is based on this repository. During training, after a few batches pass, the model starts outputting only the blank character. Similar questions show that this can occur when the blank character is part of the training data. This is not the case here, so I suspect it to be an error in my model. Could someone help me find the cause?

The data is made up of codes that are combinations of capital letters, numbers and the ‘:’ character as seperator:
0

My dataloader:

import torch
from skimage import io
from torch.utils.data import Dataset, DataLoader

class CodesDataset(Dataset):
    def __init__(self, ann_path, code_path, transform=None):
        self.code_path = code_path
        self.ann_path = ann_path
        self.transform = transform

    def __len__(self):
        return 1000

    def __getitem__(self, idx):
        img_name = self.code_path+"/"+str(idx)+".png"
        image = io.imread(img_name)

        annotation_name = self.ann_path+"/"+str(idx)+".txt"
        f = open(annotation_name, "r")
        annotation = f.read()

        annotation_numbers = []
        for c in annotation:
            if(c.isdigit()):
                annotation_numbers.append(int(c))
            else:
                if(c != ":"):
                    annotation_numbers.append(ord(c) - 55)
                else:
                    annotation_numbers.append(36)

        if self.transform:
            image = self.transform(image)

        return image, torch.IntTensor(annotation_numbers)

My model and training loop:

import sys
from itertools import groupby

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data_utils
import torchvision.transforms
from colorama import Fore
from torchvision import datasets, transforms
from tqdm import tqdm
from CodesDataset import CodesDataset

# ============================================= PREPARING DATASET ======================================================
epochs = 100
num_classes = 38
blank_label = 37
image_height = 28
gru_hidden_size = 128
gru_num_layers = 2
cnn_output_height = 4
cnn_output_width = 32
digits_per_sequence = 8

transform = torchvision.transforms.Compose([transforms.ToTensor(), transforms.Resize((100, 100))])

seq_dataset = CodesDataset("data/annotations", "data/codes", transform)
train_set, val_set = torch.utils.data.random_split(seq_dataset,
                                                   [int(len(seq_dataset) * 0.8), int(len(seq_dataset) * 0.2)])

train_loader = torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=1, shuffle=True)


# ================================================= MODEL ==============================================================
class CRNN(nn.Module):

    def __init__(self):
        super(CRNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=(3, 3))
        self.norm1 = nn.InstanceNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=(3, 3), stride=2)
        self.norm2 = nn.InstanceNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=(3, 3))
        self.norm3 = nn.InstanceNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=(3, 3), stride=2)
        self.norm4 = nn.InstanceNorm2d(64)
        self.gru_input_size = cnn_output_height * 64
        self.gru = nn.GRU(self.gru_input_size, gru_hidden_size, gru_num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(gru_hidden_size * 2, num_classes)

    def forward(self, x):
        batch_size = x.shape[0]
        out = self.conv1(x)
        out = self.norm1(out)
        out = F.leaky_relu(out)
        out = self.conv2(out)
        out = self.norm2(out)
        out = F.leaky_relu(out)
        out = self.conv3(out)
        out = self.norm3(out)
        out = F.leaky_relu(out)
        out = self.conv4(out)
        out = self.norm4(out)
        out = F.leaky_relu(out)
        out = out.permute(0, 3, 2, 1)
        out = out.reshape(batch_size, -1, self.gru_input_size)
        out, _ = self.gru(out)
        out = torch.stack([F.log_softmax(self.fc(out[i]), dim=-1) for i in range(out.shape[0])])
        return out


model = CRNN()
criterion = nn.CTCLoss(blank=blank_label, reduction='mean', zero_infinity=True)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# ================================================ TRAINING MODEL ======================================================
for _ in range(epochs):
    # ============================================ TRAINING ============================================================
    train_correct = 0
    train_total = 0
    for x_train, y_train in tqdm(train_loader,
                                 position=0, leave=True,
                                 file=sys.stdout, bar_format="{l_bar}%s{bar}%s{r_bar}" % (Fore.GREEN, Fore.RESET)):
        batch_size = x_train.shape[0]
        optimizer.zero_grad()
        y_pred = model(x_train)
        y_pred = y_pred.permute(1, 0, 2)
        input_lengths = torch.IntTensor(batch_size).fill_(cnn_output_width)
        target_lengths = torch.IntTensor([len(t) for t in y_train])
        loss = criterion(y_pred, y_train, input_lengths, target_lengths)
        loss.backward()
        optimizer.step()
        _, max_index = torch.max(y_pred, dim=2)  # max_index.shape == torch.Size([32, 64])
        for i in range(batch_size):
            raw_prediction = list(max_index[:, i].detach().cpu().numpy())  # len(raw_prediction) == 32
            prediction = torch.IntTensor([c for c, _ in groupby(raw_prediction) if c != blank_label])
            if len(prediction) == len(y_train[i]) and torch.all(prediction.eq(y_train[i])):
                train_correct += 1
            train_total += 1
    print('TRAINING. Correct: ', train_correct, '/', train_total, '=', train_correct / train_total)

Thanks in advance.