Model Returning Same Output, Despite Random Inputs

I am using AlphaZero to program a connect four game. Whenever I input the board, the policy and value outputs stay the same. Even when I input random data, the output never changes. I added print statements after the first convolutional and after the ResNet blocks and they output random values. When I input random data into the Policy and Value heads, they output random data. But when I run the model as it is meant to be run, the output stays the same. Here is my model code:

import torch
from torch import nn
class Block(nn.Module):
    def __init__(self, num_channels):
        super(Block, self).__init__()
        self.conv = nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1)
        self.batch_norm = nn.BatchNorm2d(num_channels)
    def forward(self, x):
        i = x
        # print("Shape of x before the block", x.shape)
        #x = self.batch_norm(x)
        x = self.relu(x)
        x = self.conv(x)
        # print("Shape of x after the first conv", x.shape)
        #x = self.batch_norm(x)
        x = self.relu(x)
        x = self.conv2(x)
        # print("Shape of x after the second conv", x.shape)
        return x + i

class PolicyHead(nn.Module):
    def __init__(self, num_channels, num_actions):
        super(PolicyHead, self).__init__()
        self.num_channels = num_channels
        self.conv = nn.Conv2d(num_channels, 2, kernel_size=1)
        self.relu = nn.ReLU()
        self.batch_norm = nn.BatchNorm2d(2)
        self.fc = nn.Linear(in_features=84, out_features=num_actions)
    def forward(self, x):
        x = x.reshape(-1, self.num_channels, 6, 7)
        x = self.conv(x)
        #x = self.batch_norm(x)
        x = self.relu(x)
        x = torch.flatten(x, 1)
        return self.fc(x)

class ValueHead(nn.Module):
    def __init__(self, num_channels):
        super(ValueHead, self).__init__()
        self.num_channels = num_channels
        self.conv = nn.Conv2d(num_channels, 2, kernel_size=1)
        self.relu = nn.ReLU()
        self.batch_norm = nn.BatchNorm2d(2)
        self.fc = nn.Linear(84, num_channels)
        self.fc2 = nn.Linear(num_channels, 1)
    def forward(self, x):
        x = x.reshape(-1, self.num_channels, 6, 7)
        x = self.conv(x)
        #x = self.batch_norm(x)
        x = self.relu(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = torch.tanh(x)
        return x

class AlphaZero(nn.Module):
    def __init__(self, num_channels=128, num_actions=7, num_blocks=20):
        super(AlphaZero, self).__init__()
        self.num_channels = num_channels
        self.conv = nn.Conv2d(1, num_channels, kernel_size=3, padding=1)
        self.blocks = nn.Sequential(*[Block(num_channels) for _ in range(num_blocks)])
        self.policy_head = PolicyHead(num_channels, num_actions)
        self.value_head = ValueHead(num_channels)
    def forward(self, x):
        x = x.view(-1, 1, 6, 7)
        x = self.conv(x)
        x = self.blocks(x)
        # print("Shape of x before the view", x.shape)
        x = x.view(-1, self.num_channels)
        # print("Shape of x after the view", x.shape)
        return self.policy_head(x), self.value_head(x)

Here is some code I used to test it:

weights = torch.load("./model-880", weights_only=True)
model = AlphaZero(num_channels=32, num_actions=7, num_blocks=8).to("cuda")
model.load_state_dict(weights['model_state_dict'])
x = torch.randint(10, (1, 6, 7), dtype=torch.float).to("cuda")
print(x)
print(model(x))

During training, the loss fluctuated.