Problem Reading from Array And Training Model

I have a directory of .npy files from which I am loading a model and training it for binary classification. The classes are of equal size. My loss doesn’t decrease after each epoch and the model keeps predicting one class. The data sets are balanced. I am not sure why this is happening. I am new to Pytorch and I spent a lot of time trouble shooting it. I tried doing it with a very small data set (200 in each class) but same problem persists.

class Network(nn.Module):
    def __init__(self):
        super(Network,self).__init__()
        self.conv1=nn.Conv2d(in_channels=2, out_channels=32, kernel_size=3)
        self.conv2=nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.conv3=nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
        self.fc1=nn.Linear(in_features=128*6*6, out_features=1000)
        self.fc2=nn.Linear(in_features=1000, out_features=100)
        self.out=nn.Linear(in_features=100, out_features=2)
    def forward(self,t):
        POOL_stride=2
        #Conv1
        t=F.relu(self.conv1(t))
        t=F.max_pool2d(t, kernel_size=2, stride=POOL_stride)
        #Conv2
        t=F.relu(self.conv2(t))
        t=F.max_pool2d(t, kernel_size=2, stride=POOL_stride)
        #Conv3
        t=F.relu(self.conv3(t))
        t=F.max_pool2d(t, kernel_size=2, stride=POOL_stride)
        # dense 1
        t=t.reshape(-1, 128*6*6)
        t=self.fc1(t)
        t=F.relu(t)
        #dense 2
        t=self.fc2(t)
        t=F.relu(t)
        t=self.out(t)
        return t

def npy_loader(path):
    sample = torch.from_numpy(np.load(path))
    return sample

criterion=nn.CrossEntropyLoss()
optimizer = optim.Adam(self.model.parameters(), lr=0.003)
model = Network()
trainset = datasets.DatasetFolder(
            root=train_dir,
            loader=npy_loader,
            extensions=['.npy']
        )
train_loader = torch.utils.data.DataLoader(
            trainset,
            batch_size=batch_size,
            shuffle=True,
        )
for epoch in range(epochs):
            running_loss = 0
            batches = 0
            for inputs, labels in train_loader:
                batches = batches+1
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()
                output = model(inputs)
                loss = criterion(output.squeeze(), labels.squeeze())
                loss.backward()
                optimizer.step()
                running_loss += loss.item()    
            print('Loss :{:.4f} Epoch[{}/{}]'.format(running_loss/batches, epoch, epochs))
'''

Hi,

Do you know what are the mean and std of your data? That could cause issues with training.