All predictions are 1

Architecture

class CNN(nn.Module):
    def __init__(self,in_channels=3,out_channels=1):
        super(CNN,self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels,out_channels=16,kernel_size=(3,3),stride=(1,1),padding=(1,1))
        self.conv2 = nn.Conv2d(in_channels=16,out_channels=16,kernel_size=(3,3),stride=(1,1),padding=(1,1))
        self.pool = nn.MaxPool2d(kernel_size=(2,2),stride=(2,2))
        self.conv3 = nn.Conv2d(in_channels=16,out_channels=32,kernel_size=(3,3),stride=(1,1),padding=(1,1))
        self.conv4 = nn.Conv2d(in_channels=32,out_channels=32,kernel_size=(3,3),stride=(1,1),padding=(1,1))
        self.conv5 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size=(3,3),stride=(1,1),padding=(1,1))
        self.conv6 = nn.Conv2d(in_channels=64,out_channels=64,kernel_size=(3,3),stride=(1,1),padding=(1,1))
        self.conv7 = nn.Conv2d(in_channels=64,out_channels=128,kernel_size=(3,3),stride=(1,1),padding=(1,1))
        self.conv8 = nn.Conv2d(in_channels=128,out_channels=128,kernel_size=(3,3),stride=(1,1),padding=(1,1))
        self.conv9 = nn.Conv2d(in_channels=128,out_channels=256,kernel_size=(3,3),stride=(1,1),padding=(1,1))
        self.conv10 = nn.Conv2d(in_channels=256,out_channels=256,kernel_size=(3,3),stride=(1,1),padding=(1,1))
        #dropout
        self.dropout3 = nn.Dropout(p=0.3)
        self.dropout5 = nn.Dropout(p=0.5)
        #flatten
        self.flatten = nn.Flatten()
        self.dense512 = nn.Linear(in_features= 256*7*7, out_features=512)
        self.dense2 = nn.Linear(in_features= 512, out_features=2)
    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))   #3->16
        x = self.pool(x)
        x = F.relu(self.conv3(x))   #16->32
        x = F.relu(self.conv4(x))
        x = self.pool(x)
        x = F.relu(self.conv5(x))   #32->64
        x = F.relu(self.conv6(x))
        x = self.pool(x)
        x = F.relu(self.conv7(x))   # 64->128
        x = F.relu(self.conv8(x))
        x = self.pool(x)
        x = self.dropout3(x)
        x = F.relu(self.conv9(x))   # 128->256
        x = F.relu(self.conv10(x))
        x = self.pool(x)
        x = self.dropout3(x)
        x = self.flatten(x)
        x = F.relu(self.dense512(x))
        x = self.dropout5(x)
        x = self.dense2(x)
        return x

Training

model = CNN(in_channels=3, out_channels=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(num_epochs):
    loop = tqdm(enumerate(train_batch),total=len(train_batch))
    for batch_idx, (data, targets) in loop:
        data = data.to(device=device)
        targets = targets.to(device=device)
        scores = model(data)
        loss = criterion(scores, targets)

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()
        
        #Update Progress bar
        loop.set_description(f'Epoch [{epoch/num_epochs}]')
        loop.set_postfix(loss = loss.item())

The loss is not decreasing and all the outputs are 1. Can someone please help me out?