Architecture
class CNN(nn.Module):
def __init__(self,in_channels=3,out_channels=1):
super(CNN,self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channels,out_channels=16,kernel_size=(3,3),stride=(1,1),padding=(1,1))
self.conv2 = nn.Conv2d(in_channels=16,out_channels=16,kernel_size=(3,3),stride=(1,1),padding=(1,1))
self.pool = nn.MaxPool2d(kernel_size=(2,2),stride=(2,2))
self.conv3 = nn.Conv2d(in_channels=16,out_channels=32,kernel_size=(3,3),stride=(1,1),padding=(1,1))
self.conv4 = nn.Conv2d(in_channels=32,out_channels=32,kernel_size=(3,3),stride=(1,1),padding=(1,1))
self.conv5 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size=(3,3),stride=(1,1),padding=(1,1))
self.conv6 = nn.Conv2d(in_channels=64,out_channels=64,kernel_size=(3,3),stride=(1,1),padding=(1,1))
self.conv7 = nn.Conv2d(in_channels=64,out_channels=128,kernel_size=(3,3),stride=(1,1),padding=(1,1))
self.conv8 = nn.Conv2d(in_channels=128,out_channels=128,kernel_size=(3,3),stride=(1,1),padding=(1,1))
self.conv9 = nn.Conv2d(in_channels=128,out_channels=256,kernel_size=(3,3),stride=(1,1),padding=(1,1))
self.conv10 = nn.Conv2d(in_channels=256,out_channels=256,kernel_size=(3,3),stride=(1,1),padding=(1,1))
#dropout
self.dropout3 = nn.Dropout(p=0.3)
self.dropout5 = nn.Dropout(p=0.5)
#flatten
self.flatten = nn.Flatten()
self.dense512 = nn.Linear(in_features= 256*7*7, out_features=512)
self.dense2 = nn.Linear(in_features= 512, out_features=2)
def forward(self,x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x)) #3->16
x = self.pool(x)
x = F.relu(self.conv3(x)) #16->32
x = F.relu(self.conv4(x))
x = self.pool(x)
x = F.relu(self.conv5(x)) #32->64
x = F.relu(self.conv6(x))
x = self.pool(x)
x = F.relu(self.conv7(x)) # 64->128
x = F.relu(self.conv8(x))
x = self.pool(x)
x = self.dropout3(x)
x = F.relu(self.conv9(x)) # 128->256
x = F.relu(self.conv10(x))
x = self.pool(x)
x = self.dropout3(x)
x = self.flatten(x)
x = F.relu(self.dense512(x))
x = self.dropout5(x)
x = self.dense2(x)
return x
Training
model = CNN(in_channels=3, out_channels=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(num_epochs):
loop = tqdm(enumerate(train_batch),total=len(train_batch))
for batch_idx, (data, targets) in loop:
data = data.to(device=device)
targets = targets.to(device=device)
scores = model(data)
loss = criterion(scores, targets)
# backward
optimizer.zero_grad()
loss.backward()
# gradient descent or adam step
optimizer.step()
#Update Progress bar
loop.set_description(f'Epoch [{epoch/num_epochs}]')
loop.set_postfix(loss = loss.item())
The loss is not decreasing and all the outputs are 1. Can someone please help me out?