Training Batch Gradient Descent w/

Orcun_Deniz · April 24, 2020, 12:31pm

Hey, I have been trying to train a net for the first time in PT. Come up with my dataset object, wrapped it with with Dataloader and achieved to get desired sized batches. However when I pass a batch in to my model, say tensor with shape [64,3,100,200], I only get outputs with shape [1]. Shouldnt it be giving an output shaped like [64] ??

Here is my training cycle.

def train(device, epochs, batch_size, trainingLoader, validationLoader):
    model = driver(batch_size).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1.0e-4)
    lossFunc = nn.MSELoss(reduction='sum')

    for i in range(epochs):
        for img_batch, labels in trainingLoader:
            img_batch, labels = img_batch.to(device), labels.to(device)
            img_batch = img_batch.permute(0,3,1,2)
            outputs = model(img_batch)
            loss = lossFunc(outputs, labels)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

And that’s the model

class driver(nn.Module):
    def __init__(self, batch_size):
        super(driver, self).__init__()

        self.first_linear_size = 1

        self.conv_1 = nn.Conv2d(in_channels=3, out_channels=24, kernel_size=(5,5), stride=(2,2))
        self.conv_2 = nn.Conv2d(in_channels=24, out_channels=36, kernel_size=(5,5), stride=(2,2))
        self.conv_3 = nn.Conv2d(in_channels=36, out_channels=48, kernel_size=(5,5), stride=(2,2))
        self.conv_4 = nn.Conv2d(in_channels=48, out_channels=64, kernel_size=(3, 3), stride=(1, 1))
        self.conv_5 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1))
        self.drop = nn.Dropout(p=0.5, inplace=False)

        _ = self._convs(torch.randn((64,3,66,200)))

        self.lin1 = nn.Linear(self.flattened_size,100)
        self.lin2 = nn.Linear(100, 50)
        self.lin3 = nn.Linear(50, 10)
        self.lin4 = nn.Linear(10, 1)

    def _convs(self, image):
        image = image / 127.5 - 1
        conv1 = F.elu(self.conv_1(image), alpha=0.3)
        conv2 = F.elu(self.conv_2(conv1), alpha=0.3)
        conv3 = F.elu(self.conv_3(conv2), alpha=0.3)
        conv4 = F.elu(self.conv_4(conv3), alpha=0.3)
        conv5 = F.elu(self.conv_5(conv4), alpha=0.3)
        drop = self.drop(conv5)
        flat = torch.flatten(drop)

        self.flattened_size = flat.numel()

        return flat

    def forward(self, image):

        flat = self._convs(image)
        lin1 = F.elu(self.lin1(flat), alpha=0.3)
        lin2 = F.elu(self.lin2(lin1), alpha=0.3)
        lin3 = F.elu(self.lin3(lin2), alpha=0.3)
        lin4 = self.lin4(lin3)

        return lin4

Orcun_Deniz · April 24, 2020, 2:16pm

Solved this. I’ve been using flatten layer wrong by flattening through all dimensions. Changed the methods in model like;

    def _convs(self, image):
        image = image / 127.5 - 1
        conv1 = F.elu(self.conv_1(image), alpha=0.3)
        conv2 = F.elu(self.conv_2(conv1), alpha=0.3)
        conv3 = F.elu(self.conv_3(conv2), alpha=0.3)
        conv4 = F.elu(self.conv_4(conv3), alpha=0.3)
        conv5 = F.elu(self.conv_5(conv4), alpha=0.3)
        drop = self.drop(conv5)
        flat = torch.flatten(drop, start_dim=1, end_dim=3) ##Correction
        self.flattened_size = flat.shape[1] ##Correction

        return flat

    def forward(self, image):

        flat = self._convs(image)
        lin1 = F.elu(self.lin1(flat), alpha=0.3)
        lin2 = F.elu(self.lin2(lin1), alpha=0.3)
        lin3 = F.elu(self.lin3(lin2), alpha=0.3)
        lin4 = self.lin4(lin3)

        return lin4.squeeze() ## Minor Correction