Attach 2 LSTM layers to ConvNet

bibekx · July 22, 2021, 8:37pm

I am trying to implement the model given by NVIDIA https://developer.nvidia.com/blog/deep-learning-self-driving-cars/

The current version of the model is given below, however, I want the model to look at 4 consecutive images to make a decision. I am thinking of attaching 2 LSTM layers but do not know how to…

# Regressor
class Oracle(nn.Module):
    def __init__(self):
        super().__init__()
        self.elu = nn.ELU(inplace=True)
        self.dropout1 = nn.Dropout(0.5) # Not given by NVIDIA, arbitrary choice
        #in, out, kernel_size, stride, padding
        self.conv1 = nn.Conv2d(3, 24, (5,5), 2) 
        self.conv2 = nn.Conv2d(24, 36, (5,5), 2) 
        self.conv3 = nn.Conv2d(36, 48, (5,5), 2) 
        self.conv4 = nn.Conv2d(48, 64, (3,3))
        self.conv5 = nn.Conv2d(64, 64, (3,3))
        
        self.fc1 = nn.Linear(64*18, 100) #NVIDIA says this must be 1164 instead of 1152 (64*18)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 10)
        self.fc4 = nn.Linear(10, 1)

    def forward(self, x):
        x = self.elu(self.conv1(x))
        x = self.elu(self.conv2(x))
        x = self.elu(self.conv3(x))
        x = self.elu(self.conv4(x))
        x = self.elu(self.conv5(x))
        #flatten the dims 
        x = x.view(x.size(0), -1)
        x = self.dropout1(x) ##
        x = self.elu(self.fc1(x))
        x = self.elu(self.fc2(x))
        x = self.elu(self.fc3(x))
        x = self.elu(self.fc4(x))
        return x

Your help is appreciated. Thanks!!