The CNN class
from torch import nn
import torchvision.transforms as transforms
class ConvNet(nn.Module):
def __init__(self, num_classes=20,flatten_size=2*5, inputs=3, recurrent=False):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.BatchNorm2d(inputs),
nn.Conv2d(inputs, 32, kernel_size=(3,3), padding=0,stride=1),
nn.ReLU(),
nn.MaxPool2d(2),
nn.BatchNorm2d(32)
)
self.layer2 = nn.Sequential(
nn.Conv2d(32, 32, kernel_size=(3,3), padding=0,stride=1),
nn.ReLU(),
nn.MaxPool2d(2),
nn.BatchNorm2d(32)
)
self.layer3 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=(3,3), padding=0,stride=1),
nn.ReLU(),
nn.MaxPool2d(2),
nn.BatchNorm2d(64)
)
self.layer4 = nn.Sequential(
nn.Conv2d(64, 64, kernel_size=(3,3), padding=0,stride=1),
nn.ReLU(),
nn.MaxPool2d(2),
nn.BatchNorm2d(64)
)
self.layer5 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=(3,3), padding=0,stride=1),
nn.ReLU(),
nn.MaxPool2d(2),
nn.BatchNorm2d(128)
)
self.layer6 = nn.Sequential(
nn.Conv2d(128, 256, kernel_size=(3,3), padding=0,stride=1),
nn.ReLU(),
nn.MaxPool2d(2),
nn.BatchNorm2d(256)
)
self.fc1Layer = nn.Sequential(nn.Linear(flatten_size*256, 132), nn.ReLU(), nn.BatchNorm1d(132))
self.fc2Layer = nn.Sequential(nn.Dropout(p=0.5), nn.Linear(132, 132), nn.ReLU(), nn.BatchNorm1d(132))
self.fc3Layer = nn.Sequential(nn.Linear(132, 132), nn.ReLU())
self.fc4Layer = nn.Sequential(nn.Dropout(p=0.2),nn.Linear(132, num_classes))
self.recurrent=recurrent
def forward(self, x):
print(type(x))
print('Before ', x.size())
out = self.layer1(x)
print('After 1')
out = self.layer2(out)
print('After 2')
out = self.layer3(out)
print('After 3')
out = self.layer4(out)
print('After 4')
out = self.layer5(out)
print('After 5')
out1 = self.layer6(out)
print('After 6')
#out = self.layer7(out)
out1 = out1.reshape(out1.size(0), -1)
out = self.fc1Layer(out1)
out = self.fc2Layer(out)
#out = self.fc3Layer(out)
out = self.fc4Layer(out)
if self.recurrent==True:
return out1
else:
return out
for data, target in dataloader['validation']:
data, target = data.to(device), target.to(device)
output = model(data)
loss = criterion(output)
The model class combines the CNN and lstm.
class Mixnet(nn.Module):
def __init__(self, n_outputs, num_layers, hidden_dim, n_steps, cnn, num_classes=20,n_inputs=78*256):
super(Mixnet, self).__init__()
self.cnn=cnn
self.bn = nn.BatchNorm1d(hidden_dim)
self.n_steps = n_steps
self.n_inputs = n_inputs
self.n_outputs = n_outputs
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.lstm = nn.LSTM(self.n_inputs, self.hidden_dim, self.num_layers, batch_first=True, dropout=0.5)
self.FC = nn.Sequential(nn.Dropout(p=0.5), nn.Linear(self.hidden_dim, self.n_outputs))
def init_hidden(self,dim):
return [torch.zeros(self.num_layers, dim, self.hidden_dim).to(dev),
torch.zeros(self.num_layers, dim, self.hidden_dim).to(dev)]
def forward(self, x):
batch_size, timesteps, C, H, W = x.size()
print('Batch size ', batch_size)
print('Timesteps ', timesteps)
print('Channels ', C)
print('Height ', H)
print('Width ', W)
c_in = x.view(batch_size * timesteps, C, H, W)
c_in.to(device)
self.cnn.to(device)
out1 = self.cnn(c_in) # This does not work
out1 = out1.view(batch_size, timesteps, -1)
X = out1
dim = X.size()[0]
print(X.size())
lstm_out0, hidden = self.lstm(X)
f=lstm_out0[:, -1, :]
f = self.bn(f)
out = self.FC(f)
return out.view(dim, self.n_outputs)
The idea is to run my batch through the CNN then connect the output to the lstm.