I have coded up a basic CovNet from scratch (1 conv, 1 pool, and 2 fc) and it is running too slow. I was looking to verify if I am doing it right here and what can be done to speed it up:
The individual functions of conv_forward and pool_forward works in numpy.
class ConvNet(nn.Module):
def __init__(self, _filters, biases, dim_in_fc1, dim_out_fc1, dim_out_fc2):
super(ConvNet, self).__init__()
self._filters = nn.Parameter(torch.randn(_filters))
self.biases = nn.Parameter(torch.randn(biases))
self.W1 = nn.Parameter(torch.rand(dim_in_fc1, dim_out_fc1, dtype=torch.float))
self.b1 = nn.Parameter(torch.zeros(dim_out_fc1))
self.W2 = nn.Parameter(torch.rand(dim_out_fc1, dim_out_fc2, dtype=torch.float))
self.b2 = nn.Parameter(torch.zeros(dim_out_fc2))
def _conv_forward(self, Activation_prev_layer, _filters, biases, stride_pad_dict):
m, ch, a, b = Activation_prev_layer.size()
ch, f, f, num = _filters.size()
stride = stride_pad_dict["stride"]
pad = stride_pad_dict["pad"]
new_a = (a+2*pad-f)//stride + 1
new_b = (b+2*pad-f)//stride + 1
z = torch.zeros((m, num, new_a, new_b))
Activation_prev_layer_pad = padding(Activation_prev_layer, pad)
for i in range(m):
Activation_prev_layer_pad_slice = Activation_prev_layer_pad[i]
for x in range(new_a):
for y in range(new_b):
for n in range(num):
x_start = stride*x
x_end = stride*x + f
y_start = stride*y
y_end = stride*y + f
z[i, n, x, y] = conv_step(Activation_prev_layer_pad_slice[:, x_start:x_end, y_start:y_end], _filters[:, :, :, n], biases[:, :, :, n])
cache = (Activation_prev_layer, _filters, biases, stride_pad_dict)
return z
def _pool_forward(self, Activation_prev_layer, stride_pool_dict, mode="max"):
m, ch, a, b = Activation_prev_layer.size()
stride = stride_pool_dict["stride"]
pool = stride_pool_dict["pool"]
new_a = (a-pool)//stride + 1
new_b = (b-pool)//stride + 1
z = torch.zeros((m, ch, new_a, new_b))
for i in range(m):
Activation_prev_layer_slice = Activation_prev_layer[i]
for x in range(new_a):
for y in range(new_b):
for c in range(ch):
x_start = stride*x
x_end = stride*x + pool
y_start = stride*y
y_end = stride*y + pool
Activation_prev_layer_slice_pool = Activation_prev_layer_slice[c, x_start:x_end, y_start:y_end]
if mode=="max":
z[i, c, x, y] = torch.max(Activation_prev_layer_slice_pool)
elif mode=="average":
z[i, c, x, y] = torch.mean(Activation_prev_layer_slice_pool)
cache = (Activation_prev_layer, stride_pool_dict)
return z
def fc(self, x, W, b):
z = torch.add(torch.mm(x,W), b)
a = f.log_softmax(z)
return a
def forward_feed(self, x, stride_pad_dict, stride_pool_dict):
m = x.size()[0]
x = f.relu(self._conv_forward(x, self._filters, self.biases, stride_pad_dict))
x = self._pool_forward(x, stride_pool_dict)
x = x.view(m, -1)
x = self.fc(x, self.W1, self.b1)
x = self.fc(x, self.W2, self.b2)
return x
I am running the loop here:
model = ConvNet((1, 3, 3, 8), (1,1,1, 8), 1800,50,10)
stride_pad_dict = {"stride": 1, "pad": 2}
stride_pool_dict = {"stride": 2, "pool":2}
epochs = 10
train = torch.optim.SGD(params=model.parameters(), lr=0.01)
for i in range(epochs):
for batch_idx, (image, target) in enumerate(mnist_train_loader):
train.zero_grad()
pred_y = model.forward_feed(image, stride_pad_dict, stride_pool_dict)
loss = f.nll_loss(pred_y, target)
loss.backward()
train.step()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(i, batch_idx * len(image), len(mnist_train_loader.dataset),100. * batch_idx / len(mnist_train_loader), loss.item()))