I’m trying to send 2 images through a siamese network. It looks like it’s as easy as writing a for-loop, calling forward
for each leg of the siamese net. Is this correct? I’ve written a baby siamese net below:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import pdb
class BabyNet(torch.nn.Module):
def __init__(self):
super(BabyNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(64, 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
self.pool = nn.AvgPool2d((112,112), stride=(112,112))
self.softmax = nn.Softmax()
def forward(self, x, leg=0):
leg = str(leg)
x = self.conv1(x)
x.register_hook(save_grad('conv1_'+leg))
x = self.bn1(x)
x.register_hook(save_grad('bn1_'+leg))
x = self.relu(x)
x.register_hook(save_grad('relu_'+leg))
x = self.conv2(x)
x.register_hook(save_grad('conv2_'+leg))
x = self.pool(x)
x.register_hook(save_grad('pool_'+leg))
x = self.softmax(x)
x.register_hook(save_grad('softmax_'+leg))
return x
grads = {} # cache the gradients
def save_grad(name):
def hook(grad):
grads[name] = grad
return hook
grad_of_param={}
if __name__=='__main__':
model = BabyNet()
model.cuda()
lr = 1e-3
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), momentum=0.9, lr=lr)
for _ in range(100):
out = []
optimizer.zero_grad()
label_wts = torch.Tensor([1, 1, 1, 1])
labels = Variable(torch.multinomial(label_wts, 2).long())
labels = labels.cuda()
for i_leg in range(2):
X = Variable(torch.zeros(1,3,224,224).uniform_(0,255).cuda())
Y = model.forward(X, leg=i_leg)
out.append(Y.view(1,-1))
out[0].register_hook(save_grad('out_0'))
out[1].register_hook(save_grad('out_1'))
preds = torch.cat(out, dim=0)
loss = criterion(preds, labels)
loss.backward()
print('Gradient comparison...')
print('conv1: {}'.format(torch.equal(grads['conv1_0'], grads['conv1_1'])))
print('bn1: {}'.format(torch.equal(grads['bn1_0'], grads['bn1_1'])))
print('relu: {}'.format(torch.equal(grads['relu_0'], grads['relu_1'])))
print('conv2: {}'.format(torch.equal(grads['conv2_0'], grads['conv2_1'])))
print('pool: {}'.format(torch.equal(grads['pool_0'], grads['pool_1'])))
print('softmax: {}'.format(torch.equal(grads['softmax_0'], grads['softmax_1'])))
print(grads['out_0'], grads['out_1'])
for name, parameter in model.named_parameters():
grad_of_param[name] = parameter.grad
optimizer.step()