My code in keras working good but not in pytorch.
My code in keras:
combined = concatenate([left.output, right.output])
combined = Conv3D(128, (3, 3, 3), activation='relu', strides=1, kernel_initializer='he_uniform' , padding='same')(combined)
combined = BatchNormalization()(combined)
combined = Conv3D(64, (3, 3, 3), activation='relu', strides=1 , kernel_initializer='he_uniform' , padding='same')(combined)
combined = BatchNormalization()(combined)
combined = MaxPooling3D(pool_size=(2, 2, 2))(combined)
combined = Flatten()(combined)
# apply a FC layer and then a regression prediction on the
# combined outputs
z = Dense(64, activation="relu")(combined)
z = BatchNormalization()(z)
z = Dropout(0.5)(z)
z = Dense(32, activation="relu")(z)
z = Dense(1, activation="linear")(z)
model = Model(inputs=[left.input, right.input], outputs=z)
My optimizer:
optimizer = SGD(momentum=0.9, nesterov=True)
self.model.compile(loss=‘mean_absolute_error’, optimizer=optimizer, metrics=metrics)
My code in Pytorch:
def train_epoch(epoch, data_loader, model, criterion,criterion2, optimizer, opt,
epoch_logger, batch_logger):
print('train at epoch {}'.format(epoch))
model.train()
model = model.cuda()
# model = nn.DataParallel(model, device_ids=None)
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
losses2 = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
end_time = time.time()
for i, (inputs, targets) in enumerate(data_loader):
data_time.update(time.time() - end_time)
if not opt.no_cuda:
targets = targets.cuda()
# inputs=inputs[0]
inputs = inputs.squeeze()
inputs = np.reshape(inputs, (inputs.shape[0],inputs.shape[1],inputs.shape[5],inputs.shape[2],inputs.shape[3],inputs.shape[4]))
inputs = inputs.cuda()
outputs = model(inputs[0].float(), inputs[1].float())
targets=torch.transpose(targets, 0, 1)
targets = targets.float()
# targets = targets.unsqueeze(-1)
# outputs = torch.reshape(outputs, (-1,))
# targets = targets.to(torch.float64)
# targets = targets.unsqueeze(0)
# targets = torch.reshape(targets, (-1,))
targets = targets.squeeze()
outputs = outputs.squeeze()
loss = criterion(outputs, targets)
loss2 = criterion2(outputs, targets)
# loss = loss * weight
# loss = loss.sum()
losses.update(loss.mean().data, inputs.size(0))
losses2.update(loss2.data, inputs.size(0))
# prec1, prec5 = calculate_accuracy(outputs.data, targets.data, topk=(1,5))
# top1.update(prec1, inputs.size(0))
# top5.update(prec5, inputs.size(0))
optimizer.zero_grad()
loss.mean().backward()
optimizer.step()
And my model in pytorch:
class combineNet(nn.Module):
def __init__(self, modelA, modelB):
super(combineNet, self).__init__()
self.modelA = modelA
self.modelB = modelB
# conv
self.conv1 = nn.Conv3d(2048, 128, kernel_size=(3,3,3), stride=1, padding=( 1,1,1))
nn.init.xavier_uniform_(self.conv1.weight)#, gain=nn.init.calculate_gain('relu')
nn.init.zeros_(self.conv1.bias)
self.BN1 = nn.BatchNorm3d(128, eps=0.001).train()#, momentum=0.99
self.conv2 = nn.Conv3d(128, 64, kernel_size=(3,3,3), stride=1, padding=(1,1,1))
nn.init.xavier_uniform_(self.conv2.weight)#, gain=nn.init.calculate_gain('relu')
nn.init.zeros_(self.conv2.bias)
self.BN2 = nn.BatchNorm3d(64, eps=0.001).train()#, momentum=0.99
self.MP = nn.MaxPool3d(2)
self.fc1 = nn.Linear(128, 64)#, bias=False 32768
# nn.init.xavier_uniform_(self.fc1.weight)
# nn.init.zeros_(self.fc1.bias)
self.BN = nn.BatchNorm1d(64, eps=0.001).train()#, momentum=0.99
self.act = nn.ReLU()
self.dr = nn.Dropout(0.5)
self.classifier1 = nn.Linear(64, 32)# 128, bias=False
# nn.init.xavier_uniform_(self.classifier1.weight)
# nn.init.zeros_(self.classifier1.bias)
self.classifier2 = nn.Linear(32, 1)#, bias=False
# nn.init.xavier_uniform_(self.classifier2.weight)
# nn.init.zeros_(self.classifier2.bias)
# self.act=nn.ReLU()
def forward(self, x1, x2):
x1 = self.modelA(x1)
x2 = self.modelB(x2)
x = torch.cat((x1, x2), 1)#, dim=1
x = x.view(x.size(0),2048,4,2,2)
x = self.act(self.conv1(x))
x = self.BN1(x)
x = self.act(self.conv2(x))
x = self.BN2(x)
x = self.MP(x)
x = x.view(x.size(0), -1)
x = self.BN(self.act(self.fc1(x))) #self.pool
x = self.dr(x)
x = self.act(self.classifier1(x))
x = self.classifier2(x)
return x
My tran loss in keras is 0.2 (my objective) but in pytorch it goes to 0.35
Also the validation loss also not decreasing from 0.4.
Both learning rates are 0.01
The datasets are the same.
Actually the training loss is decreasing in both keras and pytorch. But in pytorch code the validation loss decreases to 0.35 and then not decreases.
Any help?