Seems still have problem, my implementation is like the following:
def train(**kwargs):
#torch.manual_seed(100) # 10, 100, 666,
opt.parse(kwargs)
vis = Visualizer(opt.env)
# step1: configure model
model = getattr(models, opt.model)()
if opt.load_model_path:
model.load(opt.load_model_path)
if opt.use_gpu:
model.cuda()
# step2: load data
train_data = STSDataset(opt.train_data_path)
val_data = STSDataset(opt.train_data_path)
train_dataloader = DataLoader(train_data, opt.batch_size,
shuffle=True,
num_workers=opt.num_workers)
val_dataloader = DataLoader(val_data, opt.batch_size,
shuffle=False,
num_workers=opt.num_workers)
torch.save(train_data.X, opt.train_features_path)
torch.save(train_data.y, opt.train_targets_path)
# step3: set criterion and optimizer
criterion = torch.nn.MSELoss()
lr = opt.lr
optimizer = torch.optim.Adam(model.parameters(), lr=lr,
weight_decay=opt.weight_decay)
#optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
# step4: set meters
loss_meter = meter.MSEMeter()
previous_loss = 1e100
# train
for epoch in range(opt.max_epoch):
loss_meter.reset()
for ii, (data, label) in enumerate(train_dataloader):
# train model on a batch data
input = Variable(data)
target = Variable(torch.FloatTensor(label.numpy()))
if opt.use_gpu:
input = input.cuda()
target = target.cuda()
optimizer.zero_grad()
score = model(input)
#loss = criterion(score, target) # use MSE loss function
vx = score - torch.mean(score)
vy = target - torch.mean(target)
loss = torch.sum(vx * vy) / (torch.sqrt(torch.sum(vx ** 2)) * torch.sqrt(torch.sum(vy ** 2))) # use Pearson correlation
loss.backward()
optimizer.step()
# update meters and visualize
loss_meter.add(score.data, target.data)
if ii % opt.print_freq == opt.print_freq - 1:
vis.plot('loss', loss_meter.value())
# enter debug mode
if os.path.exists(opt.debug_file):
import ipdb
ipdb.set_trace()
# save model for each epoch
#model.save()
# validate and visualize
val_mse, val_pearsonr = val(model, val_dataloader)
vis.plot('val_mse', val_mse)
vis.plot('pearson', val_pearsonr)
vis.log("epoch:{epoch},lr:{lr},\
loss:{loss},val_mse:{val_mse},val_pearson:{val_pearson}".format(
epoch=epoch,
lr=lr,
loss=loss_meter.value(),
val_mse=str(val_mse),
val_pearson=str(val_pearsonr)))
# update learning rate
if loss_meter.value() > previous_loss:
lr = lr * opt.lr_decay
for param_group in optimizer.param_groups:
param_group['lr'] = lr
previous_loss = loss_meter.value()
and when I check the output, the MSE, pearson correlation are all NaN.