Hi all, i have problems with connect an MLP with a recurrent unit such a GRU net.
This is my nets:
import torch.cuda
import torch.nn as nn
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from torch import nn
class DeepMLPRegressor(nn.Module):
def __init__(self, in_features):
super(DeepMLPRegressor, self).__init__()
self.model = nn.Sequential(nn.Linear(in_features, 32),
nn.ReLU(),
nn.Linear(32, 64),
nn.ReLU(),
nn.Linear(64, 128),
nn.ReLU())
def forward(self, x):
return self.model(x)
class GRUNet(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob=0.2):
super(GRUNet, self).__init__()
self.hidden_dim = hidden_dim
self.n_layers = n_layers
self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
self.fc = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()
def forward(self, x, h, o):
out, h = self.gru(x, h, o)
out = self.fc(self.relu(out[:, -1]))
return out, h
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
device ="cuda" if torch.cuda.is_available() else "cpu"
hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device)
return hidden
class final_model(nn.Module):
def __init__(self, input_dim, output_dim):
super(final_model, self).__init__()
self.mlp = DeepMLPRegressor(input_dim)
self.GRU = GRUNet(input_dim=128, hidden_dim= 32, output_dim=output_dim, n_layers=1)
def forward(self, x):
x1 = self.mlp(x)
x2 = self.GRU(x1, 32, 2)
return x2
This is my training class where i call all from main:
import torch
import torch.nn as nn
from torch.optim import SGD, Adam
from torch.optim.lr_scheduler import StepLR
import numpy as np
from dataset import Dataset
import net
from net import *
from torch.utils.data import DataLoader
from torchnet.meter import AverageValueMeter
from torchnet.logger import VisdomPlotLogger, VisdomSaver
def train(model, train_loader, valid_loader, exp_name = "MLP121", lr=0.00001, epochs=1000, wd = 0.000001):
criterionX = nn.MSELoss()
criterionZ = nn.MSELoss()
optimizer = Adam(params=model.parameters(),lr = lr, weight_decay=wd)
scheduler = StepLR(optimizer, step_size=100, gamma=0.5)#per ogni 100 epochs, lr si divide per due
# meters
lossX_meter = AverageValueMeter()
lossZ_meter = AverageValueMeter()
lossT_meter = AverageValueMeter()
# device
device = "cuda" #if torch.cuda.is_available() else "cpu"
#print(device)
model.to(device)
loader = {"train": train_loader, "test": valid_loader}
loss_X_logger = VisdomPlotLogger('line', env=exp_name, opts={'title': 'LossX', 'legend': ['train', 'test']})
loss_Z_logger = VisdomPlotLogger('line', env=exp_name, opts={'title': 'LossZ', 'legend': ['train', 'test']})
loss_T_logger = VisdomPlotLogger('line', env=exp_name, opts={'title': 'Total_Loss', 'legend': ['train', 'test']})
visdom_saver = VisdomSaver(envs=[exp_name])
last_best_loss = np.inf #for early stopping for best model
for e in range(epochs):
for mode in ["train", "test"]:
lossX_meter.reset()
lossZ_meter.reset()
lossT_meter.reset()
model.train() if mode == "train" else model.eval()
with torch.set_grad_enabled(mode == "train"): # abilitiamo i gradienti in training
for i, batch in enumerate(loader[mode]):
x = batch["Array"].to(device)
dx = batch['Movement'][:,0].float().to(device)
dz = batch['Movement'][:,1].float().to(device)
output = model(x)
#out1, out2 = model(x)
out1, out2 = output[:,0], output[:,1]
#out2 = out2 / torch.sqrt((out2 ** 2).sum(1)).view(-1, 1)
l1 = criterionX(out1, dx)
l2 = criterionZ(out2, dz)
loss = l1+l2
if mode == "train":
optimizer.zero_grad()
loss.backward()
optimizer.step()
else:
if loss < last_best_loss:
torch.save(model.state_dict(), 'Best_%s.pth' % exp_name)
last_best_loss = loss
n = x.shape[0] # numero di elementi nel batch
lossX_meter.add(l1.item() * n, n)#update meter to ploot
lossZ_meter.add(l2.item() * n, n)
lossT_meter.add(loss.item()* n, n)
if mode == "train":
loss_X_logger.log(e + (i + 1) / len(loader[mode]), lossX_meter.value()[0], name=mode)
loss_Z_logger.log(e + (i + 1) / len(loader[mode]), lossZ_meter.value()[0], name=mode)
loss_T_logger.log(e + (i + 1) / len(loader[mode]), lossT_meter.value()[0], name=mode)
loss_X_logger.log(e + (i + 1) / len(loader[mode]), lossX_meter.value()[0], name=mode)
loss_Z_logger.log(e + (i + 1) / len(loader[mode]), lossZ_meter.value()[0], name=mode)
loss_T_logger.log(e + (i + 1) / len(loader[mode]), lossT_meter.value()[0], name=mode)
scheduler.step()
#save visdom environment
visdom_saver.save()
# conserviamo solo l'ultimo modello sovrascrivendo i vecchi, salviamo anche il best di volta in volta
torch.save(model.state_dict(), '%s.pth' % exp_name)
return model
def start_all():
model = final_model(15, 2)
#Define train dataset and loader
#
train_dataset = Dataset('../Dataset/121/','121_train_sequential.csv', 'raw/')
valid_dataset = Dataset('../Dataset/121/','121_validation_sequential.csv', 'raw/')
train_loader = DataLoader(train_dataset, batch_size=16, num_workers=2)
valid_loader = DataLoader(valid_dataset, batch_size=16, num_workers=2)
model_trained = train(model, train_loader, valid_loader, exp_name="MLP_GRU", epochs=500)
My dataset class take three array of 5 elements each and pass them as a unit tensor.
i have this error:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (16x5 and 15x32)
and i don’t understand why!
Can you help me?