My model paramters arent being updated, i have checked and they are all same for every batch train. Task of this model is Multi Label Classification.
this is whole code of my model
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
class GraphAttentionLayer(nn.Module):
def __init__(self, in_features, out_features):
super(GraphAttentionLayer, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.W = nn.Parameter(torch.empty(size=(in_features, out_features)))
nn.init.xavier_uniform_(self.W.data, gain=1.414)
self.relu = nn.LeakyReLU(0.2)
def forward(self, H, adj):
HW = torch.matmul(H, self.W)
attention = torch.relu(torch.mm(HW, HW.transpose(0, 1)))
h_prime = self.relu(torch.matmul(attention, HW))
h_prime = self.relu(torch.matmul(adj, h_prime))
return h_prime
def __repr__(self):
return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
class GAT(nn.Module):
def __init__(self, nfeat, nhid, nheads):
super(GAT, self).__init__()
self.nheads = nheads
self.attentions = [GraphAttentionLayer(nfeat, nhid) for _ in range(nheads)]
for i, attention in enumerate(self.attentions):
self.add_module('attention_{}'.format(i), attention)
def forward(self, H, adj):
sum = 0
for att in self.attentions:
sum += att(H, adj)
sum = sum/self.nbheads
return sum
class MAGNET(nn.Module):
def __init__(self, input_size, hidden_size, num_classes, adjacency, attention_heads, rnn='lstm'):
super(MAGNET, self).__init__()
self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, bidirectional=True)
self.gat1 = GAT(input_size, hidden_size*2, attention_heads)
self.gat2 = GAT(hidden_size*2, hidden_size*2, attention_heads)
self.A = nn.Parameter(adjacency)
self.relu = nn.LeakyReLU(0.2)
def forward(self, x, feat):
features, _ = self.lstm(x)
features = features[:, -1, :].squeeze(1)
adj = self.A
att = self.gat1(feat, adj)
att = self.relu(att)
att = self.gat2(att, adj)
att = att.transpose(0, 1)
out = torch.matmul(features, att)
return out
this is how i train model
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train, X_test, y_train, y_test, feat, adjacency = load_data()
feat = feat.to(device)
adjacency = adjacency.to(device)
# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 250
num_classes = y_train.shape[1]
learning_rate = 0.001
batch_size = 250
num_epochs = 250
attention_heads = 4
datalen = X_train.shape[0]
# Initialize network
model = MAGNET(input_size, hidden_size, num_classes, adjacency, attention_heads).to(device)
# Loss and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
model.train()
# Train Network
for epoch in range(num_epochs):
loss_list = []
hammingloss = []
microf1_score= []
for start, end in indexloader(datalen, batch_size):
data = X_train[start:end]
data = data.to(device)
targets = y_train[start:end]
targets = targets.to(device)
# forward
scores = model(data, feat)
loss = criterion(scores, targets)
# backward
optimizer.zero_grad()
loss.backward()
clip_grad_norm_(model.parameters(), max_norm=10)
loss_list.append(loss.item())
# gradient descent or adam step
optimizer.step()
print(epoch, np.mean(loss_list))
Result:
0 1.0576843857765197
1 1.0576843857765197
2 1.0576843857765197
3 1.0576843857765197
4 1.0576843857765197
5 1.0576843857765197
6 1.0576843857765197
7 1.0576843857765197
What am i doing wrong?
I really need help
Thank you all.