I am trying to extract the weights from a linear layer, but they do not change during training, although error is dropping monotonously. Printing the weights’ sum, nothing happens - it stays constant.
I tried to extract the weights in two ways, but both do not work:
list(model.parameters())[0].data.numpy()
or
model.fc2.weight.data.numpy()
Here are the other code snippets:
def train(epochs):
model.train()
for epoch in range(1, epochs+1):
# Train on train set
print(np.sum(model.fc2.weight.data.numpy()))
for batch_idx, (data, target) in enumerate(train_loader):
data, target = Variable(data), Variable(data)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
and
# Define model
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# an affine operation: y = Wx + b
self.fc1 = nn.Linear(100, 80, bias=False)
init.normal(self.fc1.weight, mean=0, std=1)
self.fc2 = nn.Linear(80, 87)
self.fc3 = nn.Linear(87, 94)
self.fc4 = nn.Linear(94, 100)
def forward(self, x):
x = self.fc1(x)
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
return x
I can’t repeat your error, my idea is that your problem comes from your data set:
model = Net()
optimizer = optim.Adam(model.parameters(), 0.01)
criterion = nn.MSELoss()
def train(epochs):
model.train()
for epoch in range(1, epochs+1):
# I dont have your data set:
data = Variable(torch.rand(1,100))
target = Variable(torch.ones(1,100))
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
s = torch.sum(model.fc2.weight.data)
print(s)
train(100)
Thanks for your help.
I prepared a minimal working example of my code. Maybe there is something wrong, because I am new to Pytorch and do not something important. Maybe you can see anything?
import numpy as np
import numpy.random as rnd
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data_utils
import torch.optim as optim
import torch.nn.init as init
#
# Return data
#
def sparse_data(N, k, num):
X = np.zeros((N, num))
X[0:k,:] = np.abs(rnd.normal(0, 1, size=(k, num)))
idx_1 = rnd.sample(X.shape).argsort(axis=0)
idx_2 = np.tile(np.arange(X.shape[1]), (X.shape[0], 1))
return np.transpose(X[idx_1, idx_2])
#
# Define model
#
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(100, 80, bias=False)
init.normal(self.fc1.weight, 0, 1)
self.fc2 = nn.Linear(80, 90)
self.fc3 = nn.Linear(90, 100)
def forward(self, x):
x = self.fc1(x)
x = F.sigmoid(self.fc2(x))
x = F.sigmoid(self.fc3(x))
return x
#
# Prepare data
#
# <-- sample x -->
X_train = sparse_data(N=100, k=20, num=500)
train = data_utils.TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(X_train).float())
train_loader = data_utils.DataLoader(train, batch_size=1)
model = Net()
optimizer = optim.SGD(model.parameters(), lr=0.01)
criterion = nn.MSELoss(size_average=False)
#
# Train
#
model.train()
init_weights = model.fc1.weight
for epoch in range(1, 3):
for batch_idx, (dat, target) in enumerate(train_loader):
data, target = Variable(dat), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
trained_weights = model.fc1.weight
#
# Output
#
print(torch.sum(trained_weights), torch.sum(init_weights), torch.sum(trained_weights-init_weights))
Output is:
Variable containing:
80.6124
[torch.FloatTensor of size 1]
Variable containing:
80.6124
[torch.FloatTensor of size 1]
Variable containing:
0
[torch.FloatTensor of size 1]
Why are you doing torch.sum(model.fc1.weight.data) on the weights. This leads to an addition of all feature weights. The main purpose of extracting weight is to identify which feature is important for the task. Could you explain if I am interpreting it wrong?