Apparent issue with accessing the weights of the model

Zeeyuu · August 26, 2021, 3:37pm

I used the following way to store the model weights before and after training:

before=[ ]
for param in net.parameters():
before.append(param)

for epoch in range(tot_epoch):
…

after=[ ]
for param in net.parameters():
after.append(param)

then do ‘before == after’
and i got ‘True’ !

There should not be any issue with training steps per se, as I got the accuracy on test set from initially 17% to 69%.

There is the code:

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28 * 28 , 512*30)
        self.fc2 = nn.Linear(512*30 , 128*30)
        self.fc3 = nn.Linear(128*30 , 64*30)
        self.fc4 = nn.Linear(64*30, 10)
  
 
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x))
        return x
 

net = Net()
net.to(device)
 

import torch.optim as optim
 
criterion = nn.NLLLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001)
 
tot_epoch=20
 
   
loss_history=[]
 
before=[]
for param in net.parameters():
    before.append(param)
 

for epoch in range(tot_epoch):  # loop over the dataset multiple times
 
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
 
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        # print(inputs.view(1,-1).is_cuda)
        # print(next(net.parameters()).is_cuda)
        # zero the parameter gradients
        optimizer.zero_grad()
 
        # forward + backward + optimize
        outputs = net(inputs.view(1,-1))
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
       
 
        # # print statistics
        # running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
        #     print('[%d, %5d] loss: %.3f' %
        #           (epoch + 1, i + 1, running_loss / 2000))
        #     loss_history.append(running_loss / 2000)
        #     running_loss = 0.0
 

            class_correct = list(0. for i in range(10))
            class_total = list(0. for i in range(10))
            acc=[]
            datasize=5000
            with torch.no_grad():
                for i, data in enumerate(testloader, 0):
                    if i > datasize:
                        break
                    # get the inputs
                    images, labels = data
                    outputs = net(images.view(1,-1).to(device))
                    _, predicted = torch.max(outputs, 1)
                    c = (predicted == labels.to(device)).squeeze()
                    # print(c,labels)
                   
                    label = labels[0]
                    class_correct[label] += c.item()
                    class_total[label] += 1
 
after=[]
for param in net.parameters():
    after.append(param)

Zeeyuu · August 27, 2021, 2:04am

Okay it seems what I saved in the list is memory location instead of the weights values. If I clone the initial weights they will no longer be the same.