Hi, everyone.
I have this error when I try to retrain my network.
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [64, 10]], which is output 0 of AsStridedBackward0, is at version 3; expected version 2 instead.
I created and trained two simple network using the MNIST dataset, and it successfully trained two network.
class Net1(nn.Module):
def __init__(self):
super(Net1, self).__init__()
self.fc1 = nn.Linear(784, 256)
self.fc2 = nn.Linear(256, 64)
self.fc3 = nn.Linear(64, 10)
self.r1 = nn.ReLU(inplace=False)
self.r2 = nn.ReLU(inplace=False)
def forward(self, x):
x = torch.flatten(x, 1)
x = self.r1(self.fc1(x))
x = self.r2(self.fc2(x))
x = self.fc3(x)
return x
But when I reload the network and want to retrain the network, it shows the above error. Below is my main code for re-train.
class MyDataset(Dataset):
def __init__(self, index, input_x, label):
self.size = len(index)
self.input_list = input_x
self.label_list = label
def __len__(self):
return self.size
def __getitem__(self, item):
image = self.input_list[index[item]]
labels = self.label_list[index[item]]
return image, labels
if __name__ == '__main__':
small_path = './mnist_fnn_s.pth'
large_path = './mnist_fnn_l.pth'
temp_path = './mnist_temp.pth'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net1 = Net1()
net1.load_state_dict(torch.load(large_path, map_location=torch.device('cpu')))
net2 = Net1()
net2.load_state_dict(torch.load(small_path, map_location=torch.device('cpu')))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net2.parameters(), lr=0.01)
image_path = './14.jpg'
IM = imageio.v2.imread(image_path)
IM = IM[:, :, np.newaxis]
IM = IM / 255
IM = (IM - 0.5) / 0.5
lb = -0.05
ub = 0.05
max_range = [[1.5, 1.], [1.5, 1.], [1.5, 1.], [1.5, 1.], [1.5, 1.], [1.5, 1.], [1.5, 1.], [1.5, 1.], [1.5, 1.], [1.5, 1.]]
max_range = np.array(max_range)
# create original dataset
n_dataset = 5000
e = 0.0001
for k in range(10):
print("%d iteration" % k)
input_list = []
ref_list = []
output_l_list = []
output_s_list = []
index = []
for i in range(n_dataset):
if (i + 1) % 1000 == 0:
print("%d train sample created" % (i + 1))
noise = lb + (ub - lb) * np.random.random()
IM_sample = IM.copy()
IM_sample[13:15, 13:15, :] += noise
input_list.append(IM_sample.transpose(2, 0, 1))
outputs1 = net1(torch.from_numpy(IM_sample.transpose(2, 0, 1)).unsqueeze_(0).to(torch.float32))
outputs2 = net2(torch.from_numpy(IM_sample.transpose(2, 0, 1)).unsqueeze_(0).to(torch.float32))
output_l_list.append(outputs1)
output_s_list.append(outputs2)
ref_list.append(outputs2.squeeze() + (outputs1.squeeze() - outputs2.squeeze() + e) / 2)
for label in range(10):
label_diff = outputs1[0, label] - outputs2[0, label]
max_label = max(abs(max_range[label]))
if (abs(label_diff) > max_label * 2 / 3) & (i not in index):
index.append(i)
print(output_s_list[0:10])
train_dataset = MyDataset(index, input_list, ref_list)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=False, num_workers=0)
print("Start re-train")
net2.train()
for epoch in range(20):
if (epoch + 1) % 5 == 0:
print("%d epoch re-train" % (epoch + 1))
for i, data in enumerate(train_dataloader, 0):
inputs, labels = data[0], data[1]
outputs = net2(inputs.to(torch.float32))
optimizer.zero_grad()
loss = criterion(outputs, labels)
# loss.backward(retain_graph=True)
# loss.backward(inputs=list(net2.parameters()))
loss.backward()
optimizer.step()
net2.eval()
torch.save(net2.state_dict(), temp_path)
I know this kind of problem has been discussed many times in other topic. But no solution works for my case. So I want you guys help on this. Thank you!