I have a model that takes a tensor representing the difference between two images and outputs coordinates used to make them more alike. I then calculate the loss as MSE of the created image and the original image, but when I run a backward pass no weights seems to update and the loss remains constant (although not none) throughout all epochs.
Is this because the loss isn’t calculated directly on the model output?
When reading other post the requires_grad was sometimes responsible for the problem, I’ve tried with requires_grad = True but I’m still a bit unsure if it’s used correctly.
class Net(nn.Module): def __init__(self): super().__init__() self.fc1 = nn.Linear(10 * 10 * 3, 240) self.fc2 = nn.Linear(240, 240) self.fc3 = nn.Linear(240, 240) self.fc4 = nn.Linear(240, 7) # [x1, y1, x2, y2, r, g, b] def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = F.relu(self.fc3(x)) x = self.fc4(x) return x
def array_to_flatten_tensor(array_to_reshape): prepared_image = torch.FloatTensor(array_to_reshape) prepared_image.requires_grad = True prepared_image.retains_grad = True return torch.flatten(prepared_image)
image = cv2.imread("flower.png") image = cv2.resize(image, (10, 10)) image = torch.FloatTensor(image) image.requires_grad = True image.retains_grad = True image = torch.flatten(image) for image_pass in range(1000): running_loss = 0 # Create empty image drawn_image = array_to_png_test.generate_empty_RGB_array(10, 10) for _ in range(10): optimizer.zero_grad() # Pass (image - net drawn) image to net difference_array = image - array_to_flatten_tensor(drawn_image) output = net_to_train(difference_array) # Converts the output to two points and colour point_1 = (float(output), 0, float(output)) point_2 = (float(output), float(output)) colour = round(max(float(output), 0)), round(max(float(output), 0)), round(max(float(output), 0)) point_1, point_2 = sorted([point_1, point_2]) # Draws a line to the empty image drawn_image = array_to_png_test.draw_line(point_1, point_2, colour, drawn_image) # Calculate loss as mean square difference of image and drawn image drawn_image_tensor = array_to_flatten_tensor(drawn_image) current_loss = criterion(drawn_image_tensor, image) running_loss += float(current_loss) current_loss.backward() optimizer.step() print("epoch " + str(image_pass) + " : " + str(running_loss / 250))