1.) You should apply the same preprocessing as you did during training (but omit the data augmentation).
2.) You have to create an instance of your network and load it’s state_dict. A minimal example to do so is given below:
# Just a small dummy network
class Network(torch.nn.Module):
def __init__(self):
super().__init__()
# single convolution with 1 input and 1 output channel, a kernel of 3x3 and padding=1
self.conv = torch.nn.Conv2d(1, 1, 3, padding=1)
def forward(self, x):
return self.conv(x)
# create model instance
model = Network()
# let's just save our model here
# don't use ".pth" here since this extension is also used in python internals
torch.save(model.state_dict(), "checkpoint.pt")
# now we just load the model back
# create a new instance of the model
new_model = Network()
# load the previously saved state_dict
new_model.load_state_dict(torch.load("checkpoint.pt"))
# check if predictions of models are equal
# generate random input of size (N,C,H,W)
input_tensor = torch.rand(1, 1, 28, 28)
# switch to eval mode for both models
model = model.eval()
new_model = new_model.eval()
# predict with both models
pred_original = model(input_tensor)
pred_loaded = new_model(input_tensor)
# check input difference to be only due to numerical differences
assert (pred_original - pred_loaded).abs().sum() <= 1e-6