I am getting AssertionError: No inf checks were recorded for this optimizer.
in "/torch/cuda/amp/grad_scaler.py", line 291
when mixed-precision is used in this weird example below. However, if no mixed-precision is used pytorch doesn’t complain (toggle USE_HALF_PRECISION = True
).
I am using PyTorch 1.6.0 (python 3.7, cuda 10.2.89, cudnn 7.6.5. – everything is in conda binaries). Here is the MWE.
import torch
from torch import nn
from torch.cuda.amp.autocast_mode import autocast
from torch.cuda.amp.grad_scaler import GradScaler
class Identity_with_weights(nn.Module):
'''For example a KNN algorithm which returns a closest entry from a database for x. Weights are needed
for a seamless inclusion of knn baseline to a set of baseline which do have some parameters. Otherwise
you would need to change the code (remove optimizer, backward pass etc) just for knn which is not neat.'''
def __init__(self):
super(Identity_with_weights, self).__init__()
self.__hidden__ = torch.nn.Linear(1, 1, bias=False)
def forward(self, x):
# we need it to be able to call backward on the loss which uses x (outputs).
# Nothing will happen in these examples as it propagates to the input which is not used anywhere else
x.requires_grad = True
return x
if __name__ == "__main__":
# config
USE_HALF_PRECISION = True
device = torch.device('cuda:0')
# define model
model = Identity_with_weights()
# define training things
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters())
# for amp
scaler = GradScaler()
# targets are exactly the same as inputs, i.e. for reconstruction
inputs = torch.rand(8, 1)
targets = inputs.clone().detach()
# send to device
model = model.to(device)
inputs = inputs.to(device)
targets = targets.to(device)
# we don't need it for the sake of this example, but let's have it here anyway.
optimizer.zero_grad()
# since outputs are going to be f16 and targets are f32, criterion will output non zero loss
if USE_HALF_PRECISION:
targets = targets.half()
inputs = inputs.half()
# autocasting ops inside of the context manager
with autocast(USE_HALF_PRECISION):
outputs = model(inputs)
loss = criterion(outputs, targets)
print(loss)
# scaling loss if using half precision
if USE_HALF_PRECISION:
scaler.scale(loss).backward()
scaler.step(optimizer) ## ERROR HERE
scaler.update()
else:
loss.backward()
optimizer.step()
I think I am doing something wrong here. What does it complain about?