i want to get mask by yolov5, so make some change in yolov5. but get the error like this:
Traceback (most recent call last):
File “train.py”, line 713, in
main(opt)
File “train.py”, line 610, in main
train(opt.hyp, opt, device, callbacks)
File “train.py”, line 411, in train
scaler_mask.step(optimizer_mask) # optimizer.step
File “/home/mopanzhong/.conda/envs/rope_detection/lib/python3.7/site-packages/torch/cuda/amp/grad_scaler.py”, line 318, in step
assert len(optimizer_state[“found_inf_per_device”]) > 0, “No inf checks were recorded for this optimizer.”
train.py:
optimizer.zero_grad()
optimizer_mask.zero_grad()
for …:
…
with amp.autocast(enabled=cuda):
pred, mask_res = model(imgs, get_mask_res=True) # forward
loss, loss_mask, loss_items = compute_loss(pred, targets.to(device), mask_res=mask_res,img_masks=img_masks) # loss scaled by batch_size
# Backward
print("$"*20, loss, loss_mask) #$$$$$$$$$$$$$$$$$$$$ tensor([3.15543], device='cuda:0', grad_fn=<MulBackward0>) tensor(0.74285, device='cuda:0', grad_fn=<MeanBackward0>)
scaler.scale(loss).backward()
scaler_mask.scale(loss_mask).backward()
# Optimize
if ni - last_opt_step >= accumulate:
print("%"*20,optimizer)
print("%" * 20, optimizer_mask)
print:
%%%%%%%%%%%%%%%%%%%% SGD (
Parameter Group 0
dampening: 0
initial_lr: 0.01
lr: 0.0
momentum: 0.8
nesterov: True
weight_decay: 0
Parameter Group 1
dampening: 0
initial_lr: 0.01
lr: 0.0
momentum: 0.8
nesterov: True
weight_decay: 0.0005
Parameter Group 2
dampening: 0
initial_lr: 0.01
lr: 0.1
momentum: 0.8
nesterov: True
weight_decay: 0
)
%%%%%%%%%%%%%%%%%%%% SGD (
Parameter Group 0
dampening: 0
initial_lr: 0.01
lr: 0.0
momentum: 0.8
nesterov: True
weight_decay: 0
Parameter Group 1
dampening: 0
initial_lr: 0.01
lr: 0.0
momentum: 0.8
nesterov: True
weight_decay: 0.0005
Parameter Group 2
dampening: 0
initial_lr: 0.01
lr: 0.1
momentum: 0.8
nesterov: True
weight_decay: 0
)
scaler.step(optimizer) # optimizer.step
scaler.update()
scaler_mask.step(optimizer_mask) # optimizer.step
scaler_mask.update()
optimizer.zero_grad()
optimizer_mask.zero_grad()
if ema:
ema.update(model)
last_opt_step = ni
.....
and compute_loss:
…
# mask_losses is a list, like [1,2,3,4,5]
mask_loss = torch.tensor(mask_losses, device=device, requires_grad=True)
mask_losses = torch.tensor(mask_losses, device=device).mean(0,keepdim=True)
print(mask_losses)
print(lbox, lobj, lcls)
#print(torch.cat((lbox, lobj, lcls)).detach())
return (lbox + lobj + lcls) * bs, mask_loss.mean(), torch.cat((lbox, lobj, lcls, mask_losses)).detach()