I’m training the model with DistributedDataParallel and made weight file
Then trying to load the pth file with model and eval
# multi gpu load
self.model = EfficientDet(num_classes=args.num_class,
network=args.network,
W_bifpn=EFFICIENTDET[args.network]['W_bifpn'],
D_bifpn=EFFICIENTDET[args.network]['D_bifpn'],
D_class=EFFICIENTDET[args.network]['D_class']
)
if torch.cuda.is_available():
self.model = self.model.cuda()
if args.distributed:
print('args.distributed...FF')
self.model = self.model.to(args.rank)
torch.cuda.set_device(0)
self.model = torch.nn.parallel.DistributedDataParallel(self.model
,device_ids=[args.rank]
,output_device=[args.rank]
,find_unused_parameters=True)
self.model = self.model.module
#self.model = self.model.cuda()
if(self.weights is not None):
print('load state dic...',self.weights)
checkpoint = torch.load(
self.weights, map_location=lambda storage, loc: storage)
state_dict = checkpoint['state_dict']
self.model.load_state_dict(state_dict)
if torch.cuda.is_available():
self.model = self.model.cuda()
self.model.eval()
Then got the following error
Loaded pretrained weights for efficientnet-b0
args.distributed...FF
Traceback (most recent call last):
File "demokogas.py", line 174, in <module>
detect = Detect(weights=args.weight)
File "demokogas.py", line 88, in __init__
,find_unused_parameters=True)
File "/home/jake/venv/lib/python3.6/site-packages/torch/nn/parallel/distributed.py", line 305, in __init__
self.process_group = _get_default_group()
File "/home/jake/venv/lib/python3.6/site-packages/torch/distributed/distributed_c10d.py", line 285, in _get_default_group
raise RuntimeError("Default process group has not been initialized, "
RuntimeError: Default process group has not been initialized, please make sure to call init_process_group.