Hello, I’m trying to use a multi-GPU by referring to the tutorial.
However, when I create a child process and pass some type of instance, an error occurs.
This is a sample code.
import os
import sys
import tempfile
import torch
import torch.distributed as dist
import torch.nn as nn
import torch.optim as optim
import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP
from attrdict import AttrDict
import easydict
from torch.utils.tensorboard import SummaryWriter
def setup(rank, world_size):
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '12355'
dist.init_process_group("nccl", rank=rank, world_size=world_size)
def cleanup():
dist.destroy_process_group()
class ToyModel(nn.Module):
def __init__(self):
super(ToyModel, self).__init__()
self.net1 = nn.Linear(10, 10)
self.relu = nn.ReLU()
self.net2 = nn.Linear(10, 5)
def forward(self, x):
return self.net2(self.relu(self.net1(x)))
def demo_basic(rank, world_size, args):
print(f"Running basic DDP example on rank {rank}.")
setup(rank, world_size)
cleanup()
def run_demo(demo_fn, world_size):
easy_dict = easydict.EasyDict({'test':'test'})
tb_log = SummaryWriter(log_dir="./tensorboard")
args = ({'string':'ok',
'tensorboard': tb_log, # err
'easydict':easy_dict, # err
})
mp.spawn(demo_fn,
args=(world_size, args),
nprocs=world_size,
join=True)
n_gpus = torch.cuda.device_count()
assert n_gpus >= 2, f"Requires at least 2 GPUs to run, but got {n_gpus}"
world_size = n_gpus
run_demo(demo_basic, world_size)
This is the error message.
TypeError: cannot pickle '_thread.lock' object
[W CudaIPCTypes.cpp:21] Producer process has been terminated before all shared CUDA tensors released. See Note [Sharing CUDA tensors]