I have following code which have continuous two steps:
- Iterate the original Dataloader and save some feature by
update_dataset_memory
(save to dict object) . - Add saved feature to the Dataset.
model.eval()
val_dataset = val_loader.dataset
val_memory_loader = Memory_loader(cfg, 'val')
val_memory_loader.update_dataset_memory(val_loader, model, debug=DEBUG)
val_dataset.set_memory_loader(val_memory_loader)
val_loader = loader.construct_loader(cfg, "val", dataset=val_dataset)
for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader):
...
In update_dataset_memory
, iterating the original Dataset is ok.
@torch.no_grad()
def update_dataset_memory(self, dataloader, model, debug=False):
for cur_iter, (inputs, _, _, meta) in enumerate(dataloader):
if isinstance(inputs, (list,)):
for i in range(len(inputs)):
inputs[i] = inputs[i].cuda(non_blocking=True)
else:
inputs = inputs.cuda(non_blocking=True)
feature = model.feature_extract(inputs, meta)['feature']
memory_boxes, memory_meta = meta['boxes'], meta['metadata']
self.batch_set_memory(
feature.cpu(), memory_boxes.cpu(), memory_meta.cpu())
But when I iterate the updated Dataset by map-style Dataloader, it raise ValueError('too many fds')
.
It seems that if i add extra dict to the Dataset and use it in this context will increase fds obviously(num_workers=2 for all Dataloader).
So, how does it happend and whether this is a proper practice?
File "/home/nijingcheng/slowfast/tools/train_net_v2.py", line 179, in eval_epoch
for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader):
File "/home/nijingcheng/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 279, in __iter__
return _MultiProcessingDataLoaderIter(self)
File "/home/nijingcheng/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 719, in __init__
w.start()
File "/home/nijingcheng/anaconda3/lib/python3.7/multiprocessing/process.py", line 112, in start
self._popen = self._Popen(self)
File "/home/nijingcheng/anaconda3/lib/python3.7/multiprocessing/context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/home/nijingcheng/anaconda3/lib/python3.7/multiprocessing/context.py", line 291, in _Popen
return Popen(process_obj)
File "/home/nijingcheng/anaconda3/lib/python3.7/multiprocessing/popen_forkserver.py", line 35, in __init__
super().__init__(process_obj)
File "/home/nijingcheng/anaconda3/lib/python3.7/multiprocessing/popen_fork.py", line 20, in __init__
self._launch(process_obj)
File "/home/nijingcheng/anaconda3/lib/python3.7/multiprocessing/popen_forkserver.py", line 51, in _launch
self.sentinel, w = forkserver.connect_to_new_process(self._fds)
File "/home/nijingcheng/anaconda3/lib/python3.7/multiprocessing/forkserver.py", line 66, in connect_to_new_process
raise ValueError('too many fds')
ValueError: too many fds