I’m using the
torch.multiprocessing module for paralleling model inference. For my own understanding, CUDA tensor will be placed in a shared memory instead of kept by each process. So the memory will not be doubled if I spawn new process using
torch.multiprocessing. But when I run the code followed, the memory will increase very fast.
import torch import torch.nn as nn import torch.nn.functional as F import torch.multiprocessing as mp import time mp1 = mp.get_context('fork') mp2 = mp.get_context('spawn') class Net(nn.Module): def __init__(self): super().__init__() self.fc1 = nn.Linear(10,128) self.fc2 = nn.Linear(128,128) self.fc3 = nn.Linear(128,10) def forward(self, x): x = self.fc1(x) x = self.fc2(x) x = self.fc3(x) return x class Work1(mp1.Process): def __init__(self, a): mp.Process.__init__(self) self.a = a def run(self): while True: pass class Work2(mp2.Process): def __init__(self, a): mp2.Process.__init__(self) self.a = a def run(self): while True: pass def main(): p = Net().cuda() p.share_memory() for i in range(10): w = Work1(p) w.start() print('start a work1') time.sleep(10) for i in range(10): w = Work2(p) w.start() print('start a work2') if __name__ == '__main__': main()
I know the reason why
fork does not increasing the memory usage immediately(COW), but could anyone tell me why
spawn will just doubled the memory I used? Or why the
Net object will occupy so much memory?