import random
import torch
from torch.multiprocessing import Process
class DynamicNet(torch.nn.Module):
def __init__(self, D_in, H, D_out):
super(DynamicNet, self).__init__()
self.input_linear = torch.nn.Linear(D_in, H)
self.middle_linear = torch.nn.Linear(H, H)
self.output_linear = torch.nn.Linear(H, D_out)
def forward(self, x):
h_relu = self.input_linear(x).clamp(min=0)
for _ in range(5):
h_relu = self.middle_linear(h_relu).clamp(min=0)
y_pred = self.output_linear(h_relu)
return y_pred
N, D_in, H, D_out = 64, 1000, 100, 10
def p1():
x1 = torch.randn(N, D_in).cuda()
model1 = DynamicNet(D_in, H, D_out).cuda()
while True:
y_pred1 = model1(x1)
def p2():
x2 = torch.randn(N, D_in).cuda()
model2 = DynamicNet(D_in, H, D_out).cuda()
t = 0
while True:
y_pred2 = model2(x2)
print("Step {}".format(t))
t+=1
p1 = Process(target=p1, args=())
p2 = Process(target=p2, args=())
p1.start()
p2.start()
p1.join()
p2.join()
This allocates over 3 gigs of RAM in main memory and only about 1 gig in VRAM. Why should this be the case, especially when everything is pushed to the cuda device.
EDIT: For the sake of citation, the DynamicNet
class is modified from this :https://jhui.github.io/2018/02/09/PyTorch-neural-networks/