Out of Memory in Multi GPU -- Model Parallelization

I am facing an error “Out of memory” when trying to load 100 MySmallModel in “cuda:0”.

I have two GPU with 12GB.

RuntimeError: CUDA out of memory. Tried to allocate 28.00 MiB (GPU 0; 11.91 GiB total capacity; 11.18 GiB already allocated; 19.38 MiB free; 34.67 MiB cached)

Can any please help me how can I solve this problem?/ How can I divide “self.fc1 = nn.Linear(70000, 3000)” layer into multiple layers and then do the computation?

``````class MySmallModel(nn.Module):
def __init__(self,nodes):
super(MySmallModel, self).__init__()
self.fc1 = nn.Linear(70000, 3000)
self.fc3 = nn.Linear(3000, 1000)

self.fc1.cuda(0)
self.fc3.cuda(1)

def forward(self, x):

x = F.relu(self.fc1(x))
x = x.cuda(1)
x = F.relu(self.fc3(x))

return x

class Classifier(nn.Module):
def __init__(self,input_nodes):
super(Classifier, self).__init__()

self.networks = nn.ModuleList([MySmallModel() for i in range(100)])

self.sharedlayer = nn.Sequential(
nn.Linear(30000, 300),
nn.ReLU(),
nn.Linear(300, 100),
nn.ReLU(),
)
self.sharedlayer.cuda(1)

def forward(self, input_):
x_list=[]
x_list.append(F.relu(self.networks[i](tensor_Data)))
x = torch.cat((x_list), 1)

x = x.cuda(1)
h_shared = self.sharedlayer(x)

return h_shared

================================================
criterion = nn.MSELoss()
model = Classifier(input_nodes)
optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in range(n_epochs):
running_loss = 0
i = 0
model.train()