I am training a Conv network on CIFAR 10 with 2 GPUs. I am using DataParallel()
to parallelize the model.
The model is:
class CNNModel(torch.nn.Module):
def __init__(self):
super().__init__()
#Conv 1
self.cnn1 = nn.Conv2d(in_channels=3,out_channels=16,kernel_size=3,stride=1,padding=1)
self.activation = nn.ELU()
#maxpool1
self.maxpool1 = nn.MaxPool2d(kernel_size=2)
#Conv 2
self.cnn2 = nn.Conv2d(in_channels=16,out_channels=32,kernel_size=3,stride=1,padding=1)
#Conv 3
self.cnn3 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=1)
self.cnn4 = nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=1)
self.cnn5 = nn.Conv2d(in_channels=128,out_channels=128,kernel_size=3,stride=1,padding=1)
self.cnn6 = nn.Conv2d(in_channels=128,out_channels=128,kernel_size=3,stride=1,padding=1)
#Maxpool 2
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
# 8 = (32/2)/2
self.fc1 = nn.Linear(128*8*8,10)
def forward(self,x):
# x is of size (64,1,28,28)
# reshape to (64,784)
out = self.activation(self.cnn1(x))
out = self.maxpool1(out)
out = self.activation(self.cnn2(out))
out = self.activation(self.cnn3(out))
out = self.activation(self.cnn4(out))
out = self.activation(self.cnn5(out))
out = self.activation(self.cnn6(out))
out = self.maxpool2(out)
out = out.view(out.shape[0],-1)
out = self.fc1(out)
return out
The weight constraints I am using are:
class weightConstraint(object):
def __init__(self):
pass
def __call__(self,module):
if hasattr(module,'weight'):
print("Entered")
w=module.weight.data
w[torch.where(w<0)] = 0
module.weight.data=w
I tried using 2 methods to apply these constaints:
Technique 1:
model = CNNModel()
constaint = weightConstraint()
for key,val in model._modules.items():
if hasattr(val,'weight'):
print(key)
val.apply(constaint)
model = nn.DataParallel(model).to(device)
Technique 2:
model = CNNModel()
model = nn.DataParallel(model).to(device)
constaint = weightConstraint()
for key,val in model._modules['module']._modules.items():
if hasattr(val,'weight'):
print(key)
val.apply(constaint)
Training in both cases gives me loss as nan
(and accuracy as 0.10 which means random assignment) from the very first epoch