I’m trying to train a neural network with three different data sources, and I’m encountering a few problems. Should I be using the functional API similar to Keras or is the nn.Module fine? Here’s my current code (working):
class CNN(nn.Module):
def __init__(self):
super(CNN,self).__init__()
filtsize=25
poolsize=10
global pwmsize
print("pwmsize={}".format(pwmsize))
#set zero padding to p=(f-1)/2to ensure same input/output size
self.seq1 = nn.Sequential(
nn.Conv1d(4, 1, kernel_size=pwmsize,padding=(pwmsize-1)/2),
nn.ReLU())
self.seq2 = nn.Sequential(
nn.Conv1d(1, 25, kernel_size=filtsize,padding=(filtsize-1)/2),
nn.ReLU())
self.seq3 = nn.Sequential(
nn.Conv1d(25, 25, kernel_size=filtsize,padding=(filtsize-1)/2),
nn.ReLU())
self.dnase1=nn.Sequential(
nn.Conv1d(1,25,kernel_size=filtsize,padding=(filtsize-1)/2),nn.ReLU())
self.dnase2=nn.Sequential(
nn.Conv1d(25,25,kernel_size=filtsize,padding=(filtsize-1)/2),nn.ReLU())
self.dnase3=nn.Sequential(
nn.Conv1d(25,25,kernel_size=filtsize,padding=(filtsize-1)/2),nn.ReLU())
self.combineconv=nn.Sequential(
nn.Conv1d(50,55,kernel_size=filtsize,padding=(filtsize-1)/2),nn.ReLU())
self.pool=nn.AvgPool1d(poolsize)
#*************************************
# self.dnaseresidual=nn.Sequential(
# #bn-relu-conv-bn-relu-conv
# nn.BatchNorm1d(25),nn.ReLU(),
# nn.Conv1d(25,25,kernel_size=filtsize,padding=(filtsize-1)/2),
# nn.BatchNorm1d(25),nn.ReLU(),
# nn.Conv1d(25,25,kernel_size=filtsize,padding=(filtsize-1)/2))
# self.postdnaseresidual=nn.Sequential(
# nn.BatchNorm1d(25),nn.ReLU())
#*************************************
# self.hidden=nn.Sequential(
# # nn.Linear(1080,1000),nn.ReLU())
# nn.Linear(2200,100),nn.ReLU())
# self.last=nn.Sequential(
# # nn.Linear(810,1),nn.Sigmoid())
# nn.Linear(1000,1),nn.Sigmoid())
self.seqdnasehidden=nn.Sequential(
# nn.Linear(1080,1000),nn.ReLU())
nn.Linear(5500,100),nn.ReLU())
# self.allhidden=nn.Sequential(
# nn.Linear(100,100),nn.ReLU())
self.allhidden=nn.Sequential(
nn.Linear(107,100),nn.ReLU())
self.final=nn.Sequential(
nn.Linear(100,1),nn.Sigmoid())
def forward(self,s,d,r):
# embed()
seqp=self.seq1(s)
seqp=self.seq2(seqp)
seqp=self.seq3(seqp)
dnasep=self.dnase1(d)
dnasep=self.dnase2(dnasep)
dnasep=self.dnase3(dnasep)
#residual
#*************************************
# nresid=1
# for i in range(nresid):
# orig=dnasep
# dnasep=self.dnaseresidual(dnasep)
# dnasep+=orig
# dnasep=self.postdnaseresidual(dnasep) #apply bn-activation
#*************************************
#merge
combine=torch.cat((seqp,dnasep),1) #1 refers to dimension, torch dim is (batchsize, # channels/filters, width)
combine=self.combineconv(combine)
combine=self.pool(combine) #pooling size after is prev kernel siez (prob 1000) / poolsize (since stride=poolsize)
combine=combine.view(combine.size(0),-1) #flatten layer, -1 infers size from rest of layer, combinesize(0) keeps size of original batch
seqdnasecombine=self.seqdnasehidden(combine)
# embed()
#merge with rna
allcombine=torch.cat((seqdnasecombine,r),1)
allcombine=self.allhidden(allcombine)
out=self.final(allcombine)
return out
cnn=CNN();
I’m not quite clear on which weights are updated in each backprop step, and if the inputs are being properly convolved. For instance, if I define a random Sequential layer in the init(), but I don’t use it in forward(), it still shows up in list(cnn.parameters()) when I check it later. How can I see which layers are connected to which?
I’m also initializing the first layer to an arbitrary tensor:
m.weight=param
nn.init.constant(m.bias.data, 0) #set bias=0
m.weight.require_grad=False
m.bias.require_grad=False
where param is created as torch.nn.Parameter(torch.from_numpy(numpyarr).float()). However, once I do this, I notice the gradients for several of the layers turns into 0. Does anyone know what might be causing this?
Thanks so much!