Trouble with simple NN with multiple data sources

I’m trying to train a neural network with three different data sources, and I’m encountering a few problems. Should I be using the functional API similar to Keras or is the nn.Module fine? Here’s my current code (working):

class CNN(nn.Module):
def __init__(self):
    super(CNN,self).__init__()
    filtsize=25
    poolsize=10
    global pwmsize
    print("pwmsize={}".format(pwmsize))
    #set zero padding to p=(f-1)/2to ensure same input/output size
    self.seq1 = nn.Sequential(
        nn.Conv1d(4, 1, kernel_size=pwmsize,padding=(pwmsize-1)/2),
        nn.ReLU())
    self.seq2 = nn.Sequential(
        nn.Conv1d(1, 25, kernel_size=filtsize,padding=(filtsize-1)/2),
        nn.ReLU())
    self.seq3 = nn.Sequential(
        nn.Conv1d(25, 25, kernel_size=filtsize,padding=(filtsize-1)/2),
        nn.ReLU())
    self.dnase1=nn.Sequential(
        nn.Conv1d(1,25,kernel_size=filtsize,padding=(filtsize-1)/2),nn.ReLU())
    self.dnase2=nn.Sequential(
        nn.Conv1d(25,25,kernel_size=filtsize,padding=(filtsize-1)/2),nn.ReLU())
    self.dnase3=nn.Sequential(
        nn.Conv1d(25,25,kernel_size=filtsize,padding=(filtsize-1)/2),nn.ReLU())
    self.combineconv=nn.Sequential(
        nn.Conv1d(50,55,kernel_size=filtsize,padding=(filtsize-1)/2),nn.ReLU())
    self.pool=nn.AvgPool1d(poolsize) 
    #*************************************
    # self.dnaseresidual=nn.Sequential(
    #     #bn-relu-conv-bn-relu-conv
    #     nn.BatchNorm1d(25),nn.ReLU(),
    #     nn.Conv1d(25,25,kernel_size=filtsize,padding=(filtsize-1)/2),
    #     nn.BatchNorm1d(25),nn.ReLU(),
    #     nn.Conv1d(25,25,kernel_size=filtsize,padding=(filtsize-1)/2))
    # self.postdnaseresidual=nn.Sequential(
    #     nn.BatchNorm1d(25),nn.ReLU())
    #*************************************

    # self.hidden=nn.Sequential(
    #     # nn.Linear(1080,1000),nn.ReLU())
    #     nn.Linear(2200,100),nn.ReLU())
    # self.last=nn.Sequential(
    #     # nn.Linear(810,1),nn.Sigmoid())
    #     nn.Linear(1000,1),nn.Sigmoid())
    self.seqdnasehidden=nn.Sequential(
        # nn.Linear(1080,1000),nn.ReLU())
        nn.Linear(5500,100),nn.ReLU())
    # self.allhidden=nn.Sequential(
    #     nn.Linear(100,100),nn.ReLU())
    self.allhidden=nn.Sequential(
        nn.Linear(107,100),nn.ReLU())
    self.final=nn.Sequential(
        nn.Linear(100,1),nn.Sigmoid())

  

def forward(self,s,d,r):
    # embed()


    seqp=self.seq1(s)
    seqp=self.seq2(seqp)
    seqp=self.seq3(seqp) 
    dnasep=self.dnase1(d)
    dnasep=self.dnase2(dnasep)
    dnasep=self.dnase3(dnasep)
    #residual
    #*************************************

    # nresid=1
    # for i in range(nresid):
    #     orig=dnasep
    #     dnasep=self.dnaseresidual(dnasep)
    #     dnasep+=orig
    # dnasep=self.postdnaseresidual(dnasep) #apply bn-activation



    #*************************************
    #merge
    combine=torch.cat((seqp,dnasep),1) #1 refers to dimension, torch dim is (batchsize, # channels/filters, width)
    combine=self.combineconv(combine)
    combine=self.pool(combine) #pooling size after is prev kernel siez (prob 1000) / poolsize (since stride=poolsize)
    combine=combine.view(combine.size(0),-1) #flatten layer, -1 infers size from rest of layer, combinesize(0) keeps size of original batch
    seqdnasecombine=self.seqdnasehidden(combine)
    # embed()
    #merge with rna
    allcombine=torch.cat((seqdnasecombine,r),1)
    allcombine=self.allhidden(allcombine)
    out=self.final(allcombine)
    return out

cnn=CNN();

I’m not quite clear on which weights are updated in each backprop step, and if the inputs are being properly convolved. For instance, if I define a random Sequential layer in the init(), but I don’t use it in forward(), it still shows up in list(cnn.parameters()) when I check it later. How can I see which layers are connected to which?

I’m also initializing the first layer to an arbitrary tensor:

m.weight=param
nn.init.constant(m.bias.data, 0) #set bias=0
m.weight.require_grad=False
m.bias.require_grad=False

where param is created as torch.nn.Parameter(torch.from_numpy(numpyarr).float()). However, once I do this, I notice the gradients for several of the layers turns into 0. Does anyone know what might be causing this?

Thanks so much!