Translation keras to pytorch - resahping unmatched tensor

I am trying to reproduce the code of keras to pytorch. However I am struggling to reproduce the result.

from typing import List
class DNA_CNN_test2(nn.Module): # deepcre model  
    def __init__(self,
                 seq_len: int =1000,
                 kernel_size: int = 8,
                 p = 0.25): # drop out value 
        super().__init__()
        self.seq_len = seq_len
        # CNN module
        self.conv_net = nn.Sequential()
        self.model = nn.Sequential(
            nn.Conv1d(4,64,kernel_size=kernel_size, padding='same'),
            nn.ReLU(),
            nn.Conv1d(64,64,kernel_size=kernel_size, padding='same'),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=8),
            nn.Dropout(p),
            nn.Conv1d(64,128,kernel_size=kernel_size, padding='same'),
            nn.ReLU(),
            nn.Conv1d(128,128,kernel_size=kernel_size, padding='same'),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=8),
            nn.Dropout(p),
            nn.Conv1d(128,64,kernel_size=kernel_size, padding='same'),
            nn.ReLU(),
            nn.Conv1d(64,64,kernel_size=kernel_size, padding='same'),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=8),
            nn.Dropout(p),
            nn.Flatten(),
        
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Dropout(p),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )#.to(device)
         
    def forward(self, xb: torch.Tensor):
        """Forward pass."""
        xb = xb.permute(0, 2, 1) 
        out = self.conv_net(xb)
        return out 

I am following all the order of the original code however the code give me an error, which I can locate. Here I am using 512 batch size. My input is one hot encoded DNA sequence (1000bp) and its corresponding transcript value (numeric). What have I missed? Meanwhile, I have used used to unsqueze() the matrix, but did not help till now.

The size of tensor a (4) must match the size of tensor b (512) at non-singleton dimension 1