CNN model in pytorch giving 30% less accuracy to Tensoflowflow model

I am trying to perform a 3 class classification in Pytorch using a basic CNN (3 conv layer, 2 maxpool and 3 dense layers). The data is stored in .mat files which I am reading using the scipy.io function.I have created a custom Dataset and dataloader. The issue that I am facing is that this same model in tensorflow gives me a 80% accuracy while in pytorch it is giving me 48% accuracy. I checked the following thread Pytorch result against Tensorflow and suboptimal convergence but the difference in accuracy in the thread is just 1% while in my case its more than 30%. It is a simple CNN model so I am unable to understand why there is such a vast difference in accuracy. Is their an error in how I am creating the Dataset or the training of the model on the data samples is being done incorrectly.

Also, one thing that the Tensorflow model had was that they used InverseTime Delay optimizer which I tried to implement using weight decay in sgf, but that somewhat reduces the accuracy. So at the end I used Adam optimizer.
I used xavier normal to initialize the weights while in the TF model he_uniform() was used

class PDataset(Dataset):
    def __init__(self,path):
        self.x=[]
        self.y=[]
        hpath = os.path.join(path_to_mats,"H")
        mpath = os.path.join(path_to_mats,"M")
        lpath = os.path.join(path_to_mats,"L")

        for myFile in os.listdir(hpath):
            if (not myFile.startswith('._')):
                content = scipy.io.loadmat(os.path.join(hpath,myFile))
                self.x.append(content['L']) 
                self.y.append(2) 

        for myFile in os.listdir(mpath):
            if (not myFile.startswith('._')):
                content = scipy.io.loadmat(os.path.join(mpath,myFile))
                self.x.append(content['L']) 
                self.y.append(1)

        for myFile in os.listdir(lpath):
            if (not myFile.startswith('._')):
                content = scipy.io.loadmat(os.path.join(lpath,myFile))
                self.x.append(content['L']) 
                self.y.append(0) 
        
        self.x=np.array(self.x)
        self.y=np.array(self.y)
        self.x = torch.from_numpy(self.x)
        self.x = self.x.view(-1,1,16,16)        
        self.y = torch.from_numpy(self.y)
        self.n_samples = self.x.shape[0]

    def __getitem__(self,index):
        return self.x[index],self.y[index]

    def __len__(self):
        return self.n_samples

Here I load the dataset and create the train test and validation samplers

dataset = PDataset(path)
model = Network()
error = nn.CrossEntropyLoss()
learning_rate = 0.0001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
batch_size = 32
N_EPOCHS = 1000
validation_split = .2
shuffle_dataset = True
random_seed= 58
model.to(device)
error.to(device)


dataset_size = len(dataset)
indices = list(range(dataset_size))
train_test_split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, test_indices = indices[train_test_split:], indices[:train_test_split]
train_val_split = int(np.floor(validation_split * len(train_indices)))
train_indices, val_indices = train_indices[train_val_split:], train_indices[:train_val_split]
train_sampler = SubsetRandomSampler(train_indices)
test_sampler = SubsetRandomSampler(test_indices)
val_sampler = SubsetRandomSampler(val_indices)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, drop_last=True,
                                           sampler=train_sampler)
val_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, drop_last=True,
                                           sampler=val_sampler)
test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,sampler=test_sampler)
total_step = len(train_loader)
loss_list=[]
for epoch in range(N_EPOCHS):
        accuracy = 0
        avg_loss = 0
        for i, (images, labels) in enumerate(train_loader,0):
            correct = 0
            total = 0
            train  = Variable(images).to(device)
            labels = Variable(labels).to(device)           
            #Forward propagation
            train=train.float()
            labels= labels.long()
            outputs = model(train)
            loss = error(outputs, labels) 
            avg_loss +=loss.item()      
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            accuracy += 100 * (correct / total)
            optimizer.zero_grad()
            loss.backward()        
            optimizer.step()        
        
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for i, (val_images, val_labels) in enumerate(val_loader):
                val  = Variable(val_images).to(device)
                val_labels = Variable(val_labels).to(device) 
                val = val.float()
                val_labels=val_labels.long()
                val_outputs = model(val)
                _, val_predicted = torch.max(val_outputs.data, 1)
                val_total += val_labels.size(0)
                val_correct += (val_predicted == val_labels).sum().item()

        print ('Epoch [{}/{}], Loss per epoch: {:.8f},accuracy per epoch: {:.8f},val_accuracy: {:.8f}'.format(epoch+1, N_EPOCHS, avg_loss/total_step,accuracy/total_step,100 * val_correct / val_total))
        loss_list.append(avg_loss/total_step)

    ```
Any help is appreciated

Did you also compare the TF model using Adam?
Before diving into the code it would be good to have a valid baseline to compare against and currently it seems that the scripts differ at least in the optimizer.