AUROC scores are not increasing

#libraries
import numpy as np
import networkx as nx
import torch
from torch import optim
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.autograd import Variable
import itertools
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
%matplotlib inline
import scipy
import mpld3

#data
arr1=np.array([[0, 1, 1, 0, 0, 0, 1, 1, 0, 0],
           [1, 0, 1, 0, 0, 0, 0, 1, 0, 0],
           [1, 1, 0, 0, 0, 0, 1, 0, 0, 0],
           [1, 0, 0, 0, 0, 0, 1, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
           [0, 0, 1, 0, 0, 0, 0, 1, 0, 0],
           [0, 1, 0, 0, 0, 0, 1, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
           [0, 1, 0, 0, 0, 0, 1, 1, 0, 0]])

#hyperparameters
epochs=10
learning_rate=0.001
log_interval=200
batch_size=1
alpha=0.6
train_test_split=1
input_shape=arr1.shape[0]
representation_size=128 

#pretraining data
train_len=int(arr1.shape[0]*train_test_split)
arr1=arr1[:train_len]

arr2=arr1
arr3=arr1

#moving continuous window 
train_data=[]
for i in range(train_len-2):
    
    train_data.append([arr1[i],arr2[i+1],arr3[i+2]])
np.asarray(train_data).shape

#dataloader
train_loader=DataLoader(train_data,batch_size=1,shuffle=True,drop_last=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#model class
class Framework(nn.Module):
    def __init__(self):
        super(Framework, self).__init__()
        self.fc1 = nn.Linear(input_shape, 512,bias=True)
        self.fc21 = nn.Linear(512, representation_size,bias=True)
        self.fc22 = nn.Linear(512, representation_size,bias=True)
        self.fc3 = nn.Linear(representation_size, 512,bias=True)
        self.fc4 = nn.Linear(512, input_shape,bias=True)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.elu=nn.ELU()
        for m in self.modules():
            if isinstance(m, nn.Linear):
                torch.nn.init.xavier_uniform_(m.weight)
    
    def encoder(self, x):
        """Encode a batch of samples, and return posterior parameters for each point."""
        h1 = self.relu(self.fc1(x))
        return self.fc21(h1), self.elu(self.fc22(h1))+1
    
    def decoder(self, z):
        h3 = self.relu(self.fc3(z))
        return self.sigmoid(self.fc4(h3))
        
    def reparam(self, mu, logvar):
        """Reparameterisation trick to sample z values. 
        This is stochastic during training,  and returns the mode during evaluation."""
        
        if self.training:
            std = logvar.mul(0.5).exp_()
            eps = Variable(std.data.new(std.size()).normal_())
            return eps.mul(std).add_(mu)
        else:
            return mu
        
    def get_z(self, x):
        """Encode a batch of data points, x, into their z representations."""
        
        mu, logvar = self.encoder(x.view(-1, input_shape))
        return self.reparam(mu, logvar)
    
    def forward(self, x):
        """Takes a batch of samples, encodes them, and then decodes them again to compare."""
        mu, logvar = self.encoder(x.view(-1, input_shape))
        z = self.reparam(mu, logvar)
        return self.decoder(z), mu, logvar

#Complete model
model1 = Framework().to(device)

#deine optimizers
f_params=model1.parameters()
optimizer = optim.RMSprop(f_params, lr=learning_rate)

#define loss function
def root(var):
    res=(var**0.5)
    return res

def reshape(tens):
    return torch.reshape(tens,(-1,))

def W2_dis(mean1, logvar1, mean2, logvar2):
    
    mean1=reshape(mean1)
    mean2=reshape(mean2)
    logvar1=reshape(logvar1)
    logvar2=reshape(logvar2)
    agg_var=torch.sum(logvar1.exp().pow(0.25)-logvar2.exp().pow(0.25))**2
    agg_mean=(torch.norm(mean1-mean2, 2))**2
    distance =(agg_mean+agg_var).pow(0.5)
    return distance

def l2_part1(p, q):
    p=torch.reshape(p,(-1,))
    q=torch.reshape(q,(-1,))
    return torch.sum((p*(p - q)))**2
###
def wl1(m1, v1,m2, v2, m3, v3):
    ik=(-1*W2_dis(m1,v1,m3,v3)).exp()
    ij=W2_dis(m1,v1,m2,v2)
    r_loss=(ij**2+ik)
    return r_loss


def wl2(recon_x1,x1,recon_x2,x2,recon_x3,x3):
    loss1= l2_part1(x1,recon_x1)
    loss2= l2_part1(x2,recon_x2)
    loss3= l2_part1(x1,recon_x3)
    loss_final=(loss2+loss1+loss3)/3
    return loss_final

###
def wasserstein_loss(recon_x1, x1, mu1, var1,recon_x2, x2, mu2, var2,recon_x3, x3, mu3, var3):
    l1_loss=wl1(mu1, var1, mu2, var2, mu3, var3)
    l2_loss=wl2(recon_x1,x1,recon_x2,x2,recon_x3,x3)
    w_final=alpha*l2_loss+l1_loss
    return w_final

def rocauc_score(y_true,y_pred):
    y_true=torch.reshape(y_true,(-1,))
    y_pred=torch.reshape(y_pred,(-1,))
    y_true=y_true.detach().cpu().numpy()
    y_pred=y_pred.detach().cpu().numpy()

    return roc_auc_score(y_true,y_pred)

def auroc_score(recon_batch1, data1,recon_batch2, data2,recon_batch3, data3):
    score1=rocauc_score(data1,recon_batch1)
    score2=rocauc_score(data2,recon_batch2)
    score3=rocauc_score(data3,recon_batch3)
    auroc=(score1+score2+score3)/3
    return auroc

#train Model
epoch_loss=[]
Auc_score=[]
def train(epoch):
    
    for param_group in optimizer.param_groups:
        print(param_group['lr'], "learning rate for Auto-Encoder.")
    model1.train()
    #model2.train()
    #model3.train()
    
    torch.autograd.set_detect_anomaly(True)
    train_loss,Auc=0,0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    for batch_idx, data in enumerate(train_loader):
        data1 = data[0].to(device)
        data2 = data[1].to(device)
        data3 = data[2].to(device)

        data1 = Variable(data1)
        data2 = Variable(data2)
        data3 = Variable(data3)
        
        recon_batch1, mu1, logvar1 = model1(data1.float())
        recon_batch2, mu2, logvar2 = model1(data2.float())
        recon_batch3, mu3, logvar3 = model1(data3.float())
        
        optimizer.zero_grad()
        loss = wasserstein_loss(recon_batch1, data1, mu1, logvar1,recon_batch2, data2, mu2, logvar2,recon_batch3, data3, mu3, logvar3)

        auroc=auroc_score(recon_batch1, data1,recon_batch2, data2, recon_batch3, data3)
        auroc=100*auroc
        loss.backward(create_graph=False,retain_graph=True)
        optimizer.step()
        
        train_loss+=loss.item()
        Auc+=auroc.item()
        
        if (batch_idx) % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6}\tauc: {:.6}'.format(
                epoch, batch_idx*batch_size* len(data)/3, len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                loss.item()/ len(data),auroc))
            
    epochloss=(train_loss / (len(train_loader.dataset)))
    AUC=(Auc*batch_size/(len(train_loader.dataset)))
    epoch_loss.append(epochloss)
    Auc_score.append(AUC)
    print('====> Epoch: {} Average loss: {:.6}\t Auc: {:.6f}\tmax_auc: {:.6}\tmin_loss: {:.6}'.format(epoch,epochloss,AUC,max(Auc_score),min(epoch_loss)))
    
    
if __name__ == "__main__":
    for epoch in range(1, epochs + 1):
        train(epoch)

@ptrblck @albanD it is a network reconstruction problem.
As you have mentioned that I should make a sample model.
As you can see the scores and loss are doing fine with 10X10 network but I need to train 3000X3000 network with this model.
If possible,can you suggest something.I am getting the Auc score of 0.70 but I am expecting it to be around 0.93
moreover this is an implementation of a research paper
https://www.kdd.org/kdd2018/accepted-papers/view/deep-variational-network-embedding-in-wasserstein-space