My autoencoder was not learning to predict value

I am trying to work on building a variational autoencoder in Keras, with an input shape of X= (1,32) and output X= (1,32) Y= (1,16).

This dataset comprising binary vectors. For each instance in my dataset, there is an input vector X and an output vector Y. The structure of these vectors is as follows:

X is a binary vector of length n (e.g., X(1,:) = [0/1, 0/1, ..., 0/1]). Y is a binary vector of length m, where m < n (e.g., Y(1,:) = [0/1, 0/1, ..., 0/1]).

For each X I want to predict Y : for example a sample :

X = [1,0,1,1,1,0,1,0,1,0,1,1,0,1] and its Y=[0,1,1,1,1,0,1]

My objective is to develop a machine learning model M that can predict the vector Y from the vector X with an accuracy greater than 95%.

I mean Machinelearning(X) => Y  it predict the Y 

I build that model :

from keras.layers import Lambda, Input, Dense, Dropout, BatchNormalization
from keras.models import Model
from keras import backend as K

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from sklearn import set_config

#Set the random seed for consistent results
import random
#clear session for each run

#Load digits data

X, Y_original =X, Y

# reparameterization trick
# instead of sampling from Q(z|X), sample eps = N(0,I)
# z = z_mean + sqrt(var)*eps
def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean=0 and std=1.0
    epsilon = K.random_normal(shape=(batch, dim))
    thre = K.random_uniform(shape=(batch,1))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

# Define VAE model components
intermediate_dim = 32 // 1
latent_dim = 32 

# Encoder network
inputs_x = Input(shape=input_dim, name='encoder_input')
inputs_x_dropout = Dropout(0.25)(inputs_x)
inputs_x_dropout = Dense(1024, activation='relu')(inputs_x)
inputs_x_dropout = BatchNormalization()(inputs_x_dropout)
inputs_x_dropout = Dense(512, activation='relu')(inputs_x_dropout)
inputs_x_dropout = BatchNormalization()(inputs_x_dropout)
inputs_x_dropout = Dense(224, activation='relu')(inputs_x_dropout)
inputs_x_dropout = BatchNormalization()(inputs_x_dropout)
inter_x1 = Dense(128, activation='relu')(inputs_x_dropout)
inter_x2 = Dense(intermediate_dim, activation='relu')(inter_x1)

z_mean = Dense(latent_dim, name='z_mean')(inter_x2)
z_log_var = Dense(latent_dim, name='z_log_var')(inter_x2)
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
encoder = Model(inputs_x, [z_mean, z_log_var, z], name='encoder')

# Decoder network for reconstruction
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
inter_y1 = Dense(intermediate_dim, activation='relu')(latent_inputs)
inter_y1 = Dense(224, activation='relu')(inter_y1)
inter_y1 = BatchNormalization()(inter_y1)
inter_y1 = Dense(512, activation='relu')(inter_y1)
inter_y1 = BatchNormalization()(inter_y1)
inter_y1 = Dense(1024, activation='relu')(inter_y1)
inter_y1 = BatchNormalization()(inter_y1)
inter_y2 = Dense(128, activation='relu')(inter_y1)
outputs_reconstruction = Dense(input_dim, activation='sigmoid')(inter_y2)
decoder = Model(latent_inputs, outputs_reconstruction, name='decoder')

# Separate network for multilabel indicator prediction from inter_y2
outputs_prediction = Dense(multilabel_size, activation='sigmoid')(inter_y2)
predictor = Model(latent_inputs, outputs_prediction, name='predictor')

# Instantiate VAE model with two outputs
outputs_vae = [decoder(z), predictor(z)]
vae = Model(inputs_x, outputs_vae, name='vae_mlp')
vae.compile(optimizer='nadam', loss='binary_crossentropy')

# Train the model
val_size = 360 #20% val size

from collections import defaultdict
metrics = defaultdict(list)

for epoch in range(600):
    history =, [X,Y], batch_size=32, shuffle=True)
    h = history.history
    #Manually calculate accuracy for trn and val
    for mode in ['trn', 'val']:
        XY = [X, Y] if mode == 'trn' else [XX, YY]
        n_samples = len(XY[0])
        soft_recon, soft_pred = vae.predict(XY[0])
        hard_recon = (soft_recon > 0.5).astype(int)
        hard_pred = (soft_pred > 0.5).astype(int)
        recon_acc = sum(
            [np.array_equal(xhat, x) for xhat, x in zip(hard_recon, XY[0])]
        ) / n_samples * 100
        pred_acc = sum(
            [np.array_equal(yhat, y) for yhat, y in zip(hard_pred, XY[1])]
        ) / n_samples * 100
        metrics[mode + '_decoder_acc'].append(recon_acc)
        metrics[mode + '_predictor_acc'].append(pred_acc)

plt.plot(metrics['trn_loss'], 'C3', lw=2, label='loss')
plt.plot(metrics['trn_decoder_loss'], 'C0', lw=2, label='loss | decoder')
plt.plot(metrics['trn_predictor_loss'], 'C1', lw=2, label='loss | predictor')

ax2 = plt.gca().twinx()
ax2.plot(metrics['trn_decoder_acc'], 'C0', ls=':', label='trn acc | decoder')
ax2.plot(metrics['trn_predictor_acc'], 'C1', ls=':', label='trn acc | predictor')

ax2.plot(metrics['val_decoder_acc'], 'C0', ls='--', label='val acc | decoder')
ax2.plot(metrics['val_predictor_acc'], 'C1', ls='--', label='val acc | predictor')
ax2.set_ylabel('accuracy (%)')
plt.gcf().legend(bbox_to_anchor=(0.7, 1.1), ncol=2)
plt.gcf().set_size_inches(7, 4)

soft_recon, soft_pred = vae.predict(X)

#Convert soft predictions (probabilities) to hard binary 0/1
recon_binary = soft_recon > 0.5
pred_binary = soft_pred > 0.5"BrmEnco_Updated.h5", overwrite=True)"BrmDeco_Updated.h5", overwrite=True)"BrmPred_Updated.h5", overwrite=True)"BrmAut_Updated.h5", overwrite=True)

However, the accuracy stop in 50% and my model can’t predict all the 16 bits ( always make wrong prediction in 6 bits or 5 bits)

You might want to ask this question in the keras discussion board as you’ll find the experts there.