I am trying to work on building a variational autoencoder in Keras, with an input shape of X= (1,32) and output X= (1,32) Y= (1,16).
This dataset comprising binary vectors. For each instance in my dataset, there is an input vector X
and an output vector Y. The structure of these vectors is as follows:
X
is a binary vector of length n (e.g., X(1,:) = [0/1, 0/1, ..., 0/1]).
Y
is a binary vector of length m, where m < n
(e.g., Y(1,:) = [0/1, 0/1, ..., 0/1]
).
For each X I want to predict Y : for example a sample :
X = [1,0,1,1,1,0,1,0,1,0,1,1,0,1] and its Y=[0,1,1,1,1,0,1]
My objective is to develop a machine learning model M
that can predict the vector Y from the vector X with an accuracy greater than 95%
.
I mean Machinelearning(X) => Y it predict the Y
I build that model :
from keras.layers import Lambda, Input, Dense, Dropout, BatchNormalization
from keras.models import Model
from keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn import set_config
set_config(transform_output='default')
#Set the random seed for consistent results
import random
random.seed(0)
tf.random.set_seed(0)
np.random.seed(0)
#clear session for each run
K.clear_session()
#
#Load digits data
#
multilabel_size=16
X, Y_original =X, Y
# reparameterization trick
# instead of sampling from Q(z|X), sample eps = N(0,I)
# z = z_mean + sqrt(var)*eps
def sampling(args):
z_mean, z_log_var = args
batch = K.shape(z_mean)[0]
dim = K.int_shape(z_mean)[1]
# by default, random_normal has mean=0 and std=1.0
epsilon = K.random_normal(shape=(batch, dim))
thre = K.random_uniform(shape=(batch,1))
return z_mean + K.exp(0.5 * z_log_var) * epsilon
# Define VAE model components
intermediate_dim = 32 // 1
latent_dim = 32
# Encoder network
inputs_x = Input(shape=input_dim, name='encoder_input')
inputs_x_dropout = Dropout(0.25)(inputs_x)
inputs_x_dropout = Dense(1024, activation='relu')(inputs_x)
inputs_x_dropout = BatchNormalization()(inputs_x_dropout)
inputs_x_dropout = Dense(512, activation='relu')(inputs_x_dropout)
inputs_x_dropout = BatchNormalization()(inputs_x_dropout)
inputs_x_dropout = Dense(224, activation='relu')(inputs_x_dropout)
inputs_x_dropout = BatchNormalization()(inputs_x_dropout)
inter_x1 = Dense(128, activation='relu')(inputs_x_dropout)
inter_x2 = Dense(intermediate_dim, activation='relu')(inter_x1)
z_mean = Dense(latent_dim, name='z_mean')(inter_x2)
z_log_var = Dense(latent_dim, name='z_log_var')(inter_x2)
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
encoder = Model(inputs_x, [z_mean, z_log_var, z], name='encoder')
# Decoder network for reconstruction
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
inter_y1 = Dense(intermediate_dim, activation='relu')(latent_inputs)
inter_y1 = Dense(224, activation='relu')(inter_y1)
inter_y1 = BatchNormalization()(inter_y1)
inter_y1 = Dense(512, activation='relu')(inter_y1)
inter_y1 = BatchNormalization()(inter_y1)
inter_y1 = Dense(1024, activation='relu')(inter_y1)
inter_y1 = BatchNormalization()(inter_y1)
inter_y2 = Dense(128, activation='relu')(inter_y1)
outputs_reconstruction = Dense(input_dim, activation='sigmoid')(inter_y2)
decoder = Model(latent_inputs, outputs_reconstruction, name='decoder')
# Separate network for multilabel indicator prediction from inter_y2
outputs_prediction = Dense(multilabel_size, activation='sigmoid')(inter_y2)
predictor = Model(latent_inputs, outputs_prediction, name='predictor')
# Instantiate VAE model with two outputs
outputs_vae = [decoder(z), predictor(z)]
vae = Model(inputs_x, outputs_vae, name='vae_mlp')
vae.compile(optimizer='nadam', loss='binary_crossentropy')
# Train the model
val_size = 360 #20% val size
from collections import defaultdict
metrics = defaultdict(list)
for epoch in range(600):
history = vae.fit(X, [X,Y], batch_size=32, shuffle=True)
print(epoch)
h = history.history
metrics['trn_predictor_loss'].extend(h['predictor_loss'])
metrics['trn_decoder_loss'].extend(h['decoder_loss'])
metrics['trn_loss'].extend(h['loss'])
#Manually calculate accuracy for trn and val
for mode in ['trn', 'val']:
XY = [X, Y] if mode == 'trn' else [XX, YY]
n_samples = len(XY[0])
soft_recon, soft_pred = vae.predict(XY[0])
hard_recon = (soft_recon > 0.5).astype(int)
hard_pred = (soft_pred > 0.5).astype(int)
recon_acc = sum(
[np.array_equal(xhat, x) for xhat, x in zip(hard_recon, XY[0])]
) / n_samples * 100
pred_acc = sum(
[np.array_equal(yhat, y) for yhat, y in zip(hard_pred, XY[1])]
) / n_samples * 100
metrics[mode + '_decoder_acc'].append(recon_acc)
metrics[mode + '_predictor_acc'].append(pred_acc)
plt.plot(metrics['trn_loss'], 'C3', lw=2, label='loss')
plt.plot(metrics['trn_decoder_loss'], 'C0', lw=2, label='loss | decoder')
plt.plot(metrics['trn_predictor_loss'], 'C1', lw=2, label='loss | predictor')
plt.xlabel('epoch')
plt.ylabel('loss')
ax2 = plt.gca().twinx()
ax2.plot(metrics['trn_decoder_acc'], 'C0', ls=':', label='trn acc | decoder')
ax2.plot(metrics['trn_predictor_acc'], 'C1', ls=':', label='trn acc | predictor')
ax2.plot(metrics['val_decoder_acc'], 'C0', ls='--', label='val acc | decoder')
ax2.plot(metrics['val_predictor_acc'], 'C1', ls='--', label='val acc | predictor')
ax2.set_ylabel('accuracy (%)')
plt.gcf().legend(bbox_to_anchor=(0.7, 1.1), ncol=2)
plt.gcf().set_size_inches(7, 4)
soft_recon, soft_pred = vae.predict(X)
#Convert soft predictions (probabilities) to hard binary 0/1
recon_binary = soft_recon > 0.5
pred_binary = soft_pred > 0.5
encoder.save("BrmEnco_Updated.h5", overwrite=True)
decoder.save("BrmDeco_Updated.h5", overwrite=True)
predictor.save("BrmPred_Updated.h5", overwrite=True)
vae.save("BrmAut_Updated.h5", overwrite=True)
plt.show()
However, the accuracy stop in 50% and my model can’t predict all the 16 bits ( always make wrong prediction in 6 bits or 5 bits)