Hello
keras code is given below:
from tensorflow import keras
from tensorflow.keras import layers
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
# Normalization and Attention
x = layers.LayerNormalization(epsilon=1e-6)(inputs)
x = layers.MultiHeadAttention(
key_dim=head_size, num_heads=num_heads, dropout=dropout
)(x, x)
x = layers.Dropout(dropout)(x)
res = x + inputs
# Feed Forward Part
x = layers.LayerNormalization(epsilon=1e-6)(res)
x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
x = layers.Dropout(dropout)(x)
x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
return x + res
def build_model(
input_shape,
head_size,
num_heads,
ff_dim,
num_transformer_blocks,
mlp_units,
dropout=0,
mlp_dropout=0,
):
inputs = keras.Input(shape=input_shape)
x = inputs
for _ in range(num_transformer_blocks):
x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
for dim in mlp_units:
x = layers.Dense(dim, activation="relu")(x)
x = layers.Dropout(mlp_dropout)(x)
outputs = layers.Dense(n_classes, activation="softmax")(x)
return keras.Model(inputs, outputs)
input_shape = x_train.shape[1:]
model = build_model(
input_shape,
head_size=256,
num_heads=4,
ff_dim=4,
num_transformer_blocks=4,
mlp_units=[128],
mlp_dropout=0.4,
dropout=0.25,
)
model.compile(
loss=“sparse_categorical_crossentropy”,
optimizer=keras.optimizers.Adam(learning_rate=0.0001),
metrics=[“accuracy”],
)
checkpoint_filepath = “transformer/best_model1.hdf5”
print(os.listdir(“transformer”))
model.summary()
model.load_weights(checkpoint_filepath)
callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_filepath, monitor=‘loss’, verbose=1,save_best_only=True, mode=‘auto’, save_freq=‘epoch’,period=1)
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=‘model.{epoch:02d}-{val_loss:.2f}.h5’)
model_checkpoint_tsboard = tf.keras.callbacks.TensorBoard(log_dir=‘./transformer’)
model.fit(
x_train,
y_train,
validation_split=0.2,
epochs=85,
batch_size=1,
callbacks=[callbacks,model_checkpoint_callback]
)
I have tried to convert into pytorch given below:
import torch
import torch.nn as nn
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
# Normalization and Attention
x = nn.LayerNorm(epsilon=1e-6)(inputs)
x = nn.MultiheadAttention(
key_dim=head_size, num_heads=num_heads, dropout=dropout
)(x, x)
x = nn.Dropout(dropout)(x)
res = x + inputs
# Feed Forward Part
x = nn.LayerNormalization(epsilon=1e-6)(res)
x = nn.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
x = nn.Dropout(dropout)(x)
x = nn.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
return x + res
def build_model(
input_shape,
head_size,
num_heads,
ff_dim,
num_transformer_blocks,
mlp_units,
dropout=0,
mlp_dropout=0,
):
inputs = torch.tensor(shape=input_shape)
x = inputs
for _ in range(num_transformer_blocks):
x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
x = nn.AvgPool1d(data_format="channels_first")(x)
for dim in mlp_units:
x = nn.linear(dim, activation="relu")(x)
x = nn.Dropout(mlp_dropout)(x)
outputs = nn.linear(n_classes, activation="softmax")(x)
return keras.Model(inputs, outputs)
Q1: what is should write at the place of return keras.Model(inputs, outputs)?
Q2: I want to pass data using dataloader pytorch as:
for batch_idx, (images_mls, labels_mls, labels_sec_mls) in enumerate(trainloader_mls):
will this conversion work for it?
Thank you