Hi @albanD, the below is my TF and Pytorch code for an encoder network.
# ----- ENCODER -----
We1 = tf.Variable(
tf.random_uniform((input_length, args.hidden_size), -1.0 / math.sqrt(input_length), 1.0 / math.sqrt(input_length)))
We2 = tf.Variable(tf.random_uniform((args.hidden_size, args.code_size), -1.0 / math.sqrt(args.hidden_size),
1.0 / math.sqrt(args.hidden_size)))
be1 = tf.Variable(tf.zeros([args.hidden_size]))
be2 = tf.Variable(tf.zeros([args.code_size]))
hidden_1 = tf.nn.tanh(tf.matmul(encoder_inputs, We1) + be1)
code = tf.nn.tanh(tf.matmul(hidden_1, We2) + be2)
# ----- DECODER -----
if tied_weights:
Wd1 = tf.transpose(We2)
Wd2 = tf.transpose(We1)
else:
Wd1 = tf.Variable(tf.random_uniform((args.code_size, args.hidden_size), -1.0 / math.sqrt(args.code_size),
1.0 / math.sqrt(args.code_size)))
Wd2 = tf.Variable(tf.random_uniform((args.hidden_size, input_length), -1.0 / math.sqrt(args.hidden_size),
1.0 / math.sqrt(args.hidden_size)))
bd1 = tf.Variable(tf.zeros([args.hidden_size]))
bd2 = tf.Variable(tf.zeros([input_length]))
reconstruct_loss = tf.losses.mean_squared_error(labels=dec_out, predictions=encoder_inputs)
PYTORCH:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.We1 = torch.nn.Parameter(torch.Tensor(input_length, args.hidden_size).uniform_(-1.0 / math.sqrt(input_length), 1.0 / math.sqrt(input_length)))
self.We2 = torch.nn.Parameter(torch.Tensor(args.hidden_size, args.code_size).uniform_(-1.0 / math.sqrt(args.hidden_size), 1.0 / math.sqrt(args.hidden_size)))
self.be1 = torch.nn.Parameter(torch.zeros([args.hidden_size]))
self.be2 = torch.nn.Parameter(torch.zeros([args.code_size]))
def encoder(self, encoder_inputs):
hidden_1 = torch.tanh(torch.matmul(encoder_inputs.float(), self.We1) + self.be1)
code = torch.tanh(torch.matmul(hidden_1, self.We2) + self.be2)
#print ("CODE ENCODER SHAPE:", code.size())
return code
def decoder(code):
# ----- DECODER -----
if tied_weights:
Wd1 = torch.transpose(We2)
Wd2 = torch.transpose(We1)
else:
Wd1 = torch.nn.Parameter(
torch.Tensor(args.code_size, args.hidden_size).uniform_(-1.0 / math.sqrt(args.code_size),
1.0 / math.sqrt(args.code_size)))
Wd2 = torch.nn.Parameter(
torch.Tensor(args.hidden_size, input_length).uniform_(-1.0 / math.sqrt(args.hidden_size),
1.0 / math.sqrt(args.hidden_size)))
bd1 = torch.nn.Parameter(torch.zeros([args.hidden_size]))
bd2 = torch.nn.Parameter(torch.zeros([input_length]))
if lin_dec:
hidden_2 = torch.matmul(code, Wd1) + bd1
else:
hidden_2 = torch.tanh(torch.matmul(code, Wd1) + bd1)
dec_out = torch.matmul(hidden_2, Wd2) + bd2
return dec_out
code_vs = model.encoder(encoder_inp)
dec_out_val = decoder(code_vs)
#print ("DEC OUT VAL:", dec_out_val)
reconstruct_loss_val = torch.mean((dec_out_val - encoder_inp) ** 2)
Do you think there is any significant difference in the converted code, as I am not getting similar reconstruction loss with the same batches of data. Thank you!