Doubt on the number of trainable parameters of encoder model

Hi @albanD, the below is my TF and Pytorch code for an encoder network.

# ----- ENCODER -----
We1 = tf.Variable(
    tf.random_uniform((input_length, args.hidden_size), -1.0 / math.sqrt(input_length), 1.0 / math.sqrt(input_length)))
We2 = tf.Variable(tf.random_uniform((args.hidden_size, args.code_size), -1.0 / math.sqrt(args.hidden_size),
                                    1.0 / math.sqrt(args.hidden_size)))

be1 = tf.Variable(tf.zeros([args.hidden_size]))
be2 = tf.Variable(tf.zeros([args.code_size]))


hidden_1 = tf.nn.tanh(tf.matmul(encoder_inputs, We1) + be1)
code = tf.nn.tanh(tf.matmul(hidden_1, We2) + be2)

# ----- DECODER -----
if tied_weights:
    Wd1 = tf.transpose(We2)
    Wd2 = tf.transpose(We1)
else:
    Wd1 = tf.Variable(tf.random_uniform((args.code_size, args.hidden_size), -1.0 / math.sqrt(args.code_size),
                                        1.0 / math.sqrt(args.code_size)))
    Wd2 = tf.Variable(tf.random_uniform((args.hidden_size, input_length), -1.0 / math.sqrt(args.hidden_size),
                                        1.0 / math.sqrt(args.hidden_size)))

bd1 = tf.Variable(tf.zeros([args.hidden_size]))
bd2 = tf.Variable(tf.zeros([input_length]))

reconstruct_loss = tf.losses.mean_squared_error(labels=dec_out, predictions=encoder_inputs)

PYTORCH:

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()

        self.We1 = torch.nn.Parameter(torch.Tensor(input_length, args.hidden_size).uniform_(-1.0 / math.sqrt(input_length), 1.0 / math.sqrt(input_length)))
        self.We2 = torch.nn.Parameter(torch.Tensor(args.hidden_size, args.code_size).uniform_(-1.0 / math.sqrt(args.hidden_size), 1.0 / math.sqrt(args.hidden_size)))

        self.be1 = torch.nn.Parameter(torch.zeros([args.hidden_size]))
        self.be2 = torch.nn.Parameter(torch.zeros([args.code_size]))


    def encoder(self, encoder_inputs):
        hidden_1 = torch.tanh(torch.matmul(encoder_inputs.float(), self.We1) + self.be1)
        code = torch.tanh(torch.matmul(hidden_1, self.We2) + self.be2)
        #print ("CODE ENCODER SHAPE:", code.size())
        return code

def decoder(code):

    # ----- DECODER -----
    if tied_weights:

        Wd1 = torch.transpose(We2)
        Wd2 = torch.transpose(We1)

    else:

        Wd1 = torch.nn.Parameter(
            torch.Tensor(args.code_size, args.hidden_size).uniform_(-1.0 / math.sqrt(args.code_size),
                                                                       1.0 / math.sqrt(args.code_size)))
        Wd2 = torch.nn.Parameter(
            torch.Tensor(args.hidden_size, input_length).uniform_(-1.0 / math.sqrt(args.hidden_size),
                                                                         1.0 / math.sqrt(args.hidden_size)))

        bd1 = torch.nn.Parameter(torch.zeros([args.hidden_size]))
        bd2 = torch.nn.Parameter(torch.zeros([input_length]))

        if lin_dec:
            hidden_2 = torch.matmul(code, Wd1) + bd1
        else:
            hidden_2 = torch.tanh(torch.matmul(code, Wd1) + bd1)

        dec_out = torch.matmul(hidden_2, Wd2) + bd2

    return  dec_out

code_vs = model.encoder(encoder_inp)
dec_out_val = decoder(code_vs)
#print ("DEC OUT VAL:", dec_out_val)
reconstruct_loss_val = torch.mean((dec_out_val - encoder_inp) ** 2)

Do you think there is any significant difference in the converted code, as I am not getting similar reconstruction loss with the same batches of data. Thank you!

Maybe this question should have the autograd tag instead of distributed?

Hi, @ptrblck , can you please have a look at this. Thanks!

hi @albanD , can you kindly have a look at this. I do not see any difference b/w the two but the reconstruction error on the same batches are quite different. Thanks!