# Why the same tensor data, but different results after calculation

I have built an LSTM net work.But I find when the model is doing prediction,the same tensor data has different outputs.

For example, when the output of the last hidden layer is

``````tensor([[0.0150],
[0.0150],
[0.0150],
[0.0150],
[0.0151],
[0.0151],
[0.0151],
[0.0151],
[0.0152],
[0.0152],
[0.0152],
[0.0152],
[0.0153],
[0.0153],
[0.0153],
[0.0153]])
``````

The predicted result is

``````tensor([[-0.1035],
[-0.1035],
[-0.1035],
[-0.1035],
[-0.1035],
[-0.1035],
[-0.1036],
[-0.1036],
[-0.1036],
[-0.1036],
[-0.1036],
[-0.1036],
[-0.1036],
[-0.1036],
[-0.1036],
[-0.1036]])
``````

The sixth input and the seventh input are both 0.0151, but their output is different.

I really don’t know what causes the different outputs.

The complete code is as follows.

``````import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
``````
``````timesteps = 10;
L = 1;

batch_size = 16;
Epoch = 1;
``````
``````timespaces=np.linspace(0,10,50000);

data=np.sin(timespaces * 10);
data=np.reshape(data, (-1, 1));

plt.plot(timespaces, data)
plt.show()
``````
``````train_len=(int)(len(data) * 0.6);

train_data=data[:train_len]; # (30000, 1)
test_data=data[train_len:]; # (20000, 1)

print(train_data.shape);
print(test_data.shape);

plt.plot(timespaces[:train_len],train_data);
plt.plot(timespaces[train_len:],test_data);
plt.show();
``````
``````def divide(data,timesteps,L):
X,Y=[],[];

for i in range(len(data) - timesteps):

x = data[i:i+timesteps];
y = data[i+timesteps+L-1];

X.append(x);
Y.append(y);

return X,Y;

def divide_batch(data,batch_size):
X = [];

for i in range(len(data)):

if((i + 1) * batch_size - 1 > len(data) - 1):
break;

x = data[i * batch_size : (i + 1) * batch_size];
X.append(x);

return X;

train_X,train_Y = divide(train_data,timesteps,L);
test_X,test_Y = divide(test_data,timesteps,L);

train_X = torch.tensor(train_X).to(torch.float32);
train_Y = torch.tensor(train_Y).to(torch.float32);

print("before:");

print("train_X.shape:");
print(train_X.shape);

print("train_Y.shape:");
print(train_Y.shape);

train_X = divide_batch(train_X,batch_size);
train_Y = divide_batch(train_Y,batch_size);

train_X = torch.tensor([item.detach().numpy() for item in train_X] ).to(torch.float32);
train_Y = torch.tensor([item.detach().numpy() for item in train_Y]).to(torch.float32);

print("after:");

print("train_X.shape:");
print(train_X.shape);

print("train_Y.shape:");
print(train_Y.shape);

test_X = torch.tensor(test_X).to(torch.float32);
test_Y = torch.tensor(test_Y).to(torch.float32);

print("before:");

print("test_X.shape:");
print(test_X.shape);

print("test_Y.shape:");
print(test_Y.shape);

test_X = divide_batch(test_X,batch_size);
test_Y = divide_batch(test_Y,batch_size);

test_X = torch.tensor([item.detach().numpy() for item in test_X] ).to(torch.float32);
test_Y = torch.tensor([item.detach().numpy() for item in test_Y]).to(torch.float32);

print("after:");

print("test_X.shape:");
print(test_X.shape);

print("test_Y.shape:");
print(test_Y.shape);
``````
``````class LSTM(object):
def __init__(self,timesteps,batch_size,input_size,hidden_size,output_size):
self.times = 0;

self.timesteps = timesteps;
self.batch_size = batch_size;

self.input_size = input_size;
self.hidden_size = hidden_size;
self.output_size = output_size;

self.Wfh = torch.tensor([[0.0107]]);
self.Wfx = torch.tensor([[-0.0069]]);
self.bf = torch.tensor([0.0126]);

self.Wih = torch.tensor([[-0.0045]]);
self.Wix = torch.tensor([[-0.0180]]);
self.bi = torch.tensor([1]);

self.Woh = torch.tensor([[-0.0065]]);
self.Wox = torch.tensor([[0.0002]]);
self.bo = torch.tensor([-0.0018]);

self.Wch = torch.tensor([[-0.0039]]);
self.Wcx = torch.tensor([[0.0162]]);
self.bc = torch.tensor([0.0202]);

self.Wp = torch.tensor([[-0.0563]]);
self.bp = torch.tensor([-0.1027]);

#         self.Wfh,self.Wfx,self.bf = self.Weight_bias(self.input_size,self.hidden_size);
#         self.Wih,self.Wix,self.bi = self.Weight_bias(self.input_size,self.hidden_size);
#         self.Woh,self.Wox,self.bo = self.Weight_bias(self.input_size,self.hidden_size);
#         self.Wch,self.Wcx,self.bc = self.Weight_bias(self.input_size,self.hidden_size);

#         self.Wp = torch.randn(self.hidden_size,self.output_size) * 0.01;
#         self.bp = torch.randn(self.output_size) * 0.01;

self.f = torch.zeros(self.batch_size,self.hidden_size);
self.i = torch.zeros(self.batch_size,self.hidden_size);
self.o = torch.zeros(self.batch_size,self.hidden_size);
self.ct = torch.zeros(self.batch_size,self.hidden_size);

self.h = torch.zeros(self.batch_size,self.hidden_size);
self.c = torch.zeros(self.batch_size,self.hidden_size);

self.fList = [];
self.iList = [];
self.oList = [];
self.ctList = [];

self.hList = [];
self.cList = [];

self.preList=[];

self.fList.append(self.f);
self.iList.append(self.i);
self.oList.append(self.o);
self.ctList.append(self.ct);

self.hList.append(self.h);
self.cList.append(self.c);

print("__init__:");

print("self.Wfh:");
print(self.Wfh.shape);

print("self.Wfx:");
print(self.Wfx.shape);

print("self.bf:");
print(self.bf.shape);

print("self.Wih:");
print(self.Wih.shape);

print("self.Wix:");
print(self.Wix.shape);

print("self.bi:");
print(self.bi.shape);

print("self.Woh:");
print(self.Woh.shape);

print("self.Wox:");
print(self.Wox.shape);

print("self.bo:");
print(self.bo.shape);

print("self.Wch:");
print(self.Wch.shape);

print("self.Wcx:");
print(self.Wcx.shape);

print("self.bc:");
print(self.bc.shape);

print("self.h:");
print(self.h.shape);

print("self.c:");
print(self.c.shape);

def Weight_bias(self,input_size,hidden_size):
return (torch.randn(hidden_size,hidden_size) * 0.01,
torch.randn(input_size,hidden_size) * 0.01,
torch.randn(hidden_size) * 0.01);

def forward(self,x):
for i in range(self.timesteps):
self.times += 1;

self.f = self.Sigmoid_forward(self.hList[-1] @ self.Wfh + x[i] @ self.Wfx + self.bf);
self.i = self.Sigmoid_forward(self.hList[-1] @ self.Wih + x[i] @ self.Wix + self.bi);
self.o = self.Sigmoid_forward(self.hList[-1] @ self.Woh + x[i] @ self.Wox + self.bo);
self.ct = self.Tanh_forward(self.hList[-1] @ self.Wch + x[i] @ self.Wcx + self.bc);

self.c = self.f * self.cList[-1] + self.i * self.ct;
self.h = self.o * self.Tanh_forward(self.c);

self.fList.append(self.f);
self.iList.append(self.i);
self.oList.append(self.o);
self.ctList.append(self.ct);

self.hList.append(self.h);
self.cList.append(self.c);

print("self.h:");
print(self.h);

return self.prediction();

def prediction(self):
pre = self.hList[-1] @ self.Wp + self.bp;
self.preList.append(pre);

return pre;

self.delta_Wfh,self.delta_Wfx,self.delta_bf = self.Weight_bias(self.input_size,self.hidden_size);
self.delta_Wih,self.delta_Wix,self.delta_bi = self.Weight_bias(self.input_size,self.hidden_size);
self.delta_Woh,self.delta_Wox,self.delta_bo = self.Weight_bias(self.input_size,self.hidden_size);
self.delta_Wch,self.delta_Wcx,self.delta_bc = self.Weight_bias(self.input_size,self.hidden_size);

self.delta_hList = self.init_delta();
self.delta_cList = self.init_delta();

self.delta_fList = self.init_delta();
self.delta_iList = self.init_delta();
self.delta_oList = self.init_delta();
self.delta_ctList = self.init_delta();

for k in range(self.times,0,-1):
self.compute_gate_backward(self,x,k);

self.compute_Weight_bias_backward(self,x);

def init_delta(self):
X = [];

for i in range(self.times + 1):
X.append(np.zeros(self.batch_size,self.hidden_size));

return X;

def compute_gate_backward(self,t,x):

f = self.fList[k];
i = self.iList[k];
o = self.oList[k];
ct = self.ctList[k];

h = self.hList[k];
c = self.cList[k];

c_pre = self.cList[k-1];
f_for = self.fList[k+1];

delta_hk = self.delta_hList[k];

if(k == self.times):
delta_ck = delta_hk * o * self.Tanh_backward(c);
else:
delta_ck = delta_hk * o * self.Tanh_backward(c) + self.delta_cList[k+1] * f_for;

delta_ctk = delta_ck * i;
delta_fk = delta_ck * c_pre;
delta_ik = delta_ck * ct;
delta_ok = delta_hk * Tanh_forward(c);

delta_hkpre = delta_fk * self.Sigmoid_backward(h @ self.Wfh + x[k-1] @ self.Wfx + self.bf) * self.Wfh + delta_ik * self.Sigmoid_backward(h @ self.Wih + x[k-1] @ self.Wix + self.bi) * self.Wih +delta_ok * self.Sigmoid_backward(h @ self.Woh + x[k-1] @ self.Wox + self.bo) * self.Woh +delta_ctk * self.Tanh_backward(h @ self.Wch + x[k-1] @ self.Wcx + self.bc) * self.Wch;

self.delta_hList[k-1] = delta_hkpre;
self.delta_cList[k] = delta_ck;

self.delta_fList[k] = delta_fk;
self.delta_iList[k] = delta_ik;
self.delta_oList[k] = delta_ok;
self.delta_ctList[k] = delta_ctk;

def compute_Weight_bias_backward(self,x):

for t in range (slef.times,0,-1):

delta_Wfh = self.delta_fList[t] * self.Sigmoid_backward(h @ self.Wfh + x[t-1] @ self.Wfx + self.bf) * self.hList[t-1];
delta_Wfx = self.delta_fList[t] * self.Sigmoid_backward(h @ self.Wfh + x[t-1] @ self.Wfx + self.bf) * x[t];
delta_bf = self.delta_fList[t] * self.Sigmoid_backward(h @ self.Wfh + x[t-1] @ self.Wfx + self.bf);

delta_Wih = self.delta_iList[t] * self.Sigmoid_backward(h @ self.Wih + x[t-1] @ self.Wix + self.bi) * self.hList[t-1];
delta_Wix = self.delta_iList[t] * self.Sigmoid_backward(h @ self.Wih + x[t-1] @ self.Wix + self.bi) * x[t];
delta_bi = self.delta_iList[t] * self.Sigmoid_backward(h @ self.Wih + x[t-1] @ self.Wix + self.bi);

delta_Wch = self.delta_ctList[t] * self.Tanh_backward(h @ self.Wch + x[t-1] @ self.Wcx + self.bc) * self.hList[t-1];
delta_Wcx = self.delta_ctList[t] * self.Tanh_backward(h @ self.Wch + x[t-1] @ self.Wcx + self.bc) * x[t];
delta_bc = self.delta_ctList[t] * self.Tanh_backward(h @ self.Wch + x[t-1] @ self.Wcx + self.bc);

delta_Woh = self.delta_oList[t] * self.Sigmoid_backward(h @ self.Woh + x[t-1] @ self.Wox + self.bo) * self.hList[t-1];
delta_Wox = self.delta_oList[t] * self.Sigmoid_backward(h @ self.Woh + x[t-1] @ self.Wox + self.bo) * x[t];
delta_bo = self.delta_oList[t] * self.Sigmoid_backward(h @ self.Woh + x[t-1] @ self.Wox + self.bo);

self.delta_Wfh += delta_Wfh;
self.delta_Wfx += delta_Wfx;
self.delta_bf += delta_bf;

self.delta_Wih += delta_Wih;
self.delta_Wix += delta_Wix;
self.delta_bi += delta_bi;

self.delta_Wch += delta_Wch;
self.delta_Wcx += delta_Wcx;
self.delta_bc += delta_bc;

self.delta_Woh += delta_Woh;
self.delta_Wox += delta_Wox;
self.delta_bo += delta_bo;

def update(self,lr):

self.Wfh -= self.delta_Wfh * lr;
self.Wfx -= self.delta_Wfx * lr;
self.bf -= self.delta_bf * lr;

self.Wih -= self.delta_Wih * lr;
self.Wix -= self.delta_Wix * lr;
self.bi -= self.delta_bi * lr;

self.Woh -= self.delta_Woh * lr;
self.Wox -= self.delta_Wox * lr;
self.bo -= self.delta_bo * lr;

self.Wch -= self.delta_Wch * lr;
self.Wcx -= self.delta_Wcx * lr;
self.bc -= self.delta_bc * lr;

def reset(self):
self.times = 0;

self.hList = [torch.zeros(self.batch_size,self.hidden_size)];
self.cList = [torch.zeros(self.batch_size,self.hidden_size)];

def Sigmoid_forward(self,x):
return 1.0 / (1.0 + torch.exp(-x));

def Sigmoid_backward(self,x):
return x * (1 - x);

def Tanh_forward(self,x):
return ((torch.exp(x) - torch.exp(-x)) / (torch.exp(x) + torch.exp(-x)));

def Tanh_backward(self,x):
return 1 - (self.Tanh_forward(x) * self.Tanh_forward(x));
``````
``````l = LSTM(timesteps,batch_size,1,1,1);

lr = 0.01;
lossList = [];
``````
``````for epoch in range(Epoch):

loss = 0;

for i in range(len(train_X)):

x = train_X[i].permute(1, 0, 2);

l.forward(x);

pre = l.prediction();

print("pre:");
print(pre);
``````

Hi Xinbai!

The values you think are the same are almost certainly different. By default,
pytorch prints out tensors with four digits, while a `float` has about seven or
eight digits of precision.

Try `torch.set_printoptions (precision = 8)` and then print out your
tensors.

Best.

K. Frank