Hi guys, i am tring to put the calculation in the GPU instead of CPU. And i have met following errors:
Error notification:
File “C:\Users\49996.spyder-py3\temp.py”, line 173, in train
test_p = torch.FloatTensor(test_p0)
TypeError: expected TensorOptions(dtype=float, device=cpu, layout=Strided, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt)) (got TensorOptions(dtype=float, device=cuda:0, layout=Strided, requires_grad=false (default), pinned_memory=false (default), memory_format=(nullopt)))
code here:
#training proccess
def train(rho_data, size, train_size, mine_net, optimizer, iteration, input_size, tau):
criterion = nn.BCEWithLogitsLoss()
diff_et = torch.tensor(0.0)#初始化
data, test_p0, test_q0, label, train_index, marg_index = recons_data(rho_data, size,
train_size)
for i in range(iteration):
batch_size = int(len(data)/4)
if input_size == 2:
test_p = torch.FloatTensor(test_p0[:,[0,2]])
test_q = torch.FloatTensor(test_q0[:,[0,2]])
else:
**test_p = torch.FloatTensor(test_p0)**ERROR
** test_q = torch.FloatTensor(test_q0)**
train_batch, index1, index2 = sample_batch(data, input_size,
batch_size = batch_size,
sample_mode = 'joint')
label_batch = label[index1]
train_batch = torch.autograd.Variable(torch.FloatTensor(train_batch), requires_grad=True)
label_batch = torch.FloatTensor(label_batch)
logit = mine_net(train_batch)[0]
loss = criterion(logit.reshape(-1), label_batch)
if i < iteration-1:
optimizer.zero_grad()
loss.backward()
optimizer.step()
else:
optimizer.zero_grad()
loss.backward(retain_graph = True)
train_batch.grad.zero_()
loss.backward()
optimizer.step()
grads = train_batch.grad
if i >= iteration-101:
prob_p = mine_net(test_p)[1]
rn_est_p = prob_p/(1-prob_p)
finp_p = torch.log(torch.abs(rn_est_p))
prob_q = mine_net(test_q)[1]
rn_est_q = prob_q/(1-prob_q)
a = torch.abs(rn_est_q)
clip = torch.max(torch.min(a,torch.exp(tau)), torch.exp(-tau))
diff_et_hat = diff_et+torch.max(torch.mean(finp_p)-torch.log(torch.mean(clip)), torch.tensor(0.0))
return (diff_et_hat/100).detach().cpu().numpy(), grads, index1, train_index, marg_index
#%%
def mi(rho_data, size, train_size, model, optimizer, repo, tau, input_size):
mi, grad, index, train_index, marg_index = train(rho_data, size, train_size,
model, optimizer, repo,
input_size, tau=tau)return mi, grad, index, train_index, marg_index
#%%
def ma(a, window_size=1):
return [np.mean(a[i:i+window_size]) for i in range(0,len(a)-window_size)]
Device configuration
device = torch.device(‘cuda’ if torch.cuda.is_available() else ‘cpu’)
#parameters
rho = 0.9
repi = int(200)
repo = int(200)
rep = int(1000)
alpha = 0.001
tau = torch.tensor(0.9)
quan=7
thresh_vec = np.linspace(-3,3,quan)#阈值的定义
size = 4000
train_size = 3000
test_size = size-train_size
realization = 10
print(“ite, realization”, realization)
total_te = np.zeros(shape=(realization,quan))
total_ite = np.zeros(shape=(realization,quan))
total_ste = np.zeros(shape=(realization,quan))
for i in range(realization):
condiMI=
ground_truth=
result_ITE=
for thresh in thresh_vec:
rho_data = torch.from_numpy(data_generation(rho, thresh, size)).float().to(device)
modelP = Class_Net(input_size = 3, hidden_size = 130, std = 0.08).to(device)
modelQ = Class_Net(input_size = 2, hidden_size = 100, std = 0.02).to(device)
#分别初始化P与Q的神经网络
optimizerP = torch.optim.Adam(modelP.parameters(), lr=1e-3)
optimizerQ = torch.optim.Adam(modelQ.parameters(), lr=1e-3)
# conditional mi
mi_p = torch.from_numpy(mi(rho_data, size, train_size, modelP, optimizerP, rep, tau, input_size=3)[0]).to(device)
mi_q = torch.from_numpy(mi(rho_data, size, train_size, modelQ, optimizerQ, rep, tau, input_size=2)[0]).to(device)
condi_mi = mi_p - mi_q
condiMI.append(condi_mi*1.4427)
# ground truth
p=scipy.stats.norm(0, 1).cdf(thresh)
ground_value=-(1-p)*0.5*np.log(1-rho*rho)*1.4427
ground_truth.append(ground_value)
print("Conditional TE", condiMI)
print("ground_truth", ground_truth)
# ite
vae_net=VAE()
diff_ite=[]
modelA = Class_Net(input_size=3, hidden_size=130, std=0.08).to(device)
modelB = Class_Net(input_size=2, hidden_size=100, std=0.02).to(device)
optimizerA = torch.optim.Adam(modelA.parameters(), lr=1e-3).to(device)
optimizerB = torch.optim.Adam(modelB.parameters(), lr=1e-3).to(device)
for j in range(1):
vae_data, Jacobian_joint=data_gen_zbar(rho_data, size, vae_net)
#jacobian matrix is equal to Jacobian_joint together with Jacobian_joint by mar_index
miA, gradsA, indexA, t_indexA, m_indexA =mi(vae_data, size, train_size,
modelA, optimizerA, repi,
tau,input_size=3).to(device)
J_reorderA = torch.index_select(Jacobian_joint, 1, torch.LongTensor(m_indexA))
J_Am1 = torch.index_select(J_reorderA, 1, torch.LongTensor(t_indexA))
J_Aj1 = torch.index_select(Jacobian_joint, 1, torch.LongTensor(t_indexA))
Jacobian_jm = torch.cat ((J_Aj1, J_Am1), 1)
Jacobian_A = torch.index_select(Jacobian_jm, 1, torch.LongTensor(indexA))
miB, gradsB, indexB, t_indexB, m_indexB = mi(vae_data, size, train_size,
modelB, optimizerB, repi,
tau, input_size=2).to(device)
J_reorderB = torch.index_select(Jacobian_joint, 1, torch.LongTensor(m_indexB))
J_Bm1 = torch.index_select(J_reorderB, 1, torch.LongTensor(t_indexB))
J_Bj1 = torch.index_select(Jacobian_joint, 1, torch.LongTensor(t_indexB))
Jacobian_jmB = torch.cat ((J_Bj1, J_Bm1), 1)
Jacobian_B = torch.index_select(Jacobian_jmB, 1, torch.LongTensor(indexB))
grads_j_A=gradsA[:,-1].reshape(-1,1)
grads_j_B=gradsB[:,-1].reshape(-1,1)
#calculate gradient
# calculate the gradient wrt the weights of network vae
grads_A=torch.mm(torch.t(grads_j_A),torch.t(Jacobian_A))
grads_B=torch.mm(torch.t(grads_j_B),torch.t(Jacobian_B))
diff_grads=grads_A-grads_B
with torch.no_grad():
vae_net.fc1.weight -=alpha*torch.t(diff_grads)
diff_ite.append(miA-miB)
print('current_epoch', j)
result_ITE.append(ma(diff_ite)[-1]*1.4427)
print("result_ITE", result_ITE)
total_te[i,:] = condiMI
total_ite[i,:] = result_ITE
final_result_STE=[a_i - b_i for a_i, b_i in zip(condiMI, result_ITE)]
total_ste[i,:]=final_result_STE