I have an unknown problem in my learning model. Could you tell me why?
import torch
import torch.nn as nn
import os
import numpy as np
def cov(m, rowvar=False):
if m.dim() > 2:
raise ValueError('m has more than 2 dimensions')
if m.dim() < 2:
m = m.view(1, -1)
if not rowvar and m.size(0) != 1:
m = m.t()
fact = 1.0 / (m.size(1) - 1)
#특징에서 평균 빼기
mean_m=torch.mean(m, dim=1, keepdim=True)
m -= mean_m
mt = m.t() # if complex: mt = m.t().conj()
m_squeeze=m.matmul(mt).squeeze()
result=fact * m_squeeze
return result
def feature_cov(feature):
cov_arr=[]
for i in range(feature.shape[0]):
our_c=cov(feature[i])
cov_arr.append(our_c)
cov_arr=torch.stack(cov_arr)
return cov_arr
def trace(matrix):
trace_val=0
for i in range(matrix.shape[0]):
trace_val+=matrix[i][i]
trace_val2=0.0001*trace_val
result_trace_val=torch.mul(trace_val2,torch.eye(matrix.shape[0]).cuda())
return result_trace_val
def normalize_cov(cov_matrix):
normalized_cov=[]
for i in range(cov_matrix.shape[0]):
trace_val=trace(cov_matrix[i])
tmp=cov_matrix[i]+trace_val
normalized_cov.append(tmp)
normalized_cov=torch.stack(normalized_cov)
return normalized_cov
def tile(a, dim, n_tile):
init_dim = a.size(dim)
repeat_idx = [1] * a.dim()
repeat_idx[dim] = n_tile
a = a.repeat(*(repeat_idx))
order_index = torch.cuda.LongTensor(np.concatenate([init_dim * np.arange(n_tile) + i for i in range(init_dim)]))
out=torch.index_select(a, dim, order_index)
return out
#(3,128,128) shape covariance pooling
def cal_cov_pooling(feature):
feature=feature.view(feature.shape[0],feature.shape[1],-1)
cov_matrix=feature_cov(feature)
cov_regularized=normalize_cov(cov_matrix)
return cov_regularized
# computes weights for BiMap Layer
def variable_with_orth_weight_decay(shape):
s1=int(shape[2])
s2=int(shape[2]/2)
rand_val=torch.randn([s1, s2],device='cuda:0')
w0_init, _ = torch.qr(rand_val)
w0 = w0_init
tmp1 = w0.view(1, s1, s2)
tmp2 = w0.t().view(1, s2, s1)
tmp1 = tile(tmp1,0,shape[0])
tmp2 = tile(tmp2,0,shape[0])
return tmp1, tmp2
# ReEig Layer
def cal_rect_cov(features):
weight1, weight2 = variable_with_orth_weight_decay(features.shape)
features = torch.bmm(torch.bmm(weight2, features), weight1)
# print(features.requires_grad,weight1.requires_grad, weight2.requires_grad)
# print(features.device,weight1.device, weight2.device)
# features=features.detach().cpu()
result=[]
for i in range(features.shape[0]):
s_f,v_f=torch.symeig(features[i], eigenvectors=True)
s_f_clamp=torch.clamp(s_f,0.0001,10000)
s_f_clamp2=torch.diag(s_f_clamp)
sv_m=torch.matmul(v_f,s_f_clamp2)
features_t=torch.matmul(sv_m,v_f.t())
result.append(features_t)
result = torch.stack(result)
return result
# LogEig Layer
def cal_log_cov(features):
# features=features.detach().cpu()
result=[]
for i in range(features.shape[0]):
s_f,v_f=torch.symeig(features[i], eigenvectors=True)
s_f_log = torch.log(s_f)
s_f_log2=torch.diag(s_f_log)
sv_m=torch.matmul(v_f,s_f_log2)
features_t=torch.matmul(sv_m,v_f.t())
result.append(features_t)
result = torch.stack(result)
return result
class Model(nn.Module):
def __init__(self):
super(Model,self).__init__()
#conv_layer
self.layer1=nn.Sequential(
#1
nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1,bias=False),
# nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,padding=0),
#2
nn.Conv2d(64,96,kernel_size=3,stride=1,padding=1,bias=False),
# nn.BatchNorm2d(96),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,padding=0),
#3
nn.Conv2d(96,128,kernel_size=3,stride=1,padding=1,bias=False),
# nn.BatchNorm2d(128),
nn.ReLU(),
# nn.MaxPool2d(kernel_size=2,padding=0),
#4
nn.Conv2d(128,128,kernel_size=3,stride=1,padding=1,bias=False),
# nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,padding=0),
#5
nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1,bias=False),
# nn.BatchNorm2d(64),
nn.ReLU(),
# nn.MaxPool2d(kernel_size=2,padding=0)
nn.Conv2d(256,256,kernel_size=3,stride=1,padding=1,bias=False),
# nn.BatchNorm2d(64),
nn.ReLU(),
# nn.MaxPool2d(kernel_size=2,padding=0)
)
self.fc1=nn.Sequential(
nn.Linear(16384,2000),
nn.ReLU(),
)
self.fc2=nn.Sequential(
nn.Linear(2000,128),
nn.ReLU(),
)
self.fc3=nn.Sequential(
nn.Linear(128,7)
)
def forward(self,x):
#cov matrix
out=self.layer1(x)
#공분산 행렬
out=cal_cov_pooling(out)
print("cov_pooling grad stage")
check_grad(out)
#bimap layer1
out=cal_rect_cov(out)
print("rect cov pooling grad stage")
check_grad(out)
#bimap layer2
# out=cal_rect_cov(out)
# print("rect cov pooling grad stage")
# check_grad(out)
out=cal_log_cov(out)
print("log cov pooling grad stage")
check_grad(out)
out=out.view(out.shape[0],-1)
out=self.fc1(out)
out=self.fc2(out)
out=self.fc3(out)
return out
The contents of the error are as follows.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-7-d4ada5de2326> in <module>
22 # Backward and optimize
23 optimizer.zero_grad()
---> 24 loss.backward()
25 optimizer.step()
26 total += labels.size(0)
~\Anaconda3\lib\site-packages\torch\tensor.py in backward(self, gradient, retain_graph, create_graph)
105 products. Defaults to ``False``.
106 """
--> 107 torch.autograd.backward(self, gradient, retain_graph, create_graph)
108
109 def register_hook(self, hook):
~\Anaconda3\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
91 Variable._execution_engine.run_backward(
92 tensors, grad_tensors, retain_graph, create_graph,
---> 93 allow_unreachable=True) # allow_unreachable flag
94
95
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [256, 256]], which is output 0 of AsStridedBackward, is at version 128; expected version 127 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).```
please help me , thank you
--james