Hi I’m james.
my model source in here
import torch
import torch.nn as nn
import os
import numpy as np
def cov(m, rowvar=False):
if m.dim() > 2:
raise ValueError('m has more than 2 dimensions')
if m.dim() < 2:
m = m.view(1, -1)
if not rowvar and m.size(0) != 1:
m = m.t()
fact = 1.0 / (m.size(1) - 1)
#특징에서 평균 빼기
mean_m=torch.mean(m, dim=1, keepdim=True)
m =m- mean_m
mt = m.t() # if complex: mt = m.t().conj()
m_squeeze=m.matmul(mt).squeeze()
result=fact * m_squeeze
return result
def feature_cov(feature):
cov_arr=[]
for i in range(feature.shape[0]):
our_c=cov(feature[i])
cov_arr.append(our_c)
cov_arr=torch.stack(cov_arr)
return cov_arr
def trace(matrix):
trace_val=0
for i in range(matrix.shape[0]):
trace_val+=matrix[i][i]
trace_val2=0.0001*trace_val
result_trace_val=torch.mul(trace_val2,torch.eye(matrix.shape[0]).cuda())
return result_trace_val
def normalize_cov(cov_matrix):
normalized_cov=[]
for i in range(cov_matrix.shape[0]):
trace_val=trace(cov_matrix[i])
tmp=cov_matrix[i]+trace_val
normalized_cov.append(tmp)
normalized_cov=torch.stack(normalized_cov)
return normalized_cov
def tile(a, dim, n_tile):
init_dim = a.size(dim)
repeat_idx = [1] * a.dim()
repeat_idx[dim] = n_tile
a = a.repeat(*(repeat_idx))
order_index = torch.cuda.LongTensor(np.concatenate([init_dim * np.arange(n_tile) + i for i in range(init_dim)]))
out=torch.index_select(a, dim, order_index)
return out
#(3,128,128) shape covariance pooling
def cal_cov_pooling(feature):
feature=feature.view(feature.shape[0],feature.shape[1],-1)
cov_matrix=feature_cov(feature)
cov_regularized=normalize_cov(cov_matrix)
return cov_regularized
# computes weights for BiMap Layer
def variable_with_orth_weight_decay(shape):
s1=int(shape[2])
s2=int(shape[2]/2)
rand_val=torch.randn([s1, s2],device='cuda:0')
w0_init, _ = torch.qr(rand_val)
w0 = w0_init
tmp1 = w0.view(1, s1, s2)
tmp2 = w0.t().view(1, s2, s1)
tmp1 = tile(tmp1,0,shape[0])
tmp2 = tile(tmp2,0,shape[0])
return tmp1, tmp2
# ReEig Layer
def cal_rect_cov(features):
weight1, weight2 = variable_with_orth_weight_decay(features.shape)
features = torch.bmm(torch.bmm(weight2, features), weight1)
result=[]
for i in range(features.shape[0]):
s_f,v_f=torch.symeig(features[i], eigenvectors=True)
s_f_clamp=torch.clamp(s_f,0.0001,10000)
s_f_clamp2=torch.diag(s_f_clamp)
sv_m=torch.matmul(v_f,s_f_clamp2)
features_t=torch.matmul(sv_m,v_f.t())
result.append(features_t)
result = torch.stack(result)
return result
# LogEig Layer
def cal_log_cov(features):
# features=features.detach().cpu()
result=[]
for i in range(features.shape[0]):
s_f,v_f=torch.symeig(features[i], eigenvectors=True)
s_f_log = torch.log(s_f)
s_f_log2=torch.diag(s_f_log)
sv_m=torch.matmul(v_f,s_f_log2)
features_t=torch.matmul(sv_m,v_f.t())
result.append(features_t)
result = torch.stack(result)
return result
class Model(nn.Module):
def __init__(self):
super(Model,self).__init__()
#conv_layer
self.layer1=nn.Sequential(
#1
nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1,bias=False),
# nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,padding=0),
#2
nn.Conv2d(64,96,kernel_size=3,stride=1,padding=1,bias=False),
# nn.BatchNorm2d(96),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,padding=0),
#3
nn.Conv2d(96,128,kernel_size=3,stride=1,padding=1,bias=False),
# nn.BatchNorm2d(128),
nn.ReLU(),
# nn.MaxPool2d(kernel_size=2,padding=0),
#4
nn.Conv2d(128,128,kernel_size=3,stride=1,padding=1,bias=False),
# nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,padding=0),
#5
nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1,bias=False),
# nn.BatchNorm2d(64),
nn.ReLU(),
# nn.MaxPool2d(kernel_size=2,padding=0)
nn.Conv2d(256,256,kernel_size=3,stride=1,padding=1,bias=False),
# nn.BatchNorm2d(64),
nn.ReLU(),
# nn.MaxPool2d(kernel_size=2,padding=0)
)
self.fc1=nn.Sequential(
nn.Linear(16384,2000),
nn.ReLU(),
)
self.fc2=nn.Sequential(
nn.Linear(2000,128),
nn.ReLU(),
)
self.fc3=nn.Sequential(
nn.Linear(128,7)
)
def forward(self,x):
#cov matrix
out=self.layer1(x)
#공분산 행렬
out=cal_cov_pooling(out)
# print("cov_pooling grad stage")
# check_grad(out)
#bimap layer1
out=cal_rect_cov(out)
# print("rect cov pooling grad stage")
# check_grad(out)
#bimap layer2
# out=cal_rect_cov(out)
# print("rect cov pooling grad stage")
# check_grad(out)
out=cal_log_cov(out)
# print("log cov pooling grad stage")
# check_grad(out)
out=out.view(out.shape[0],-1)
out=self.fc1(out)
out=self.fc2(out)
out=self.fc3(out)
return out
and model load and optimize function here
model=Model().cuda()
# model=Model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
my training code here
#train
# model.train()
# torch.autograd.set_detect_anomaly(True)
x=[]
y=[]
total_step = len(train_loader)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.3)
for epoch in range(args.num_epochs):
scheduler.step()
correct = 0
total = 0
for i, (images, labels) in enumerate(train_loader):
images=images.cuda()
labels=(labels).cuda()
outputs = model.forward(images)
# print("labels:{}".format(labels.shape))
# print("outputs:{}".format(outputs.shape))
_, predicted = torch.max(outputs.data, 1)
loss = criterion(outputs,labels)
print("current learning rate : {}, loss:{}".format(scheduler.get_lr(),loss))
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, args.num_epochs, i+1, total_step, loss.item()))
print('Test Accuracy of the model: {} %'.format(100 * correct / total))
if (epoch+1) % 10 == 0:
torch.save(model.state_dict(), 'training.ckpt')
x.append(epoch)
y.append(loss.item())
plt.plot(x,y)
plt.show()
torch.save(model.state_dict(), 'training.ckpt')
There is no change in the accuracy of epochs when my source is learning.
The same is true for loss.
I don’t know why it is.
It would be very appreciated if you could tell me why.
thank you.
-James