Training problem

6fb4b64ff0a68bcbf8fa · June 20, 2019, 8:55am

Hi I’m james.

my model source in here

import torch
import torch.nn as nn
import os
import numpy as np


def cov(m, rowvar=False):
    if m.dim() > 2:
        raise ValueError('m has more than 2 dimensions')
    if m.dim() < 2:
        m = m.view(1, -1)
    if not rowvar and m.size(0) != 1:
        m = m.t()
    fact = 1.0 / (m.size(1) - 1)
    #특징에서 평균 빼기
    mean_m=torch.mean(m, dim=1, keepdim=True)
    m =m- mean_m
    mt = m.t()  # if complex: mt = m.t().conj()
    m_squeeze=m.matmul(mt).squeeze()
    result=fact * m_squeeze
    return result
def feature_cov(feature):
    cov_arr=[]
    for i in range(feature.shape[0]):
        our_c=cov(feature[i])
        
        cov_arr.append(our_c)
    cov_arr=torch.stack(cov_arr)
    return cov_arr
def trace(matrix):
    
    trace_val=0
    for i in range(matrix.shape[0]):
        trace_val+=matrix[i][i]
    trace_val2=0.0001*trace_val
    result_trace_val=torch.mul(trace_val2,torch.eye(matrix.shape[0]).cuda())

    return result_trace_val

def normalize_cov(cov_matrix):
    normalized_cov=[]
    for i in range(cov_matrix.shape[0]):
        trace_val=trace(cov_matrix[i])
        tmp=cov_matrix[i]+trace_val
        normalized_cov.append(tmp)
    normalized_cov=torch.stack(normalized_cov)
    return normalized_cov

def tile(a, dim, n_tile):
    init_dim = a.size(dim)
    repeat_idx = [1] * a.dim()
    repeat_idx[dim] = n_tile
    a = a.repeat(*(repeat_idx))
    order_index = torch.cuda.LongTensor(np.concatenate([init_dim * np.arange(n_tile) + i for i in range(init_dim)]))
    out=torch.index_select(a, dim, order_index)
    return out

#(3,128,128) shape covariance pooling
def cal_cov_pooling(feature):
    feature=feature.view(feature.shape[0],feature.shape[1],-1)    
    cov_matrix=feature_cov(feature)
    cov_regularized=normalize_cov(cov_matrix)
    return cov_regularized

# computes weights for BiMap Layer
def variable_with_orth_weight_decay(shape):
    s1=int(shape[2])
    s2=int(shape[2]/2)
    rand_val=torch.randn([s1, s2],device='cuda:0')
    w0_init, _ = torch.qr(rand_val)
    w0 = w0_init
    tmp1 = w0.view(1, s1, s2)
    tmp2 = w0.t().view(1, s2, s1)
    tmp1 = tile(tmp1,0,shape[0])
    tmp2 = tile(tmp2,0,shape[0])
    return tmp1, tmp2
# ReEig Layer
def cal_rect_cov(features):
    weight1, weight2 = variable_with_orth_weight_decay(features.shape)
    features = torch.bmm(torch.bmm(weight2, features), weight1)
    result=[]
    for i in range(features.shape[0]):
        s_f,v_f=torch.symeig(features[i], eigenvectors=True)
        s_f_clamp=torch.clamp(s_f,0.0001,10000)
        s_f_clamp2=torch.diag(s_f_clamp)
        sv_m=torch.matmul(v_f,s_f_clamp2)
        features_t=torch.matmul(sv_m,v_f.t())
        result.append(features_t)
    result = torch.stack(result)
    return result

# LogEig Layer
def cal_log_cov(features):
#     features=features.detach().cpu()
    result=[]
    for i in range(features.shape[0]):
        s_f,v_f=torch.symeig(features[i], eigenvectors=True)
        s_f_log = torch.log(s_f)
        s_f_log2=torch.diag(s_f_log)
        sv_m=torch.matmul(v_f,s_f_log2)
        features_t=torch.matmul(sv_m,v_f.t())
        result.append(features_t)
    result = torch.stack(result)
    return result

class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        #conv_layer
        self.layer1=nn.Sequential(
            #1
            nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1,bias=False),
#             nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,padding=0),
            #2
            nn.Conv2d(64,96,kernel_size=3,stride=1,padding=1,bias=False),
#             nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,padding=0),
            #3
            nn.Conv2d(96,128,kernel_size=3,stride=1,padding=1,bias=False),
#             nn.BatchNorm2d(128),
            nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2,padding=0),
            #4
            nn.Conv2d(128,128,kernel_size=3,stride=1,padding=1,bias=False),
#             nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,padding=0),
            #5
            nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1,bias=False),
#             nn.BatchNorm2d(64),
            nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2,padding=0)
            
            nn.Conv2d(256,256,kernel_size=3,stride=1,padding=1,bias=False),
#             nn.BatchNorm2d(64),
            nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2,padding=0)
            )
        self.fc1=nn.Sequential(
            nn.Linear(16384,2000),
            nn.ReLU(),
        )
        self.fc2=nn.Sequential(
            nn.Linear(2000,128),
            nn.ReLU(),
        )
        self.fc3=nn.Sequential(
            nn.Linear(128,7)
        )
        
    def forward(self,x):
        #cov matrix
        out=self.layer1(x)

        #공분산 행렬
    
        out=cal_cov_pooling(out)
#         print("cov_pooling grad stage")
#         check_grad(out)

        #bimap layer1
        out=cal_rect_cov(out)
        
#         print("rect cov pooling grad stage")
#         check_grad(out)

        #bimap layer2
#         out=cal_rect_cov(out)
#         print("rect cov pooling grad stage")
#         check_grad(out)

        out=cal_log_cov(out)
#         print("log cov pooling grad stage")
#         check_grad(out)

        out=out.view(out.shape[0],-1)
        out=self.fc1(out)
        out=self.fc2(out)
        out=self.fc3(out)
        return out

and model load and optimize function here

model=Model().cuda()
# model=Model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

my training code here

#train 
# model.train()
# torch.autograd.set_detect_anomaly(True)
x=[]
y=[]
total_step = len(train_loader)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.3)
for epoch in range(args.num_epochs):
    scheduler.step()
    correct = 0
    total = 0
    for i, (images, labels) in enumerate(train_loader):      
        images=images.cuda()
        labels=(labels).cuda()
        outputs = model.forward(images)
#         print("labels:{}".format(labels.shape))
#         print("outputs:{}".format(outputs.shape))
        
        _, predicted = torch.max(outputs.data, 1)
        loss = criterion(outputs,labels)
        print("current learning rate : {}, loss:{}".format(scheduler.get_lr(),loss))
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
    print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, args.num_epochs, i+1, total_step, loss.item()))
    print('Test Accuracy of the model: {} %'.format(100 * correct / total)) 
    if (epoch+1) % 10 == 0:
        torch.save(model.state_dict(), 'training.ckpt')
    
    x.append(epoch)
    y.append(loss.item())
plt.plot(x,y)
plt.show()
torch.save(model.state_dict(), 'training.ckpt')

There is no change in the accuracy of epochs when my source is learning.

The same is true for loss.

I don’t know why it is.

It would be very appreciated if you could tell me why.

thank you.
-James

Oli · June 20, 2019, 9:35am

Hi, it’s a lot of code so can’t help you with everything. But, you should replace model.forward(images) with model(images).

Common advice is to try to overfit to a small training set to figure out if the code can actually learn something