The code can run but the train loss and train acc never change
train_loss = 0.69, train_acc = 0.5
I think the model does not be trained, but I can’t find my fault.
I try all solution i can find, change lr, reset_parameters, normalize and so on.
Maybe the preprocessing? The image are Gray image
I use model.named_parameters() , find grad_requirs: True
, but weight
and grad
not change
CODE:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision.models as models
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
import torch.optim as optim
import shutil
import time
from PIL import Image
import os
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
def gaussian1(x):
mean = torch.mean(x)
std = torch.std(x)
return torch.exp(-((x-mean)**2)/(torch.std(x))**2)
def gaussian2(x):
mean = torch.mean(x)
std = torch.std(x)
return 0.5 * torch.exp(-((x-mean)**2)/(torch.std(x))**2)
KV = torch.tensor([[-1,2,-2,2,-1],
[2,-6,8,-6,2],
[-2,8,-12,8,-2],
[2,-6,8,-6,2],
[-1,2,-2,2,-1]])/12.
KV = KV.view(1,1,5,5).to(device=device, dtype=torch.float)
KV = torch.autograd.Variable(KV, requires_grad=False)
class GNCNN(nn.Module):
def __init__(self):
super(GNCNN, self).__init__()
self.gaussian1 = gaussian1
self.gaussian2 = gaussian2
self.conv1 = nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=0, bias=True)
self.avg_pool1 = nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0, bias=True)
self.avg_pool2 =nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
self.conv3 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0, bias=True)
self.avg_pool3 = nn.AvgPool2d(kernel_size=3, stride=2, padding=0)
self.conv4 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0, bias=True)
self.avg_pool4 = nn.AvgPool2d(kernel_size=3, stride=2, padding=0)
self.conv5 = nn.Conv2d(16, 16, kernel_size=5, stride=1, padding=0, bias=True)
self.avg_pool5 = nn.AvgPool2d(kernel_size=3, stride=2, padding=0)
self.fc1 = nn.Linear(16*4*4, 128)
self.fc2 = nn.Linear(128, 128)
self.fc3 = nn.Linear(128, 2)
self.reset_parameters()
def forward(self, x):
prep = F.conv2d(x, KV, padding=2)
out = self.avg_pool1(gaussian1(self.conv1(prep)))
out = self.avg_pool2(gaussian2(self.conv2(out)))
out = self.avg_pool3(gaussian2(self.conv3(out)))
out = self.avg_pool4(gaussian2(self.conv4(out)))
out = self.avg_pool5(gaussian2(self.conv5(out)))
out = out.view(out.size(0), -1)
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
out = self.fc3(out)
return out
def reset_parameters(self):
for mod in self.modules():
if isinstance(mod, nn.Conv2d):
nn.init.xavier_uniform_(self.conv1.weight)
elif isinstance(mod, nn.Linear):
nn.init.kaiming_normal_(mod.weight.data)
def accuracy(outputs, labels):
_, argmax = torch.max(outputs, 1)
return (labels == argmax.squeeze()).float().mean()
def default_loader(path):
try:
img = Image.open(path)
return img.convert('RGB')
except:
print("Cannot read image: {}".format(path))
class BOSSBaseDataset(Dataset):
def __init__(self, txt, transforms, loader=default_loader):
super(BOSSBaseDataset, self).__init__()
fh = open(txt, 'r')
imgs = []
for line in fh:
line = line.strip('\n')
line = line.rstrip('\n')
words = line.split('#')
imgs.append((words[0],int(words[1])))
self.imgs = imgs
self.transforms = transforms
self.loader = loader
def __getitem__(self, index):
fn, label = self.imgs[index]
img = self.loader(fn)
if self.transforms is not None:
img = self.transforms(img)
return img,label
def __len__(self):
return len(self.imgs)
def train_model(model, criterion, optimizer, num_epochs, batch_size, use_gpu):
since = time.time()
best_model_wts = model.state_dict()
best_acc = 0.0
val_acc_history = []
val_loss_history = []
is_best = False
for epoch in range(num_epochs):
begin_time = time.time()
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
count_batch = 0
epoch_loss = 0.
epoch_accuracy = 0.
running_loss = 0.
running_accuracy = 0.
val_loss = 0.
val_accuracy = 0.
test_loss = 0.
test_accuracy = 0.
for data in train_loader:
# count_batch += 1
inputs, labels = data
if use_gpu:
inputs = Variable(inputs.cuda())
labels = Variable(labels.cuda())
else:
inputs, labels = Variable(inputs), Variable(labels)
optimizer.zero_grad()
outputs = model(inputs)
# accuracys = accuracy(outputs, labels).item()
# running_accuracy += accuracys
# epoch_accuracy += accuracys
loss = criterion(outputs, labels)
running_loss += loss.data.item()
epoch_loss += loss.data.item()
loss.backward()
optimizer.step()
epoch_loss /= (train_sizes/batch_size)
train_loss_history.append(epoch_loss)
# epoch_accuracy /= train_sizes
# train_acc_history.append(epoch_accuracy)
# print('\nTrain: Epoch [{}] Loss: {:.4f} Acc: {:.4f}%'.format(epoch, epoch_loss, 100*epoch_accuracy))
print('\nTrain: Epoch [{}] Loss: {:.4f}'.format(epoch, epoch_loss))
if __name__ == '__main__':
use_gpu = torch.cuda.is_available()
batch_size = 180
learning_rate = 0.001
atransforms = transforms.Compose([
transforms.Resize((256, 256)),
transforms.Grayscale(1),
# transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, ], [0.229, ])
])
train_sets = BOSSBaseDataset(txt='/content/drive/My Drive/Dataset/train.txt',
transforms=atransforms)
train_loader = DataLoader(train_sets, batch_size = batch_size, shuffle=True, num_workers=16)
train_sizes = len(train_sets)
model = GNCNN()
model.reset_parameters()
if use_gpu:
model = model.cuda()
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(model.parameters(), lr = learning_rate, momentum=0.9)
train_acc_history = []
train_loss_history = []
model = train_model(model=model, criterion=criterion, optimizer=optimizer,
num_epochs=50, batch_size=batch_size, use_gpu=use_gpu)
SOME RESULT:
Train: Epoch [7] Loss: 0.6954
-->name: conv1.weight -->grad_requirs: True --weight tensor(0.0042, device='cuda:0') -->grad_value: tensor(-0.0005, device='cuda:0')
-->name: conv1.bias -->grad_requirs: True --weight tensor(-0.0086, device='cuda:0') -->grad_value: tensor(-6.9849e-10, device='cuda:0')
-->name: conv2.weight -->grad_requirs: True --weight tensor(-0.0001, device='cuda:0') -->grad_value: tensor(7.2851e-05, device='cuda:0')
-->name: conv2.bias -->grad_requirs: True --weight tensor(-0.0114, device='cuda:0') -->grad_value: tensor(1.1059e-09, device='cuda:0')
-->name: conv3.weight -->grad_requirs: True --weight tensor(0.0007, device='cuda:0') -->grad_value: tensor(9.7395e-06, device='cuda:0')
-->name: conv3.bias -->grad_requirs: True --weight tensor(-0.0105, device='cuda:0') -->grad_value: tensor(-1.3388e-09, device='cuda:0')
-->name: conv4.weight -->grad_requirs: True --weight tensor(-0.0002, device='cuda:0') -->grad_value: tensor(3.7657e-05, device='cuda:0')
-->name: conv4.bias -->grad_requirs: True --weight tensor(-0.0098, device='cuda:0') -->grad_value: tensor(2.4447e-09, device='cuda:0')
-->name: conv5.weight -->grad_requirs: True --weight tensor(-0.0001, device='cuda:0') -->grad_value: tensor(-2.4289e-06, device='cuda:0')
-->name: conv5.bias -->grad_requirs: True --weight tensor(-0.0004, device='cuda:0') -->grad_value: tensor(6.9849e-10, device='cuda:0')
-->name: fc1.weight -->grad_requirs: True --weight tensor(8.4385e-05, device='cuda:0') -->grad_value: tensor(6.8088e-05, device='cuda:0')
-->name: fc1.bias -->grad_requirs: True --weight tensor(-0.0016, device='cuda:0') -->grad_value: tensor(0.0002, device='cuda:0')
-->name: fc2.weight -->grad_requirs: True --weight tensor(-0.0012, device='cuda:0') -->grad_value: tensor(-2.2372e-05, device='cuda:0')
-->name: fc2.bias -->grad_requirs: True --weight tensor(0.0015, device='cuda:0') -->grad_value: tensor(-0.0001, device='cuda:0')
-->name: fc3.weight -->grad_requirs: True --weight tensor(-0.0153, device='cuda:0') -->grad_value: tensor(-4.8385e-10, device='cuda:0')
-->name: fc3.bias -->grad_requirs: True --weight tensor(0.0303, device='cuda:0') -->grad_value: tensor(-7.4506e-09, device='cuda:0')
Epoch 8/49
----------
Train: Epoch [8] Loss: 0.6957
-->name: conv1.weight -->grad_requirs: True --weight tensor(0.0042, device='cuda:0') -->grad_value: tensor(0.0004, device='cuda:0')
-->name: conv1.bias -->grad_requirs: True --weight tensor(-0.0086, device='cuda:0') -->grad_value: tensor(3.6380e-10, device='cuda:0')
-->name: conv2.weight -->grad_requirs: True --weight tensor(-0.0001, device='cuda:0') -->grad_value: tensor(0.0002, device='cuda:0')
-->name: conv2.bias -->grad_requirs: True --weight tensor(-0.0114, device='cuda:0') -->grad_value: tensor(-3.4197e-10, device='cuda:0')
-->name: conv3.weight -->grad_requirs: True --weight tensor(0.0007, device='cuda:0') -->grad_value: tensor(1.5599e-05, device='cuda:0')
-->name: conv3.bias -->grad_requirs: True --weight tensor(-0.0105, device='cuda:0') -->grad_value: tensor(-1.2224e-09, device='cuda:0')
-->name: conv4.weight -->grad_requirs: True --weight tensor(-0.0002, device='cuda:0') -->grad_value: tensor(3.0600e-06, device='cuda:0')
-->name: conv4.bias -->grad_requirs: True --weight tensor(-0.0098, device='cuda:0') -->grad_value: tensor(-2.3865e-09, device='cuda:0')
-->name: conv5.weight -->grad_requirs: True --weight tensor(-0.0001, device='cuda:0') -->grad_value: tensor(-3.1800e-06, device='cuda:0')
-->name: conv5.bias -->grad_requirs: True --weight tensor(-0.0004, device='cuda:0') -->grad_value: tensor(3.6089e-09, device='cuda:0')
-->name: fc1.weight -->grad_requirs: True --weight tensor(8.2432e-05, device='cuda:0') -->grad_value: tensor(-1.3619e-05, device='cuda:0')
-->name: fc1.bias -->grad_requirs: True --weight tensor(-0.0016, device='cuda:0') -->grad_value: tensor(-4.5320e-05, device='cuda:0')
-->name: fc2.weight -->grad_requirs: True --weight tensor(-0.0012, device='cuda:0') -->grad_value: tensor(2.6668e-05, device='cuda:0')
-->name: fc2.bias -->grad_requirs: True --weight tensor(0.0015, device='cuda:0') -->grad_value: tensor(0.0001, device='cuda:0')
-->name: fc3.weight -->grad_requirs: True --weight tensor(-0.0153, device='cuda:0') -->grad_value: tensor(-1.9463e-10, device='cuda:0')
-->name: fc3.bias -->grad_requirs: True --weight tensor(0.0303, device='cuda:0') -->grad_value: tensor(0., device='cuda:0')
thanks alot