Dear All,
I am new to pytorch and training my model with CNN and attention mechanism. The model works fine when run on CPU but gives training and validation accuracy as 0, when code runs on GPU. I tried to solve the problem, but unable to do so. Here is my code
#------------------------------------------------->Import all the required libraries-------------------------------------->
import torch # Import Torch
import torch.nn as nn # Import NN module from Torch
from torchvision.datasets import CIFAR10 # Import CIFAR10 datset from torchvision
from torchvision.transforms import transforms# Import transform module from torchvision
from torch.utils.data import DataLoader # Import dataloader from torch
from torch.optim import Adam # import optimizer module from torch
from torch.autograd import Variable # Import autograd from torch
import numpy as np # Import numpy module
import torchvision.datasets as datasets #Import dataset from torch
from torchvision import models # import pretrained models from torch
from Attention import PAM_Module # import position attention module
from Attention import CAM_Module # import channel attention module
from torch import optim, cuda # import optimizer
import os
import random
import torch.nn.functional as F
import random
SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
device = torch.device(‘cuda’)
Training_Path="/home/mani/Desktop/Binary Attention Based ASC/DCASE 2018 Dataset/Training" # path to folder contains traning images
Test_Path="/home/mani/Desktop/Binary Attention Based ASC/DCASE 2018 Dataset/Test" # path to folder contains test images
#----------------------------------------------> Define Training Transformation --------------------------------------->
train_transformations = transforms.Compose([
#transforms.RandomHorizontalFlip(),
#transforms.RandomCrop(32,padding=4),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
#----------------------------------------------> Define Test Transformation -------------------------------------------->
test_transformations = transforms.Compose([
#transforms.RandomHorizontalFlip(),
#transforms.RandomCrop(32,padding=4),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
Validation_transformations = transforms.Compose([
#transforms.RandomHorizontalFlip(),
#transforms.RandomCrop(32,padding=4),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
#----------------------------------------------> Define and Load Training and Test Set ---------------------------------->
#Load the training set
train_set =datasets.ImageFolder(root=Training_Path,transform=train_transformations)
validation_set =datasets.ImageFolder(root=Training_Path,transform=Validation_transformations)
#print(train_set) # prints dettails of training set along with number of images, type of transformation etc.
test_set =datasets.ImageFolder(root=Training_Path,transform=test_transformations)
#print(test_set) # prints dettails of test set along with number of images, type of transformation etc.
BATCH_SIZE=32 # Define the batch size
#----------------------------------------------> Use dataloader to create batches of data -------------------------------->
train_loader = DataLoader(train_set,batch_size=BATCH_SIZE,shuffle=True)#Create a loder for the training set
test_loader = DataLoader(test_set,batch_size=BATCH_SIZE,shuffle=True)#Create a loder for the test set
validation_loader = DataLoader(validation_set,batch_size=BATCH_SIZE,shuffle=True)#Create a loder for the test set
#print(len(train_loader)) # Given total number of iterations required to complete one training epoch
#----------------------------------------------> Check of GPU and get the pretrained model ----------------------------------------->
device = torch.device(“cuda” if torch.cuda.is_available()
else “cpu”)
print(device)
#model = models.resnet18(pretrained=True) # Import the resnet model
#print(model)
num_classes=10
class BACNN(nn.Module):
def init(self):
super(BACNN, self).init()
Pre_Trained_Layers = list(models.resnet18().children())[:-2] #all layer expect last layer
print(“pre trained layers after removing the top layers---------------------->”+’\n’)
#print(Pre_Trained_Layers)
#self.count=count
self.features=nn.Sequential(*Pre_Trained_Layers)
self.PAM=PAM_Module(512)
self.CAM=CAM_Module(512)
self.conv1 = nn.Conv2d(512,10,3,bias=True,padding=0)
self.fc1=nn.Linear(18, num_classes)
for p in self.features.parameters():
p.requires_grad=False
#print (self.features)
def forward(self, image):
x = self.features(image)
x=F.relu(x)
x_1=self.PAM(x)
x_2=self.CAM(x)
x_3=x_1+x_2
x=self.conv1(x_3)
#print(a)
x = F.relu(x)
x=self.fc1(x)
x= x.view(x.size(0),-1)
#print(x.size)
#count=count+1
#print(count)
#print(“Completed”)
return x
model=BACNN().to(device)
#optimizer = optim.Adam(model.parameters())
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
Define the cross entropy loss function
criterion = nn.CrossEntropyLoss()
def calculate_accuracy(fx, y):
preds = fx.max(1, keepdim=True)[1]
correct = preds.eq(y.view_as(preds)).sum()
acc = correct.float()/preds.shape[0]
return acc
def train(model,device,iterator, optimizer, criterion):
print(“Training Starts”)
epoch_loss = 0
epoch_acc = 0
count=0
model.train()
for (x, y) in iterator:
x=x.to(device)
y=y.to(device)
optimizer.zero_grad()
count=count+1
Predicted_Train_Label = model(x)
loss = criterion(Predicted_Train_Label, y)
acc = calculate_accuracy(Predicted_Train_Label, y)
print("Training Iteration Number=",count)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate(model,device,iterator, criterion):
print(“Validation Starts”)
epoch_loss = 0
epoch_acc = 0
count=0
model.eval()
with torch.no_grad():
for (x, y) in iterator:
x=x.to(device)
y=y.to(device)
count=count+1
Predicted_Label = model(x)
loss = criterion(Predicted_Label, y)
acc = calculate_accuracy(Predicted_Label, y)
print("Validation Iteration Number=",count)
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
EPOCHS = 250
SAVE_DIR = ‘models’
MODEL_SAVE_PATH = os.path.join("/home/mani/Desktop/Binary Attention Based ASC/", ‘BACNN.pt’)
best_valid_loss = float(‘inf’)
#if not os.path.isdir(f’{SAVE_DIR}’):
#os.makedirs(f’{SAVE_DIR}’)
#print(model)
for epoch in range(EPOCHS):
print(“Start Training and Validation For Epoch Number=”,epoch)
train_loss, train_acc = train(model,device,train_loader, optimizer, criterion)
valid_loss, valid_acc = evaluate(model,device,validation_loader,criterion)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), MODEL_SAVE_PATH)
print(“Epoch Number=”,epoch,“Train Loss=”,train_loss,“Training Accuracy=”,train_acc,’\n’)
print(“Epoch Number=”,epoch,“Validation Loss=”,valid_loss,“Validation Accuracy=”,valid_acc,’\n’)
--------------------------------------------------------------------------------------------------------------------------------
Please see the code and suggest the modifications that will solve the problem.
Thanks
Achyut