Hello,
I have a very specific question about a model which I am trying to train but something goes wrong and I dont know why.
I am training a network to recognize hand gestures. I am using the data LeapGestRecog from kaggle. I have augmented the data by taking the mirrored versions of the images as well.
I will start by showing you the dataset.
This is how my data looks like. Each picture is 120 by 320 grayscale and I am using a batch size of 100 because otherwise I am running out of memory. The training data set contains 34000 images while the validation set only 3000.
The network looks as follows:
class Model(nn.Module):
def __init__(self,input_size=32, hidden_size=64,n_classes=10):
""" Define our model """
super(Model, self).__init__()
self.conv1 = nn.Conv2d(1,input_size, kernel_size=(3,3),stride=(1,1),padding=1)
self.relu1 = nn.ReLU()
self.maxp1 = nn.MaxPool2d(kernel_size=(2,2))
self.conv2 = nn.Conv2d(input_size,hidden_size,kernel_size=(3,3),padding=1)
self.relu2 = nn.ReLU()
self.maxp2 = nn.MaxPool2d(kernel_size=(2,2))
self.conv3 = nn.Conv2d(hidden_size,128,kernel_size=3,padding=1)
self.maxp3 = nn.MaxPool2d(kernel_size=(2,2))
self.l1 = nn.Linear(128 * 15 * 40,640)
self.relul = nn.ReLU()
self.l2 = nn.Linear(640,128)
self.l3 = nn.Linear(128,n_classes)
self.soft = nn.Softmax(1)
def forward(self, x):
""" The forward pass of our model """
x = self.conv1(x)
x = self.relu1(x)
x = self.maxp1(x)
x = self.conv2(x)
x = self.relu2(x)
x = self.maxp2(x)
x = self.conv3(x)
x = self.maxp3(x)
x = x.view(x.size(0),-1)
x = self.l1(x)
x = self.l2(x)
x = self.l3(x)
x = self.soft(x)
return x
The training function is a “standard procedure”
def train_model(model,train_data,valid_data,learning_rate,num_epochs,optimizer,criterion):
""" Training procedure of the model together with accuracy and loss for both data sets """
train_loss = np.zeros(num_epochs)
valid_loss = np.zeros(num_epochs)
train_accuracy = np.zeros(num_epochs)
valid_accuracy = np.zeros(num_epochs)
"""begin training"""
for epoch in range(num_epochs):
model.train()
train_losses = []
train_correct= 0
total_items = 0
valid_losses = []
valid_correct = 0
for images,labels in train_data:
images = images.float()
labels = labels.long()
optimizer.zero_grad()
"""add to GPU hopefully"""
images = images.to(device)
labels = labels.to(device)
"""Forward pass"""
outputs = model.forward(images)
loss = criterion(outputs,labels)
"""Backward pass"""
loss.backward()
optimizer.step()
"""staticstics"""
train_losses.append(loss.item())
_, predicted = torch.max(outputs.data,1)
train_correct += (predicted == labels).sum().item()
total_items += labels.size(0)
train_loss[epoch] = np.mean(train_losses)
train_accuracy[epoch] = (1 * train_correct/total_items)
with torch.no_grad():
correct_val = 0
total_val = 0
for images,labels in valid_data:
images = images.float()
labels = labels.long()
images = images.to(device)
labels = labels.to(device)
outputs = model.forward(images)
loss = criterion(outputs, labels)
valid_losses.append(loss.item())
_, predicted = torch.max(outputs.data, 1)
correct_val += (predicted == labels).sum().item()
total_val += labels.size(0)
valid_loss[epoch] = np.mean(valid_losses)
valid_accuracy[epoch] = (1 * correct_val/total_val)
print("Epoch: [{},{}], train accuracy: {:.4f}, valid accuracy: {:.4f}, train loss: {:.4f}, valid loss: {:.4f}"
.format(num_epochs,epoch+1,train_accuracy[epoch],valid_accuracy[epoch],train_loss[epoch],valid_accuracy[epoch]))
return model, train_accuracy, train_loss, valid_accuracy, valid_loss
This is how I call the train function:
network = Model()
network = network.to(device)
optimizer = torch.optim.SGD(network.parameters(),lr=0.01,momentum=0.9)
criterion = nn.CrossEntropyLoss()
model, train_accuracy, train_loss, valid_accuracy, valid_loss = train_model(model=network,train_data=train,valid_data=valid,learning_rate=0.001,num_epochs=100,optimizer=optimizer,criterion=criterion)
print("Ready")
I tried tweaking the model parameters such as learning rate but no matter what I do the accuracy and loss stays the same. Please help me, I dont know what is going wrong.