I am practicing binary classification and my model seems to be stuck with indifference… The parameters looks like they are not being updated… Below is my code…
It looks like a lot…
# define train function
import time
def train( epochs, model, loaders_list, optimiser, loss_fn, use_cuda, save_path, verbose=True ):
# initialize trackers
valid_loss_min = np.Inf
train_losses = { 'total':[], 'batch':[] }
valid_losses = { 'total':[], 'batch':[] }
accuracy = { 'train':[], 'valid':[] }
print( 'Training Started......' )
for epoch in range( 1, epochs+1 ):
# intiating time trackers
start_time = time.time()
# initialize variables to monitor training and validation losses
train_loss = valid_loss = correct = total = total_valid = 0.0
running_loss = running_loss_valid = correct_valid = 0.0
####################################################
################# Training Model ###################
####################################################
model.train()
#for batch_idx, ( data, label ) in enumerate( data_loaders['train'] ):
batch_idx = 0
for data, label in data_loaders['train']:
# move to GPU if available
if use_cuda:
data, label = data.cuda(), label.cuda()
# reshaping labels to match the shape of the model's output,
# and converting to Float
label = label.reshape( (label.shape[0], 1) )
label = label.float()
# clear accumulated gradients
optimiser.zero_grad()
# forward pass
output = model( data )
# calculate batch loss
loss = loss_fn( output, label )
# backward pass
loss.backward()
# parameter update
optimiser.step()
## track losses and accuracy
# compute avg train loss
train_loss = loss.item()
# convert output probabilities to predicted class
pred = output.data.max( 1, keepdim=True )[1]
# compare prediction to true label
correct += np.sum( np.squeeze(pred.eq(label.data.view_as(pred))).cpu().numpy() )
total += data.size( 0 )
#record batch losses
running_loss += loss.item()
if batch_idx % 10 == 0:
avg_loss = running_loss/10
train_losses['batch'].append( avg_loss )
running_loss = 0.0
#update batch_idx
batch_idx += 1
####################################################
################ Validating Model ##################
####################################################
model.eval()
#for batch_idx, ( data, label ) in enumerate( data_loaders['valid'] ):
batch_idx = 0
for data, label in data_loaders['valid']:
# move data to cuda if available
if use_cuda:
data, label = data.cuda(), label.cuda()
# reshaping labels to match the shape of the model's output,
# and converting to Float
label = label.reshape( (label.shape[0], 1) )
label = label.float()
# forward pass
output = model( data )
# compute loss
loss = loss_fn( output, label )
## track losses
# obtain avg validation loss over a batch
valid_loss = loss.item()
# convert output into predicted class
pred = output.data.max( 1, keepdim=True )[1]
# compare predictions to true label
correct_valid += np.sum( np.squeeze(pred.eq(label.data.view_as(pred))).cpu().numpy() )
total_valid += data.size( 0 )
# record batch loss statistics
running_loss_valid = loss.item()
if batch_idx % 2 == 0:
avg_loss = running_loss_valid/2
valid_losses['batch'].append( avg_loss )
running_loss_valid = 0.0
# update batch_idx
batch_idx += 1
#track ending time
end_time = time.time()
total_time = end_time - start_time
#Track total losses
train_acc = 100. * correct/total
valid_acc = 100. * correct_valid/total_valid
train_losses['total'].append( train_loss )
valid_losses['total'].append( valid_loss )
accuracy['train'].append( train_acc )
accuracy['valid'].append( valid_acc )
# verbose
if verbose:
print( f"==================== Epoch: {epoch}/{epochs} =========================" )
print( "Train loss: {:.6f} \t\tValid loss: {:.6f}".format( train_loss,
valid_loss ) )
print( "Train Acc: {:.6f} \t\tValid Acc: {:.6f}".format( train_acc,
valid_acc ) )
print(f"======>ETA: {total_time:.6f}seconds")
# Track best performining model
if valid_loss <= valid_loss_min:
if verbose:
print( f"Validation loss decreased ({valid_loss_min:.6f} ==> {valid_loss:.6f}) Saving model....")
torch.save( model, save_path )
valid_loss_min = valid_loss
# returned trained model
print( 'Training Complete!' )
return model, [train_losses, valid_losses, accuracy]type or paste code here
This is my output
Training Started......
==================== Epoch: 1/10 =========================
Train loss: 0.724163 Valid loss: 0.872123
Train Acc: 49.945000 Valid Acc: 50.220000
======>ETA: 132.691783seconds
Validation loss decreased (inf ==> 0.872123) Saving model....
==================== Epoch: 2/10 =========================
Train loss: 0.796593 Valid loss: 0.920781
Train Acc: 49.945000 Valid Acc: 50.220000
======>ETA: 134.288136seconds
==================== Epoch: 3/10 =========================
Train loss: 0.681651 Valid loss: 0.836445
Train Acc: 49.945000 Valid Acc: 50.220000
======>ETA: 133.916785seconds
Validation loss decreased (0.872123 ==> 0.836445) Saving model....
==================== Epoch: 4/10 =========================
Train loss: 0.699744 Valid loss: 0.705061
Train Acc: 49.945000 Valid Acc: 50.220000
======>ETA: 134.853135seconds
Validation loss decreased (0.836445 ==> 0.705061) Saving model....
==================== Epoch: 5/10 =========================
Train loss: 0.666447 Valid loss: 0.681207
Train Acc: 49.945000 Valid Acc: 50.220000
======>ETA: 134.621456seconds
Validation loss decreased (0.705061 ==> 0.681207) Saving model....
==================== Epoch: 6/10 =========================
Train loss: 0.704485 Valid loss: 0.887497
Train Acc: 49.945000 Valid Acc: 50.220000
======>ETA: 137.774845seconds
==================== Epoch: 7/10 =========================
Train loss: 0.689475 Valid loss: 0.479000
Train Acc: 49.945000 Valid Acc: 50.220000
======>ETA: 137.363145seconds
Validation loss decreased (0.681207 ==> 0.479000) Saving model....
==================== Epoch: 8/10 =========================
Train loss: 0.824759 Valid loss: 0.862540
Train Acc: 49.945000 Valid Acc: 50.220000
======>ETA: 139.869251seconds
==================== Epoch: 9/10 =========================
Train loss: 0.692937 Valid loss: 0.955699
Train Acc: 49.945000 Valid Acc: 50.220000
======>ETA: 135.745124seconds
==================== Epoch: 10/10 =========================
Train loss: 0.644844 Valid loss: 0.753045
Train Acc: 49.945000 Valid Acc: 50.220000
======>ETA: 134.871356seconds
Training Complete!type or paste code here
I do not know what exactly, I might be doing wrong…
I am using a resnet101 model