Training A Face Verification Model and error not decreasing

#model architecture

TODO: define the convolutional neural network architecture

import torch
from torch.autograd import Variable
import torch.nn as nn
from torch.nn import Sequential, MaxPool2d, Conv2d, BatchNorm2d, ReLU, Softmax, Linear,Dropout, Identity, AdaptiveAvgPool2d
import torch.nn.functional as F
# can use the below import should you choose to initialize the weights of your Net
import torch.nn.init as I
from collections import OrderedDict

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        self.feature2 = models.squeezenet1_1(pretrained=False)
        # self.feature2.features[0] = Conv2d(6,64, kernel_size=(3, 3), stride=(2, 2))
        
        # self.feature2.classifier[1] = Conv2d(512, 2, kernel_size=(1, 1), stride=(1, 1))
        self.feature2.classifier = Identity()
        # # self.feature2.classifier[0] = Dropout(p=0.2, inplace=False)
        # # self.feature2.classifier[3] = Identity()
        self.fc = Sequential(OrderedDict([
                                          ('drop', Dropout(p=0.1)),
                                          ('conc1', Conv2d(1024,500,kernel_size=(1,1))),
                                          ('reluc1', ReLU()),
                                          ('drop2', Dropout(0.2)),
                                          ('conc2',  Conv2d(500, 2,kernel_size=(1,1))),
                                          ('reluc2', ReLU()),
                                          ('adp', AdaptiveAvgPool2d((1,1)))
        ]))
        
        ## Note that among the layers to add, consider including:
        # maxpooling layers, multiple conv layers, fully-connected layers, and other layers (such as dropout or batch normalization) to avoid overfitting
        
        
    def forward(self, x):
        ## TODO: Define the feedforward behavior of this model
        ## x is the input image and, as an example, here you may choose to include a pool/conv step:
        x1 = x[:,:3,:,:].detach()
        x2 = x[:,3:,:,:].detach()
        del x
        x1 = self.feature2(x1)
        x2 = self.feature2(x2)
        x1 = x1.view((-1, 512,13,13))
        x2 = x2.view((-1, 512,13,13))
        x = tch.cat([x1,x2],dim=1)
        x = self.fc(x)
        del x1,x2        
        return x.squeeze(-1).squeeze(-1)


model= Net()

#training Outputs

Epoch: 3[Train], Batch: 1, Avg. Loss: 0.6931475400924683 acc: 0.336 err_rate: 0.664 F1 Score: nan
Epoch: 3[Train], Batch: 2, Avg. Loss: 0.6931475400924683 acc: 0.362 err_rate: 0.638 F1 Score: nan
Epoch: 3[Train], Batch: 3, Avg. Loss: 0.6931475400924683 acc: 0.3586666666666667 err_rate: 0.6413333333333333 F1 Score: nan
Epoch: 3[Train], Batch: 4, Avg. Loss: 0.6931475400924683 acc: 0.357 err_rate: 0.643 F1 Score: nan
Epoch: 3[Train], Batch: 5, Avg. Loss: 0.6931475400924683 acc: 0.36 err_rate: 0.64 F1 Score: nan
Epoch: 3[Train], Batch: 6, Avg. Loss: 0.6931475400924683 acc: 0.358 err_rate: 0.642 F1 Score: nan
Epoch: 3[Train], Batch: 7, Avg. Loss: 0.6931475400924683 acc: 0.3628571428571429 err_rate: 0.6371428571428571 F1 Score: nan
Epoch: 3[Train], Batch: 8, Avg. Loss: 0.6931475400924683 acc: 0.354 err_rate: 0.646 F1 Score: nan
Epoch: 3[Train], Batch: 9, Avg. Loss: 0.6931475400924683 acc: 0.3551111111111111 err_rate: 0.6448888888888888 F1 Score: nan
Epoch: 3[Train], Batch: 10, Avg. Loss: 0.6931475400924683 acc: 0.3552 err_rate: 0.6448 F1 Score: nan
Epoch: 3[Train], Batch: 11, Avg. Loss: 0.6931475400924683 acc: 0.356 err_rate: 0.644 F1 Score: nan
Epoch: 3[Train], Batch: 12, Avg. Loss: 0.6931475400924683 acc: 0.35533333333333333 err_rate: 0.6446666666666667 F1 Score: nan
Epoch: 3[Train], Batch: 13, Avg. Loss: 0.6931475400924683 acc: 0.3547692307692308 err_rate: 0.6452307692307693 F1 Score: nan
Epoch: 3[Train], Batch: 14, Avg. Loss: 0.6931475400924683 acc: 0.3545714285714286 err_rate: 0.6454285714285715 F1 Score: nan
Epoch: 3[Train], Batch: 15, Avg. Loss: 0.6931475400924683 acc: 0.3536 err_rate: 0.6464 F1 Score: nan
Epoch: 3[Train], Batch: 16, Avg. Loss: 0.6931475400924683 acc: 0.35475 err_rate: 0.64525 F1 Score: nan
Epoch: 3[Train], Batch: 17, Avg. Loss: 0.6931475400924683 acc: 0.35552941176470587 err_rate: 0.6444705882352941 F1 Score: nan
Epoch: 3[Train], Batch: 18, Avg. Loss: 0.6931475400924683 acc: 0.358 err_rate: 0.642 F1 Score: nan
Epoch: 3[Train], Batch: 19, Avg. Loss: 0.6931475400924683 acc: 0.35810526315789476 err_rate: 0.6418947368421053 F1 Score: nan
Epoch: 3[Train], Batch: 20, Avg. Loss: 0.6931475400924683 acc: 0.3558 err_rate: 0.6442 F1 Score: nan
Epoch: 3[Train], Batch: 21, Avg. Loss: 0.6931475400924683 acc: 0.3537142857142857 err_rate: 0.6462857142857142 F1 Score: nan
Epoch: 3[Train], Batch: 22, Avg. Loss: 0.6931475400924683 acc: 0.354 err_rate: 0.646 F1 Score: nan
Epoch: 3[Train], Batch: 23, Avg. Loss: 0.6931475400924683 acc: 0.3525217391304348 err_rate: 0.6474782608695652 F1 Score: nan
Epoch: 3[Train], Batch: 24, Avg. Loss: 0.6931475400924683 acc: 0.3511666666666667 err_rate: 0.6488333333333334 F1 Score: nan
Epoch: 3[Train], Batch: 25, Avg. Loss: 0.6931475400924683 acc: 0.35104 err_rate: 0.64896 F1 Score: nan
Epoch: 3[Train], Batch: 26, Avg. Loss: 0.6931475400924683 acc: 0.3527692307692308 err_rate: 0.6472307692307693 F1 Score: nan
Epoch: 3[Train], Batch: 27, Avg. Loss: 0.6931475400924683 acc: 0.3542222222222222 err_rate: 0.6457777777777778 F1 Score: nan
Epoch: 3[Train], Batch: 28, Avg. Loss: 0.6931475400924683 acc: 0.35328571428571426 err_rate: 0.6467142857142857 F1 Score: nan
Epoch: 3[Train], Batch: 29, Avg. Loss: 0.6931475400924683 acc: 0.3546206896551724 err_rate: 0.6453793103448275 F1 Score: nan
Epoch: 3[Train], Batch: 30, Avg. Loss: 0.6931475400924683 acc: 0.3548 err_rate: 0.6452 F1 Score: nan
Epoch: 3[Train], Batch: 31, Avg. Loss: 0.6931475400924683 acc: 0.3536774193548387 err_rate: 0.6463225806451612 F1 Score: nan
Epoch: 3[Train], Batch: 32, Avg. Loss: 0.6931475400924683 acc: 0.3525 err_rate: 0.6475 F1 Score: nan
Epoch: 3[Train], Batch: 33, Avg. Loss: 0.6931475400924683 acc: 0.35333333333333333 err_rate: 0.6466666666666666 F1 Score: nan
Epoch: 3[Train], Batch: 34, Avg. Loss: 0.6931475400924683 acc: 0.3528235294117647 err_rate: 0.6471764705882352 F1 Score: nan
Epoch: 3[Train], Batch: 35, Avg. Loss: 0.6931475400924683 acc: 0.3525714285714286 err_rate: 0.6474285714285715 F1 Score: nan
Epoch: 3[Train], Batch: 36, Avg. Loss: 0.6931475400924683 acc: 0.35333333333333333 err_rate: 0.6466666666666666 F1 Score: nan
Epoch: 3[Train], Batch: 37, Avg. Loss: 0.6931475400924683 acc: 0.3531891891891892 err_rate: 0.6468108108108108 F1 Score: nan
Epoch: 3[Train], Batch: 38, Avg. Loss: 0.6931475400924683 acc: 0.3542105263157895 err_rate: 0.6457894736842106 F1 Score: nan
Epoch: 3[Train], Batch: 39, Avg. Loss: 0.6931475400924683 acc: 0.35292307692307695 err_rate: 0.6470769230769231 F1 Score: nan
Epoch: 3[Train], Batch: 40, Avg. Loss: 0.6931475400924683 acc: 0.354 err_rate: 0.646 F1 Score: nan
Epoch: 3[Train], Batch: 41, Avg. Loss: 0.6931475400924683 acc: 0.3532682926829268 err_rate: 0.6467317073170732 F1 Score: nan
Epoch: 3[Train], Batch: 42, Avg. Loss: 0.6931475400924683 acc: 0.35295238095238096 err_rate: 0.6470476190476191 F1 Score: nan
Epoch: 3[Train], Batch: 43, Avg. Loss: 0.6931475400924683 acc: 0.353953488372093 err_rate: 0.6460465116279069 F1 Score: nan
Epoch: 3[Train], Batch: 44, Avg. Loss: 0.6931475400924683 acc: 0.3528181818181818 err_rate: 0.6471818181818182 F1 Score: nan
Epoch: 3[Train], Batch: 45, Avg. Loss: 0.6931475400924683 acc: 0.35315555555555556 err_rate: 0.6468444444444444 F1 Score: nan
Epoch: 3[Train], Batch: 46, Avg. Loss: 0.6931475400924683 acc: 0.35347826086956524 err_rate: 0.6465217391304348 F1 Score: nan
Epoch: 3[Train], Batch: 47, Avg. Loss: 0.6931475400924683 acc: 0.35421276595744683 err_rate: 0.6457872340425532 F1 Score: nan
Epoch: 3[Train], Batch: 48, Avg. Loss: 0.6931475400924683 acc: 0.3545 err_rate: 0.6455 F1 Score: nan
Epoch: 3[Train], Batch: 49, Avg. Loss: 0.6931475400924683 acc: 0.3536326530612245 err_rate: 0.6463673469387755 F1 Score: nan
Epoch: 3[Train], Batch: 50, Avg. Loss: 0.6931475400924683 acc: 0.3544 err_rate: 0.6456 F1 Score: nan
Epoch: 3[Train], Batch: 51, Avg. Loss: 0.6931475400924683 acc: 0.3551372549019608 err_rate: 0.6448627450980392 F1 Score: nan
Epoch: 3[Train], Batch: 52, Avg. Loss: 0.6931475400924683 acc: 0.35523076923076924 err_rate: 0.6447692307692308 F1 Score: nan
Epoch: 3[Train], Batch: 53, Avg. Loss: 0.6931475400924683 acc: 0.35562264150943396 err_rate: 0.644377358490566 F1 Score: nan
Epoch: 3[Train], Batch: 54, Avg. Loss: 0.6931475400924683 acc: 0.35488888888888886 err_rate: 0.6451111111111111 F1 Score: nan
Epoch: 3[Train], Batch: 55, Avg. Loss: 0.6931475400924683 acc: 0.35454545454545455 err_rate: 0.6454545454545455 F1 Score: nan
Epoch: 3[Train], Batch: 56, Avg. Loss: 0.6931475400924683 acc: 0.35378571428571426 err_rate: 0.6462142857142857 F1 Score: nan
Epoch: 3[Train], Batch: 57, Avg. Loss: 0.6931475400924683 acc: 0.3552982456140351 err_rate: 0.6447017543859649 F1 Score: nan
Epoch: 3[Train], Batch: 58, Avg. Loss: 0.6931475400924683 acc: 0.35537931034482756 err_rate: 0.6446206896551724 F1 Score: nan
Epoch: 3[Train], Batch: 59, Avg. Loss: 0.6931475400924683 acc: 0.3550508474576271 err_rate: 0.6449491525423728 F1 Score: nan
Epoch: 3[Train], Batch: 60, Avg. Loss: 0.6931475400924683 acc: 0.355 err_rate: 0.645 F1 Score: nan
Epoch: 3[Train], Batch: 61, Avg. Loss: 0.6931475400924683 acc: 0.3543606557377049 err_rate: 0.6456393442622951 F1 Score: nan
Epoch: 3[Train], Batch: 62, Avg. Loss: 0.6931475400924683 acc: 0.35529032258064513 err_rate: 0.6447096774193548 F1 Score: nan
Epoch: 3[Train], Batch: 63, Avg. Loss: 0.6931475400924683 acc: 0.35638095238095235 err_rate: 0.6436190476190476 F1 Score: nan
Epoch: 3[Train], Batch: 64, Avg. Loss: 0.6931475400924683 acc: 0.3564375 err_rate: 0.6435625 F1 Score: nan
Epoch: 3[Train], Batch: 65, Avg. Loss: 0.6931475400924683 acc: 0.3566153846153846 err_rate: 0.6433846153846153 F1 Score: nan
Epoch: 3[Train], Batch: 66, Avg. Loss: 0.6931475400924683 acc: 0.3572121212121212 err_rate: 0.6427878787878788 F1 Score: nan
Epoch: 3[Train], Batch: 67, Avg. Loss: 0.6931475400924683 acc: 0.35707462686567165 err_rate: 0.6429253731343284 F1 Score: nan
Epoch: 3[Train], Batch: 68, Avg. Loss: 0.6931475400924683 acc: 0.35670588235294115 err_rate: 0.6432941176470588 F1 Score: nan
Epoch: 3[Train], Batch: 69, Avg. Loss: 0.6931475400924683 acc: 0.35739130434782607 err_rate: 0.6426086956521739 F1 Score: nan
Epoch: 3[Train], Batch: 70, Avg. Loss: 0.6931475400924683 acc: 0.3578857142857143 err_rate: 0.6421142857142857 F1 Score: nan
Epoch: 3[Train], Batch: 71, Avg. Loss: 0.6931475400924683 acc: 0.35864788732394365 err_rate: 0.6413521126760563 F1 Score: nan
Epoch: 3[Train], Batch: 72, Avg. Loss: 0.6931475400924683 acc: 0.3581666666666667 err_rate: 0.6418333333333334 F1 Score: nan
Epoch: 3[Train], Batch: 73, Avg. Loss: 0.6931475400924683 acc: 0.35835616438356166 err_rate: 0.6416438356164383 F1 Score: nan
Epoch: 3[Train], Batch: 74, Avg. Loss: 0.6931475400924683 acc: 0.3578918918918919 err_rate: 0.6421081081081081 F1 Score: nan
Epoch: 3[Train], Batch: 75, Avg. Loss: 0.6931475400924683 acc: 0.35776 err_rate: 0.64224 F1 Score: nan
Epoch: 3[Train], Batch: 76, Avg. Loss: 0.6931475400924683 acc: 0.35805263157894734 err_rate: 0.6419473684210526 F1 Score: nan
Epoch: 3[Train], Batch: 77, Avg. Loss: 0.6931475400924683 acc: 0.3579220779220779 err_rate: 0.642077922077922 F1 Score: nan
Epoch: 3[Train], Batch: 78, Avg. Loss: 0.6931475400924683 acc: 0.3577435897435897 err_rate: 0.6422564102564102 F1 Score: nan
Epoch: 3[Train], Batch: 79, Avg. Loss: 0.6931475400924683 acc: 0.3575696202531646 err_rate: 0.6424303797468355 F1 Score: nan
Epoch: 3[Train], Batch: 80, Avg. Loss: 0.6931475400924683 acc: 0.35775 err_rate: 0.64225 F1 Score: nan
Epoch: 3[Train], Batch: 81, Avg. Loss: 0.6931475400924683 acc: 0.35762962962962963 err_rate: 0.6423703703703704 F1 Score: nan
Epoch: 3[Train], Batch: 82, Avg. Loss: 0.6931475400924683 acc: 0.35673170731707315 err_rate: 0.6432682926829268 F1 Score: nan
Epoch: 3[Train], Batch: 83, Avg. Loss: 0.6931475400924683 acc: 0.3563855421686747 err_rate: 0.6436144578313253 F1 Score: nan
Epoch: 3[Train], Batch: 84, Avg. Loss: 0.6931475400924683 acc: 0.35628571428571426 err_rate: 0.6437142857142857 F1 Score: nan
Epoch: 3[Train], Batch: 85, Avg. Loss: 0.6931475400924683 acc: 0.3563294117647059 err_rate: 0.6436705882352941 F1 Score: nan
Epoch: 3[Train], Batch: 86, Avg. Loss: 0.6931475400924683 acc: 0.35572093023255813 err_rate: 0.6442790697674419 F1 Score: nan
Epoch: 3[Train], Batch: 87, Avg. Loss: 0.6931475400924683 acc: 0.35604597701149426 err_rate: 0.6439540229885058 F1 Score: nan
Epoch: 3[Train], Batch: 88, Avg. Loss: 0.6931475400924683 acc: 0.35645454545454547 err_rate: 0.6435454545454545 F1 Score: nan
Epoch: 3[Train], Batch: 89, Avg. Loss: 0.6931475400924683 acc: 0.3561797752808989 err_rate: 0.6438202247191012 F1 Score: nan
Epoch: 3[Train], Batch: 90, Avg. Loss: 0.6931475400924683 acc: 0.3554222222222222 err_rate: 0.6445777777777778 F1 Score: nan
Epoch: 3[Train], Batch: 91, Avg. Loss: 0.6931475400924683 acc: 0.35520879120879123 err_rate: 0.6447912087912088 F1 Score: nan
Epoch: 3[Train], Batch: 92, Avg. Loss: 0.6931475400924683 acc: 0.35530434782608694 err_rate: 0.644695652173913 F1 Score: nan
Epoch: 3[Train], Batch: 93, Avg. Loss: 0.6931475400924683 acc: 0.3549247311827957 err_rate: 0.6450752688172043 F1 Score: nan
Epoch: 3[Train], Batch: 94, Avg. Loss: 0.6931475400924683 acc: 0.35540425531914893 err_rate: 0.644595744680851 F1 Score: nan
Epoch: 3[Train], Batch: 95, Avg. Loss: 0.6931475400924683 acc: 0.3549894736842105 err_rate: 0.6450105263157895 F1 Score: nan
Epoch: 3[Train], Batch: 96, Avg. Loss: 0.6931475400924683 acc: 0.3550833333333333 err_rate: 0.6449166666666667 F1 Score: nan
Epoch: 3[Train], Batch: 97, Avg. Loss: 0.6931475400924683 acc: 0.3544329896907216 err_rate: 0.6455670103092783 F1 Score: nan
Epoch: 3[Train], Batch: 98, Avg. Loss: 0.6931475400924683 acc: 0.35416326530612247 err_rate: 0.6458367346938776 F1 Score: nan
Epoch: 3[Train], Batch: 99, Avg. Loss: 0.6931475400924683 acc: 0.3538585858585859 err_rate: 0.6461414141414141 F1 Score: nan
Epoch: 3[Train], Batch: 100, Avg. Loss: 0.6931475400924683 acc: 0.35396 err_rate: 0.64604 F1 Score: nan
Epoch: 3[Train], Batch: 101, Avg. Loss: 0.6931475400924683 acc: 0.3537425742574257 err_rate: 0.6462574257425743 F1 Score: nan
Epoch: 3[Train], Batch: 102, Avg. Loss: 0.6931475400924683 acc: 0.3540392156862745 err_rate: 0.6459607843137255 F1 Score: nan
Epoch: 3[Train], Batch: 103, Avg. Loss: 0.6931475400924683 acc: 0.3533980582524272 err_rate: 0.6466019417475728 F1 Score: nan
Epoch: 3[Train], Batch: 104, Avg. Loss: 0.6931475400924683 acc: 0.35315384615384615 err_rate: 0.6468461538461538 F1 Score: nan
Epoch: 3[Train], Batch: 105, Avg. Loss: 0.6931475400924683 acc: 0.35291428571428574 err_rate: 0.6470857142857143 F1 Score: nan
Epoch: 3[Train], Batch: 106, Avg. Loss: 0.6931475400924683 acc: 0.3529056603773585 err_rate: 0.6470943396226415 F1 Score: nan
Epoch: 3[Train], Batch: 107, Avg. Loss: 0.6931475400924683 acc: 0.35282242990654206 err_rate: 0.647177570093458 F1 Score: nan
Epoch: 3[Train], Batch: 108, Avg. Loss: 0.6931475400924683 acc: 0.3534074074074074 err_rate: 0.6465925925925926 F1 Score: nan
Epoch: 3[Train], Batch: 109, Avg. Loss: 0.6931475400924683 acc: 0.3532110091743119 err_rate: 0.6467889908256881 F1 Score: nan
Epoch: 3[Train], Batch: 110, Avg. Loss: 0.6931475400924683 acc: 0.35316363636363635 err_rate: 0.6468363636363637 F1 Score: nan
Epoch: 3[Train], Batch: 111, Avg. Loss: 0.6931475400924683 acc: 0.3534054054054054 err_rate: 0.6465945945945946 F1 Score: nan
Epoch: 3[Train], Batch: 112, Avg. Loss: 0.6931475400924683 acc: 0.35367857142857145 err_rate: 0.6463214285714286 F1 Score: nan
Epoch: 3[Train], Batch: 113, Avg. Loss: 0.6931475400924683 acc: 0.3541946902654867 err_rate: 0.6458053097345132 F1 Score: nan
Epoch: 3[Train], Batch: 114, Avg. Loss: 0.6931475400924683 acc: 0.3544561403508772 err_rate: 0.6455438596491228 F1 Score: nan
Epoch: 3[Train], Batch: 115, Avg. Loss: 0.6931475400924683 acc: 0.3550608695652174 err_rate: 0.6449391304347826 F1 Score: nan
Epoch: 3[Train], Batch: 116, Avg. Loss: 0.6931475400924683 acc: 0.3553448275862069 err_rate: 0.6446551724137931 F1 Score: nan
Epoch: 3[Train], Batch: 117, Avg. Loss: 0.6931475400924683 acc: 0.35552136752136754 err_rate: 0.6444786324786325 F1 Score: nan
Epoch: 3[Train], Batch: 118, Avg. Loss: 0.6931475400924683 acc: 0.3554576271186441 err_rate: 0.644542372881356 F1 Score: nan
Epoch: 3[Train], Batch: 119, Avg. Loss: 0.6931475400924683 acc: 0.3554621848739496 err_rate: 0.6445378151260505 F1 Score: nan
Epoch: 3[Train], Batch: 120, Avg. Loss: 0.6931475400924683 acc: 0.35556666666666664 err_rate: 0.6444333333333333 F1 Score: nan
Epoch: 3[Train], Batch: 121, Avg. Loss: 0.6931475400924683 acc: 0.3554380165289256 err_rate: 0.6445619834710744 F1 Score: nan
Epoch: 3[Train], Batch: 122, Avg. Loss: 0.6931475400924683 acc: 0.35521311475409834 err_rate: 0.6447868852459017 F1 Score: nan
Epoch: 3[Train], Batch: 123, Avg. Loss: 0.6931475400924683 acc: 0.3555121951219512 err_rate: 0.6444878048780488 F1 Score: nan
Epoch: 3[Train], Batch: 124, Avg. Loss: 0.6931475400924683 acc: 0.35538709677419356 err_rate: 0.6446129032258064 F1 Score: nan
Epoch: 3[Train], Batch: 125, Avg. Loss: 0.6931475400924683 acc: 0.355232 err_rate: 0.644768 F1 Score: nan
Epoch: 3[Train], Batch: 126, Avg. Loss: 0.6931475400924683 acc: 0.35495238095238096 err_rate: 0.6450476190476191 F1 Score: nan
Epoch: 3[Train], Batch: 127, Avg. Loss: 0.6931475400924683 acc: 0.35464566929133856 err_rate: 0.6453543307086614 F1 Score: nan
Epoch: 3[Train], Batch: 128, Avg. Loss: 0.6931475400924683 acc: 0.354875 err_rate: 0.645125 F1 Score: nan
Epoch: 3[Train], Batch: 129, Avg. Loss: 0.6931475400924683 acc: 0.3546046511627907 err_rate: 0.6453953488372093 F1 Score: nan
Epoch: 3[Train], Batch: 130, Avg. Loss: 0.6931475400924683 acc: 0.35467692307692306 err_rate: 0.6453230769230769 F1 Score: nan
Epoch: 3[Train], Batch: 131, Avg. Loss: 0.6931475400924683 acc: 0.3544732824427481 err_rate: 0.6455267175572519 F1 Score: nan
Epoch: 3[Train], Batch: 132, Avg. Loss: 0.6931475400924683 acc: 0.3547272727272727 err_rate: 0.6452727272727272 F1 Score: nan
Epoch: 3[Train], Batch: 133, Avg. Loss: 0.6931475400924683 acc: 0.3549473684210526 err_rate: 0.6450526315789473 F1 Score: nan
Epoch: 3[Train], Batch: 134, Avg. Loss: 0.6931475400924683 acc: 0.35492537313432837 err_rate: 0.6450746268656716 F1 Score: nan
Epoch: 3[Train], Batch: 135, Avg. Loss: 0.6931475400924683 acc: 0.3551111111111111 err_rate: 0.6448888888888888 F1 Score: nan
Epoch: 3[Train], Batch: 136, Avg. Loss: 0.6931475400924683 acc: 0.3548823529411765 err_rate: 0.6451176470588236 F1 Score: nan
Epoch: 3[Train], Batch: 137, Avg. Loss: 0.6931475400924683 acc: 0.35512408759124087 err_rate: 0.6448759124087591 F1 Score: nan
Epoch: 3[Train], Batch: 138, Avg. Loss: 0.6931475400924683 acc: 0.35507246376811596 err_rate: 0.644927536231884 F1 Score: nan
Epoch: 3[Train], Batch: 139, Avg. Loss: 0.6931475400924683 acc: 0.35536690647482017 err_rate: 0.6446330935251798 F1 Score: nan
Epoch: 3[Train], Batch: 140, Avg. Loss: 0.6931475400924683 acc: 0.3554 err_rate: 0.6446 F1 Score: nan
Epoch: 3[Train], Batch: 141, Avg. Loss: 0.6931475400924683 acc: 0.35540425531914893 err_rate: 0.644595744680851 F1 Score: nan
Epoch: 3[Train], Batch: 142, Avg. Loss: 0.6931475400924683 acc: 0.35546478873239434 err_rate: 0.6445352112676056 F1 Score: nan
Epoch: 3[Train], Batch: 143, Avg. Loss: 0.6931475400924683 acc: 0.3554965034965035 err_rate: 0.6445034965034965 F1 Score: nan
Epoch: 3[Train], Batch: 144, Avg. Loss: 0.6931475400924683 acc: 0.35541666666666666 err_rate: 0.6445833333333333 F1 Score: nan
Epoch: 3[Train], Batch: 145, Avg. Loss: 0.6931475400924683 acc: 0.35533793103448275 err_rate: 0.6446620689655173 F1 Score: nan
Epoch: 3[Train], Batch: 146, Avg. Loss: 0.6931475400924683 acc: 0.3553698630136986 err_rate: 0.6446301369863013 F1 Score: nan
Epoch: 3[Train], Batch: 147, Avg. Loss: 0.6931475400924683 acc: 0.35537414965986397 err_rate: 0.6446258503401361 F1 Score: nan
Epoch: 3[Train], Batch: 148, Avg. Loss: 0.6931475400924683 acc: 0.3553243243243243 err_rate: 0.6446756756756756 F1 Score: nan
Epoch: 3[Train], Batch: 149, Avg. Loss: 0.6931475400924683 acc: 0.3555973154362416 err_rate: 0.6444026845637584 F1 Score: nan
Epoch: 3[Train], Batch: 150, Avg. Loss: 0.6931475400924683 acc: 0.35528 err_rate: 0.64472 F1 Score: nan
Epoch: 3[Train], Batch: 151, Avg. Loss: 0.6931475400924683 acc: 0.3550728476821192 err_rate: 0.6449271523178808 F1 Score: nan
Epoch: 3[Train], Batch: 152, Avg. Loss: 0.6931475400924683 acc: 0.35507894736842105 err_rate: 0.644921052631579 F1 Score: nan
Epoch: 3[Train], Batch: 153, Avg. Loss: 0.6931475400924683 acc: 0.35490196078431374 err_rate: 0.6450980392156863 F1 Score: nan
Epoch: 3[Train], Batch: 154, Avg. Loss: 0.6931475400924683 acc: 0.35485714285714287 err_rate: 0.6451428571428571 F1 Score: nan
Epoch: 3[Train], Batch: 155, Avg. Loss: 0.6931475400924683 acc: 0.3548903225806452 err_rate: 0.6451096774193549 F1 Score: nan
Epoch: 3[Train], Batch: 156, Avg. Loss: 0.6931475400924683 acc: 0.35494871794871796 err_rate: 0.645051282051282 F1 Score: nan
Epoch: 3[Train], Batch: 157, Avg. Loss: 0.6931475400924683 acc: 0.35477707006369424 err_rate: 0.6452229299363057 F1 Score: nan
Epoch: 3[Train], Batch: 158, Avg. Loss: 0.6931475400924683 acc: 0.3548860759493671 err_rate: 0.645113924050633 F1 Score: nan
Epoch: 3[Train], Batch: 159, Avg. Loss: 0.6931475400924683 acc: 0.35491823899371067 err_rate: 0.6450817610062893 F1 Score: nan
Epoch: 3[Train], Batch: 160, Avg. Loss: 0.6931475400924683 acc: 0.3549 err_rate: 0.6451 F1 Score: nan
Epoch: 3[Train], Batch: 161, Avg. Loss: 0.6931475400924683 acc: 0.3546086956521739 err_rate: 0.6453913043478261 F1 Score: nan
Epoch: 3[Train], Batch: 162, Avg. Loss: 0.6931475400924683 acc: 0.35476543209876543 err_rate: 0.6452345679012346 F1 Score: nan
Epoch: 3[Train], Batch: 163, Avg. Loss: 0.6931475400924683 acc: 0.35496932515337426 err_rate: 0.6450306748466258 F1 Score: nan
Epoch: 3[Train], Batch: 164, Avg. Loss: 0.6931475400924683 acc: 0.35460975609756096 err_rate: 0.645390243902439 F1 Score: nan
Epoch: 3[Train], Batch: 165, Avg. Loss: 0.6931475400924683 acc: 0.3544969696969697 err_rate: 0.6455030303030304 F1 Score: nan
Epoch: 3[Train], Batch: 166, Avg. Loss: 0.6931475400924683 acc: 0.35481927710843375 err_rate: 0.6451807228915662 F1 Score: nan
Epoch: 3[Train], Batch: 167, Avg. Loss: 0.6931475400924683 acc: 0.35487425149700597 err_rate: 0.645125748502994 F1 Score: nan
Epoch: 3[Train], Batch: 168, Avg. Loss: 0.6931475400924683 acc: 0.35469047619047617 err_rate: 0.6453095238095238 F1 Score: nan
Epoch: 3[Train], Batch: 169, Avg. Loss: 0.6931475400924683 acc: 0.354603550295858 err_rate: 0.645396449704142 F1 Score: nan
Epoch: 3[Train], Batch: 170, Avg. Loss: 0.6931475400924683 acc: 0.3548470588235294 err_rate: 0.6451529411764706 F1 Score: nan
Epoch: 3[Train], Batch: 171, Avg. Loss: 0.6931475400924683 acc: 0.35457309941520465 err_rate: 0.6454269005847953 F1 Score: nan
Epoch: 3[Train], Batch: 172, Avg. Loss: 0.6931475400924683 acc: 0.35451162790697677 err_rate: 0.6454883720930232 F1 Score: nan
Epoch: 3[Train], Batch: 173, Avg. Loss: 0.6931475400924683 acc: 0.35458959537572254 err_rate: 0.6454104046242775 F1 Score: nan
Epoch: 3[Train], Batch: 174, Avg. Loss: 0.6931475400924683 acc: 0.3544827586206897 err_rate: 0.6455172413793103 F1 Score: nan
Epoch: 3[Train], Batch: 175, Avg. Loss: 0.6931475400924683 acc: 0.35456 err_rate: 0.64544 F1 Score: nan
Epoch: 3[Train], Batch: 176, Avg. Loss: 0.6931475400924683 acc: 0.3543181818181818 err_rate: 0.6456818181818181 F1 Score: nan
Epoch: 3[Train], Batch: 177, Avg. Loss: 0.6931475400924683 acc: 0.3542598870056497 err_rate: 0.6457401129943503 F1 Score: nan
Epoch: 3[Train], Batch: 178, Avg. Loss: 0.6931475400924683 acc: 0.3543820224719101 err_rate: 0.6456179775280899 F1 Score: nan
Epoch: 3[Train], Batch: 179, Avg. Loss: 0.6931475400924683 acc: 0.35427932960893854 err_rate: 0.6457206703910614 F1 Score: nan
Epoch: 3[Train], Batch: 180, Avg. Loss: 0.6931475400924683 acc: 0.3543777777777778 err_rate: 0.6456222222222222 F1 Score: nan
Epoch: 3[Train], Batch: 181, Avg. Loss: 0.6931475400924683 acc: 0.35451933701657456 err_rate: 0.6454806629834254 F1 Score: nan
Epoch: 3[Train], Batch: 182, Avg. Loss: 0.6931475400924683 acc: 0.3547472527472528 err_rate: 0.6452527472527473 F1 Score: nan
Epoch: 3[Train], Batch: 183, Avg. Loss: 0.6931475400924683 acc: 0.35481967213114757 err_rate: 0.6451803278688525 F1 Score: nan
Epoch: 3[Train], Batch: 184, Avg. Loss: 0.6931475400924683 acc: 0.35491304347826086 err_rate: 0.6450869565217391 F1 Score: nan
Epoch: 3[Train], Batch: 185, Avg. Loss: 0.6931475400924683 acc: 0.35496216216216214 err_rate: 0.6450378378378379 F1 Score: nan
Epoch: 3[Train], Batch: 186, Avg. Loss: 0.6931475400924683 acc: 0.3546451612903226 err_rate: 0.6453548387096775 F1 Score: nan
Epoch: 3[Train], Batch: 187, Avg. Loss: 0.6931475400924683 acc: 0.3548235294117647 err_rate: 0.6451764705882352 F1 Score: nan
Epoch: 3[Train], Batch: 188, Avg. Loss: 0.6931475400924683 acc: 0.3550212765957447 err_rate: 0.6449787234042553 F1 Score: nan
Epoch: 3[Train], Batch: 189, Avg. Loss: 0.6931475400924683 acc: 0.3549206349206349 err_rate: 0.645079365079365 F1 Score: nan
Epoch: 3[Train], Batch: 190, Avg. Loss: 0.6931475400924683 acc: 0.3546947368421053 err_rate: 0.6453052631578947 F1 Score: nan
Epoch: 3[Train], Batch: 191, Avg. Loss: 0.6931475400924683 acc: 0.3547853403141361 err_rate: 0.6452146596858639 F1 Score: nan
Epoch: 3[Train], Batch: 192, Avg. Loss: 0.6931475400924683 acc: 0.35452083333333334 err_rate: 0.6454791666666667 F1 Score: nan
Epoch: 3[Train], Batch: 193, Avg. Loss: 0.6931475400924683 acc: 0.3546735751295337 err_rate: 0.6453264248704663 F1 Score: nan
Epoch: 3[Train], Batch: 194, Avg. Loss: 0.6931475400924683 acc: 0.3547835051546392 err_rate: 0.6452164948453608 F1 Score: nan
Epoch: 3[Train], Batch: 195, Avg. Loss: 0.6931475400924683 acc: 0.3545846153846154 err_rate: 0.6454153846153846 F1 Score: nan
Epoch: 3[Train], Batch: 196, Avg. Loss: 0.6931475400924683 acc: 0.3547142857142857 err_rate: 0.6452857142857142 F1 Score: nan
Epoch: 3[Train], Batch: 197, Avg. Loss: 0.6931475400924683 acc: 0.3546192893401015 err_rate: 0.6453807106598984 F1 Score: nan
Epoch: 3[Train], Batch: 198, Avg. Loss: 0.6931475400924683 acc: 0.35454545454545455 err_rate: 0.6454545454545455 F1 Score: nan
Epoch: 3[Train], Batch: 199, Avg. Loss: 0.6931475400924683 acc: 0.3544924623115578 err_rate: 0.6455075376884422 F1 Score: nan
Epoch: 3[Train], Batch: 200, Avg. Loss: 0.6931475400924683 acc: 0.35444 err_rate: 0.64556 F1 Score: nan
Epoch: 3[Train], Batch: 201, Avg. Loss: 0.6931475400924683 acc: 0.35448756218905475 err_rate: 0.6455124378109452 F1 Score: nan
Epoch: 3[Train], Batch: 202, Avg. Loss: 0.6931475400924683 acc: 0.3545148514851485 err_rate: 0.6454851485148515 F1 Score: nan
Epoch: 3[Train], Batch: 203, Avg. Loss: 0.6931475400924683 acc: 0.35420689655172416 err_rate: 0.6457931034482759 F1 Score: nan
Epoch: 3[Train], Batch: 204, Avg. Loss: 0.6931475400924683 acc: 0.3540392156862745 err_rate: 0.6459607843137255 F1 Score: nan
Epoch: 3[Train], Batch: 205, Avg. Loss: 0.6931475400924683 acc: 0.35385365853658535 err_rate: 0.6461463414634147 F1 Score: nan
Epoch: 3[Train], Batch: 206, Avg. Loss: 0.6931475400924683 acc: 0.35394174757281555 err_rate: 0.6460582524271845 F1 Score: nan
Epoch: 3[Val], Batch: 1, Avg. Loss: 0.6931471228599548 acc: 0.33 err_rate: 0.67 F1 Score: nan
Epoch: 3[Val], Batch: 2, Avg. Loss: 0.6931471228599548 acc: 0.355 err_rate: 0.645 F1 Score: nan
Epoch: 3[Val], Batch: 3, Avg. Loss: 0.6931471228599548 acc: 0.39666666666666667 err_rate: 0.6033333333333334 F1 Score: nan
Epoch: 3[Val], Batch: 4, Avg. Loss: 0.6931471228599548 acc: 0.3875 err_rate: 0.6125 F1 Score: nan
Epoch: 3[Val], Batch: 5, Avg. Loss: 0.6931471228599548 acc: 0.39 err_rate: 0.61 F1 Score: nan
Epoch: 3[Val], Batch: 6, Avg. Loss: 0.6931471228599548 acc: 0.38 err_rate: 0.62 F1 Score: 

Your training seems to get stuck after 3 epochs of training, so you might need to play around with some hyperparameters.
I verified that the loss decreases at the beginning using your code.
However, I’m not sure, why you want to delete the input tensor as well as the intermediate tensors in your forward.

Yes loss decreased after first epoch but after Second Epoch, it just got stuck at either predicting ones through out or predicting zeros through out. Also about deleting the tensors, It is for working cost, i want to delete the values from the memory so they don’t use up space on the ram while it is running.