CIFAR10 classification accuracy is not improved

I am having trouble with loss_value not going down when I am learning using CIFAR10 with VGG16 that I built myself. What am I doing wrong?

class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        self.features = nn.Sequential(
                        nn.Conv2d(3, 64, kernel_size=(3,3),stride=(1, 1), padding=(1, 1)),
                        nn.ReLU(inplace=True),
                        nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                        nn.ReLU(inplace=True),
                        nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                        nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                        nn.ReLU(inplace=True),
                        nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                        nn.ReLU(inplace=True),
                        nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                        nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                        nn.ReLU(inplace=True),
                        nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                        nn.ReLU(inplace=True),
                        nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                        nn.ReLU(inplace=True),
                        nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                        nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                        nn.ReLU(inplace=True),
                        nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                        nn.ReLU(inplace=True),
                        nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                        nn.ReLU(inplace=True),
                        nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                        nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                        nn.ReLU(inplace=True),
                        nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                        nn.ReLU6(inplace=True),
                        nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                        nn.ReLU(inplace=True),
                        nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                        )
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
                        nn.Linear(512*7*7,4096),
                        nn.ReLU(inplace=True),
                        nn.Dropout(0.5,inplace=False),
                        nn.Linear(4096,4096),
                        nn.ReLU(inplace=True),
                        nn.Dropout(0.5,inplace=False),
                        nn.Linear(4096,10),
                        nn.LogSoftmax(dim = 0),
        )
    def forward(self,x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x
def train(model,train_loader,device):
# 交差エントロピー
    criterion = nn.CrossEntropyLoss()
    # 確率的勾配降下法
    optimizer = optim.AdamW(model.parameters(), lr=0.004)
    loss_values = []
    for epoch in range(100):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            #print(i)
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs.to(device))
            loss = criterion(outputs, labels.to(device))
            # 誤差逆伝播
            loss.backward()
            optimizer.step()
            train_loss = loss.item()
            running_loss += loss.item()
            if i % 500 == 499:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 500))
                loss_values.append(running_loss/500)
                running_loss = 0.0
    plt.plot(loss_values)
    plt.show()
    #print('Finished Training')
    return model
def val(model,val_dataloader,device):
    model = model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_dataloader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            #print(outputs.data)
            #print((predicted == labels).sum())
            correct += (predicted == labels).sum().item()
    print('Accuracy of the network on images: %d %%' % (100 * correct / total))
if __name__ == '__main__':
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = (v.VGG16()).to(device)
    # ToTensor:画像のグレースケール化(RGBの0~255を0~1の範囲に正規化)、Normalize:Z値化(RGBの平均と標準偏差を0.5で決め打ちして正規化)
    transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# トレーニングデータをダウンロード
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=100, shuffle=True, num_workers=2)
# テストデータをダウンロード
    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
    val_dataloader = torch.utils.data.DataLoader(testset, batch_size=10, shuffle=True, num_workers=2)
    #train_loader = torch.load("D:\Imagetrain.pt")
    #train_dataloader = data.DataLoader(train_loader, batch_size = 100, shuffle = True)
    #val_loader = torch.load("D:\Imagevalid.pt")
    #val_dataloader = data.DataLoader(val_loader, batch_size = 100, shuffle = True)
    print("VGG16")
    model = train(model,train_loader,device)
    
    val(model,val_dataloader,device)

[1, 500] loss: 2.712
[2, 500] loss: 2.303
[3, 500] loss: 2.303
[4, 500] loss: 2.303
[5, 500] loss: 2.303
[6, 500] loss: 2.303
[7, 500] loss: 2.303
[8, 500] loss: 2.303
[9, 500] loss: 2.303
[10, 500] loss: 2.303

The last activation as nn.LogSoftmax(dim = 0) looks wrong since you are calculating the log probabilities in the batch dimension instead of the class dimension.
Also, you can remove this layer completely as nn.CrossEntropyLoss expects raw logits.

Should I just set nn.LogSoftmax(dim = 0) to nn.Softmax? Or do I just delete nn.LogSoftmax(dim = 0)?

No, the usage of softmax would be wrong so remove it completely and pass the outputs of the linear layer to the criterion directly.

I removed nn.LogSoftmax(dim = 0) from the model but the loss_value does not drop. What am I doing wrong?
[1, 500] loss: 5.94170756626129126232
[2, 500] loss: 2.30296494007110608138
[3, 500] loss: 2.30301657056808473101
[4, 500] loss: 2.30306150436401368964
[5, 500] loss: 2.30292595243453979137
[6, 500] loss: 2.30289011669158938389
[7, 500] loss: 2.30299579811096188919
[8, 500] loss: 2.30294379997253439996
[9, 500] loss: 2.30301538562774643637
self.classifier = nn.Sequential(
nn.Linear(512, 512), #512 * 7 * 7, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(512, 32 ), #4096, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(32, num_classes), #4096
)
Would self.classifier be more accurate if I did it this way?

ohhhh,i found my error that my pca1.weight shape is [8, 10,10]
I want to use eight convolutions to check 100 matrices for dimensionality reduction, and finally turn them into eight. Is it possible?

Where and how should I make changes?