DataLoader changes dataset dimension, "CrossEntropyLoss" not work properly

hhaoao · August 9, 2020, 3:37am

I found the problem:

I misunderstood the three sentences of Shape in https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html?highlight=crossentropyloss#torch.nn.CrossEntropyLoss. (doc too crude).
The shape of output in my code is 2. So the following code should be changed like this:

     def __init__(self):
         super(Net, self).__init__()
         self.fc1 = nn.Linear(2,3,True)
         self.fc2 = nn.Linear(3,1,True)

correct:

         self.linear1 = nn.Linear(2, 3)
         self.linear2 = nn.Linear(3, 2)

Data set output format. I am too rigid and always want to change it in pytorch.

It took me two weeks to finally figure it out, and I found a good tutorial: https://d2l.ai/chapter_linear-networks/softmax-regression-concise.html.

Final code:

import torch
import torch.nn as nn
from torch.utils.data import IterableDataset, DataLoader, TensorDataset

import math 

from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss 

from ignite.contrib.handlers.visdom_logger import *

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(2, 3)
        self.linear2 = nn.Linear(3, 2)
        self.relu = nn.ReLU()

    def forward(self, x):
        y1 = self.linear1(x)
        y3 = self.relu(y1)
        y = self.linear2(y3)
        return y

class XorDataset(IterableDataset):
    def __init__(self):
        super(XorDataset).__init__()
        self.x = [torch.tensor(i, dtype=torch.float) for i in [[0,0],[0,1],[1,0],[1,1]]]
        self.y = [torch.tensor(i, dtype=torch.long) for i in [0,1,1,0]]

    def __iter__(self):
        worker_info = torch.utils.data.get_worker_info()
        if worker_info is None:
            iter_x = self.x
            iter_y = self.y
        else:
            pre_worker = int(math.ceil(len(self.x) / float(worker_info.num_workers)))
            worker_id = worker_info.id
            start = 0 + worker_id * pre_worker
            end = min(start + pre_worker, len(self.x))
            iter_x = self.x[start:end]
            iter_y = self.y[start:end]
        result = (i for i in zip(iter_x,iter_y))
        return result

def get_data_loaders():
    xorDataset = XorDataset()
    xorDataLoader = DataLoader(xorDataset,num_workers=1)
    train_iter = test_iter = xorDataLoader
    return train_iter, test_iter


if __name__ == "__main__":
    train_loader, val_loader = get_data_loaders()
    net = Net()
    optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
    criterion = nn.CrossEntropyLoss()

    trainer = create_supervised_trainer(net, optimizer, criterion)
    
    def thresholded_output_transform(output):
        y_pred, y = output
        y_pred = torch.round(y_pred)
        return y_pred, y

    val_metrics = {
        "accuracy": Accuracy(thresholded_output_transform),
        "nll": Loss(criterion)
    }
    
    evaluator = create_supervised_evaluator(net, metrics=val_metrics)

    @trainer.on(Events.ITERATION_COMPLETED(every=5000))
    def log_training_loss(trainer):
        print("Epoch[{}] Loss: {:.2f}".format(trainer.state.epoch, trainer.state.output))

    # @trainer.on(Events.EPOCH_COMPLETED)
    # def log_training_results(trainer):
    #     evaluator.run(train_loader)
        # metrics = evaluator.state.metrics
        # print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
        #   .format(trainer.state.epoch, metrics["accuracy"], metrics["nll"]))

    # @trainer.on(Events.EPOCH_COMPLETED)
    # def log_validation_results(trainer):
    #     evaluator.run(val_loader)
        # metrics = evaluator.state.metrics
        # print("Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
        #   .format(trainer.state.epoch, metrics["accuracy"], metrics["nll"]))

    # vd_logger = VisdomLogger()
    # vd_logger.attach_output_handler(
    #     evaluator,
    #     event_name=Events.EPOCH_COMPLETED,
    #     tag="training",
    #     metric_names=["nll", "accuracy"],
    #     global_step_transform=global_step_from_engine(trainer),
    # )
    

    trainer.run(train_loader, max_epochs=20000)