Multi-class classification neural network classifying everything as one class

I am creating an multi-class classifier to classify stars based on their effective temperatures and absolute magnitudes, but when my model is trained, it classifies all of the stars as one type. Any help or tips would be appreciated.

This is how I want the classifier to classify stars:

Here is my code:

import csv
import numpy
from sklearn.model_selection import train_test_split
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader


def convertStarToInt(arr):
    output = []
    for item in arr:
        if ("Iab" in item[2]):
            output.append([item[0], item[1], 1])
        elif ("D" in item[2]):
            output.append([item[0], item[1], 8])
        elif ("Ia" in item[2]):
            output.append([item[0], item[1], 0])
        elif ("Ib" in item[2]):
            output.append([item[0], item[1], 2])
        elif ("III" in item[2]):
            output.append([item[0], item[1], 4])
        elif ("IV" in item[2]):
            output.append([item[0], item[1], 5])
        elif ("VII" in item[2]):
            output.append([item[0], item[1], 8])
        elif ("VI" in item[2]):
            output.append([item[0], item[1], 7])
        elif ("sd" in item[2]):
            output.append([item[0], item[1], 7])
        elif ("V" in item[2]):
            output.append([item[0], item[1], 6])
        elif ("II" in item[2]):
            output.append([item[0], item[1], 3])
    return output

def countStars(arr):
    a = 0
    ab = 0
    b = 0
    ii = 0
    wd = 0
    iii = 0
    iv = 0
    v = 0
    vi = 0
    for item in arr:
        if ("Iab" in item[2]):
            ab += 1
        elif ("D" in item[2]):
            wd += 1
        elif ("Ia" in item[2]):
            a += 1
        elif ("Ib" in item[2]):
            b += 1
        elif ("III" in item[2]):
            iii += 1
        elif ("IV" in item[2]):
            iv += 1
        elif ("VII" in item[2]):
            wd += 1
        elif ("VI" in item[2]):
            vi += 1
        elif ("sd" in item[2]):
            vi += 1
        elif ("V" in item[2]):
            v += 1
        elif ("II" in item[2]):
            ii += 1
    output = [a, ab, b, ii, iii, iv, v, vi, wd]
    return output




def starToInt(arr, dictionary):
    output = []
    for item in arr:
        output.append(dictionary.index(item))
    return output


with open("Train&TestData.csv") as OutData:
    TrainingTestData = list(csv.reader(OutData))

del TrainingTestData[0]
Data = convertStarToInt(TrainingTestData)
StarTypes = []
StarProperties = []
for row in Data:
    StarTypes.append(row[2])
    StarProperties.append([row[0], row[1]])

StarCount = countStars(TrainingTestData)
print(StarCount)

weights = []
for item in StarCount:
    weights.append(sum(StarCount)/(item))

weight = torch.FloatTensor(weights)

TrainData, TestData, TrainOutput, TestOutput = train_test_split(
    StarProperties, StarTypes, test_size=0.5, random_state=356)

TrainData = torch.from_numpy(numpy.array(TrainData).astype(dtype = "float32"))
TestData = torch.from_numpy(numpy.array(TestData).astype(dtype = "float32"))
TrainOutput = torch.from_numpy(numpy.array(TrainOutput).astype(dtype = "int64"))
TestOutput = (numpy.array(TestOutput).astype(dtype = "int64"))


class Data(Dataset):
    def __init__(self):
        self.x = (TrainData)
        self.y = (TrainOutput)
        self.len = self.x.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.len


class Net(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)
        self.softmax = nn.Softmax(dim = 0)

    def forward(self, x):
        x = (self.linear1(x))
        x = (self.linear2(x))
        x = (self.linear3(x))
        x = (self.softmax(x))
        return x


input_dim = 2
hidden_layer1 = 25
hidden_layer2 = 20
output_classes = 9

model = Net(input_dim, hidden_layer1, hidden_layer2, output_classes)

trainloader = DataLoader(dataset = Data(), batch_size = 50)

CrossELoss = nn.CrossEntropyLoss()

learning_rate = 0.1
optimizer1 = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer2 = torch.optim.Adam(model.parameters(), lr=3e-4)

n_epochs = 25
loss_list = []

for epoch in range(n_epochs):
    for x, y in trainloader:
        optimizer1.zero_grad()
        z = model(x)
        loss = CrossELoss(z, y)
        loss.backward()
        optimizer1.step()
        loss_list.append(loss.data)
    
    print("Epoch is", epoch)


z = model(TestData)
maximum, prediction = (torch.max(z.data,1))
classes = z.data.tolist()
prediction = prediction.tolist()

a = 0
b = 0.0

for index, item in enumerate(prediction):
    if item == TestOutput[index]:
        a += 1
    b += 1

print("Accuracy is", (a / b))

The data I used is the HYG Database and the effective temperatures are calculated using the color index.

Hi Rishav!

I haven’t looked at your code in detail, but I have two comments:

First, be aware that because you do not have any intervening
non-linearity between your Linear layers, your three Linears
collapse, in effect, into a single linear layer equivalent to
Linear (D_in, D_out).

You should place non-linear so-called “activations,” such as
ReLU or Tanh between the layers, e.g.:

    def forward(self, x):
        x = (self.linear1(x))
        x = torch.nn.functional.relu (x)
        x = (self.linear2(x))
        x = torch.nn.functional.relu (x)
        x = (self.linear3(x))
        return x

Second, your loss criterion, CrossEntropyLoss, has, in effect,
softmax() built in. Therefore your model should not have a final
Softmax layer; just feed the output of your final Linear layer into
your loss function.

Best.

K. Frank

Thanks for the clarification, but I have another question.

One of my classes has 20,000+ entries, while the next one is 3,000 entries, and everything else is below 3,000 entries. Is weighting required and if so, how could I weight the data?

Regards,
Rishav

Hi Rishav!

It does sound like your training data is sufficiently unbalanced that
some weighting scheme is likely to beneficial and perhaps even
necessary.

There are two common approaches (which can even be combined
together):

You can sample your less-common classes more heavily when you
construct your training batches. A helper tool for doing this is pytorch’s
WeightedRandomSampler.

You can also weight the less-common classes more heavily in your
loss function. If you are using CrossEntropyLoss, you would do
this using its weight constructor argument.

I don’t have any evidence for this, but my belief is that the first approach
will be preferable unless your less-common classes are so rare that you
find yourself with many duplicates of the same sample in any given
batch you construct with weighted sampling.

Best.

K. Frank

I’ve weighted it, but I get the same problem. When I print the result of inputting the test data into the model, I get this:

tensor([[-2.9972, -2.9940, -3.0171,  ...,  6.3018, -2.9989, -3.0200],
        [-2.9972, -2.9940, -3.0171,  ...,  6.3018, -2.9989, -3.0200],
        [-2.9972, -2.9940, -3.0171,  ...,  6.3018, -2.9989, -3.0200],
        ...,
        [-2.9972, -2.9940, -3.0171,  ...,  6.3018, -2.9989, -3.0200],
        [-2.9972, -2.9940, -3.0171,  ...,  6.3018, -2.9989, -3.0200],
        [-2.9972, -2.9940, -3.0171,  ...,  6.3018, -2.9989, -3.0200]],

As you can see, the data for all of the rows is the same and it seems that this has been the root cause of all of the predictions of being the same class. Could you help me identify the source of this error?

Thanks

Could you find the cause of this error in my program?

Thanks,
Rishav

Hi Rishav!

I guess that the next step would be systematic debugging.

I imagine that you compute:

output_from_model = model (input_to_model)

that input_to_model has shape [nBatch, D_in], and that
output_from_model has shape [nBatch, D_out].

First check whether the rows of output_from_model are exactly
identical, or whether they differ a little bit, but agree up to the
five-digit precision that you print them out with.

Next check whether the rows of input_to_model are the same
as one another or different. If they are the same, that’s your problem.

If the rows of input_to_model differ from one another, then look at
the intermediate results within your model and see at what point the
rows of the intermediate results become the same. The layer that
first produces identical rows would likely be the source of the problem.

(Also, you didn’t say whether the rows of output_from_model are
identical before you start training your model, or only become
identical after after running some sort of training procedure.)

Can you reproduce this issue by constructing your model, but not
training it, and passing a single randomly generated input_to_model
through the model? If so, could you post a fully self-contained script
(with no numpy, sklearn, nor Dataloader) that reproduces this
simple version of your issue?

Good luck.

K. Frank

All of the output_from_model rows are identical when inputted with the training data. The input_to_model values are different on each row. When I run the model before training, the values in the output_from_model are different for each row, so I now that the training is the source of error.

This is my training portion of my code:

input_dim = 2
hidden_layer1 = 4
hidden_layer2 = 8
output_classes = 9

model = Net(input_dim, hidden_layer1, hidden_layer2, output_classes)

trainloader = DataLoader(dataset = Data(), batch_size = 256, sampler = weighted_sampler)

Loss = nn.CrossEntropyLoss()

learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.5)

n_epochs = 10
loss_list = []

for epoch in range(n_epochs):
    for x, y in trainloader:
        optimizer.zero_grad()
        z = model(x)
        loss = Loss(z, y)
        loss.backward()
        optimizer.step()
        loss_list.append(loss.data)

My neural network has two layers, a Relu and a Sigmoid:

class Net(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)

    def forward(self, x):
        x = self.linear1(x)
        x = f.relu(x)
        x = self.linear2(x)
        x = torch.sigmoid(x)
        x = self.linear3(x)
        return x

Thanks,
Rishav

Hi Rishav!

You had mentioned above that your training set is highly unbalanced.

You didn’t post a complete row from your all-identical
output_from_model rows:

[-2.9972, -2.9940, -3.0171,  ...,  6.3018, -2.9989, -3.0200]

but if I imagine that the three elided entries have values all about
-3.0, then it looks like you are predicting the class that corresponds
to index 8 (starting from 0) with high certainty. Is the class that is
highly over-represented in your unbalanced data set class 8 (starting
from 0)?

In code posted above, you calculate weight, presumably to reweight
your unbalanced data, but you don’t show where (or even if) you
perform this reweighting.

Can you reproduce this issue with randomly-generated (balanced)
data? If so, please post a complete, runnable script (without any
extraneous stuff such as Dataloader) that illustrates your issue.

Can you reproduce this issue training on a perfectly-balanced subset
of your training data (without any reweighting)?

Best.

K. Frank

The output shown above is from when a WeightedRandomSampler is used. The use of the sampler did not seem to solve the problem so I commented it out.
Here is my code in the current state:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch import nn
from torch.nn import functional as f
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import io


def convertStarToInt(arr):
    output = []
    for item in arr:
        if ("Iab" in item[2]):
            output.append([item[0], item[1], 1])
        elif ("D" in item[2]):
            output.append([item[0], item[1], 8])
        elif ("Ia" in item[2]):
            output.append([item[0], item[1], 0])
        elif ("Ib" in item[2]):
            output.append([item[0], item[1], 2])
        elif ("III" in item[2]):
            output.append([item[0], item[1], 4])
        elif ("IV" in item[2]):
            output.append([item[0], item[1], 5])
        elif ("VII" in item[2]):
            output.append([item[0], item[1], 8])
        elif ("VI" in item[2]):
            output.append([item[0], item[1], 7])
        elif ("sd" in item[2]):
            output.append([item[0], item[1], 7])
        elif ("V" in item[2]):
            output.append([item[0], item[1], 6])
        elif ("II" in item[2]):
            output.append([item[0], item[1], 3])
    return output

def countStars(arr):
    a = 0
    ab = 0
    b = 0
    ii = 0
    wd = 0
    iii = 0
    iv = 0
    v = 0
    vi = 0
    for item in arr:
        if (item == 1):
            ab += 1
        elif (item == 8):
            wd += 1
        elif (item == 0):
            a += 1
        elif (item == 2):
            b += 1
        elif (item == 4):
            iii += 1
        elif (item == 5):
            iv += 1
        elif (item == 7):
            vi += 1
        elif (item == 6):
            v += 1
        elif (item == 3):
            ii += 1
    output = [a, ab, b, ii, iii, iv, v, vi, wd]
    return output




def starToInt(arr, dictionary):
    output = []
    for item in arr:
        output.append(dictionary.index(item))
    return output


TrainingTestData = (pd.read_csv(io.BytesIO(uploaded['TrainingTestData.csv']))).values.tolist()

del TrainingTestData[0]
Data = convertStarToInt(TrainingTestData)
StarTypes = []
StarProperties = []
for row in Data:
    StarTypes.append(row[2])
    StarProperties.append([row[0], row[1]])

TrainData, TestData, TrainOutput, TestOutput = train_test_split(
    StarProperties, StarTypes, test_size=0.4, random_state=372)

StarCount = countStars(TrainOutput)
weights = []
for starType in StarCount:
  weights.append(1.0/starType)

TrainData = torch.from_numpy(np.array(TrainData).astype(dtype = "float32"))
TestData = torch.from_numpy(np.array(TestData).astype(dtype = "float32"))
TrainOutput = torch.from_numpy(np.array(TrainOutput).astype(dtype = "int64"))
TestOutput = torch.from_numpy(np.array(TestOutput).astype(dtype = "int64"))


class Data(Dataset):
    def __init__(self):
        self.x = (TrainData)
        self.y = (TrainOutput)
        self.len = self.x.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.len

"""samples_weight=torch.from_numpy(np.array([weights[t] for t in TrainOutput]))"""

"""samples_weight = weights"""
"""sampler = WeightedRandomSampler(samples_weight, len(samples_weight), replacement = True)

print(samples_weight.tolist())"""

class Net(nn.Module):
    def __init__(self, D_in, H1, H2, H3, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, H3)
        self.linear4 = nn.Linear(H3, D_out)

    def forward(self, x):
        x = torch.sigmoid(self.linear1(x))
        x = torch.sigmoid(self.linear2(x))
        x = torch.sigmoid(self.linear3(x))
        x = torch.sigmoid(self.linear4(x))
        return x


input_dim = 2
hidden_layer1 = 4
hidden_layer2 = 8
hidden_layer3 = 10
output_classes = 9

model = Net(input_dim, hidden_layer1, hidden_layer2, hidden_layer3, output_classes)
z = model(TestData)
print(z)
trainloader = DataLoader(dataset = Data(), batch_size = 500)



Loss = nn.CrossEntropyLoss()

learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9)

n_epochs = 3
loss_list = []

for epoch in range(n_epochs):
    for x, y in trainloader:
        optimizer.zero_grad()
        z = model(x)
        loss = Loss(z, y)
        loss.backward()
        optimizer.step()
        loss_list.append(loss.data)
    
    print("Epoch is", epoch)


z = model(TestData)
print(z)
maximum, prediction = (torch.max(z.data,1))

prediction = prediction.tolist()


a = 0
b = 0.0

for index, item in enumerate(prediction):
    if item == TestOutput[index]:
        a += 1
    b += 1

print("Accuracy is", (100 * a / b),"%.")

and this is the output:

tensor([[0.4654, 0.5800, 0.6331,  ..., 0.4935, 0.4560, 0.3975],
        [0.4654, 0.5800, 0.6331,  ..., 0.4935, 0.4560, 0.3975],
        [0.4654, 0.5800, 0.6331,  ..., 0.4935, 0.4560, 0.3975],
        ...,
        [0.4654, 0.5800, 0.6331,  ..., 0.4935, 0.4560, 0.3975],
        [0.4654, 0.5800, 0.6331,  ..., 0.4935, 0.4560, 0.3975],
        [0.4654, 0.5800, 0.6331,  ..., 0.4935, 0.4560, 0.3975]],
       grad_fn=<SigmoidBackward0>)
Epoch is 0
Epoch is 1
Epoch is 2
tensor([[0.0085, 0.0085, 0.0094,  ..., 0.9987, 0.0078, 0.0130],
        [0.0085, 0.0085, 0.0094,  ..., 0.9987, 0.0078, 0.0130],
        [0.0085, 0.0085, 0.0094,  ..., 0.9987, 0.0078, 0.0130],
        ...,
        [0.0085, 0.0085, 0.0094,  ..., 0.9987, 0.0078, 0.0130],
        [0.0085, 0.0085, 0.0094,  ..., 0.9987, 0.0078, 0.0130],
        [0.0085, 0.0085, 0.0094,  ..., 0.9987, 0.0078, 0.0130]],
       grad_fn=<SigmoidBackward0>)
Accuracy is 48.01211098426565 %.

Is there a known case or error that would result in all of the outputs being the same. Also, how many layers and activation functions would you recommend I use?

Thanks,
Rishav

The problem also persists with random data being inputted.