Hi
I have built a simple classifier with the breast cancer dataset. It would be great if someone just reviewed my code. I know i have not split the data
# coding: utf-8
# In[201]:
import numpy as np
import torch
from torch.autograd import Variable
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
# In[202]:
features, labels = load_breast_cancer(return_X_y=True)
# In[203]:
# Building dataset
loader = DataLoader(dataset = TensorDataset(torch.from_numpy(features).float(), torch.from_numpy(labels).float()),shuffle=True)
# In[204]:
class Classifier(nn.Module):
def __init__(self):
super(Classifier,self).__init__()
self.linear1 = nn.Linear(features.shape[1],50) # Input
self.linear2 = nn.Linear(50, 25) # Hidden
self.linear3 = nn.Linear(25, 2) # Output
def forward(self, x):
# Activation Functions
relu = nn.ReLU()
sigmoid = nn.Sigmoid()
softmax = nn.Softmax(dim=1)
out1 = relu(self.linear1(x))
out2 = relu(self.linear2(out1))
output = softmax(self.linear3(out2))
return output # Final Output between 0 and 1
# In[205]:
classifier = Classifier() # Loading Module
# In[206]:
# Loading parameters
optimizer = optim.Adam(classifier.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
epochs = 50
# In[207]:
features = Variable(torch.from_numpy(features)).float()
labels = Variable(torch.from_numpy(labels)).long()
# Training
for epoch in range(epochs):
print ("EPOCH #",epoch)
y_pred = classifier(features)
loss = loss_fn(y_pred,labels)
print ("The loss is:", loss.item())
# Zero Gradients
optimizer.zero_grad()
loss.backward() # Compute gradients
optimizer.step() # Update
# In[208]:
features
# In[209]:
pred = classifier(features)
# In[214]:
pred = np.argmax(pred.detach().numpy(), axis=1)
# In[215]:
accuracy_score(labels, pred)
There is a major problem with your code, and that is you apply softmax in your last layer and you also use crossentropyloss, crossentropyloss applies softmax internally so that is wrong(your network still works but with longer training time)
second problem I see is that you instantiate relu and softmax in your forward pass, but a cleaner way would have been to define them in your init
I do not understand, the whole point of softmax is make the output as a probability but in the above case the values are exceeding 1?
Calculating loss with crossentropy gives a value of 7.8?
yes, softmax is needed when we want to calculate probability but when you use CrossEntropyLoss it handles that(read the first line of the link)
If you really want to apply softmax, then you have to use NLLLoss with log of the softmax
but now that I see, when you have only two classes you can use BCEwithLogits in the first place (so now your last linear layer has 25 inputs and 1 output, and there is no need to apply sigmoid there)
yeah they don’t seem that imbalanced.
hmm, can you train the net with CrossEntropyLoss or BCEwithLogits and then report the loss? (one epoch will suffice)
cause the code seems right and I don’t see anything wrong with it
# coding: utf-8
# In[22]:
import numpy as np
import torch
from torch.autograd import Variable
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
# In[23]:
features, labels = load_breast_cancer(return_X_y=True)
# In[24]:
# Building dataset
loader = DataLoader(dataset = TensorDataset(torch.from_numpy(features).float(), torch.from_numpy(labels).float()),shuffle=True, batch_size=100)
# In[25]:
class Classifier(nn.Module):
def __init__(self):
super(Classifier,self).__init__()
self.linear1 = nn.Linear(features.shape[1],50) # Input
self.linear2 = nn.Linear(50, 25) # Hidden
self.linear3 = nn.Linear(25, 2) # Output
def forward(self, x):
# Activation Functions
relu = nn.ReLU()
out1 = relu(self.linear1(x))
out2 = relu(self.linear2(out1))
output = self.linear3(out2)
return output
# In[26]:
classifier = Classifier() # Loading Module
# In[27]:
# Loading parameters
optimizer = optim.Adam(classifier.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
epochs = 10
# In[28]:
features = Variable(torch.from_numpy(features)).float()
labels = Variable(torch.from_numpy(labels)).long()
# Training
for epoch in range(epochs):
print ("EPOCH #",epoch)
y_pred = classifier(features)
loss = loss_fn(y_pred,labels)
print ("The loss is:", loss.item())
# Zero Gradients
optimizer.zero_grad()
loss.backward() # Compute gradients
optimizer.step() # Update
# In[29]:
pred = classifier(features)
# In[30]:
pred
# In[21]:
labels
Output
EPOCH # 0
The loss is: 15.668041229248047
EPOCH # 1
The loss is: 13.88979434967041
EPOCH # 2
The loss is: 12.140748977661133
EPOCH # 3
The loss is: 10.405417442321777
EPOCH # 4
The loss is: 8.679145812988281
EPOCH # 5
The loss is: 6.958510875701904
EPOCH # 6
The loss is: 5.249380111694336
EPOCH # 7
The loss is: 3.595121383666992
EPOCH # 8
The loss is: 2.235161781311035
EPOCH # 9
The loss is: 1.7997623682022095
I ran your code and here is my result:
EPOCH # 0
The loss is: 1.2877309322357178
EPOCH # 1
The loss is: 2.0903804302215576
EPOCH # 2
The loss is: 1.9283970594406128
EPOCH # 3
The loss is: 0.9835712313652039
EPOCH # 4
The loss is: 0.45820337533950806
EPOCH # 5
The loss is: 1.0796217918395996
EPOCH # 6
The loss is: 1.0129979848861694
EPOCH # 7
The loss is: 0.5118906497955322
EPOCH # 8
The loss is: 0.4246406555175781
EPOCH # 9
The loss is: 0.6247745156288147
which means that it’s working, I think your main problem is solved, now you have to add validation set and some regularizers to decrease your loss
The loss will decrease (even in my code the loss will decrease on increasing the number of epochs) but what about the output predicted by the model. The model spits out values greater than 1 and that will of no use to me?
For the prediction phase you can simply argmax the output (and if you want the probabilities, you can apply softmax to the output (but still no softmax while training))
No, you used softmax as a layer inside your network and wrong loss for that;
You should not apply softmax with crossentropyloss, but when you are just doing forward and not training you can apply softmax (argmax of something is equal to the argmax of the softmax of that same thing)
This is not a good code(I didn’t split the data and it’s overfitting)
your learning rate was too high and I reduced it and also added a small weight_decay to prevent overfitting, but you have to tune them yourself
and it’s getting 92% accuracy, both in the binary cross entropy and multiclass cross entropy modes (as expected)
import numpy as np
import torch
from torch.autograd import Variable
from torch import nn
import torch.optim as optim
from sklearn.datasets import load_breast_cancer
use_bce = True
features, labels = load_breast_cancer(return_X_y=True)
features = Variable(torch.from_numpy(features)).float()
labels = Variable(torch.from_numpy(labels))
labels = labels.float() if use_bce else labels.long()
class Classifier(nn.Module):
def __init__(self):
super(Classifier,self).__init__()
self.net = nn.Sequential(nn.Linear(features.shape[1],50), nn.ReLU(), nn.Linear(50, 25), nn.ReLU(), nn.Linear(25, 1 if use_bce else 2))
def forward(self, x):
return self.net(x).squeeze()
classifier = Classifier()
optimizer = optim.Adam(classifier.parameters(), lr=0.0001, weight_decay=0.1)
loss_fn = nn.BCEWithLogitsLoss() if use_bce else nn.CrossEntropyLoss()
epochs = 1000
def get_acc():
if use_bce:
return ((classifier(features) > 0.5) == labels.byte()).float().mean().item()*100.0
return (torch.argmax(classifier(features), dim=1) == labels).float().mean().item()*100.0
for epoch in range(epochs):
y_pred = classifier(features)
loss = loss_fn(y_pred,labels)
print('epoch: {}, loss: {}, acc: {}'.format(epoch, loss.item(), get_acc()))
optimizer.zero_grad()
loss.backward()
optimizer.step()