In my code, i am taking a random array as a dataset. Each row of array has 4 values, and each row is one data. So if total no. of rows is suppose, 10000, then i have 10,000 data.
The task is to feed one row at a time to the model:
input layer- has 4 nodes for the 4 values in each row.
no. of hidden layers- 2 (for now)
output layer has 3 nodes for 3 classes.
Class labels are 0,1,2.
Upon training, each output contains 3 probabilities but all 3 of them have values in same range, i.e 0.3… . I dont understand why?
Also, in validation testing, the output of all data values(each row) is same.
I have tried many variations, but now i dont understand why am i getting same output each time.
i have pasted the code here:
import torch
import torch.nn as nn
#import torch.nn.functional as f
import torch.optim as optim
#from torch.autograd import Variable
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
import numpy as np
import time
#_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _DEFINING DATA_ _ _ __ _ _ _ _ _ __ _ __
arr1=np.random.rand(22500,4) #random array with 22500 rows of values; (demo dataset)
lbl2=np.ones(7500) #matrix of 0s for class 1
lbl1=np.zeros(7500) #matrix of 1s for class 2
lbl3=np.full(7500,2) #matrix of 2s for class 3
lbl=np.hstack((lbl1,lbl2,lbl3))
label_encode=LabelEncoder()
int_encode=label_encode.fit_transform(lbl)
onehot_encoder=OneHotEncoder(categories='auto',sparse=False)
lbl=lbl.reshape(len(lbl),1)
lbl=onehot_encoder.fit_transform(lbl)
print(lbl)
#divide the dataset arr1 into 3 parts: 20% for test set, 20% of remaining for validation set and rest is training set
percent=0.2
data1,label1=shuffle(arr1,lbl) #shuffle the data values before partition
no=int(round(percent*len(lbl))) #no stores 20% of data1
#print("no.of elements after first cut: ",no)
#test_input - stores 20% of data1 as test data1
#test_label - stores corresponding labels of data points in test_input
#t_input - stores remaining 80% of data points
#t_label - stores remaining corresponding 80% class labels
#valid_input - stores 20% of remaining data as validation dataset
# valid_label - stores corresponding validation set labels
#train_input - stores training dataset
#train_label - stores training labels
test_input,t_input=data1[:no],data1[no:]
test_label,t_label=label1[:no],label1[no:]
noele=int(round(percent*len(t_input))) #stores 20% of remaining data
#print("no.of elements after second cut: ",noele)
valid_input,train_input=t_input[:noele],t_input[noele:]
valid_label,train_label=t_label[:noele],t_label[noele:]
print("size of training set: {} \n size of test set: {} \n size of validation set: {}".format(len(train_input),len(test_input),
len(valid_input)))
#converting all datasets into torch tensors
trdata=[torch.tensor([line],dtype=torch.float) for line in train_input]
trlabel=[torch.tensor([line],dtype=torch.float) for line in train_label]
vdata=[torch.tensor([line],dtype=torch.float) for line in valid_input]
vlabel=[torch.tensor([line],dtype=torch.float) for line in valid_label]
tedata=[torch.tensor([line],dtype=torch.float) for line in test_input]
telabel=[torch.tensor([line],dtype=torch.float) for line in test_label]
#print("type of label is: ",vlabel[0].dtype)
#print("type of training label is: ",trlabel[0].dtype)
#print("type of validation dataset is: " ,type(vlabel))
#print("type of training dataset is: ", type(trdata))
#_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _DEFINING THE NETWORK_ _ _ _ _ _ _ ___ __ _ _ _ _ _ _ _ _ _
#Two layers network class
class Network(nn.Module):
def __init__(self):
super(Network,self).__init__()
self.block1=nn.Sequential(nn.Linear(4,20),
#nn.BatchNorm1d(20),
nn.Dropout(p=0.25),
nn.ReLU(),
nn.Linear(20,20),
#nn.BatchNorm1d(20),
nn.Dropout(p=0.25),
nn.ReLU())
self.block2=nn.Sequential( nn.Linear(20,20),
#nn.BatchNorm1d(20),
nn.Dropout(p=0.25),
nn.ReLU(),
nn.Linear(20,3),
nn.Softmax(dim=1))
#forward pass
def forward(self,x):
x=self.block1(x)
output=self.block2(x)
#return f.softmax(x,dim=1)
return output
net=Network()
#state CrossEntropyLoss as loss function; stochastic gradient descent as optimizer,
#learning rate, momentum and weight decay for regularization
criterion=nn.BCEWithLogitsLoss()
opt=optim.SGD(net.parameters(),lr=0.001,momentum=0.9,weight_decay=0.1)
#_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _TRAINING DATA_ _ _ _ _ _ _ _ _ _ _ __ _ _ _ _ _ _ _ _ _
#train() computes loss over a single data passed to it.
def train(input,target):
output=net(input)
print("training data is : ",input)
print("training output is : ",output[0])
print("\n")
loss=criterion(output,target)
#print("loss is : ",loss)
opt.zero_grad()
loss.backward()
opt.step()
return loss.item()
def trainer(epoch,data,target):
net.train()
#batch_loss stores sum of loss values computed for each data in the entire
#data set in 1 epoch.
batch_loss=0.0
for iter in range(totalbatches):
#creating batches of size batch_size from the data.
data1=data[iter*batch_size:(iter+1)*batch_size]
target1=target[iter*batch_size:(iter+1)*batch_size]
#print("in trainer : ",target1[0].dtype)
#losses stores loss of each data from a batch; returned by train()
losses=[]
for i in range(len(data1)):
losses.append(train(data1[i],target1[i]))
#print("loss for data {} :".format(i+1),loss)
batch_loss+=sum(losses)
#print('[%d/%d %4d] train loss: %.5f'%(epoch+1,epochs,(iter+1)*batch_size,sum(loss)/batch_size))
print("batch %d of epoch %d successful"%(iter+1,epoch+1))
return(batch_loss/len(data)) #return avg error over entire dataset in 1 epoch
#_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _VALIDATION TESTING_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ __
def validation(epoch,vdata,vtarget):
net.eval()
cor=0 #no. of correct predictions
incor=0 #no. of incorrect predictions
valid_loss=0.0 #stores sum of losses over entire dataset
op=[] #stores predicted class of each data
tar=[] #stores target class of each data
#print(("before getting inside loop, type is: ",vtarget[0].dtype))
for iter in range(vbatches):
vdata1=vdata[iter*batch_size:(iter+1)*batch_size]
vtarget1=vtarget[iter*batch_size:(iter+1)*batch_size]
#print(("inside loop, type is: ",vtarget1[0].dtype))
losses=[] #stores loss of every data in 1 batch
for i in range(len(vtarget1)):
output=net(vdata1[i])
print("validset data \t:",vdata1[i])
print("validset output \t:",output)
print("\n")
op.append(torch.argmax(output).item())
tar.append(torch.argmax(vtarget1[i]).item())
loss=criterion(output,vtarget1[i])
losses.append(loss)
valid_loss+=sum(losses)
#print('[%d %5d] valid loss: %.5f'%(epoch+1,(iter+1)*batch_size,(valid_loss/len(vdata)))
#compute total number of correct/incorrect predictions
for i in range(len(op)):
if op[i]==tar[i]:
cor+=1
else:
incor+=1
valid_acc=(cor/(cor+incor))*100
#return avg loss in validation data and validation accuracy over 1 epoch
return ((valid_loss.item())/len(vdata)),valid_acc
#_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ __ TESTING DATA_ _ _ _ _ _ _ _ _ _ __ _ __ _ _ _ __ _ __
def test(tdata,tlabel):
net.eval()
cor=0
incor=0
test_op=[] #list to store output predictions
test_tar=[] #list to store target values
error=[] #stores error in class prediction of each data
#print("label is: ")
#print(tlabel)
for batch in range(tbatches):
tdata1=tdata[batch*batch_size:(batch+1)*batch_size]
tlabel1=tlabel[batch*batch_size:(batch+1)*batch_size]
for i in range(batch_size):
output=net(tdata1[i])
#print("test data is: ", tdata1[i])
#print("test output is :", output[0])
#print("\n")
test_op.append(torch.argmax(output[0]).item())
test_tar.append(torch.argmax(tlabel1[i]).item())
error.append(criterion(output,tlabel1[i]))
print("Batch {} testing done ".format(batch+1))
avg_error=(sum(error)/len(tdata)) #avg error computed for test data
#compute total correct/incorrect predictions.
for i in range(len(test_op)):
if(test_op[i]==test_tar[i]):
cor+=1
else:
incor+=1
results=confusion_matrix(test_tar,test_op)
print("confusion matrix:")
print(results)
print("acccuracy score: ")
print(accuracy_score(test_tar,test_op,normalize=False))
print("report :")
print(classification_report(test_tar,test_op))
print("average error in test set is: {}".format(avg_error))
print("test accuracy is : {} %".format((cor/(cor+incor))*100))
# _ _ _ _ _ _ _ _ _ _ __ _ _ _ _ _ _ _DRIVER CODE_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ __ _ __ _
epochs=6
batch_size=800
trainlen=len(trdata) #size of training data
validlen=len(vdata) #size of validation data
testlen=len(tedata) #size of test data
totalbatches=int(trainlen/batch_size) #total training batches
vbatches=int(validlen/batch_size) #total validation batches
tbatches=int(testlen/batch_size) #total test batches
t=time.time()
#train_loss stores avg loss computed over training set in every epoch
#valid_loss stores avg loss computed over validation set in every epoch
train_loss=[]
valid_loss=[]
for epoch in range(epochs):
tloss=0.0 #stores avg training loss returned in 1 epoch
vloss=0.0 #stores avg validation loss returned in 1 epoch
#indices=torch.randperm(trainlen)
#print(indices)
#data,label=data[indices],label[indices]
trdata,trlabel=shuffle(trdata,trlabel,random_state=0)
#print(len(trdata))
vdata,vlabel=shuffle(vdata,vlabel,random_state=0)
t0=time.time()
tloss=trainer(epoch,trdata,trlabel)
vloss,valid_acc=validation(epoch,vdata,vlabel)
#print("epoch {} successful : {} seconds ".format(epoch+1,round((time.time()-t0),3)), end=' ')
#print("train loss: {}".format(round(tloss,5)))
#print("validation loss: {}".format(round(vloss,5)))
print("epoch %d/%d took %.5f seconds" %((epoch+1),epochs,round((time.time()-t0),3)))
print("training loss: ",round(tloss,5))
print("validation loss :",round(vloss,5))
print("validation accuracy is: {}%".format(valid_acc))
train_loss.append(tloss)
valid_loss.append(vloss)
print("total training time is {} minutes : ".format(round((time.time()-t)/60,3)))
#print("train loss \t validation loss: ")
#for i in range(len(train_loss)):
# print(round(train_loss[i],5),'\t', round(valid_loss[i],5))
#print(vlabel)
#testing data
#test(tedata,telabel)
Here is the validation result:
validset data : tensor([[0.4604, 0.5948, 0.1612, 0.2271]])
validset output : tensor([[0.3381, 0.3278, 0.3341]], grad_fn=<SoftmaxBackward>)
validset data : tensor([[0.7745, 0.0985, 0.1097, 0.4409]])
validset output : tensor([[0.3381, 0.3278, 0.3341]], grad_fn=<SoftmaxBackward>)
validset data : tensor([[0.2225, 0.8885, 0.1667, 0.9272]])
validset output : tensor([[0.3381, 0.3278, 0.3341]], grad_fn=<SoftmaxBackward>)
validset data : tensor([[0.4683, 0.1766, 0.2229, 0.3912]])
validset output : tensor([[0.3381, 0.3278, 0.3341]], grad_fn=<SoftmaxBackward>)
validset data : tensor([[0.7469, 0.3891, 0.7210, 0.6673]])
validset output : tensor([[0.3381, 0.3278, 0.3341]], grad_fn=<SoftmaxBackward>)
validset data : tensor([[0.5424, 0.6587, 0.4722, 0.3075]])
validset output : tensor([[0.3381, 0.3278, 0.3341]], grad_fn=<SoftmaxBackward>)
validset data : tensor([[0.3186, 0.5349, 0.9189, 0.7124]])
validset output : tensor([[0.3381, 0.3278, 0.3341]], grad_fn=<SoftmaxBackward>)
validset data : tensor([[0.7580, 0.7256, 0.3102, 0.7087]])
validset output : tensor([[0.3381, 0.3278, 0.3341]], grad_fn=<SoftmaxBackward>)
validset data : tensor([[0.1943, 0.7328, 0.1706, 0.8615]])
validset output : tensor([[0.3381, 0.3278, 0.3341]], grad_fn=<SoftmaxBackward>)
validset data : tensor([[0.1238, 0.6982, 0.0876, 0.9440]])
validset output : tensor([[0.3381, 0.3278, 0.3341]], grad_fn=<SoftmaxBackward>)
Also, i changed the loss function to CrossEntropyLoss() and used class labels as 0,1,2. But the result was same.
PLz help.