I cannot change the architecture or the loss function for the NN below so I kinda have to make small improvements here and there and would appreciate all the help.
The NN is a general-purposePreformatted text NN designed for binary classification.
class Net(nn.Module): #Could be nn.Sequential also but then the forward will change as all modules will run sequentially
def __init__(self, n_x, n_h, n_y):
super(Net, self).__init__()
# 1 input a feature of dimenstion 1024.
self.fc1 = nn.Linear(n_x, n_h) #Defining a fully connected layer with input dimension as 1024 and output as
self.fc2 = nn.Linear(n_h, n_y)
self.dropout = nn.Dropout(p=0.5)
def forward(self, x): #This is the forward propagation function which will be called everytime during forward pass
#x is the input that we will give in the network.
x = self.fc1(x) #Passsing the function through the first Fully connected layer
x = F.relu(x) #Applying the sigmoid activation to the outputof the first fc layer
x = self.fc2(x)
m = nn.Sigmoid()
# x = torch.round(m(x))
x = torch.transpose(m(x), 0, 1)
# print(x.shape)
return x
from sklearn.utils import shuffle
def train_net(epochs,batch_size,train_x,train_y,model_size,lr):
# print(train_x.shape)
# train_y=train_y.T
# print(train_y.shape)
n_x,n_h,n_y=model_size
model = Net(n_x, n_h, n_y)
optim = torch.optim.ASGD(model.parameters(),lr=0.005,weight_decay=.01)
loss_function = nn.BCELoss()
train_losses = []
accuracy = []
for epoch in range(epochs):
train_y=train_y.T
train_x, train_y = shuffle(train_x, train_y)
train_y=train_y.T
# print((train_y==train_y1).all())
count=0
model.train()
train_loss = []
batch_accuracy = []
for idx in range(0, train_x.shape[0], batch_size):
batch_x = torch.from_numpy(train_x[idx : idx + batch_size]).float()
batch_y = torch.from_numpy(train_y[:,idx : idx + batch_size]).float()
model_output = model(batch_x)
batch_accuracy=[]
loss = loss_function(model_output, batch_y)
train_loss.append(loss.item())
preds = model_output > 0.5
nb_correct = (preds == batch_y).sum()
count+=nb_correct.item()
optim.zero_grad()
loss.backward()
# Scheduler made it worse
# scheduler.step(loss.item())
optim.step()
if epoch % 100 == 1:
train_losses.append(train_loss)
print("Iteration : {}, Training loss: {} ,Accuracy %: {}".format(epoch,np.mean(train_loss),(count/train_x.shape[0])*100))
plt.plot(np.squeeze(train_losses))
plt.ylabel('loss')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(lr))
plt.show()
return model
I am shuffling the dataset with each epoch, but the problem is my data is clearly overfitting despite using early stopping, shuffling and using dropouts. I honestly don’t know what else to do/look for. Any suggestions are appreciated.