Hello, I am new to deeplearning and trying to make a NN model with my own data.
I have 4 inputs and 1 output
2 inputs and the output has similar range (-0.05 ~ 3); float
and another input has range of (-0.2 ~ 1); float
and the other input as range of (0 ~ 8); integer
total data length is 7640
but the final model prediction (pred) shows negatvie correlation with test data (y)
is there something wrong with my code?
import numpy as np
import datetime as dt
import glob as gl
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.preprocessing import MinMaxScaler
lines = open('data.txt','r')
inputs = np.zeros((len(lines),4))
output = np.zeros(len(lines))
for l, line in enumerate(lines):
temp = lines.strip().split(',')
inputs[l,0] = float(temp[0])
inputs[l,1] = float(temp[1])
inputs[l,2] = float(temp[2])
inputs[l,3] = float(temp[3])
output[l] = float(temp[4])
scaler_in = MinMaxScaler()
scaler_in.fit(inputs)
inputs = scaler_in.transform(inputs)
scaler_out = MinMaxScaler()
scaler_out.fit(output.reshape(-1,1))
output = scaler_out.transform(output.reshape(-1,1))
class CustomDataset(Dataset):
def __init__(self):
self.labels = output
self.features = inputs
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
x = torch.FloatTensor(self.features[idx])
y = torch.FloatTensor(self.labels[idx])
return x, y
dataset = CustomDataset()
train_size = int(n * 0.7)
validation_size = int(n * 0.2)
test_size = int(n * 0.1)
while (train_size+validation_size+test_size) != n:
train_size+=1
train_dataset, validation_dataset, test_dataset = random_split(dataset, [train_size, validation_size, test_size])
class NeuralNetwork(nn.Module):
def __init__(self, l1, l2):
super().__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(4, l1),
nn.ReLU(),
nn.Linear(l1, l2),
nn.ReLU(),
nn.Linear(l2, 1)
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
device = 'cpu'
def train(config):
model = NeuralNetwork(config['l1'], config['l2']).to(device)
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=config['lr'])
train_dataloader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True, drop_last=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=config["batch_size"], shuffle=True, drop_last=True)
for epoch in range(10):
running_loss = 0.
for i, data in enumerate(train_dataloader, 0):
X, y = data
X, y = X.to(device), y.to(device)
pred = model(X)
loss = loss_fn(pred, y)
loss.backward()
optimizer.step()
optimizer.zero_grad()
running_loss += loss.item()
val_loss = 0.
val_steps = 0
total = 0
correct = 0
for i, data in enumerate(validation_dataloader, 0):
with torch.no_grad():
X, y = data
X, y = X.to(device), y.to(device)
pred = model(X)
total += y.size(0)
correct += ((pred <= y* 1.15 + 0.05) & (pred >= y* 0.85 + 0.05))[:,0].sum().item()
val_loss += loss_fn(pred, y).cpu().numpy()
# val_loss += CustomLoss(pred, y).cpu().numpy()
val_steps += 1
tune.report(loss=(val_loss / val_steps), accuracy=correct / total)
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from functools import partial
config = {
"l1": tune.choice([2,6,12,24]),
"l2": tune.choice([2,6,12,24]),
"lr": tune.choice([1e-5, 5e-5, 1e-4]),
"batch_size": tune.choice([16,32,64,128])
}
scheduler = ASHAScheduler(
metric="loss",
mode="min",
grace_period=1,
reduction_factor=2)
reporter = CLIReporter(
# ``parameter_columns=["l1", "l2", "lr", "batch_size"]``,
metric_columns=["loss", "accuracy", "training_iteration"])
result = tune.run(
partial(train),
resources_per_trial={"cpu": 4, "gpu": 0},
config=config,
num_samples=20,
scheduler=scheduler,
progress_reporter=reporter)
best_trial = result.get_best_trial("loss", "min", "last")
print("Best trial config: {}".format(best_trial.config))
print("Best trial final validation loss: {}".format(
best_trial.last_result["loss"]))
print("Best trial final validation accuracy: {}".format(
best_trial.last_result["accuracy"]))
best_trained_model = NeuralNetwork(best_trial.config["l1"], best_trial.config["l2"])
best_trained_model.to(device)
correct = 0
total = 0
test_dataloader = DataLoader(test_dataset, batch_size=test_size, shuffle=True, drop_last=True)
with torch.no_grad():
for data in test_dataloader:
X, y = data
X, y = X.to(device), y.to(device)
pred = best_trained_model(X)
total += y.size(0)
correct += ((pred <= y* 1.15 + 0.05) & (pred >= y* 0.85 + 0.05))[:,0].sum().item()
pred = pred.numpy()
y = y.numpy()
pred = scaler_out.inverse_transform(pred)
y = scaler_out.inverse_transform(y)