Hi Guys,
I am completely new to PyTorch and ANN’s itself. To start my journey I want to make a simple NN which multiplies 3 values and outputs the correct result.
So what I have done?
Made a Script called “ANN_Engine” - here I actually train the model
Made a Script called “DataSetDefinition” - here I setup my DataSet
Made a Script called “NeuralNetwork” - here I made the model definition
ANN Engine looks like that
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device_ids = [0, 1]
torch.backends.cudnn.benchmark = True
if __name__ == "__main__":
# define model
model = Model()
model = nn.DataParallel(model)
# loss function definition
#criterion = nn.L1Loss(reduction='sum') # expecting outliers L1 otherwise MSE (both for Linear Regression???)
criterion = nn.MSELoss(reduction='sum')
criterion.to(device)
# optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-15, betas=[0.9, 0.999], eps=1e-8)
# optimizer = optim.SGD(model.parameters(), lr=0.1e-15)
# DataLoader definition
def _worker_init_fn_():
torch_seed = torch.initial_seed()
np_seed = torch_seed // 2 ** 32 - 1
random.seed(torch_seed)
np.random.seed(np_seed)
transform = transforms.Compose([CustomDataset])
dataset = CustomDataset('venv/TrainingData/Training2.csv')
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1,
shuffle=True, num_workers=1, worker_init_fn=_worker_init_fn_())
# training procedure
model.to(device)
num_epochs = 500
for epoch in range(num_epochs):
running_loss = 0.0
torch.cuda.empty_cache()
for i, data in enumerate(dataloader, 0):
inputs, outputs = data
inputs, outputs = inputs.to(device), outputs.to(device)
optimizer.zero_grad()
pred = model(inputs)
loss = criterion(pred, outputs)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 19 == 18:
model.eval()
with torch.no_grad():
#####some testing#####
print("[epoch {}][{}/{}] loss {:.4f}".format(epoch+1, i+1, len(dataloader), running_loss/19))
# the end of one epoch
model.eval()
checkpoint = dict(state_dict=model.module.state_dict(), opt_state_dict=optimizer.state_dict(), epoch=epoch)
torch.save(checkpoint, PATH)
with torch.no_grad():
"""some testing
# print("xxxxxxx".format(xxxxxxx))
logging (e.g. tensorboard) """
DataSet Definition looks like this:
I import a CSV containing 4 cols - 0,1,2 = input - 3 =result
class CustomDataset(Dataset):
def __init__(self, csv_path):
"""
Args:
csvPath = Path to the CSV to import
"""
# Transforms
#self.to_tensor = transforms.ToTensor()
# Read the csv file
self.data_info = pd.read_csv(csv_path, header=None)
self.csv_input = pd.read_csv(csv_path, usecols=[0, 1, 2], encoding="UTF-8")
self.csv_output = pd.read_csv(csv_path, usecols=[3], encoding="UTF-8")
# Calculate len
self.data_len = len(self.data_info.index)
def __getitem__(self, index):
input_as_tensor = torch.tensor(self.csv_input.values[index]).float()
output_as_tensor = torch.tensor(self.csv_output.values[index]).float()
return input_as_tensor, output_as_tensor
def __len__(self):
return self.data_len-1 # how many examples you have
In the Network itself I defined the model with some hidden layers:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.inputLayer = nn.Linear(3, 9)
self.activationFunction1 = nn.ReLU()
self.hiddenLayer1 = nn.Linear(9, 27)
self.activationFunction2 = nn.ReLU()
self.hiddenLayer2 = nn.Linear(27, 1)
def forward(self, inp):
x = self.inputLayer(inp)
x = self.activationFunction1(x)
x = self.hiddenLayer1(x)
x = self.activationFunction2(x)
x = self.hiddenLayer2(x)
return x
So now to my problem:
I am sitting here since two days and don’t figure out why my loss is not decreasing - it looks always like:
Blockquote
[epoch 85][19/19] loss 16894.8960
[epoch 86][19/19] loss 16894.8960
[epoch 87][19/19] loss 16894.8960
[epoch 88][19/19] loss 16894.8960
[epoch 89][19/19] loss 16894.8960
So it looks like my weights and bias are not updated.
When I started a week ago I made pretty much the same script except using CUDA and DataSets
Here my defintion was like (just an example…I don’t have the original one so I copied from another post)
x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168],
[9.779], [6.182], [7.59], [2.167], [7.042],
[10.791], [5.313], [7.997], [3.1]], dtype=np.float32)
y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573],
[3.366], [2.596], [2.53], [1.221], [2.827],
[3.465], [1.65], [2.904], [1.3]], dtype=np.float32)
Here it worked smooth.
I am sorry to say and normally I don’t like to ask because I want to figure out my problems on my own but does anyone can let me know where my problem is?
Thanks in advance