Hi Ptrblck
Thanks for replying.
I have included the code I am using below to assist in identifying issue. This code is generic to all 4 models I am running (CNN, RNN, LSTM and GRU). The problem with the CPU only occurs on the RNN and LSTM models.
PS I am using the UNSW-NB15 dataset.
Thanks in advance for your help.
Gerry
#######################################################################
###Code Listing#####################
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from torchvision import datasets,transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import TensorDataset
import torch.optim as optim
from timeit import default_timer as timer
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
import torch.nn.functional as F
import dataModels as dm
import utilities as ut
import os
from datetime import datetime
from collections import OrderedDict
from collections import namedtuple
from itertools import product
class RunBuilder():
@staticmethod
def get_runs(params):
Run = namedtuple('Run', params.keys())
runs = []
for v in product(*params.values()):
runs.append(Run(*v))
return runs
#load training data
def loadTrainingData(dataSet,batch_size):
try:
np.set_printoptions(threshold=np.inf)
if dataSet==1:
fileName="c:\data\Train15All.csv"
#elif dataSet==2:
# fileName="c:\data\B1520000.csv"
#elif dataSet==3:
# fileName="c:\data\B1530000.csv"
#elif dataSet==4:
# fileName="c:\data\B1540000.csv"
#else:
# fileName="c:\data\B15train2.csv"
dataImported=pd.read_csv(fileName)
x=dataImported.to_numpy()
#balance dataset for processing
if len(x)>10000:
x=x[0:82000]
#print(len(x))
#normalise data to range
#b = x / np.linalg.norm(x, ord=1)
#l2=Euclidean norm
##l2-normalization, āunit normā essentially means that if we squared each element in the vector, and summed them, it would equal 1.
# https://kawahara.ca/how-to-normalize-vectors-to-unit-norm-in-python/
b = preprocessing.normalize(x, norm='l2')
#verification of normalisation
#X_squared = b ** 2
##print(X_squared)
#X_sum_squared = np.sum(X_squared, axis=1)
#print(X_sum_squared)
#print(X_sum_squared.size)
#extract dataset first 42 columns
data=b[:,0:41]
#get data
d=data
#extract labels set
labels = b[:, [42]]
#classify labels
l=np.ceil(labels)
#convert to tensor
allDataTensor = torch.from_numpy(d).float()
#rint(allDataTensor)
allLabelsTensor = torch.from_numpy(l).long()
allLabelsTensor=allLabelsTensor.squeeze(1)
#print(allLabelsTensor)
#calculate number unique elements in dataset for future embedding layer vector generation
uniqueElements=np.unique(d)
combinedDataLabelTensor = TensorDataset(allDataTensor, allLabelsTensor)
train_loader = DataLoader(combinedDataLabelTensor, shuffle=True, batch_size=batch_size)
return train_loader
except Exception as e:
print("Error Generating Data Sets - generateDataSets module\n\n" + str(e))
#run model
def runModel(model,train_loader,num_epochs,lr2,optim,momentum2,modelNumber,batch_size):
#declare loss function
criterion = nn.CrossEntropyLoss()
#set optopmiser
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
if optim==1:
optimizer = torch.optim.Adam(model.parameters(), lr=lr2)
else:
optimizer = torch.optim.SGD(model.parameters(), lr=lr2, momentum=momentum2)
iter = 0
r_total=0
start = timer()
optomiserSteps=0
for epoch in range(num_epochs):
total=0
correct=0
accuracy=0
#adustment for GRU model
if modelNumber==3:
h = model.init_hidden(batch_size)
h=h.to(device)
for i, (data, labels) in enumerate(train_loader):
#gpu assign data
data = data.requires_grad_().to(device)
labels = labels.to(device)
# Clear gradients w.r.t. parameters
optimizer.zero_grad()
# Forward pass to get output/logits
#outputs = model(images)
#determine if standard or GRU model
if modelNumber !=3:
outputs = model(data)
else:
#initialisation for GRU model
h = h.data
# forward pass
outputs, h = model(data, h)
#adjust label for classification
#labels=labels.squeeze(1)
# Calculate Loss: softmax --> cross entropy loss
loss = criterion(outputs, labels)
# Getting gradients w.r.t. parameters
loss.backward()
# Updating parameters
optimizer.step()
#optomiserSteps+=1
#print("O Steps = ",optomiserSteps)
iter += 1
# Get predictions from the maximum value
_, predicted = torch.max(outputs.data, 1)
# Total number of labels
total += labels.size(0)
# Total correct predictions
if torch.cuda.is_available():
correct += (predicted.cpu() == labels.cpu()).sum()
else:
correct += (predicted == labels).sum()
#correct += (predicted.to(device) == labels).sum()
accuracy = 100 * correct.item() / total
r_total+=total
# Print Loss
print('Epoch\t%d\tNIP:\t%d\tLoss:\t%.3f\tAccuracy:\t%.2f\t%d\tsamples' % (epoch+1,r_total,loss.item(), accuracy,total))
end=timer()
print('\n\nModel Training Time (secs): %d' % (end - start))
#define model and assign to gpu
def defineModel(modelNo):
#model params
input_dim = 41
hidden_dim = 100
output_dim = 2
layer_dim = 2
if modelNo==1:
model = dm.CNNModel()
elif modelNo==2:
model = dm.LSTMModel(input_dim,hidden_dim,layer_dim,output_dim)
elif modelNo==3:
model = dm.GRUModel(input_dim,hidden_dim,output_dim,layer_dim)
else:
model = dm.RNNModel(input_dim,hidden_dim,layer_dim,output_dim)
return model
def setDevice():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
return device
params = OrderedDict(
ep =[40], #number epochs
ml =[2], #modelNumber=2 #1=CNN 2=LSTM 3=GRU 4=RNN
lr =[0.00146],
mm =[0.2,0.3,0.4,0.5] #momentum
)
#lr=0.00146 #0.005 and 0.00146 and 0.001 https://medium.com/octavian-ai/which-optimizer-and-learning-rate-should-i-use-for-deep-learning-5acb418f9b2
#main
try:
#optomise cuda
torch.backends.cudnn.benchmark = True
#path to save model train/validate/test results to
filename = os.path.join(os.environ['USERPROFILE'],"Desktop\PytorchModel\TestResults-%s.txt" % (datetime.now().strftime("%Y%m%d-%H%M%S")))
classes=('OK','Intrusion')
#model hyperparamaters
batch_size = 1000
#epochs=30
optimizer=1 #1=Adam 2= SGD
trainingDataset=1 #1=10000 2=20000 3= 30000 4=40000 5=82000
momentum=0.3
#build run parameters
runs = RunBuilder.get_runs(params)
#print(runs)
#load dataset
train_loader=loadTrainingData(trainingDataset,batch_size)
#start writing to file
ut.startWritingFile(filename)
#print date
todaysDate=datetime.now().strftime("%b %d %Y %H:%M:%S")
for run in runs:
########### MODEL SETUP #########################################################
model=defineModel(run.ml)
device=setDevice()
model.to(device)
#######################################################################
print()
if run.ml==1:
print("CNN Model - Adam: "+str(run) +" "+str(todaysDate))
elif run.ml==2:
print("LSTM Model - Adam: "+str(run) +" "+str(todaysDate))
elif run.ml==3:
print("GRU Model - Adam: "+str(run) +" "+str(todaysDate))
else:
print("RNN Model - Adam: "+str(run) +" "+str(todaysDate))
print()
#run this model
runModel(model,train_loader,run.ep,run.lr,optimizer,run.mm,run.ml,batch_size)
print()
print()
ut.stopWritingFile()
except Exception as e :
print(āError Training Network - Main Module code\n\nā + str(e))
###################################################################
####MODELS#####################################
#training template model
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import os
class CNNModel(nn.Module):
def init(self):
super(CNNModel, self).init()
# Convolution 1
self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=1, stride=1, padding=0)
self.relu1 = nn.ReLU()
# Max pool 1
self.maxpool1 = nn.MaxPool2d(kernel_size=2)
# Convolution 2
self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=1, stride=1, padding=0)
self.relu2 = nn.ReLU()
# Max pool 2
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
# Fully connected 1 (readout)
#self.fc1 = nn.Linear(1312, 656)
#self.fc2 = nn.Linear(656, 328)
#self.fc3 = nn.Linear(328, 2)
#2 conv layers
self.fc1=nn.Linear(1312,2)
#self.fc1=nn.Linear(1312,656)
#self.fc2=nn.Linear(656,328)
#self.fc3=nn.Linear(328,2)
def forward(self, x):
# Convolution 1
#out = self.cnn1(x)
#out = self.relu1(out)
## Max pool 1
#out = self.maxpool1(out)
x=x.unsqueeze(1)
x=x.unsqueeze(1)
out=(self.relu1(self.cnn1(x)))
# Convolution 2
#out = self.cnn2(out)
#out = self.relu2(out)
## Max pool 2
#out = self.maxpool2(out)
out=(self.relu2(self.cnn2(out)))
# Resize
# Original size: (100, 32, 7, 7)
# out.size(0): 100
# New out size: (100, 32*7*7)
out = out.view(out.size(0), -1)
# Linear function (readout)
#out = F.relu(self.fc1(out))
#out = F.relu(self.fc2(out))
#out = self.fc3(out)
out=self.fc1(out)
return out
class RNNModel(nn.Module):
def init(self, input_dim, hidden_dim, layer_dim, output_dim):
super(RNNModel, self).init()
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Hidden dimensions
self.hidden_dim = hidden_dim
# Number of hidden layers
self.layer_dim = layer_dim
# Building your RNN
# batch_first=True causes input/output tensors to be of shape
# (batch_dim, seq_dim, feature_dim)
self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='tanh')
# Readout layer
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# Initialize hidden state with zeros
#######################
# USE GPU FOR MODEL #
#######################
x = x.unsqueeze(1)
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(self.device)
# One time step
# We need to detach the hidden state to prevent exploding/vanishing gradients
# This is part of truncated backpropagation through time (BPTT)
out, hn = self.rnn(x, h0.detach())
# Index hidden state of last time step
# out.size() --> 100, 28, 100
# out[:, -1, :] --> 100, 100 --> just want last time step hidden states!
out = self.fc(out[:, -1, :])
# out.size() --> 100, 10
return out
class LSTMModel(nn.Module):
def init(self, input_dim, hidden_dim, layer_dim, output_dim):
super(LSTMModel, self).init()
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#self.embed = nn.Embedding(256,8)
# Number of hidden layers
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.input_dim = input_dim
# Building your LSTM
# batch_first=True causes input/output tensors to be of shape
#[65536 100 3]
self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.layer_dim, batch_first=True)
# Readout layer
self.fc = nn.Linear(hidden_dim, output_dim)
i=0
def forward(self, x):
#shape data with embedding layer and reshape
#x = self.embed(x)
#[1,1,65536 (8192 - data input *8 from embedding layer)]
x = x.reshape(x.size(0),1,-1)
# Initialize hidden state with zeros
#[1,1,100]
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(self.device)
#[1,1,100]
# Initialize cell state
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(self.device)
# One time step
# We need to detach as we are doing truncated backpropagation through
# time (BPTT) # If we don't, we'll backprop all the way to the start even after going
# through another batch
out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
# Index hidden state of last time step
# out.size() --> [1,2]
out = self.fc(out[:, -1, :])
return out
class GRUModel(nn.Module):
def init(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob=0.2):
super(GRUModel, self).init()
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.hidden_dim = hidden_dim
self.n_layers = n_layers
#self.embed = nn.Embedding(256,8)
self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
self.fc = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()
def forward(self, x, h):
#x = self.embed(x)
#x=x.unsqueeze(0)
x=x.reshape(x.size(0),1,-1)
out, h = self.gru(x, h)
out = self.fc(self.relu(out[:,-1]))
return out, h
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(self.device)
return hidden