# Imports
import numpy as np
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import datasets, transforms
import matplotlib.pyplot as plt # for plotting
import time
import pandas as pd
"""Load Data"""
def get_data(path):
df = pd.read_csv(path, header=None)
#df = pd.read_csv(path, header=None, usecols=[1,2])
df = df.astype(np.float32)
df = df.to_numpy()
return df.flatten()
def get_data_loader(batch_size):
data_transforms = transforms.Compose([transforms.ToTensor()])
path_of_data= "C:\\Users\Stephanie\Documents\ProcessedData\\"
classes = ["CLOSE", "OPEN"]
trainset = datasets.DatasetFolder(path_of_data,
loader=get_data, extensions='.csv')
# Get the list of indices to sample from
relevant_indices = list(range(0,len(trainset)))
# Split into train and validation
np.random.seed(1) # Fixed numpy random seed for reproducible shuffling
np.random.shuffle(relevant_indices)
split = int(len(relevant_indices) * 0.8) #split at _%
# split into training and validation indices
relevant_train_indices, relevant_val_indices = relevant_indices[:split], relevant_indices[split:]
train_sampler = SubsetRandomSampler(relevant_train_indices)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
num_workers=1, sampler=train_sampler)
val_sampler = SubsetRandomSampler(relevant_val_indices)
val_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
num_workers=1, sampler=val_sampler)
return train_loader, val_loader
def get_model_name(name, batch_size, learning_rate, epoch):
""" Generate a name for the model consisting of all the hyperparameter values
Args:
config: Configuration object containing the hyperparameters
Returns:
path: A string with the hyperparameter name and value concatenated
"""
path = "model_{0}_bs{1}_lr{2}_epoch{3}".format(name,
batch_size,
learning_rate,
epoch)
return path
def get_accuracy2(model, train=False):
if train:
data_loader = train_loader
else:
data_loader = val_loader
correct = 0
total = 0
for eeg, labels in data_loader:
#############################################
#To Enable GPU Usage
if use_cuda and torch.cuda.is_available():
eeg = eeg.cuda()
labels = labels.cuda()
#############################################
transfer_out = encoder(eeg)
output = model(transfer_out)
#print(output)
#select index with maximum prediction score
pred = output.max(1, keepdim=True)[1]
correct += pred.eq(labels.view_as(pred)).sum().item()
total += eeg.shape[0]
return correct / total
def train_net3(net, split, batch_size=64, learning_rate=0.01, num_epochs=7):
########################################################################
torch.manual_seed(1)
# Train the network
# Loop over the data iterator and sample a new batch of training data
# Get the output from the network, and optimize our loss function.
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
iters, losses, train_acc, val_acc = [], [], [], []
# training
n = 0 # the number of iterations
start_time=time.time()
for epoch in range(num_epochs):
for eeg, labels in iter(train_loader):
#############################################
#To Enable GPU Usage
if use_cuda and torch.cuda.is_available():
eeg = eeg.cuda()
labels = labels.cuda()
#############################################
transfer_out = encoder(eeg)
#print(transfer_out.shape)
out = net(transfer_out)
#print(out)
#print(labels)
loss = criterion(out, labels.long())
#print(loss)
loss.backward()
optimizer.step()
optimizer.zero_grad()
###########################
# save the current training information
iters.append(n)
losses.append(float(loss)/batch_size) # compute *average* loss
val_acc.append(get_accuracy2(net, train=False)) # compute validation accuracy
train_acc.append(get_accuracy2(net, train=True))
n += 1
#print("Iteration: ",n,'Progress: % 6.2f ' % ((epoch * len(train_loader) + mini_b) / (num_epochs * len(train_loader))*100),'%', "Time Elapsed: % 6.2f s " % (time.time()-start_time))
print ("Epoch %d Finished. " % epoch ,"Time per Epoch: % 6.2f s "% ((time.time()-start_time) / (epoch +1)))
print("Training accuracy:", train_acc[-1], "Validation accuracy:", val_acc[-1])
# Save the current model (checkpoint) to a file
model_path = get_model_name(net.name, batch_size, learning_rate, epoch)
model_path = f"/content/gdrive/My Drive/{model_path}"
torch.save(net.state_dict(), model_path)
end_time= time.time()
train_acc.append(get_accuracy2(net, train=True))
print("Final Training Accuracy: {}".format(train_acc[-1]))
print("Final Validation Accuracy: {}".format(val_acc[-1]))
print ("Total time: % 6.2f s Time per Epoch: % 6.2f s " % ( (end_time-start_time), ((end_time-start_time) / num_epochs) ))
total_acc = train_acc[-1] * split + val_acc[-1] * (1-split)
print("Accuracy of entire set: {}".format(total_acc))
# Write the train/test loss/err into CSV file for plotting later
epochs = np.arange(1, num_epochs + 1)
np.savetxt("{}_train_acc.csv".format(model_path), train_acc)
np.savetxt("{}_val_acc.csv".format(model_path), val_acc)
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
self.name = 'Encoder'
self.encoder = nn.Sequential(
nn.Linear(4400, 15) #
)
self.decoder = nn.Sequential(
nn.Linear(15, 4400), #
nn.Sigmoid() # get to the range (0, 1)
)
def forward(self, x):
x1 = self.encoder(x)
x2 = self.decoder(x1)
x1 = F.relu(x1)
#print(x1.shape)
return x1 # return encoder
class SmallNet2(nn.Module):
def __init__(self):
super(SmallNet2, self).__init__()
self.name = "Small"
self.layer1 = nn.Linear(15, 12)
self.layer2 = nn.Linear(12, 2)
def forward(self, transfer_out):
shape = transfer_out.shape
if shape[1] == 15:
activation1 = self.layer1(transfer_out.float())
activation1 = F.relu(activation1)
activation2 = self.layer2(activation1)
return activation2
torch.manual_seed(1)
use_cuda = True
## best for all = batch size = 48, lr = 0.005, num epoch = 45
batch_size = 48
lr = 0.005
num_epochs = 45
train_loader, val_loader = get_data_loader(batch_size)
small_net = SmallNet2()
encoder = Encoder()
path = "C:\\Users\Stephanie\PycharmProjects\capstone\Autoencoder_model2"
encoder.load_state_dict(torch.load(path))
encoder.eval()
if use_cuda and torch.cuda.is_available():
print("CUDA is available")
small_net = small_net.cuda()
encoder = encoder.cuda()
else:
print("CUDA is not available")
if __name__ == '__main__':
train_net3(small_net, 0.8, batch_size, lr, num_epochs)
If my num_workers is greater than 0, I will get an error without the “if name == ‘main’:”, but when I use this line, the code does not work as intended and training takes 10x longer. Why can I not set num_workers to a number other than 0? I copied this code from my Google Colab file and it worked fine there, but on PyCharm it doesn’t work.
Please help.