I have been trying to train a lstm model with lyrics data I found on Kaggle. When I first started with this architecture, I had glove embeddings which worked fine with the model and how I shaped it. I switched totokenization and then z scoring my input parameters and all of a sudden my program hangs and then closes out without any error, I only found out about the error when I checked my Event viewer and found this:
Faulting application name: python.exe, version: 3.12.2150.1013, time stamp: 0x65c2a4c7
Faulting module name: c10.dll, version: 0.0.0.0, time stamp: 0x65a00f1b
Exception code: 0xc0000005
Fault offset: 0x0000000000060bb5
Faulting process id: 0x0x35EC
Faulting application start time: 0x0x1DA5E8859C9145E
Faulting application path: C:\Users\rohun\AppData\Local\Programs\Python\Python312\python.exe
Faulting module path: C:\Users\rohun\final_train\env\Lib\site-packages\torch\lib\c10.dll
Report Id: 8d624ff4-59a7-448f-b320-6e7aeed0ecd4
Faulting package full name:
Faulting package-relative application ID:
I’ve tried a lot of different thigns to fix like changing model architecture, data preprocessing, hardware limitations, and such, but I haven’t found a single thing that could be casuing me this problem. ANy help much appreciated.
For reference this is my code:
import pandas as pd
import string
import torch
import torchtext
import random
import tracemalloc
from torch.utils.data import Dataset, DataLoader, random_split
from torch.nn.utils.rnn import pad_sequence
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
from operator import itemgetter
from memory_profiler import profile
if torch.cuda.is_available():
device = torch.device(“cuda”)
print(“CUDA is available.”)
else:
device = torch.device(“cpu”)
print(“CUDA is not available. Running on CPU.”)
class EarlyStopping:
def init(self, patience=5, min_delta=0.0):
self.patience = patience
self.min_delta = min_delta
self.counter = 0
self.best_loss = float(‘inf’)
def __call__(self, val_loss):
if val_loss < self.best_loss - self.min_delta:
self.best_loss = val_loss
self.counter = 0
else:
self.counter += 1
if self.counter >= self.patience:
return True # Stop training
return False
def reset(self):
self.counter = 0
self.best_loss = float('inf')
Grid Search
batch_sizes = [32] # try with 64 and 128
hidden_lstm_sizes = [25] # if it does better wiht the 512 lets use 1024 and a higher value as well, should give us 4 concrete valuesto work with.
hidden_linear_sizes = [124] # 512
epochs = 8 # max number of epochs before overfitting
drop_percentages = [0] # better values with increased dropout percentage, 0.25 working well so lets keep it
learning_rates = [.01]
weight_decays = [0] # 0, 0.10, 0.25
class LSTM(nn.Module):
def __init__(self, embedding_dim, hidden_dim, hidden1dim, output_dim, dropout_dim,sequence_len):
super(LSTM, self).__init__()
self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=False)
self.hidden = nn.Linear(hidden_dim, hidden1dim)
self.output = nn.Linear(hidden_dim, output_dim)
self.drop = nn.Dropout(dropout_dim)
def forward(self, data):
lstm_out, _ = self.lstm(data)
#hidden_out = self.hidden(lstm_out)
#drop_out = self.drop(hidden_out)
logits = self.output(lstm_out)
return logits
class data_set(Dataset):
def init(self,X_data,Y_data):
self.X_data = X_data
self.Y_data = Y_data
def __len__(self):
return len(self.X_data)
def __getitem__(self, index):
return self.X_data[index], self.Y_data[index]
early_stopper = EarlyStopping(patience=3, min_delta=0.0025)
loss_fn = torch.nn.CrossEntropyLoss()
def loop(training_data_loader, validation_data_loader, testing_data_loader,optimizer,epochs,model):
epoch_losses = {}
early_stopper.reset()
earlyStopping = False
print(len(training_data_loader))
#c_s = len(training_data_loader) // 10
for epoch in range(epochs):
if earlyStopping:
break
epoch_losses[epoch] = []
print("Epoch: " + str(epoch))
total_loss = 0
running_loss = 0
for i,batch in enumerate(training_data_loader):
x_i, y_i = batch[0],batch[1]
logits = model(x_i)
loss = loss_fn(logits, y_i)
optimizer.zero_grad()
loss.backward()
optimizer.step()
model.eval()
total_loss = 0
running_loss = 0
for i,batch in enumerate(validation_data_loader):
x_i, y_i = batch[0],batch[1]
# forward pass
with torch.no_grad():
logits = model(x_i)
loss = loss_fn(logits, y_i)
total_loss += loss
running_loss += loss
# check early stopping here?
print("Total Validation Average Loss: " + str((total_loss/len(validation_data_loader)).item()))
epoch_losses[epoch].append(total_loss/len(validation_data_loader))
scheduler.step(total_loss/len(validation_data_loader))
if early_stopper(total_loss/len(validation_data_loader)):
print(f"Early stopping at epoch {epoch}")
earlyStopping = True
total_loss = 0
running_loss = 0
for i,batch in enumerate(testing_data_loader):
x_i, y_i = batch[0],batch[1]
# forward pass
with torch.no_grad():
logits = model(x_i)
loss = loss_fn(logits, y_i)
total_loss += loss
running_loss += loss
# check early stopping here?
print("Total Testing Average Loss: " + str((total_loss/len(testing_data_loader)).item()))
epoch_losses[epoch].append(total_loss/len(testing_data_loader))
return epoch_losses
read data, pre process data
df = pd.read_csv(‘spotify_millsongdata.csv’)
#df = df.loc[(df[‘artist’] == ‘ABBA’) | (df[‘artist’] == ‘Donna Summer’) | (df[‘artist’] == ‘Bob Dylan’)]
df = df.sample(n=1)
df = df[‘text’]
print("Size of dataset: " + str(len(df)))
table = str.maketrans(‘’, ‘’, string.punctuation)
words_set = set()
number_to_words_dictionary = {}
words_to_numbers_dictionary = {}
def preProcess(x):
x = x.strip()
x = x.lower()
x = x.splitlines()
for i in range(len(x)):
x[i] = x[i].lower()
x[i] = x[i].translate(table)
x[i] = x[i].split()
x[i].insert(0,"<BOS>")
x[i].append("<EOS>")
words_set.update(x[i])
return x
print(“Preprocessing staring…”)
data =
for index, row in df.items():
data.extend(preProcess(row))
words_set = list(sorted(words_set))
for i,word in enumerate(words_set):
number_to_words_dictionary[i] = word
words_to_numbers_dictionary[word] = i
max_len = max(len(x) for x in data) - 1
Load GloVe embeddings
#glove = torchtext.vocab.GloVe(name=“6B”, dim=50)
#convert data into X and y data, with X being embeddings and y as numbers
X =
Y =
convert into word embeddings
torch size to be (max_size, 50)
window_size = 3
print(“token embeddings starting …”)
for i in range(len(data)):
update = torch.tensor()
for j in range(len(data[i]) - window_size):
keys = data[i][:j+window_size]
X.append(torch.tensor(itemgetter(*keys)(words_to_numbers_dictionary)))
Y.append(torch.tensor(words_to_numbers_dictionary[data[i][j+window_size]]))
print(“token embeddings completed…”)
Pad the sequences
print(“padding sequences starting…”)
padded_X = pad_sequence(X, batch_first=True).float()
Z score
mean = padded_X.mean(dim=None)
std = padded_X.std(dim=None)
Z-score normalization
padded_X = (padded_X - mean) / std
print(padded_X.size())
Y = torch.tensor(Y).long()
print(“padding sequences ending…”)
split training data into training, validation and testing
creates dataset
dataset = data_set(padded_X,Y)
train_size = int(0.7 * len(dataset))
test_size = int(0.2 * len(dataset))
val_size = len(dataset) - train_size - test_size
Split the dataset
train_dataset, test_dataset, val_dataset = random_split(dataset, [train_size, test_size, val_size])
print("Preprocessing Completed … ")
print(“GRID SEARCH STARTING …”)
classes_size = len(number_to_words_dictionary)
#accuracy = Accuracy(task=“multiclass”, num_classes=classes_size)
starts the whole loop
sequence_length = padded_X.size()[1]
input_dim = 1
#tracemalloc.start()
with open(“readme.txt”, “w”) as f:
for batch in batch_sizes:
# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=batch, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch, shuffle=True)
for hidden_lstm in hidden_lstm_sizes:
for hidden_linear in hidden_linear_sizes:
for drop_p in drop_percentages:
# creates model
model = LSTM(input_dim, hidden_lstm, hidden_linear, classes_size, drop_p,sequence_len=sequence_length)
model.to(device)
for lr in learning_rates:
for wd in weight_decays:
# runs training loop
optimizer = torch.optim.Adam(model.parameters(), lr=lr,weight_decay=wd)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)
name = f"batch: {batch}, hidden_lstm: {hidden_lstm}, hidden_linear: {hidden_linear}, drop_percentage: {drop_p}, learning_rate: {lr}, weight_decay: {wd}"
print(name)
print("----------------------------------------------------------------------------------------------------------------------------------------------")
val= loop(train_loader,val_loader,test_loader,optimizer,epochs=epochs,model=model)
f.write(name + "\n")
for key,value in val.items():
f.write(f"| At epoch: {key} training_loss = {value[0]} validation_loss = {value[1]} testing_loss = {value[2]}" + "\n")
print(f"| At epoch: {key} | training_loss = {value[0]} | validation_loss = {value[1]} | testing_loss = {value[2]} |")
f.write("\n")
val = None
del val
#print(tracemalloc.get_traced_memory())
del optimizer
del scheduler
del model
train_loader = None
val_loader= None
test_loader = None
del train_loader
del val_loader
del test_loader
#tracemalloc.stop()
print(“GRID SEARCH COMPLETED”)