here is the full script:
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score, accuracy_score
from transformers import BertTokenizer, BertModel
import random, torch
import numpy as np
from torchtext import data, datasets
SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
init_token_idx = tokenizer.cls_token_id
eos_token_idx = tokenizer.sep_token_id
pad_token_idx = tokenizer.pad_token_id
unk_token_idx = tokenizer.unk_token_id
max_input_length = tokenizer.max_model_input_sizes['bert-base-uncased']
def tokenize_and_cut(sentence):
tokens = tokenizer.tokenize(sentence)
tokens = tokens[:max_input_length - 2]
return tokens
TEXT = data.Field(batch_first=True, use_vocab=False, tokenize=tokenize_and_cut, # include_lengths=True,
preprocessing=tokenizer.convert_tokens_to_ids, init_token=init_token_idx, eos_token=eos_token_idx,
pad_token=pad_token_idx, unk_token=unk_token_idx)
LABEL = data.LabelField(dtype=torch.float)
fields = [('text', TEXT), ('label', LABEL)]
train_data, test_data = data.TabularDataset('IMDB Dataset.csv', format='csv', skip_header=True, fields=fields).split()
TEXT.build_vocab(train_data)
LABEL.build_vocab(train_data)
BATCH_SIZE = 255
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_iterator, valid_iterator = data.BucketIterator.splits(
(train_data, test_data),
batch_size=BATCH_SIZE,
device=device)
', sort_key= lambda x: len(x.text), sort_within_batch=False'
bert = BertModel.from_pretrained('bert-base-uncased')
class BERTGRUSentiment(nn.Module):
def __init__(self, bert, hidden_dim, output_dim, n_layers, bidirectional, dropout):
super().__init__()
self.bert = bert
embedding_dim = bert.config.to_dict()['hidden_size']
self.rnn = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional,
batch_first=True, dropout=0 if n_layers < 2 else dropout)
self.out = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, text):
with torch.no_grad():
embedded = self.bert(text)[0]
# embedded = nn.utils.rnn.pack_padded_sequence(embedded)
_, hidden = self.rnn(embedded)
if self.rnn.bidirectional:
hidden = self.dropout(torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1))
else:
hidden = self.dropout(hidden[-1, :, :])
output = self.out(hidden)
return output
HIDDEN_DIM = 26
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5
model = BERTGRUSentiment(bert, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT)
for name, param in model.named_parameters():
if name.startswith('bert'):
param.requires_grad = False
optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()
model = model
criterion = criterion.to(device)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=.1)
model.to(device)
def big_training_loop(epochs, optim, clf, lossf, train_loader, val_loader, accuracy_score):
# variables for early stopping
n_epochs_stop = 10
epochs_no_improve = 0
min_val_loss = float('inf')
# running epoch
for epoch in range(1, epochs + 1):
# variables for performance monitoring
loss_train_list = []
loss_val_list = []
correct_train_list_fscroe = []
correct_train_list = []
correct_val_list_fscroe = []
correct_val_list = []
# training on batches
for (text, labels), (val_text, val_labels) in zip(train_loader, val_loader):
# preparing data
labels = labels
# - training section - #
clf.train()
output = clf(text).squeeze()
print(output.shape, labels.shape)
# compute loss function
loss = lossf(output, labels)
# append loss output to loss_train_list for mnitoring
loss_train_list.append(loss.item())
# convert tensors to numpy array
labels = labels.detach().numpy()
output = torch.round(output).detach().numpy()
# calculate accuracy score
correct_train_list_fscroe.append(f1_score(output, labels, average='macro'))
correct_train_list.append(accuracy_score(output, labels))
# compute back propagation
loss.backward()
# update weights
optim.step()
# clean gradients to not accumulate
optim.zero_grad()
# - evaluation section - #
clf.eval()
with torch.no_grad():
# preparing data
#val_text, val_length = val_text
val_labels = val_labels
# - training section - #
output = clf(val_text).squeeze()
# compute loss function
val_loss = lossf(output, val_labels).to(device)
# append loss output to loss_val_list for mnitoring
loss_val_list.append(val_loss.item())
# convert tensors to numpy array
val_labels = val_labels.detach().numpy()
val_output = torch.round(output).detach().numpy()
# calculate accuracy score
correct_val_list.append(accuracy_score(val_output, val_labels))
correct_val_list_fscroe.append(f1_score(val_output, val_labels, average='macro'))
# change lr if the loss didn't decrease
loss = torch.mean(torch.FloatTensor(loss_train_list))
val_loss = torch.mean(torch.FloatTensor(loss_val_list))
acc = torch.mean(torch.FloatTensor(correct_train_list)) * 100
fscore = torch.mean(torch.FloatTensor(correct_train_list_fscroe)) * 100
val_acc = torch.mean(torch.FloatTensor(correct_val_list)) * 100
val_fscore = torch.mean(torch.FloatTensor(correct_val_list_fscroe)) * 100
scheduler.step(val_loss)
# save best model if the loss is the best
if val_loss < min_val_loss:
# variables for best performance
best_epoch = epoch
best_loss = loss
best_val_loss = val_loss
best_acc = acc
best_fscore = fscore
best_val_acc = val_acc
best_val_fscore = val_fscore
# save best model
epochs_no_improve = 0
min_val_loss = best_val_loss
# print the current epoch as the best epoch
print(
f'BEST EPOCH: Epoch({epoch}) -> Train: (Accuracy: {best_acc:.1f}, f-score: {best_fscore:.1f}, Loss: {best_loss:.4f}) | Val: (Accuracy: {best_val_acc:.1f}, f-score: {best_val_fscore:.1f}, Loss: {best_val_loss:.4f})')
else:
# print the current epoch as normal epoch
print(
f'Epoch({epoch}) -> Train: (Accuracy: {acc:.1f}, f-score: {fscore:.1f}, Loss: {loss:.4f}) | Val: (Accuracy: {val_acc:.1f}, f-score: {val_fscore:.1f},Loss: {val_loss:.4f})')
# if epochs_no_improve reached n_epochs_stop the training will stop
epochs_no_improve += 1
# early stop the training
if epoch > 5 and epochs_no_improve == n_epochs_stop:
torch.save(clf, f'clf_val_loss_{best_val_loss:.4f}_f-score_{best_val_fscore:.1f}_val_acc_{best_val_acc:.1f}.pt')
print('Early stopping!')
print()
print(
f'BEST EPOCH: Epoch({best_epoch}) -> Train: (Accuracy: {best_acc:.1f}, f-score: {best_fscore:.1f}, Loss: {best_loss:.4f}) | Val: (Accuracy: {best_val_acc:.1f}, f-score: {best_val_fscore:.1f}, Loss: {best_val_loss:.4f})')
break
print('training:')
big_training_loop(555, optimizer, model, criterion, train_iterator, valid_iterator, accuracy_score=accuracy_score)
'''import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score, accuracy_score
from transformers import BertTokenizer, BertModel
import random, torch
import numpy as np
from torchtext import data, datasets
SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
init_token_idx = tokenizer.cls_token_id
eos_token_idx = tokenizer.sep_token_id
pad_token_idx = tokenizer.pad_token_id
unk_token_idx = tokenizer.unk_token_id
max_input_length = tokenizer.max_model_input_sizes['bert-base-uncased']
def tokenize_and_cut(sentence):
tokens = tokenizer.tokenize(sentence)
tokens = tokens[:max_input_length - 2]
return tokens
TEXT = data.Field(batch_first=True, use_vocab=False, tokenize=tokenize_and_cut, # include_lengths=True,
preprocessing=tokenizer.convert_tokens_to_ids, init_token=init_token_idx, eos_token=eos_token_idx,
pad_token=pad_token_idx, unk_token=unk_token_idx)
LABEL = data.LabelField(dtype=torch.float)
fields = [('text', TEXT), ('label', LABEL)]
train_data, test_data = data.TabularDataset('IMDB Dataset.csv', format='csv', skip_header=True, fields=fields).split()
TEXT.build_vocab(train_data)
LABEL.build_vocab(train_data)
BATCH_SIZE = 28
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_iterator, valid_iterator = data.BucketIterator.splits(
(train_data, test_data),
batch_size=BATCH_SIZE,
device=device, sort_key= lambda x: len(x.text), sort_within_batch=False)
bert = BertModel.from_pretrained('bert-base-uncased')
class BERTGRUSentiment(nn.Module):
def __init__(self, bert, hidden_dim, output_dim, n_layers, bidirectional, dropout):
super().__init__()
self.bert = bert
embedding_dim = bert.config.to_dict()['hidden_size']
self.rnn = nn.GRU(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional,
batch_first=True, dropout=0 if n_layers < 2 else dropout)
self.out = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, text):
with torch.no_grad():
embedded = self.bert(text)[0]
# embedded = nn.utils.rnn.pack_padded_sequence(embedded)
_, hidden = self.rnn(embedded)
if self.rnn.bidirectional:
hidden = self.dropout(torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1))
else:
hidden = self.dropout(hidden[-1, :, :])
output = self.out(hidden)
return output
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5
model = BERTGRUSentiment(bert, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT)
for name, param in model.named_parameters():
if name.startswith('bert'):
param.requires_grad = False
optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()
model = model.to(device)
criterion = criterion.to(device)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=.1)
def big_training_loop(epochs, optim, clf, lossf, train_loader, val_loader, accuracy_score):
# variables for early stopping
n_epochs_stop = 10
epochs_no_improve = 0
min_val_loss = float('inf')
# running epoch
for epoch in range(1, epochs + 1):
# variables for performance monitoring
loss_train_list = []
loss_val_list = []
correct_train_list_fscroe = []
correct_train_list = []
correct_val_list_fscroe = []
correct_val_list = []
# training on batches
for (text, labels), (val_text, val_labels) in zip(train_loader, val_loader):
# preparing data
labels = labels
# - training section - #
clf.train()
output = clf(text).squeeze()
# compute loss function
loss = lossf(output, labels)
# append loss output to loss_train_list for mnitoring
loss_train_list.append(loss.item())
# convert tensors to numpy array
labels = labels.detach().numpy()
output = torch.round(output).detach().numpy()
# calculate accuracy score
correct_train_list_fscroe.append(f1_score(output, labels, average='macro'))
correct_train_list.append(accuracy_score(output, labels))
# compute back propagation
loss.backward()
# update weights
optim.step()
# clean gradients to not accumulate
optim.zero_grad()
# - evaluation section - #
clf.eval()
with torch.no_grad():
# preparing data
#val_text, val_length = val_text
val_labels = val_labels
# - training section - #
clf.to(device)
output = clf(val_text).squeeze()
# compute loss function
val_loss = lossf(output, val_labels)
# append loss output to loss_val_list for mnitoring
loss_val_list.append(val_loss.item())
# convert tensors to numpy array
val_labels = val_labels.detach().numpy()
val_output = torch.round(output).detach().numpy()
# calculate accuracy score
correct_val_list.append(accuracy_score(val_output, val_labels))
correct_val_list_fscroe.append(f1_score(val_output, val_labels, average='macro'))
# change lr if the loss didn't decrease
loss = torch.mean(torch.FloatTensor(loss_train_list))
val_loss = torch.mean(torch.FloatTensor(loss_val_list))
acc = torch.mean(torch.FloatTensor(correct_train_list)) * 100
fscore = torch.mean(torch.FloatTensor(correct_train_list_fscroe)) * 100
val_acc = torch.mean(torch.FloatTensor(correct_val_list)) * 100
val_fscore = torch.mean(torch.FloatTensor(correct_val_list_fscroe)) * 100
scheduler.step(val_loss)
# save best model if the loss is the best
if torch.mean(torch.FloatTensor(loss_val_list)) < min_val_loss:
# variables for best performance
best_epoch = epoch
best_loss = loss
best_val_loss = val_loss
best_acc = acc
best_fscore = fscore
best_val_acc = val_acc
best_val_fscore = val_fscore
# save best model
epochs_no_improve = 0
min_val_loss = best_val_loss
# print the current epoch as the best epoch
print(
f'BEST EPOCH: Epoch({epoch}) -> Train: (Accuracy: {best_acc:.1f}, f-score: {best_fscore:.1f}, Loss: {best_loss:.4f}) | Val: (Accuracy: {best_val_acc:.1f}, f-score: {best_val_fscore:.1f}, Loss: {best_val_loss:.4f})')
else:
# print the current epoch as normal epoch
print(
f'Epoch({epoch}) -> Train: (Accuracy: {acc:.1f}, f-score: {fscore:.1f}, Loss: {loss:.4f}) | Val: (Accuracy: {val_acc:.1f}, f-score: {val_fscore:.1f},Loss: {val_loss:.4f})')
# if epochs_no_improve reached n_epochs_stop the training will stop
epochs_no_improve += 1
# early stop the training
if epoch > 5 and epochs_no_improve == n_epochs_stop:
torch.save(clf, f'clf_val_loss_{best_val_loss:.4f}_f-score_{best_val_fscore:.1f}_val_acc_{best_val_acc:.1f}.pt')
print('Early stopping!')
print()
print(
f'BEST EPOCH: Epoch({best_epoch}) -> Train: (Accuracy: {best_acc:.1f}, f-score: {best_fscore:.1f}, Loss: {best_loss:.4f}) | Val: (Accuracy: {best_val_acc:.1f}, f-score: {best_val_fscore:.1f}, Loss: {best_val_loss:.4f})')
break
print('training:')
big_training_loop(1000, optimizer, model, criterion, train_iterator, valid_iterator, accuracy_score=accuracy_score)
'''