Hi I am trying to write a Classification Bert which is trained on multiple datasets.
Because the number of classes of different datasets are different, during training I need to use different final linear layer for different datasets. Besides I don’t want to overwrite the previous final linear layers because I want to test the performance drop after fine-tuning on other datasets.
Currently I can’t make it work.
Here is the implementation of my Classification Bert.
class ClassificationBert(nn.Module):
def __init__(self):
super().__init__()
self.bert = BertModel.from_pretrained('bert-base-uncased')
self.linear = nn.Sequential(nn.Linear(768, 128),
nn.Tanh())
self.classifier = None
self.datasets = []
self.classifiers = nn.ModuleList()
def forward(self, x):
all_hidden, _ = self.bert(x)
pooled_output = torch.mean(all_hidden, 1)
features = self.linear(pooled_output)
predict = self.classifier(features)
return predict
def add_dataset(self, dataset, num_outputs):
if dataset not in self.datasets:
self.datasets.append(dataset)
self.classifiers.append(nn.Linear(128, num_outputs))
def set_dataset(self, dataset):
assert dataset in self.datasets
self.classifier = self.classifiers[self.datasets.index(dataset)]
And below is where I used add_dataset
and set_dataset
.
datasets = ['ag_news_csv', 'yahoo_answers_csv']
model = ClassificationBert()
tasks = []
for dataset in datasets:
num_outputs = 0
if dataset == 'ag_news_csv':
num_outputs = 4
elif dataset == 'yahoo_answers_csv':
num_outputs = 10
else:
num_outputs = 14
model.add_dataset(dataset, num_outputs)
model.set_dataset(dataset)
model = model.cuda()
dataset_path = os.path.join(args.data_path, dataset) + '/'
train(dataset_path, model)
tasks.append(dataset)
for task in tasks:
model.set_dataset(task)
dataset_path = os.path.join(args.data_path, dataset) + '/'
train_labeled_set, val_set, test_set, n_labels = get_data(
dataset_path, args.n_labeled)
test_loader = Data.DataLoader(
dataset=test_set, batch_size=512, shuffle=False)
criterion = nn.CrossEntropyLoss()
test_loss, test_acc = validate(
test_loader, model, criterion, 'Test Stats')
print("Task : {}, acc : {}".format(
dataset, test_acc))
You can find I use two datasets and after fine-tuning on each dataset, I want to test the performance drop of every dataset that this model has been trained on before.
But I ran into an error:
Traceback (most recent call last):
File "drive/My Drive/MixText/code1/train.py", line 190, in <module>
main()
File "drive/My Drive/MixText/code1/train.py", line 98, in main
test_loader, model, criterion, 'Test Stats')
File "drive/My Drive/MixText/code1/train.py", line 119, in validate
correct += (np.array(predicted.cpu()) ==
RuntimeError: CUDA error: device-side assert triggered
The corresponding code is below:
def validate(valloader, model, criterion, mode):
model.eval()
with torch.no_grad():
loss_total = 0
total_sample = 0
acc_total = 0
correct = 0
for batch_idx, (inputs, targets, length) in enumerate(valloader):
inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True)
outputs = model(inputs)
loss = criterion(outputs, targets)
_, predicted = torch.max(outputs.data, 1)
print(predicted.shape)
print(targets.shape)
correct += (np.array(predicted.cpu()) ==
np.array(targets.cpu())).sum()
loss_total += loss.item() * inputs.shape[0]
total_sample += inputs.shape[0]
acc_total = correct/total_sample
loss_total = loss_total/total_sample
return loss_total, acc_total
I checked the shape of two tensors and didn’t find mismatch.
Would someone love to help me figure out the mistake or what’s the correct way to dynamically replace the last linear layer?