Here me again ! I’m trying to implement a Bert Classifier to discriminate between 2 sequences classes (BINARY CLASSIFICATION class 0 and class 1), with AX hyperparameters tuning.
This is all my code implemented anticipated by a sample of my datasets ( I have 3 csv, train-test-val).
Now I’m trying to use a BCEwithLogitLoss
df_train=pd.read_csv('CLASSIFIER_train',sep=',',header=None)
df_train
0 1
M A T T D R P T P D G T D A I D L T T R V R R... 1
M K K L F Q T E P L L E L F N C N E L R I I G... 0
M L V A A A V C P H P P L L I P E L A A G A A... 1
M I V A W G N S G S G L L I L I L S L A V S A... 0
M V E E G R R L A A L H P N I V V K L P T T E... 1
M G S K V S K N A L V F N V L Q A L R E G L T... 1
M P S K E T S P A E R M A R D E Y Y M R L A M... 1
M V K E Y A L E W I D G Y R E R L V K V S D A... 1
M G T A A S Q D R A A M A E A A Q R V G D S F... 0
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = SequenceDataset(
sequences=df[0].to_numpy(),
targets=df[1].to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=2,
shuffle=True
)
BATCH_SIZE = 16
train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)
def net_train(net, train_data_loader, parameters, dtype, device):
net.to(dtype=dtype, device=device)
# Define loss and optimizer
#criterion = nn.CrossEntropyLoss()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(net.parameters(), # or any optimizer you prefer
lr=parameters.get("lr", 0.001), # 0.001 is used if no lr is specified
momentum=parameters.get("momentum", 0.9)
)
scheduler = optim.lr_scheduler.StepLR(
optimizer,
step_size=int(parameters.get("step_size", 30)),
gamma=parameters.get("gamma", 1.0), # default is no learning rate decay
)
num_epochs = parameters.get("num_epochs", 3) # Play around with epoch number
# Train Network
current_loss = 0.0
# Train Network
for _ in range(num_epochs):
# Your dataloader returns a dictionary
# so access it as such
for batch in train_data_loader:
# move data to proper dtype and device
labels = batch['targets'].to(device=device)
attention_mask = batch['attention_mask'].to(device=device)
input_ids = batch['input_ids'].to(device=device)
labels = labels \
.type(torch.FloatTensor) \
.reshape((labels.shape[0], 1))
#labels = labels.long()
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs,x= net(input_ids.long(), attention_mask,return_dict=False)
#outputs,x= net(input_ids,atten_mask)
loss = criterion(outputs, labels.unsqueeze(1))
loss.backward()
optimizer.step()
scheduler.step()
return net
#from transformers.models.bert.modeling_bert import BertForSequenceClassification,AutoModel
def init_net(parameterization):
model = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME,return_dict=True) #pretrained ResNet50
# The depth of unfreezing is also a hyperparameter
for param in model.parameters():
param.requires_grad = False # Freeze feature extractor
Hs = 512 # Hidden layer size; you can optimize this as well
model.fc = nn.Sequential(nn.Linear(1024, 512), # attach trainable classifier
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(512, 1))
#nn.Sigmoid())
#nn.Sigmoid())
return model # return untrained model
def train_evaluate(parameterization):
# constructing a new training data loader allows us to tune the batch size
train_data_loader=create_data_loader(df_train, tokenizer, MAX_LEN, batch_size=parameterization.get("batchsize", 32))
# Get neural net
untrained_net = init_net(parameterization)
# train
trained_net = net_train(net=untrained_net, train_data_loader=train_data_loader,
parameters=parameterization, dtype=dtype, device=device)
# return the accuracy of the model as it was trained in this run
return evaluate(
net=trained_net,
data_loader=test_data_loader,
dtype=dtype,
device=device,
)
dtype = torch.float
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
best_parameters, values, experiment, model = optimize(
parameters=[
{"name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True},
{"name": "batchsize", "type": "range", "bounds": [16, 128]},
{"name": "momentum", "type": "range", "bounds": [0.0, 1.0]},
#{"name": "max_epoch", "type": "range", "bounds": [1, 30]},
#{"name": "stepsize", "type": "range", "bounds": [20, 40]},
],
evaluation_function=train_evaluate,
objective_name='accuracy',
)
print(best_parameters)
means, covariances = values
print(means)
print(covariances)
raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
ValueError: Target size (torch.Size([68, 1, 1])) must be the same as input size (torch.Size([68, 450, 1024]))
I now that this loss required a specific input format, but I tried everything. Probably I am not understanding the theory correctly. Thank you very much ! I really don’t now what to do