i was using torch xla and i i tried to debug my program manually like this :
def fit(model, train_dataset, val_dataset, epochs=1, batch_size=32, warmup_prop=0, lr=5e-5):
xm.master_print('1')
device = xm.xla_device()
xm.master_print('2')
model = model.to(device)
xm.master_print('loading train loader')
train_sampler = DistributedSampler(
train_dataset,
num_replicas=xm.xrt_world_size(),
rank=xm.get_ordinal(),
shuffle=True
)
def fit_multiprocessing(rank, flags):
fit(model, train_dataset, val_dataset, epochs=epochs, batch_size=batch_size, warmup_prop=warmup_prop, lr=lr)
FLAGS = {}
xmp.spawn(fit_multiprocessing, args=(FLAGS,), nprocs=8, start_method='fork')
after printing 1 and 2 the training process gets stuck for forever that means the program is not able to finish executing this line of code : model = model.to(device)
here are the codes used for creating model for tpu training :
MODEL_PATHS = {
'xlmr': '../input/xlm-roberta-base/',
}
TRANSFORMERS = {
'xlmr': (XLMRobertaModel, XLMRobertaTokenizer, XLMRobertaConfig)
}
import torch
from transformers import XLMRobertaConfig,XLMRobertaModel
xlmr_path="/kaggle/input/xlm-roberta-base/"
xlmr = XLMRobertaModel.from_pretrained(xlmr_path)
#xlmr.eval() # disable dropout (or leave in train mode to finetune)
class Transformer(nn.Module):
def __init__(self, model, num_classes=1):
"""
Constructor
Arguments:
model {string} -- Transformer to build the model on. Expects "camembert-base".
num_classes {int} -- Number of classes (default: {1})
"""
super().__init__()
self.name = model
model_class, tokenizer_class, pretrained_weights = TRANSFORMERS[model]
bert_config = XLMRobertaConfig.from_json_file(MODEL_PATHS[model] + 'config.json')
bert_config.output_hidden_states = True
self.transformer = XLMRobertaModel(bert_config)
self.nb_features = self.transformer.pooler.dense.out_features
self.pooler = nn.Sequential(
nn.Linear(self.nb_features, self.nb_features),
nn.Tanh(),
)
self.logit = nn.Linear(self.nb_features, num_classes)
def forward(self, tokens):
"""
Usual torch forward function
Arguments:
tokens {torch tensor} -- Sentence tokens
Returns:
torch tensor -- Class logits
"""
_, _, hidden_states = self.transformer(
tokens, attention_mask=(tokens > 0).long()
)
hidden_states = hidden_states[-1][:, 0] # Use the representation of the first token of the last layer
ft = self.pooler(hidden_states)
return self.logit(ft)
model = Transformer('xlmr')
sorry for not changing the variable names,the initial setting was for bert base uncased model, i just changed the paths and weights to use xlm roberta instead,i think the transformer class is where i am missing something,please help me to solve this issue, thanks