Thanks for the suggestion. However, I am still facing some issues. I use TransformerEncoderLayer in my model, which throws an error if I train using Mixed Precision, but it works fine when using float32 precision.
Error:
Traceback (most recent call last): File "main.py", line 91, in <module> trainer.fit(train_module, train_dataloader, valid_dataloader) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 697, in fit self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 650, in _call_and_handle_interrupt return trainer_fn(*args, **kwargs) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 735, in _fit_impl results = self._run(model, ckpt_path=self.ckpt_path) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1166, in _run results = self._run_stage() File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1252, in _run_stage return self._run_train() File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1274, in _run_train self._run_sanity_check() File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1343, in _run_sanity_check val_loop.run() File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/loops/loop.py", line 200, in run self.advance(*args, **kwargs) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/loops/dataloader/evaluation_loop.py", line 155, in advance dl_outputs = self.epoch_loop.run(self._data_fetcher, dl_max_batches, kwargs) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/loops/loop.py", line 200, in run self.advance(*args, **kwargs) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py", line 143, in advance output = self._evaluation_step(**kwargs) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py", line 240, in _evaluation_step output = self.trainer._call_strategy_hook(hook_name, *kwargs.values()) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1704, in _call_strategy_hook output = fn(*args, **kwargs) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/pytorch_lightning/strategies/strategy.py", line 370, in validation_step return self.model.validation_step(*args, **kwargs) File "/nlsasfs/home/nltm-st/vipular/AFP2/src/train/trainer.py", line 96, in validation_step return self.step(batch, mode="valid") File "/nlsasfs/home/nltm-st/vipular/AFP2/src/train/trainer.py", line 50, in step cts_anc_emb, cts_pos_emb, dis_anc_emb, dis_pos_emb = self(anc, pos) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl return forward_call(*input, **kwargs) File "/nlsasfs/home/nltm-st/vipular/AFP2/src/train/trainer.py", line 41, in forward cts_anc_emb = self.encoder(anc) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl return forward_call(*input, **kwargs) File "/nlsasfs/home/nltm-st/vipular/AFP2/src/models/encoder.py", line 91, in forward context_emb = self.encoder(pos_enc) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl return forward_call(*input, **kwargs) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/torch/nn/modules/transformer.py", line 238, in forward output = mod(output, src_mask=mask, src_key_padding_mask=src_key_padding_mask) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl return forward_call(*input, **kwargs) File "/nlsasfs/home/nltm-st/vipular/anaconda3/envs/dum/lib/python3.7/site-packages/torch/nn/modules/transformer.py", line 456, in forward src_mask if src_mask is not None else src_key_padding_mask, # TODO: split into two args RuntimeError: expected scalar type Half but found Float
class Encoder(nn.Module):
def __init__(self, inp_dims,patch_size, nhead, dim_feedforward, num_layers, concat_position=False):
super().__init__()
self.encoder_layer = nn.TransformerEncoderLayer(d_model=inp_dims, nhead=nhead, dim_feedforward=dim_feedforward, dropout=0.1, layer_norm_eps=1e-05, batch_first=True)
self.encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
self.patch_embedding_layer = PatchEmbedding_Layer(patch_size, inp_dims, concat_position)
def forward(self,x):
pos_enc = self.patch_embedding_layer(x)
context_emb = self.encoder(pos_enc)
return context_emb
Compute Environment:
-
Cuda compilation tools, release 11.3, V11.3.58
Build cuda_11.3.r11.3/compiler.29745058_0
-
Torch version: 1.12.1