How to run on multi GPU

problem about how multi GPU. I run the command “CUDA_VISIBLE_DEVICES=0,1 python train.py” and specify “os.environ[“CUDA_VISIBLE_DEVICES”]= ‘2’” and " model = model.cuda(2) ". is it true? that can work on multi GPU? thanks, best wishes

run the command as follows " CUDA_VISIBLE_DEVICES=0,1 python train.py"

the train.py is as followed:" # -- coding: utf-8 --
import os
os.environ[“CUDA_VISIBLE_DEVICES”]= ‘2’
from tqdm import tqdm
import os
import random
import torch
import torch.nn as nn

from transformers import RobertaTokenizer
from ERC_dataset import MELD_loader, Emory_loader, IEMOCAP_loader, DD_loader
from model import ERC_model

from ERCcombined import ERC_model

from torch.utils.data import Dataset, DataLoader
from transformers import get_linear_schedule_with_warmup
import pdb
import argparse, logging
from sklearn.metrics import precision_recall_fscore_support

from utils import make_batch_roberta, make_batch_bert, make_batch_gpt

def CELoss(pred_outs, labels):
“”"
pred_outs: [batch, clsNum]
labels: [batch]
“”"
loss = nn.CrossEntropyLoss()
loss_val = loss(pred_outs, labels)
return loss_val

finetune RoBETa-large

def main():
“”“Dataset Loading”“”
batch_size = args.batch
dataset = args.dataset
dataclass = args.cls
sample = args.sample
model_type = args.pretrained
freeze = args.freeze
initial = args.initial

dataType = 'multi'
if dataset == 'MELD':
    if args.dyadic:
        dataType = 'dyadic'
    else:
        dataType = 'multi'
    data_path = './dataset/MELD/'+dataType+'/'
    DATA_loader = MELD_loader
elif dataset == 'EMORY':
    data_path = './dataset/EMORY/'
    DATA_loader = Emory_loader
elif dataset == 'iemocap':
    data_path = './dataset/iemocap/'
    DATA_loader = IEMOCAP_loader
elif dataset == 'dailydialog':
    data_path = './dataset/dailydialog/'
    DATA_loader = DD_loader    
    
if 'roberta' in model_type:
    make_batch = make_batch_roberta
elif model_type == 'bert-large-uncased':
    make_batch = make_batch_bert
else:
    make_batch = make_batch_gpt  
    
if freeze:
    freeze_type = 'freeze'
else:
    freeze_type = 'no_freeze'
    
train_path = data_path + dataset+'_train.txt'
dev_path = data_path + dataset+'_dev.txt'
test_path = data_path + dataset+'_test.txt'
        
train_dataset = DATA_loader(train_path, dataclass)
if sample < 1.0:
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=4, collate_fn=make_batch)
else:
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=make_batch)
train_sample_num = int(len(train_dataloader)*sample)

dev_dataset = DATA_loader(dev_path, dataclass)
dev_dataloader = DataLoader(dev_dataset, batch_size=1, shuffle=False, num_workers=4, collate_fn=make_batch)

test_dataset = DATA_loader(test_path, dataclass)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4, collate_fn=make_batch)

"""logging and path"""
save_path = os.path.join(dataset+'_models', model_type, initial, freeze_type, dataclass, str(sample))

print("###Save Path### ", save_path)
log_path = os.path.join(save_path, 'train.log')
if not os.path.exists(save_path):
    os.makedirs(save_path)
fileHandler = logging.FileHandler(log_path)

logger.addHandler(streamHandler)
logger.addHandler(fileHandler)    
logger.setLevel(level=logging.DEBUG)      

"""Model Loading"""
if 'gpt2' in model_type:
    last = True
else:
    last = False
    
print('DataClass: ', dataclass, '!!!') # emotion
clsNum = len(train_dataset.labelList)
model = ERC_model(model_type, clsNum, last, freeze, initial)
model = model.cuda(2)    
model. Train() 

"""Training Setting"""        
training_epochs = args.epoch
save_term = int(training_epochs/5)
max_grad_norm = args.norm
lr = args.lr
num_training_steps = len(train_dataset)*training_epochs
num_warmup_steps = len(train_dataset)
# optimizer = torch.optim.AdamW(model.parameters(), lr=lr) # , eps=1e-06, weight_decay=0.01
optimizer = torch.optim.AdamW(model.train_params, lr=lr) # , eps=1e-06, weight_decay=0.01
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps)

"""Input & Label Setting"""
best_dev_fscore, best_test_fscore = 0, 0
best_dev_fscore_macro, best_dev_fscore_micro, best_test_fscore_macro, best_test_fscore_micro = 0, 0, 0, 0    
best_epoch = 0
for epoch in tqdm(range(training_epochs)):
    model.train() 
    for i_batch, data in enumerate(train_dataloader):
        if i_batch > train_sample_num:
            print(i_batch, train_sample_num)
            break
        
        """Prediction"""
        batch_input_tokens, batch_labels, batch_speaker_tokens = data
        batch_input_tokens, batch_labels = batch_input_tokens.cuda(), batch_labels.cuda()
        
        pred_logits = model(batch_input_tokens, batch_speaker_tokens)

        """Loss calculation & training"""
        loss_val = CELoss(pred_logits, batch_labels)
        
        loss_val.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)  # Gradient clipping is not in AdamW anymore (so you can use amp without issue)
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        
    """Dev & Test evaluation"""
    model.eval()
    if dataset == 'dailydialog': # micro & macro
        dev_acc, dev_pred_list, dev_label_list = _CalACC(model, dev_dataloader)
        dev_pre_macro, dev_rec_macro, dev_fbeta_macro, _ = precision_recall_fscore_support(dev_label_list, dev_pred_list, average='macro')
        dev_pre_micro, dev_rec_micro, dev_fbeta_micro, _ = precision_recall_fscore_support(dev_label_list, dev_pred_list, labels=[0,1,2,3,5,6], average='micro') # neutral x
        
        dev_fscore = dev_fbeta_macro+dev_fbeta_micro

        """Best Score & Model Save"""
        if dev_fscore > best_dev_fscore_macro + best_dev_fscore_micro:
            best_dev_fscore_macro = dev_fbeta_macro                
            best_dev_fscore_micro = dev_fbeta_micro
            
            test_acc, test_pred_list, test_label_list = _CalACC(model, test_dataloader)
            test_pre_macro, test_rec_macro, test_fbeta_macro, _ = precision_recall_fscore_support(test_label_list, test_pred_list, average='macro')
            test_pre_micro, test_rec_micro, test_fbeta_micro, _ = precision_recall_fscore_support(test_label_list, test_pred_list, labels=[0,1,2,3,5,6], average='micro') # neutral x                
            
            best_epoch = epoch
            _SaveModel(model, save_path)
    else: # weight
        dev_acc, dev_pred_list, dev_label_list = _CalACC(model, dev_dataloader)
        dev_pre, dev_rec, dev_fbeta, _ = precision_recall_fscore_support(dev_label_list, dev_pred_list, average='weighted')

        """Best Score & Model Save"""
        if dev_fbeta > best_dev_fscore:
            best_dev_fscore = dev_fbeta
            
            test_acc, test_pred_list, test_label_list = _CalACC(model, test_dataloader)
            test_pre, test_rec, test_fbeta, _ = precision_recall_fscore_support(test_label_list, test_pred_list, average='weighted')                
            
            best_epoch = epoch
            _SaveModel(model, save_path)
    
    if epoch % 5 == 0:
        logger.info('Epoch: {}'.format(epoch))
        if dataset == 'dailydialog': # micro & macro
            logger.info('Devleopment ## accuracy: {}, macro-fscore: {}, micro-fscore: {}'.format(dev_acc, dev_fbeta_macro, dev_fbeta_micro))
            logger.info('') 
        else:
            logger.info('Devleopment ## accuracy: {}, precision: {}, recall: {}, fscore: {}'.format(dev_acc, dev_pre, dev_rec, dev_fbeta))
            logger.info('')
    
if dataset == 'dailydialog': # micro & macro
    logger.info('Final Fscore ## test-accuracy: {}, test-macro: {}, test-micro: {}, test_epoch: {}'.format(test_acc, test_fbeta_macro, test_fbeta_micro, best_epoch))
else:
    logger.info('Final Fscore ## test-accuracy: {}, test-fscore: {}, test_epoch: {}'.format(test_acc, test_fbeta, best_epoch))         

def _CalACC(model, dataloader):
model.eval()
correct = 0
label_list = []
pred_list = []

# label arragne
with torch.no_grad():
    for i_batch, data in enumerate(dataloader):            
        """Prediction"""
        batch_input_tokens, batch_labels, batch_speaker_tokens = data
        batch_input_tokens, batch_labels = batch_input_tokens.cuda(), batch_labels.cuda()
        
        pred_logits = model(batch_input_tokens, batch_speaker_tokens) # (1, clsNum)
        
        """Calculation"""    
        pred_label = pred_logits.argmax(1).item()
        true_label = batch_labels.item()
        
        pred_list.append(pred_label)
        label_list.append(true_label)
        if pred_label == true_label:
            correct += 1
    acc = correct/len(dataloader)
return acc, pred_list, label_list

def _SaveModel(model, path):
if not os.path.exists(path):
os.makedirs(path)
torch.save(model.state_dict(), os.path.join(path, ‘model.bin’))

if name == ‘main’:
torch.cuda.empty_cache()

"""Parameters"""
parser  = argparse.ArgumentParser(description = "Emotion Classifier" )
parser.add_argument( "--batch", type=int, help = "batch_size", default = 1)

parser.add_argument( "--epoch", type=int, help = 'training epohcs', default = 10) # 12 for iemocap
parser.add_argument( "--norm", type=int, help = "max_grad_norm", default = 10)
parser.add_argument( "--lr", type=float, help = "learning rate", default = 1e-6) # 1e-5
parser.add_argument( "--sample", type=float, help = "sampling trainign dataset", default = 1.0) # 

parser.add_argument( "--dataset", help = 'MELD or EMORY or iemocap or dailydialog', default = 'MELD')

parser.add_argument( "--pretrained", help = 'roberta-large or bert-large-uncased or gpt2 or gpt2-large or gpt2-medium', default = 'roberta-large')    
parser.add_argument( "--initial", help = 'pretrained or scratch', default = 'pretrained')
parser.add_argument('-dya', '--dyadic', action='store_true', help='dyadic conversation')
parser.add_argument('-fr', '--freeze', action='store_true', help='freezing PM')
parser.add_argument( "--cls", help = 'emotion or sentiment', default = 'emotion')
    
args = parser.parse_args()

logger = logging.getLogger(__name__)
streamHandler = logging.StreamHandler()

main()
"

No, this should not work for a few reasons:

  • Calling os.environ[“CUDA_VISIBLE_DEVICES”]= ‘2’ inside the script will override the previously specified environment variable, so what’s the reason to set it in the terminal?
  • model.cuda(2) will fail, since CUDA_VISIBLE_DEVICES maps the GPUs to ids starting with 0. E.g. CUDA_VISIBLE_DEVICES=2 python script.py will allow script.py to use GPU2 mapped to cuda:0 inside the script.

Many thanks for your detailed advice. I will try to solve it.