How do I use exec in a Pytorch Module and train with multiple GPUs?

I want to create a model that can build the network automatically, just enter the name of the layer, and the necessary parameters, and then I can create the network. The model I wrote is as reply.
If you don’t use model=nn. DataParallel(model), which is trained using only a single GPU, can run.
However, when I train with multiple GPUs, I get an error:

RuntimeError                              Traceback (most recent call last)
Cell In[32], line 1
----> 1 model,predicty,testy,r,p,heatmap,weights=Auto_CNN_pytorch(vwave,vx,[['cov',[16,3,3]],['batchnormalization'],['activation','relu'],['cov',[64,3,3]],['batchnormalization'],['activation','relu'],['maxpooling',[2,2]],['batchnormalization'],['activation','relu'],['cov',[128,3,3]],['batchnormalization'],['activation','relu'],['cov',[256,3,3]],['batchnormalization'],['activation','relu'],['maxpooling',[2,2]],['batchnormalization'],['activation','relu'],['flatten'],['fc',512],['batchnormalization'],['activation','relu'],['fc',256],['batchnormalization'],['activation','relu'],['fc',1],['batchnormalization'],['activation','sigmoid']],test_size=0.25,task_mode='binary_classify',if_best_mode='no',modelpath=None,ifrandom_split='yes',cov_padding='same',cov_strides=1,pooling_strides=2,if_print_model='yes',loss_function='default',optimizer='Adam',metrics='default',learning_rate=0.01,epochs=500,batch_size=20,ifheatmap='yes',ifweight='oob',ifmute='no',ifsave='no',savepath=None,device='gpu')

Cell In[31], line 416, in Auto_CNN_pytorch(vy, vx, model_list, test_size, task_mode, if_best_mode, modelpath, ifrandom_split, cov_padding, cov_strides, pooling_strides, if_print_model, loss_function, optimizer, metrics, learning_rate, epochs, batch_size, ifheatmap, ifweight, ifmute, ifsave, savepath, device)
    414     train_metric = metric(train_output, trainy_tensor[j*batch_size:].to(devices))
    415 elif j != int(trainx.shape[0]/batch_size):
--> 416     train_output = model(trainx_tensor[j*batch_size:(j+1)*batch_size].to(devices))
    417     train_loss = loss(train_output, trainy_tensor[j*batch_size:(j+1)*batch_size].to(devices))
    418     train_metric = metric(train_output, trainy_tensor[j*batch_size:(j+1)*batch_size].to(devices))

File D:\anaconda\envs\pytorch\lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File D:\anaconda\envs\pytorch\lib\site-packages\torch\nn\parallel\data_parallel.py:171, in DataParallel.forward(self, *inputs, **kwargs)
    169     return self.module(*inputs[0], **kwargs[0])
    170 replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
--> 171 outputs = self.parallel_apply(replicas, inputs, kwargs)
    172 return self.gather(outputs, self.output_device)

File D:\anaconda\envs\pytorch\lib\site-packages\torch\nn\parallel\data_parallel.py:181, in DataParallel.parallel_apply(self, replicas, inputs, kwargs)
    180 def parallel_apply(self, replicas, inputs, kwargs):
--> 181     return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])

File D:\anaconda\envs\pytorch\lib\site-packages\torch\nn\parallel\parallel_apply.py:89, in parallel_apply(modules, inputs, kwargs_tup, devices)
     87     output = results[i]
     88     if isinstance(output, ExceptionWrapper):
---> 89         output.reraise()
     90     outputs.append(output)
     91 return outputs

File D:\anaconda\envs\pytorch\lib\site-packages\torch\_utils.py:644, in ExceptionWrapper.reraise(self)
    640 except TypeError:
    641     # If the exception takes multiple arguments, don't try to
    642     # instantiate since we don't know how to
    643     raise RuntimeError(msg) from None
--> 644 raise exception

RuntimeError: Caught RuntimeError in replica 1 on device 1.
Original Traceback (most recent call last):
  File "D:\anaconda\envs\pytorch\lib\site-packages\torch\nn\parallel\parallel_apply.py", line 64, in _worker
    output = module(*input, **kwargs)
  File "D:\anaconda\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "C:\Users\Administrator\AppData\Local\Temp\ipykernel_23176\3450847531.py", line 235, in forward
    exec('model_conv'+str(i+1)+'=self.conv'+str(i+1)+'(x)', globals(), self.__dict__)
  File "<string>", line 1, in <module>
  File "D:\anaconda\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "D:\anaconda\envs\pytorch\lib\site-packages\torch\nn\modules\conv.py", line 463, in forward
    return self._conv_forward(input, self.weight, self.bias)
  File "D:\anaconda\envs\pytorch\lib\site-packages\torch\nn\modules\conv.py", line 459, in _conv_forward
    return F.conv2d(input, weight, bias, self.stride,
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0! (when checking argument for argument weight in method wrapper_CUDA__cudnn_convolution)

This error seems to indicate that the parameters in my model’s CNN layer are distributed across different devices. But I used to_device for all the data and models.
Here is the main function that calls the model:

import numpy as np
vx=np.random.randint(1, high=100, size=(3080,51,161,7)
vwave=np.random.randint(0, high=1, size=(3080)
vx=(vx-np.nanmean(vx,axis=0))/np.nanstd(vx,axis=0)
model,predicty,testy,r,p,heatmap,weights=Auto_CNN_pytorch(vwave,vx,[['cov',[16,3,3]],['batchnormalization'],['activation','relu'],['cov',[64,3,3]],['batchnormalization'],['activation','relu'],['maxpooling',[2,2]],['batchnormalization'],['activation','relu'],['cov',[128,3,3]],['batchnormalization'],['activation','relu'],['cov',[256,3,3]],['batchnormalization'],['activation','relu'],['maxpooling',[2,2]],['batchnormalization'],['activation','relu'],['flatten'],['fc',512],['batchnormalization'],['activation','relu'],['fc',256],['batchnormalization'],['activation','relu'],['fc',1],['batchnormalization'],['activation','sigmoid']],test_size=0.25,task_mode='binary_classify',if_best_mode='no',modelpath=None,ifrandom_split='yes',cov_padding='same',cov_strides=1,pooling_strides=2,if_print_model='yes',loss_function='default',optimizer='Adam',metrics='default',learning_rate=0.01,epochs=500,batch_size=20,ifheatmap='no',ifweight='no',ifmute='no',ifsave='no',savepath=None,device='gpu')

As you can see, I’ve used model_list to specify the network architecture, which makes it easier to build a network by just entering the name of the network layer and the necessary parameters (e.g. convolutional kernel size).
The above model has been verified to be correct on a single GPU device. I’m guessing that the reason for the error might be due to the use of the exec function inside my Module. But if you don’t apply the exec function, you can’t call self.convn (n is the nth convolutional layer) to build the model, as in my model. Is there a better solution? either to automatically build the network by filling in the model_list like I did, or to train with multiple GPUs? (I don’t want to use nn. ModelList, or nn.Seqential because other networks may be nonlinear in the future, e.g. Resnet networks cannot be built with Seqential when they use the add layer). I would be grateful if someone could fix this!

This is a first part of my model:

#卷积神经网络Pytorch版本
def Auto_CNN_pytorch(vy,vx,model_list,test_size=0.25,task_mode='regression',if_best_mode='no',modelpath=None,ifrandom_split='yes',cov_padding='same',cov_strides=1,pooling_strides=2,if_print_model='yes',loss_function='default',optimizer='SGD',metrics='default',learning_rate=0.01,epochs=2000,batch_size=20,ifheatmap='yes',ifweight='yes',ifmute='no',ifsave='no',savepath=None,device='cpu'):
    from torch.nn import Module,BatchNorm1d,BatchNorm2d,LayerNorm,Conv2d,MaxPool2d,AvgPool2d,Dropout,LeakyReLU,ReLU,PReLU,Sigmoid,Tanh,ELU,Softmax,Linear,Flatten
    from torch.optim import Adam,SGD
    import torch
    from torch import nn
    import torchmetrics
    import math
    from sklearn.model_selection import train_test_split
    import numpy as np
    from scipy.stats import pearsonr
    import os
    from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
    import sklearn
    import copy
    import shap
    import datetime
    
    if device=='gpu':
        devices=torch.device('cuda:0')
    else:
        devices=torch.device('cpu')
    if task_mode=='regression':
        if loss_function=='default' or loss_function=='MSELoss':
            loss=torch.nn.MSELoss()
        elif loss_function=='L1Loss':
            loss=torch.nn.L1Loss
        elif loss_function=='PoissonNLLLoss':
            loss=torch.nn.PoissonNLLLoss()
        elif loss_function=='GaussianNLLLoss':
            loss=torch.nn.GaussianNLLLoss()
        elif loss_function=='KLDivLoss':
            loss=torch.nn.KLDivLoss()
        elif loss_function=='HuberLoss':
            loss=torch.nn.HuberLoss()
        elif loss_function=='SmoothL1Loss':
            loss=torch.nn.SmoothL1Loss()
        elif loss_function=='Pearsonr':
            class loss_pearsonr(nn.Module):
                def __init__(self):
                    super().__init__()

                def forward(self, y, x):
                    y_true_mean=torch.nanmean(y,dim=0,keepdim=True)
                    y_pred_mean=torch.nanmean(x,dim=0,keepdim=True)
                    cov=torch.nansum((y-y_true_mean)*(x-y_pred_mean),dim=0,keepdim=True)
                    y_true_v=torch.nansum(torch.square((y-y_true_mean)),dim=0,keepdim=True)
                    y_pred_v=torch.nansum(torch.square((x-y_pred_mean)),dim=0,keepdim=True)
                    y_true_v=torch.sqrt(y_true_v)
                    y_pred_v=torch.sqrt(y_pred_v)
                    pearson=cov/(y_true_v*y_pred_v)
                    return (1-pearson)**1.5
            loss=loss_pearsonr
        if metrics=='default' or metrics=='MSELoss':
            metric=torch.nn.MSELoss()
        elif metrics=='L1Loss':
            metric=torch.nn.L1Loss
        elif metrics=='PoissonNLLLoss':
            metric=torch.nn.PoissonNLLLoss()
        elif metrics=='GaussianNLLLoss':
            metric=torch.nn.GaussianNLLLoss()
        elif metrics=='KLDivLoss':
            metric=torch.nn.KLDivLoss()
        elif metrics=='HuberLoss':
            metric=torch.nn.HuberLoss()
        elif metrics=='SmoothL1Loss':
            metric=torch.nn.SmoothL1Loss()
        elif metrics=='Pearsonr':
            class metric_pearsonr(nn.Module):
                def __init__(self):
                    super().__init__()

                def forward(self, y, x):
                    y_true_mean=torch.nanmean(y,dim=0,keepdim=True)
                    y_pred_mean=torch.nanmean(x,dim=0,keepdim=True)
                    cov=torch.nansum((y-y_true_mean)*(x-y_pred_mean),dim=0,keepdim=True)
                    y_true_v=torch.nansum(torch.square((y-y_true_mean)),dim=0,keepdim=True)
                    y_pred_v=torch.nansum(torch.square((x-y_pred_mean)),dim=0,keepdim=True)
                    y_true_v=torch.sqrt(y_true_v)
                    y_pred_v=torch.sqrt(y_pred_v)
                    pearson=cov/(y_true_v*y_pred_v)
                    return (1-pearson)**1.5
            metric=metric_pearsonr
    elif task_mode=='binary_classify':
        if loss_function=='default' or loss_function=='BCELoss':
            loss=torch.nn.BCELoss()
        elif loss_function=='BCEWithLogitsLoss':
            loss=torch.nn.BCEWithLogitsLoss()
        elif loss_function=='SoftMarginLoss':
            loss=torch.nn.SoftMarginLoss()
        elif loss_function=='MultiLabelSoftMarginLoss':
            loss=torch.nn.MultiLabelSoftMarginLoss()
        if metrics=='default' or metrics=='f1':
            metric=torchmetrics.F1Score(task="binary").to(devices)
        elif metrics=='accuracy':
            metric=torchmetrics.Accuracy(task="binary").to(devices)
        elif metrics=='precision':
            metric=torchmetrics.Precision(task="binary").to(devices)
        elif metrics=='recall':
            metric=torchmetrics.Recall(task="binary").to(devices)
        elif metrics=='BCELoss':
            metric=torch.nn.BCELoss()
        elif metrics=='BCEWithLogitsLoss':
            metric=torch.nn.BCEWithLogitsLoss()
        elif metrics=='SoftMarginLoss':
            metric=torch.nn.SoftMarginLoss()
        elif metrics=='MultiLabelSoftMarginLoss':
            metric=torch.nn.MultiLabelSoftMarginLoss()
    elif task_mode=='multi_classify':
        if loss_function=='default' or loss_function=='CrossEntropyLoss':
            loss=torch.nn.CrossEntropyLoss()
        elif loss_function=='NLLLoss':
            loss=torch.nn.NLLLoss()
        elif loss_function=='TripletMarginLoss':
            loss=torch.nn.TripletMarginLoss()
        elif loss_function=='KLDivergence':
            loss=torch.nn.KLDivergence()
        elif loss_function=='HingeEmbeddingLoss':
            loss=torch.nn.HingeEmbeddingLoss()
        elif loss_function=='MultiLabelMarginLoss':
            loss=torch.nn.MultiLabelMarginLoss()
        elif loss_function=='TripletMarginWithDistanceLoss':
            loss=torch.nn.TripletMarginWithDistanceLoss()
        if metrics=='default' or metrics=='accuracy':
            metric=torchmetrics.Accuracy(task="multiclass").to(devices)
        elif metrics=='CrossEntropyLoss':
            metric=torch.nn.CrossEntropyLoss()
        elif metrics=='NLLLoss':
            metric=torch.nn.NLLLoss()
        elif metrics=='TripletMarginLoss':
            metric=torch.nn.TripletMarginLoss()
        elif metrics=='KLDivergence':
            metric=torch.nn.KLDivergence()
        elif metrics=='HingeEmbeddingLoss':
            metric=torch.nn.HingeEmbeddingLoss()
        elif metrics=='MultiLabelMarginLoss':
            metric=torch.nn.MultiLabelMarginLoss()
        elif metrics=='TripletMarginWithDistanceLoss':
            metric=torch.nn.TripletMarginWithDistanceLoss()
    heatmap=0
    weights=0
    model=0
    vx=vx.transpose(0,3,1,2)
    if vy.ndim==1:
        vy=vy.reshape(vy.shape[0],1)
    if ifrandom_split=='yes':
        trainy,testy,trainx,testx = train_test_split(vy,vx,test_size=test_size,random_state=25)
    else:
        index=int((1-test_size)*vy.shape[0])
        trainy=vy[:index]
        testy=vy[index:]
        trainx=vx[:index,:,:,:]
        testx=vx[index:,:,:,:]
    if if_best_mode=='no':
        class Model(nn.Module):
            def __init__(self,model_list,trainx,cov_strides,cov_padding,pooling_strides):
                super(Model,self).__init__()
                exec('from torch import nn', globals(), self.__dict__)
                exec('from torch.nn import Module,BatchNorm1d,BatchNorm2d,LayerNorm,Conv2d,MaxPool2d,AvgPool2d,Dropout,LeakyReLU,ReLU,PReLU,Sigmoid,Tanh,ELU,Softmax,Linear,Flatten', globals(), self.__dict__)
                exec('import numpy as np', globals(), self.__dict__)
                self.cov_strides=cov_strides
                self.cov_padding=cov_padding
                self.pooling_strides=pooling_strides
                self.trainx=trainx
                self.model_list=model_list
                self.hight=self.trainx.shape[2]
                self.weight=self.trainx.shape[3]
                self.__dict__['self']=self
                self.flattened=False
                
                for i in range(len(self.model_list)):
                    self.__dict__['i']=i
                    if self.model_list[i][0] == 'cov':
                        if self.cov_padding=='valid':
                            self.hight=1+math.floor((self.hight-self.model_list[i][1][1])/self.cov_strides)
                            self.weight=1+math.floor((self.weight-self.model_list[i][1][2])/self.cov_strides)
                            if self.hight < 1 or self.weight < 1:
                                print('卷积层数过多')
                                break
                        if i==0:
                            exec('self.conv'+str(i+1)+'=Conv2d(self.trainx.shape[1],self.model_list[i][1][0],(self.model_list[i][1][1],self.model_list[i][1][2]),stride=self.cov_strides,padding=self.cov_padding)', globals(), self.__dict__)
                            exec('in_channels=self.model_list[i][1][0]', globals(), self.__dict__)
                        else:
                            exec('self.conv'+str(i+1)+'=Conv2d(in_channels,self.model_list[i][1][0],(self.model_list[i][1][1],self.model_list[i][1][2]),stride=self.cov_strides,padding=self.cov_padding)', globals(), self.__dict__)
                            exec('in_channels=self.model_list[i][1][0]', globals(), self.__dict__)
                    elif self.model_list[i][0] == 'maxpooling':
                        self.hight=1+math.floor((self.hight-self.model_list[i][1][0])/self.pooling_strides)
                        self.weight=1+math.floor((self.weight-self.model_list[i][1][1])/self.pooling_strides)
                        if self.hight < 1 or self.weight < 1:
                            print('池化层数过多')
                            break
                        exec('self.pool'+str(i+1)+'=MaxPool2d((self.model_list[i][1][0],self.model_list[i][1][1]),stride=self.pooling_strides)', globals(), self.__dict__)
                    elif self.model_list[i][0] == 'avepooling':
                        self.hight=1+math.floor((self.hight-self.model_list[i][1][0])/self.pooling_strides)
                        self.weight=1+math.floor((self.weight-self.model_list[i][1][1])/self.pooling_strides)
                        if self.hight < 1 or self.weight < 1:
                            print('池化层数过多')
                            break
                        exec('self.pool'+str(i+1)+'=AvgPool2d((self.model_list[i][1][0],self.model_list[i][1][1]),stride=self.pooling_strides)', globals(), self.__dict__)
                    elif self.model_list[i][0] == 'batchnormalization' and not self.flattened:
                        exec('self.norm'+str(i+1)+'=BatchNorm2d(in_channels)', globals(), self.__dict__)
                    elif self.model_list[i][0] == 'batchnormalization'and self.flattened:
                        exec('self.norm'+str(i+1)+'=BatchNorm1d(in_channels)', globals(), self.__dict__)
                    elif self.model_list[i][0] == 'layernormalization':
                        exec('self.norm'+str(i+1)+'=LayerNorm(in_channels)', globals(), self.__dict__)
                    elif self.model_list[i][0] == 'activation':
                        if self.model_list[i][1]=='elu':
                            exec('self.act'+str(i+1)+'=ELU()', globals(), self.__dict__)
                        elif self.model_list[i][1]=='leakyrelu':
                            exec('self.act'+str(i+1)+'=LeakyReLU()', globals(), self.__dict__)
                        elif self.model_list[i][1]=='prelu':
                            exec('self.act'+str(i+1)+'=PReLU()', globals(), self.__dict__)
                        elif self.model_list[i][1]=='relu':
                            exec('self.act'+str(i+1)+'=ReLU()', globals(), self.__dict__)
                        elif self.model_list[i][1]=='sigmoid':
                            exec('self.act'+str(i+1)+'=Sigmoid()', globals(), self.__dict__)
                        elif self.model_list[i][1]=='tanh':
                            exec('self.act'+str(i+1)+'=Tanh()', globals(), self.__dict__)
                        elif self.model_list[i][1]=='softmax':
                            exec('self.act'+str(i+1)+'=Softmax()', globals(), self.__dict__)
                    elif self.model_list[i][0] == 'flatten':
                        exec('self.fla'+str(i+1)+'=Flatten()', globals(), self.__dict__)
                        exec('in_channels=self.hight*self.weight*in_channels', globals(), self.__dict__)
                        self.flattened=True
                    elif self.model_list[i][0] =='fc':
                        exec('self.fc'+str(i+1)+'=Linear(in_channels,self.model_list[i][1])', globals(), self.__dict__)
                        exec('in_channels=self.model_list[i][1]', globals(), self.__dict__)
                    elif self.model_list[i][0] == 'dropout':
                        exec('self.drop'+str(i+1)+'=Dropout(self.model_list[i][1])', globals(), self.__dict__)
            def forward(self, x):
                self.__dict__['x']=x
                for i in range(len(self.model_list)):
                    if self.model_list[i][0] == 'cov':
                        if i==0:
                            exec('model_conv'+str(i+1)+'=self.conv'+str(i+1)+'(x)', globals(), self.__dict__)
                        else:
                            if self.model_list[i-1][0]=='cov':
                                exec('model_conv'+str(i+1)+'=self.conv'+str(i+1)+'(model_conv'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='maxpooling' or self.model_list[i-1][0]=='avepooling':
                                exec('model_conv'+str(i+1)+'=self.conv'+str(i+1)+'(model_pool'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='batchnormalization' or self.model_list[i-1][0]=='layernormalization':
                                exec('model_conv'+str(i+1)+'=self.conv'+str(i+1)+'(model_norm'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='activation' :
                                exec('model_conv'+str(i+1)+'=self.conv'+str(i+1)+'(model_act'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='dropout' :
                                exec('model_conv'+str(i+1)+'=self.conv'+str(i+1)+'(model_drop'+str(i)+')', globals(), self.__dict__)
                    elif self.model_list[i][0] == 'maxpooling' or self.model_list[i][0] == 'avepooling':
                        if self.model_list[i-1][0]=='cov' :
                            exec('model_pool'+str(i+1)+'=self.pool'+str(i+1)+'(model_conv'+str(i)+')', globals(), self.__dict__)
                        elif self.model_list[i-1][0]=='maxpooling' or self.model_list[i-1][0]=='avepooling':
                            exec('model_pool'+str(i+1)+'=self.pool'+str(i+1)+'(model_pool'+str(i)+')', globals(), self.__dict__)
                        elif self.model_list[i-1][0]=='batchnormalization' or self.model_list[i-1][0]=='layernormalization':
                            exec('model_pool'+str(i+1)+'=self.pool'+str(i+1)+'(model_norm'+str(i)+')', globals(), self.__dict__)
                        elif self.model_list[i-1][0]=='activation' :
                            exec('model_pool'+str(i+1)+'=self.pool'+str(i+1)+'(model_act'+str(i)+')', globals(), self.__dict__)
                        elif self.model_list[i-1][0]=='dropout' :
                            exec('model_pool'+str(i+1)+'=self.pool'+str(i+1)+'(model_drop'+str(i)+')', globals(), self.__dict__)
                    elif self.model_list[i][0] == 'batchnormalization' or self.model_list[i][0] == 'layernormalization':
                        if i==len(self.model_list)-1:
                            if self.model_list[i-1][0]=='cov' :
                                outputs=eval('self.norm'+str(i+1)+'(model_conv'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='maxpooling' or self.model_list[i-1][0]=='avepooling':
                                outputs=eval('self.norm'+str(i+1)+'(model_pool'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='batchnormalization' or self.model_list[i-1][0]=='layernormalization':
                                outputs=eval('self.norm'+str(i+1)+'(model_norm'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='activation':
                                outputs=eval('self.norm'+str(i+1)+'(model_act'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='dropout' :
                                outputs=eval('self.norm'+str(i+1)+'(model_drop'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='fc':
                                outputs=eval('self.norm'+str(i+1)+'(model_fc'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='flatten':
                                outputs=eval('self.norm'+str(i+1)+'(model_fla'+str(i)+')', globals(), self.__dict__)
                        else:
                            if self.model_list[i-1][0]=='cov' :
                                exec('model_norm'+str(i+1)+'=self.norm'+str(i+1)+'(model_conv'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='maxpooling' or self.model_list[i-1][0]=='avepooling':
                                exec('model_norm'+str(i+1)+'=self.norm'+str(i+1)+'(model_pool'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='batchnormalization' or self.model_list[i-1][0]=='layernormalization':
                                exec('model_norm'+str(i+1)+'=self.norm'+str(i+1)+'(model_norm'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='activation':
                                exec('model_norm'+str(i+1)+'=self.norm'+str(i+1)+'(model_act'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='dropout' :
                                exec('model_norm'+str(i+1)+'=self.norm'+str(i+1)+'(model_drop'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='fc':
                                exec('model_norm'+str(i+1)+'=self.norm'+str(i+1)+'(model_fc'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='flatten':
                                exec('model_norm'+str(i+1)+'=self.norm'+str(i+1)+'(model_fla'+str(i)+')', globals(), self.__dict__)
                    elif self.model_list[i][0] == 'activation':
                        if i==len(self.model_list)-1:
                            if self.model_list[i-1][0]=='cov' :
                                outputs=eval('self.act'+str(i+1)+'(model_conv'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='maxpooling' or self.model_list[i-1][0]=='avepooling':
                                outputs=eval('self.act'+str(i+1)+'(model_pool'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='batchnormalization' or self.model_list[i-1][0]=='layernormalization':
                                outputs=eval('self.act'+str(i+1)+'(model_norm'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='activation':
                                outputs=eval('self.act'+str(i+1)+'(model_act'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='dropout' :
                                outputs=eval('self.act'+str(i+1)+'(model_drop'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='fc':
                                outputs=eval('self.act'+str(i+1)+'(model_fc'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='flatten':
                                outputs=eval('self.act'+str(i+1)+'(model_fla'+str(i)+')', globals(), self.__dict__)
                        else:
                            if self.model_list[i-1][0]=='cov':
                                exec('model_act'+str(i+1)+'=self.act'+str(i+1)+'(model_conv'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='maxpooling' or self.model_list[i-1][0]=='avepooling':
                                exec('model_act'+str(i+1)+'=self.act'+str(i+1)+'(model_pool'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='batchnormalization' or self.model_list[i-1][0]=='layernormalization':
                                exec('model_act'+str(i+1)+'=self.act'+str(i+1)+'(model_norm'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='activation':
                                exec('model_act'+str(i+1)+'=self.act'+str(i+1)+'(model_act'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='dropout' :
                                exec('model_act'+str(i+1)+'=self.act'+str(i+1)+'(model_drop'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='fc':
                                exec('model_act'+str(i+1)+'=self.act'+str(i+1)+'(model_fc'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='flatten':
                                exec('model_act'+str(i+1)+'=self.act'+str(i+1)+'(model_fla'+str(i)+')', globals(), self.__dict__)
                    elif self.model_list[i][0] == 'flatten':
                        if self.model_list[i-1][0]=='cov' :
                            exec('model_fla'+str(i+1)+'=self.fla'+str(i+1)+'(model_conv'+str(i)+')', globals(), self.__dict__)
                        elif self.model_list[i-1][0]=='maxpooling' or self.model_list[i-1][0]=='avepooling':
                            exec('model_fla'+str(i+1)+'=self.fla'+str(i+1)+'(model_pool'+str(i)+')', globals(), self.__dict__)
                        elif self.model_list[i-1][0]=='batchnormalization' or self.model_list[i-1][0]=='layernormalization':
                            exec('model_fla'+str(i+1)+'=self.fla'+str(i+1)+'(model_norm'+str(i)+')', globals(), self.__dict__)
                        elif self.model_list[i-1][0]=='activation':
                            exec('model_fla'+str(i+1)+'=self.fla'+str(i+1)+'(model_act'+str(i)+')', globals(), self.__dict__)
                        elif self.model_list[i-1][0]=='dropout':
                            exec('model_fla'+str(i+1)+'=self.fla'+str(i+1)+'(model_dropout'+str(i)+')', globals(), self.__dict__)
                    elif self.model_list[i][0] =='fc':
                        if i==len(self.model_list)-1:
                            if self.model_list[i-1][0]=='batchnormalization' :
                                outputs=eval('self.fc'+str(i+1)+'(model_norm'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='activation':
                                outputs=eval('self.fc'+str(i+1)+'(model_act'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='dropout':
                                outputs=eval('self.fc'+str(i+1)+'(model_drop'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='fc':
                                outputs=eval('self.fc'+str(i+1)+'(model_fc'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='flatten':
                                outputs=eval('self.fc'+str(i+1)+'(model_fla'+str(i)+')', globals(), self.__dict__)
                        else:
                            if self.model_list[i-1][0]=='batchnormalization' :
                                exec('model_fc'+str(i+1)+'=self.fc'+str(i+1)+'(model_norm'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='activation':
                                exec('model_fc'+str(i+1)+'=self.fc'+str(i+1)+'(model_act'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='dropout':
                                exec('model_fc'+str(i+1)+'=self.fc'+str(i+1)+'(model_drop'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='fc':
                                exec('model_fc'+str(i+1)+'=self.fc'+str(i+1)+'(model_fc'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='flatten':
                                exec('model_fc'+str(i+1)+'=self.fc'+str(i+1)+'(model_fla'+str(i)+')', globals(), self.__dict__)
                    elif self.model_list[i][0] == 'dropout':
                        if i==len(self.model_list)-1:
                            if self.model_list[i-1][0]=='cov' :
                                outputs=eval('self.drop'+str(i+1)+'(model_conv'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='maxpooling' or self.model_list[i-1][0]=='avepooling':
                                outputs=eval('self.drop'+str(i+1)+'(model_pool'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='batchnormalization' or self.model_list[i-1][0]=='layernormalization':
                                outputs=eval('self.drop'+str(i+1)+'(model_norm'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='activation':
                                outputs=eval('self.drop'+str(i+1)+'(model_act'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='dropout' :
                                outputs=eval('self.drop'+str(i+1)+'(model_drop'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='fc':
                                outputs=eval('self.drop'+str(i+1)+'(model_fc'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='flatten':
                                outputs=eval('self.drop'+str(i+1)+'(model_fla'+str(i)+')', globals(), self.__dict__)
                        else:
                            if self.model_list[i-1][0]=='cov':
                                exec('model_drop'+str(i+1)+'=self.drop'+str(i+1)+'(model_conv'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='maxpooling' or self.model_list[i-1][0]=='avepooling':
                                exec('model_drop'+str(i+1)+'=self.drop'+str(i+1)+'(model_pool'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='batchnormalization' or self.model_list[i-1][0]=='layernormalization':
                                exec('model_drop'+str(i+1)+'=self.drop'+str(i+1)+'(model_norm'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='activation':
                                exec('model_drop'+str(i+1)+'=Dself.drop'+str(i+1)+'(model_act'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='dropout' :
                                exec('model_drop'+str(i+1)+'=self.drop'+str(i+1)+'(model_drop'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='fc':
                                exec('model_drop'+str(i+1)+'=self.drop'+str(i+1)+'(model_fc'+str(i)+')', globals(), self.__dict__)
                            elif self.model_list[i-1][0]=='flatten':
                                exec('model_drop'+str(i+1)+'=self.drop'+str(i+1)+'(model_fla'+str(i)+')', globals(), self.__dict__)
                return outputs

This is a Second part of my model:

model=Model(model_list,trainx,cov_strides,cov_padding,pooling_strides)
        model=nn.DataParallel(model)
        model.to(devices)
        if optimizer == 'SGD':
            opt = SGD(model.parameters(), lr=learning_rate)
        elif optimizer == 'Adam':
            opt = Adam(model.parameters(), lr=learning_rate)
        elif optimizer == 'Nadam':
            opt = Nadam(model.parameters(), lr=learning_rate)
    elif if_best_mode=='yes' or if_best_mode=='load':
        model = torch.load(modelpath)
        model=nn.DataParallel(model)
        model.to(devices)
    trainx=np.nan_to_num(trainx,nan=0)
    testx=np.nan_to_num(testx,nan=0)
    trainx_tensor=torch.tensor(trainx,dtype=torch.float32)
    trainy_tensor=torch.tensor(trainy,dtype=torch.float32)
    testx_tensor=torch.tensor(testx,dtype=torch.float32)
    testy_tensor=torch.tensor(testy,dtype=torch.float32)
    if if_print_model=='yes':
        print(model)
    if epochs!=0:
        for i in range(epochs):
            start = datetime.datetime.now()
            for j in range(int(trainx.shape[0]/batch_size)+1):
                if j == int(trainx.shape[0]/batch_size) and int(trainx.shape[0]%batch_size)!=0:
                    train_output = model(trainx_tensor[j*batch_size:].to(devices))
                    train_loss = loss(train_output, trainy_tensor[j*batch_size:].to(devices))
                    train_metric = metric(train_output, trainy_tensor[j*batch_size:].to(devices))
                elif j != int(trainx.shape[0]/batch_size):
                    train_output = model(trainx_tensor[j*batch_size:(j+1)*batch_size].to(devices))
                    train_loss = loss(train_output, trainy_tensor[j*batch_size:(j+1)*batch_size].to(devices))
                    train_metric = metric(train_output, trainy_tensor[j*batch_size:(j+1)*batch_size].to(devices))
                opt.zero_grad()
                train_loss.backward()
                opt.step() 
            with torch.no_grad():
                test_output = model(testx_tensor)
                test_loss= loss(test_output,testy_tensor)
                test_metric=metric(test_output,testy_tensor)
            end = datetime.datetime.now()
            print('第',i+1,'次训练loss:',train_loss.item(),',metric:',train_metric.item(),'  第',i+1,'次测试loss:',test_loss.item(),',metric:',test_metric.item(),'训练用时:',end - start)
    predicty = model(testx_tensor)
    predicty=predicty.cpu().detach().numpy()
    predicty = np.nan_to_num(predicty,nan=0)
    if task_mode=='regression':
        r=np.zeros((testy.shape[1]))
        p=np.zeros((testy.shape[1]))
        for i in range(testy.shape[1]):
            r[i],p[i] = pearsonr(predicty[:,i],testy[:,i])
            r=np.nan_to_num(r,nan=0)
    elif task_mode=='binary_classify':
        accuracy=np.zeros((testy.shape[1]))
        recall=np.zeros((testy.shape[1]))
        precision=np.zeros((testy.shape[1]))
        f1=np.zeros((testy.shape[1]))
        for i in range(predicty.shape[1]):
            predicty[:,i]=[int(round(predicty[j,i],0)) for j in range(predicty.shape[0])]
        r=np.zeros((testy.shape[1]))
        for i in range(testy.shape[1]):
            if metrics=='Recall':
                r[i]=recall_score(testy[:,i], predicty[:,i])
            elif metrics=='Precision':
                r[i]=precision_score(testy[:,i], predicty[:,i])
            else:
                r[i]=accuracy_score(testy[:,i], predicty[:,i])
            recall[i]=recall_score(testy[:,i], predicty[:,i])
            precision[i]=precision_score(testy[:,i], predicty[:,i])
            accuracy[i]=accuracy_score(testy[:,i], predicty[:,i])
            f1[i]=f1_score(testy[:,i], predicty[:,i])
        p=0
    elif task_mode=='multi_classify':
        for i in range(predicty.shape[1]):
            predicty[:,i]=[int(round(predicty[j,i],0)) for j in range(predicty.shape[0])]
        r=np.zeros((testy.shape[1]))
        for i in range(testy.shape[1]):
            r[i]=accuracy_score(testy[:,i], predicty[:,i])
        p=0
    if ifmute == 'no':
        if task_mode=='regression':
            print('相关系数',np.nanmean(r))
        elif task_mode=='binary_classify':
            print('召回率+精确率',np.nanmean(f1),'准确率',np.nanmean(accuracy),'召回率',np.nanmean(recall),'精确率',np.nanmean(precision))
        elif task_mode=='multi_classify':
            print('准确率',np.nanmean(r))
    if ifheatmap=='yes':
        heatmap=np.zeros((testx.shape[1],testx.shape[2],testx.shape[3],testy.shape[1]))
        if testx.shape[0]>=100:
            index=np.random.randint(0,testx.shape[0],size=100)
            explainer = shap.DeepExplainer(model,testx_tensor[index,:,:,:])
            heatmap_more = np.abs(explainer.shap_values(testx_tensor[index,:,:,:]))
        else:
            explainer = shap.DeepExplainer(model,testx_tensor)
            heatmap_more = np.abs(explainer.shap_values(testx_tensor))
        heatmap=np.nanmean(heatmap_more,axis=0)
        heatmap=np.array(heatmap).transpose(3,1,2,0)
    if ifweight=='yes' or ifweight=='shap':
        weights=np.zeros((testy.shape[1],testx.shape[1]))
        if testx.shape[0]>=100:
            index=np.random.randint(0,testx.shape[0],size=100)
            explainer = shap.DeepExplainer(model,testx_tensor[index,:,:,:])
            weights_more = np.abs(explainer.shap_values(testx_tensor[index,:,:,:]))
        else:
            explainer = shap.DeepExplainer(model,testx_tensor)
            weights_more = np.abs(explainer.shap_values(testx_tensor))
        weight=np.nanmean(weights_more,axis=(0,2,3))
        for i in range(testy.shape[1]):
            for j in range(testx.shape[1]):
                weights[i,j]=(weight[j,i]/np.nansum(weight[:,i]))*100
                print('预报因子',j+1,'对预报值',i+1,'的贡献:',np.array(weights[i,j]),'%')
            print('\n')
    elif ifweight=='oob':
        weights=np.zeros((testy.shape[1],testx.shape[1]))
        weight_more=np.zeros((testy.shape[1],testx.shape[1]))
        for i in range(testy.shape[1]):
            for j in range(testx.shape[1]):
                testx_new=copy.deepcopy(testx)
                weight=[]
                for k in range(10):
                    per=np.random.permutation(testx.shape[0])
                    testx_shuffle=testx[per,j,:,:]
                    testx_new[:,j,:,:]=testx_shuffle
                    predicty_new=model(torch.tensor(testx_new,dtype=torch.float32,device=devices)).cpu().detach().numpy()[:,i]
                    if task_mode=='regression':
                        weight.append(sklearn.metrics.mean_squared_error(testy[:,i],predicty_new)-sklearn.metrics.mean_squared_error(testy[:,i],predicty[:,i]))
                    else:
                        weight.append(sklearn.metrics.log_loss(testy[:,i],predicty_new)-sklearn.metrics.log_loss(testy[:,i],predicty[:,i]))
                weight_more[i,j]=np.nanmean(weight)
        for i in range(testy.shape[1]):
            for j in range(testx.shape[1]):
                weights[i,j]=(weight_more[i,j]/np.nansum(weight_more[i,:]))*100
                print('预报因子',j+1,'对预报值',i+1,'的贡献:',np.array(weights[i,j]),'%')
            print('\n')
    if ifsave=='yes':
        torch.save(model,savepath)
        
    return model,predicty,testy,r,p,heatmap,weights