C++ Specify the GPU to make predictions

1.I use python model,When I converted the model, after selecting GPU1, I used C++ to call the model and could only forward it on GPU1.
2.When I use GPU0 to load the model of GPU1, an error appears: RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1!
3.When I use GPU1 to load the model of GPU0,Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!
4.I use module.to(device) and tensor.to(device) to Specify the device.

Thanks for your help.

Could you post a minimal, executable code snippet we could use to reproduce this issue, please?

C++

#include <iostream>
#include <torch/script.h>
#include <torch/torch.h>
#include <opencv2/opencv.hpp>

std::vector<float> LetterboxImage(const cv::Mat &src, cv::Mat &dst, const cv::Size &out_size) {
    auto in_h = static_cast<float>(src.rows);
    auto in_w = static_cast<float>(src.cols);
    float out_h = out_size.height;
    float out_w = out_size.width;

    float scale = std::min(out_w / in_w, out_h / in_h);

    int mid_h = static_cast<int>(in_h * scale);
    int mid_w = static_cast<int>(in_w * scale);

    cv::resize(src, dst, cv::Size(mid_w, mid_h));

    int top = (static_cast<int>(out_h) - mid_h) / 2;
    int down = (static_cast<int>(out_h) - mid_h + 1) / 2;
    int left = (static_cast<int>(out_w) - mid_w) / 2;
    int right = (static_cast<int>(out_w) - mid_w + 1) / 2;

    cv::copyMakeBorder(dst, dst, top, down, left, right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));

    std::vector<float> pad_info{static_cast<float>(left), static_cast<float>(top), scale};
    return pad_info;
}

int main() {
    torch::DeviceType device_type;
    torch::jit::script::Module module_;
    if (torch::cuda::is_available() ) {
        device_type = torch::kCUDA;
    } else {
        device_type = torch::kCPU;
    }
    torch::Device device(device_type,1);
    try {
        module_ = torch::jit::load("best_v2.torchscript_640_0.pt");
    }

    catch (const c10::Error &e) {
        std::cout<<e.what()<<std::endl;
        std::exit(EXIT_FAILURE);
    }
    module_.to(device);
    module_.eval();


    torch::NoGradGuard no_grad;
    //预处理
    cv::Mat img_input = cv::imread("1.jpg");
    std::vector<float> pad_info = LetterboxImage(img_input, img_input, cv::Size(640, 640));
    cv::cvtColor(img_input, img_input, cv::COLOR_BGR2RGB);  // BGR -> RGB
    img_input.convertTo(img_input, CV_32FC3, 1.0f / 255.0f);  // normalization 1/255
    at::Tensor tensor_img = torch::from_blob(img_input.data,
                                             {1, img_input.rows, img_input.cols, img_input.channels()}).to(device);
    tensor_img = tensor_img.permute({0, 3, 1, 2}).contiguous();  // BHWC -> BCHW (Batch, Channel, Height, Width)
    std::vector<torch::jit::IValue> inputs;
    inputs.emplace_back(tensor_img);
    torch::jit::IValue output;
    // inference
    try {
        output = module_.forward(inputs);
    }
    catch (const c10::Error &e) {
        std::cout<<e.what()<<std::endl;
        std::exit(EXIT_FAILURE);
    }
    auto detections = output.toTuple()->elements()[0].toTensor();
}

python

"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats

Usage:
    $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
"""

import argparse

from models.common import *
from utils import google_utils

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='/media/lmc/C25B-9CCF/lmc/main/weight/yolov5s.pt', help='weights path')
    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
    opt = parser.parse_args()
    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
    print(opt)
    img = torch.zeros((opt.batch_size, 3, *opt.img_size)).to(device='cuda')  

    # Load PyTorch model
    google_utils.attempt_download(opt.weights)
    model = torch.load(opt.weights, map_location=torch.device('cuda'))['model'].float()
    # model = torch.load(opt.weights, map_location=torch.device('cuda'))['model'].float()

    model.eval()
    # model.model[-1].export = True  # set Detect() layer export=True
    model.model[-1].export = False
    y = model(img)  # dry run

    # TorchScript export
    try:
        print('\nStarting TorchScript export with torch %s...' % torch.__version__)
        f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
        ts = torch.jit.trace(model, img)
        ts.save(f)
        print('TorchScript export success, saved as %s' % f)
    except Exception as e:
        print('TorchScript export failure: %s' % e)

I use yolov5 weight