C++/pytorch How to convert tensor to image array?

I would like to convert a tensor to image array and use tensor.data() method. But it doesn’t work.

My function is showed below:

#include <torch/script.h> // One-stop header.

#include <iostream>
#include <memory>
#include <sstream>
#include <string>
#include <vector>

#include "itkImage.h"
#include "itkImageFileReader.h"
#include "itkImageFileWriter.h"
#include "itkImageRegionIterator.h"

//////////////////////////////////////////////////////
//Goal: load jit script model and segment myocardium
//Step: 1. load jit script model
//      2. load input image
//      3. predict by model
//      4. save the result to file
//////////////////////////////////////////////////////
typedef short                                 				PixelType;
const unsigned int Dimension = 3;
typedef itk::Image<PixelType, Dimension>      				ImageType;
typedef itk::ImageFileReader<ImageType>       				ReaderType;
typedef itk::ImageRegionIterator<ImageType> 			    IteratorType;

bool itk2tensor(ImageType::Pointer itk_img, torch::Tensor &tensor_img) {
	
	typename ImageType::RegionType region = itk_img->GetLargestPossibleRegion();
	const typename ImageType::SizeType size = region.GetSize();
	std::cout << "Input size: " << size[0] << ", " << size[1]<< ", " << size[2] << std::endl;

	int len = size[0] * size[1] * size[2];
	short rowdata[len];
	int count = 0;
	IteratorType iter(itk_img, itk_img->GetRequestedRegion());
	
	// convert itk to array
	for (iter.GoToBegin(); !iter.IsAtEnd(); ++iter) {
		rowdata[count] = iter.Get();
		count++;
	}
	std::cout << "Convert itk to array DONE!" << std::endl;

	// convert array to tensor
	tensor_img = torch::from_blob(rowdata, {1, 1, (int)size[0], (int)size[1], (int)size[2]}, torch::kShort).clone();
	tensor_img = tensor_img.toType(torch::kFloat);
	tensor_img = tensor_img.to(torch::kCUDA);
	tensor_img.set_requires_grad(0);

	return true;
}


bool tensor2itk(torch::Tensor &t, ImageType::Pointer itk_img) {

	std::cout << "tensor dtype = " << t.dtype() << std::endl;
	std::cout << "tensor size = " << t.sizes() << std::endl;
	t = t.toType(torch::kShort);
	short * array = t.data<short>();

	ImageType::IndexType start;
	start[0] = 0;  // first index on X
	start[1] = 0;  // first index on Y
	start[2] = 0;  // first index on Z

	ImageType::SizeType  size;
	size[0] = t.size(2);
	size[1] = t.size(3);
	size[2] = t.size(4);

	ImageType::RegionType region;
	region.SetSize( size );
	region.SetIndex( start );

	itk_img->SetRegions( region );
	itk_img->Allocate();

	int len = size[0] * size[1] * size[2];

	IteratorType iter(itk_img, itk_img->GetRequestedRegion());
	int count = 0;
	// convert array to itk
	std::cout << "start!" << std::endl;
	for (iter.GoToBegin(); !iter.IsAtEnd(); ++iter) {
		short temp = *array++;    //  ERROR!
		std::cout << temp << " ";
		iter.Set(temp);
		count++;
	}
	std::cout << "end!" << std::endl;

	return true;
}


int main(int argc, const char* argv[]) {
	int a, b, c;
	if (argc != 4) {
		std::cerr << "usage: automyo input jitmodel output\n";
		return -1;
	}

	std::cout << "=========  jit start  =========\n";
	// 1. load jit script model
	std::cout << "Load script module: " << argv[2] << std::endl;
	std::shared_ptr<torch::jit::script::Module> module = torch::jit::load(argv[2]);
	module->to(at::kCUDA);

	// assert(module != nullptr);
	std::cout << "Load script module DONE" << std::endl;

	// 2. load input image
	const char* img_path = argv[1];
	std::cout << "Load image: " << img_path << std::endl;

	ReaderType::Pointer reader = ReaderType::New();

	if (!img_path) {
		std::cout << "Load input file error!" << std::endl;
		return false;
	}

	reader->SetFileName(img_path);
	reader->Update();

	std::cout << "Load image DONE!" << std::endl;

	ImageType::Pointer itk_img = reader->GetOutput();

	torch::Tensor tensor_img;
	if (!itk2tensor(itk_img, tensor_img)) {
		std::cerr << "itk2tensor ERROR!" << std::endl;
	}
	else {
		std::cout << "Convert array to tensor DONE!" << std::endl;
	}

	std::vector<torch::jit::IValue> inputs;
	inputs.push_back(tensor_img);

	// 3. predict by model
	torch::Tensor y = module->forward(inputs).toTensor();
	std::cout << "Inference DONE!" << std::endl;

	// 4. save the result to file
	torch::Tensor seg = y.gt(0.5);
	// std::cout << seg << std::endl;

	ImageType::Pointer out_itk_img = ImageType::New();
	if (!tensor2itk(seg, out_itk_img)) {
		std::cerr << "tensor2itk ERROR!" << std::endl;
	}
	else {
		std::cout << "Convert tensor to itk DONE!" << std::endl;
	}

	std::cout << out_itk_img << std::endl;

	return true;
}

The runtime log is showed below:

Load script module: model_myo_jit.pt
Load script module DONE
Load image: patch_6.nii.gz
Load image DONE!
Input size: 128, 128, 128
Convert itk to array DONE!
Convert array to tensor DONE!
Inference DONE!
tensor dtype = unsigned char
tensor size = [1, 1, 96, 96, 96]
start!
Segmentation fault (core dumped)

Why and how to convert?

I have found the solution. When I convert the y to kCPU, it works. Because it in CUDA before.