How could I transform weights from cputype to gputype?

nsjiow · July 25, 2019, 2:51am

I write a network with libtorch and could run with cpu, but how could i use cuda?
when i convert input to cuda device, error comes from that weights need cudatype as well. how could i use libtorch train my network from zero? that means i do not want use the weight trained by pytorch.

my code:
Classifier.h
#include <torch/torch.h>
#include
#include
#include

using namespace std;

class Classifier: torch::nn::Module{
public:
Classifier();
torch::Tensor forward(torch::Tensor x);
void create_modules();
void init_weights();
void load_weights(const string& s);
private:
torch::Device* _device;
vectortorch::nn::Sequential module_list;
};

static const vector<vector<int64_t>> vec = {
{1, 32, 2},
{32, 64, 1},
{64, 128, 2},
{128, 128, 1},
{128, 256, 2},
{256, 256, 1},
{256, 512, 2},
{512, 512, 1},
{512, 512, 1},
{512, 512, 1},
{512, 512, 1},
{512, 512, 1},
{512, 1024, 2},
{1024, 1024, 1},
{7},
{1024, 10}
};
Classifier.cpp
#include “Classifier.h”

torch::nn::Conv2dOptions conv_options(int64_t in_channels, int64_t out_channels, int64_t kernel_size,
int64_t stride, int64_t padding=0, int64_t groups=1, bool with_bias=false)
{
torch::nn::Conv2dOptions conv_options = torch::nn::Conv2dOptions(in_channels, out_channels, kernel_size);
conv_options.stride_ = stride;
conv_options.padding_ = padding;
conv_options.groups_ = groups;
conv_options.with_bias_ = with_bias;
return conv_options;
}

torch::nn::BatchNormOptions bn_options(int64_t features)
{
torch::nn::BatchNormOptions bn_options = torch::nn::BatchNormOptions(features);
bn_options.affine_ = true;
bn_options.stateful_ = true;
return bn_options;
}

torch::nn::LinearOptions ln_options(int64_t in_channls, int64_t out_channles)
{
torch::nn::LinearOptions ln_options = torch::nn::LinearOptions(in_channls, out_channles);
return ln_options;
}

Classifier::Classifier(/*, torch::Device device/)
{
create_modules();
}

struct Conv_bn: torch::nn::Module{
torch::nn::Conv2d conv;
torch::nn::BatchNorm bn;

Conv_bn(int64_t in_channels, int64_t out_channels, int64_t stride):
    conv(conv_options(in_channels, out_channels, 3, stride, 1)),
    bn(bn_options(out_channels)) {}

torch::Tensor forward(torch::Tensor x)
{
    return torch::relu(bn(conv(x)));
}

};

struct Conv_dw: torch::nn::Module{
torch::nn::Conv2d conv1, conv2;
torch::nn::BatchNorm bn1, bn2;

Conv_dw(int64_t in_channels, int64_t out_channels, int64_t stride):
    conv1(conv_options(in_channels, in_channels, 3, stride, 1, in_channels)),
    bn1(bn_options(in_channels)),
    conv2(conv_options(in_channels, out_channels, 1, 1)),
    bn2(bn_options(out_channels)) {}

torch::Tensor forward(torch::Tensor x)
{
    x = torch::relu(bn1(conv1(x)));
    return torch::relu(bn2(conv2(x)));
}

};

struct Avgpooling: torch::nn::Module{
int64_t kernel_size;
Avgpooling(int64_t n): kernel_size(n) {}
torch::Tensor forward(torch::Tensor x)
{
return torch::avg_pool2d(x, kernel_size);
}
};

void Classifier::create_modules()
{
for(int i=0; i<vec.size(); ++i)
{
torch::nn::Sequential module;
if(i < 1)
{
Conv_bn bn_layer(vec[i][0], vec[i][1], vec[i][2]);
module->push_back(bn_layer);
}
else
{
if(vec[i].size() == 1)
{
Avgpooling avgpooling(vec[i][0]);
module->push_back(avgpooling);
}
else if(vec[i].size() == 2)
{
torch::nn::Linear ln_layer(ln_options(vec[i][0], vec[i][1]));
module->push_back(ln_layer);
}
else
{
Conv_dw dw_layer(vec[i][0], vec[i][1], vec[i][2]);
module->push_back(dw_layer);
}
}
module_list.push_back(module);
}
}

void Classifier::init_weights()
{

}

torch::Tensor Classifier::forward(torch::Tensor x)
{
for(int i=0; i<vec.size(); ++i)
{
if(vec[i].size() == 2)
x = x.view({x.size(0), -1});
torch::nn::SequentialImpl *seq_imp = dynamic_cast<torch::nn::SequentialImpl *>(module_list[i].ptr().get());
x = seq_imp->forward(x);
}
return x;
}

yf225 · July 26, 2019, 3:12pm

Could you try model->to(torch::kCUDA) to convert your model to CUDA?

nsjiow · July 27, 2019, 3:05am

I did not use JIT, network was written by libtorch with c++ directly

yf225 · July 29, 2019, 5:28pm

For libtorch models, we also need to use model->to(torch::kCUDA) to convert the model to CUDA. See https://github.com/pytorch/pytorch/blob/505fa83b2f4b02b55784c94ba52d4ab41d351623/test/cpp/api/modules.cpp#L295-L310.

AlconDivino · January 24, 2020, 7:54pm

I am facing a similar porblem.
i tried using
model->to(torch::kCUDA)
and
model->to(torch::Device(torch::kCUDA, 0)
but none of them work.

I get the error message

Input type (CUDAFloatType) and weight type (CPUFloatType) should be the same (_convolution at ../../aten/src/ATen/native/Convolution.cpp:599)

any idea how to fix this?