I am profiling different parts of an application on GPU, and need to profile just a single layer on the GPU (ConvTranspose2d layer). Basically I just want to see what CUDA kernels get launched for ConvTranspose2d (I don’t want batch norm or relu run there).
However, when I try to run the following code, I get a “RuntimeError: Tensor for argument #2 ‘weight’ is on CPU, but expected it to be on GPU (while checking arguments for cudnn_batch_norm)”
import torch
import torch.nn as nn
import torch.nn.functional as F
cuda = torch.device('cuda')
def main():
net = Generator()
input = torch.rand(10,1,1).unsqueeze(0)
image = net(input.to(device=cuda))
print(image.reshape(28,28))
class Generator(nn.Module):
def __init__(self, ):
super(Generator, self).__init__()
self.z_dim = 10
self.x_dim = 784
self.name = 'mnist/dcgan/g_net'
self.layer1 = nn.Sequential(
nn.ConvTranspose2d(in_channels=10, out_channels=32, kernel_size=(4,4), stride=2, padding=0).to(device=cuda),
nn.BatchNorm2d(32),
nn.ReLU()
)
self.layer2 = nn.Sequential(
nn.ConvTranspose2d(in_channels=32, out_channels=32, kernel_size=(6,6),stride=2, padding=0),
nn.BatchNorm2d(32),
nn.ReLU()
)
self.layer3 = nn.Sequential(
nn.ConvTranspose2d(in_channels=32, out_channels=1, kernel_size=(6,6), stride=2, padding=0),
nn.Sigmoid()
)
for m in self.modules():
if isinstance(m, nn.ConvTranspose2d):
nn.init.normal_(m.weight).to(device=cuda)
def forward(self, x):
out1 = self.layer1(x)
pdb.set_trace()
out2 = self.layer2(out1)
pdb.set_trace()
out3 = self.layer3(out2)
return(out3)
if __name__ == '__main__':
main()
Full traceback:
rabbit@nano-dev:~/Documents/PRL/FPGA_DeconvAcc/mnist/tensorflow$ python example.py
Traceback (most recent call last):
File “example.py”, line 68, in
main()
File “example.py”, line 11, in main
image = net(input.to(device=cuda))
File “/home/rabbit/Documents/PRL/FPGA_DeconvAcc/mnist/tensorflow/venv/tensorflow/lib/python3.6/site-packages/torch/nn/modules/module.py”, line 532, in call
result = self.forward(*input, **kwargs)
File “example.py”, line 58, in forward
out1 = self.layer1(x)
File “/home/rabbit/Documents/PRL/FPGA_DeconvAcc/mnist/tensorflow/venv/tensorflow/lib/python3.6/site-packages/torch/nn/modules/module.py”, line 532, in call
result = self.forward(*input, **kwargs)
File “/home/rabbit/Documents/PRL/FPGA_DeconvAcc/mnist/tensorflow/venv/tensorflow/lib/python3.6/site-packages/torch/nn/modules/container.py”, line 100, in forward
input = module(input)
File “/home/rabbit/Documents/PRL/FPGA_DeconvAcc/mnist/tensorflow/venv/tensorflow/lib/python3.6/site-packages/torch/nn/modules/module.py”, line 532, in call
result = self.forward(*input, **kwargs)
File “/home/rabbit/Documents/PRL/FPGA_DeconvAcc/mnist/tensorflow/venv/tensorflow/lib/python3.6/site-packages/torch/nn/modules/batchnorm.py”, line 107, in forward
exponential_average_factor, self.eps)
File “/home/rabbit/Documents/PRL/FPGA_DeconvAcc/mnist/tensorflow/venv/tensorflow/lib/python3.6/site-packages/torch/nn/functional.py”, line 1670, in batch_norm
training, momentum, eps, torch.backends.cudnn.enabled
RuntimeError: Tensor for argument #2 ‘weight’ is on CPU, but expected it to be on GPU (while checking arguments for cudnn_batch_norm)