Using summary()

Hi, there,

from torchvision import datasets, transforms

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),

# Download and load the training data
trainset = datasets.MNIST('/.',download=True, train=True, transform=transform)
valset = datasets.MNIST('/.',download=True, train=False, transform=transform)

# Sampling the dataset
trainset =, np.arange(0, 1024))
valset =, np.arange(0, 1024))

# set it into minibatches
trainloader =, batch_size=64, shuffle=True)
valloader =, batch_size=64, shuffle=True)

I created a custom layer:

# Customization of a layer
class MyLayer(torch.nn.Module):
    def __init__(self, size_in, size_out):
        self.size_in, self.size_out = size_in, size_out
        self.A1 = torch.nn.Parameter(torch.Tensor(self.size_in, self.size_out)) # same thing as in tensorflow to set it as learnable
        self.B1 = torch.nn.Parameter(torch.Tensor(self.A1.shape)) # same as the above line
        # check 1 see if initialization helps to speed up the experiment
        torch.nn.init.kaiming_uniform_(self.A1, a=math.sqrt(5)) # weight init
        torch.nn.init.kaiming_uniform_(self.B1, a=math.sqrt(5))

        # initialization of A1 and B1
        # torch.nn.init.normal_(self.A1, mean=0.0, std=1e-2) 
        # torch.nn.init.constant_(self.B1, 1.0)

    def forward(self, x, omega):
        self.W = (self.A1 + omega*self.B1)
        w_times_x =, self.W)
        return w_times_x

Used the layer in my model:

# The exact implementation of our model. I think pytorch is really neat in that perspective
class BasicModel(torch.nn.Module):
    def __init__(self, omega1, omega2, omega3, omega4, indim, outdim, hiddim):
        self.omega1, self.omega2, self.omega3, self.omega4 = omega1, omega2, omega3, omega4 # adding in the omega, our noise
        self.indim, self.outdim, self.hiddim = indim, outdim, hiddim
        self.linear1 = MyLayer(self.indim, self.hiddim[0]) # input x output h0
        self.linear2 = MyLayer(self.hiddim[0], self.hiddim[1]) # input h0 output h1
        self.linear3 = MyLayer(self.hiddim[1], self.hiddim[2])# input h1 output h2
        self.linear4 = MyLayer(self.hiddim[2], self.outdim)# input h2 output outdim

    def forward(self, x):
        # set our activation function non-linearization of each layer
        x = torch.sigmoid(self.linear1(x, self.omega1))
        x = torch.sigmoid(self.linear2(x, self.omega2))
        x = torch.sigmoid(self.linear3(x, self.omega3))
        x = self.linear4(x, self.omega4)
        x = torch.nn.LogSoftmax(dim=1)(x)
        return x

set the input shape and number of hidden nodes:

# set our shape
input = 28*28
output = 10
hidden = [128, 64, 32] # we are building an MLP assign the number of hidden units in each layer

# the omega, the noise added to the weight with a mean of 0 and a covariance of identity matrix
o1, o2, o3, o4 = torch.normal(0, 1,size=(input,hidden[0])),torch.normal(0, 1,size=(hidden[0],hidden[1])),torch.normal(0, 1,size=(hidden[1],hidden[2])),torch.normal(0, 1,size=(hidden[2],output))

Finally use the summary():

# hopefully this works
device_name = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
# Our model 
model = BasicModel(o1,o2,o3,o4, input, output, hidden).to(device_name)

I got this error:

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and CPU!

Please help, thanks

Try sending o* tensors to(device) first:

o1 =
o2 =
o3 =
o4 =

You’ll find that it raises another error :slight_smile: but at least this one will be fixed.

RuntimeError: tensors must be 2-D
1 Like

Haha, thanks. Loops of fixing errors and bugs :smiling_face_with_tear: