How to decide the batch dimension in batch training with GCN?

Here is my GCN definition:

import math

import torch
import numpy as np
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module


class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        # self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
        if bias:
            # self.bias = Parameter(torch.FloatTensor(out_features))
            self.bias = Parameter(torch.FloatTensor(out_features, 1))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.out_features)
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
        #support = torch.mm(input, self.weight)
        #output = torch.spmm(adj, support)
        
        support = torch.mm(self.weight, input)
        output = torch.mm(support, adj.T)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'

my buffer definition:

class ReplayBuffer():
    def __init__(self, max_size, input_shape):
        self.mem_size = max_size
        self.mem_cntr = 0
        self.state_memory = np.zeros((self.mem_size, input_shape))

    def store_transition(self, state):
        index = self.mem_cntr % self.mem_size
        self.state_memory[index] = state
        self.mem_cntr += 1

    def sample_buffer(self, batch_size):
        max_mem = min(self.mem_cntr, self.mem_size)
        batch = np.random.choice(max_mem, batch_size)
        states = self.state_memory[batch]
        return states

NN definition:

class NN(Module):
    def __init__(self, in_dim, out_dim):
        super(NN, self).__init__()
        
        # [out * in] * [in * N] * [N * N] ----> [out * N]
        self.gc1 = GraphConvolution(in_dim, out_dim)
        
    def forward(self, state, adj):
        x = self.gc1(state, adj)
        return x

and my test part:

net = NN(1, 9)
adj = torch.tensor(np.eye(9), dtype=torch.float32)

buffer = ReplayBuffer(max_size=100000, input_shape=9)
for i in range(9):
    state = torch.tensor(np.random.rand(9), dtype=torch.float32).unsqueeze(0)
    buffer.store_transition(state)

batch_size = 3
state_batch = torch.tensor(buffer.sample_buffer(batch_size), dtype=torch.float32)

batch_output = net(state_batch, adj)

and the Error is:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-54-5941a970f106> in <module>
----> 1 batch_output = net(state_batch, adj)

~/anaconda3/envs/Circuits/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

<ipython-input-20-39181b909740> in forward(self, state, adj)
      7 
      8     def forward(self, state, adj):
----> 9         x = self.gc1(state, adj)
     10         return x

~/anaconda3/envs/Circuits/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

<ipython-input-12-5d6f496f5bd3> in forward(self, input, adj)
     35         #output = torch.spmm(adj, support)
     36 
---> 37         support = torch.mm(self.weight, input)
     38         output = torch.mm(support, adj.T)
     39         if self.bias is not None:

RuntimeError: size mismatch, m1: [9 x 1], m2: [3 x 9] at ../aten/src/TH/generic/THTensorMath.cpp:41

How can I implement batch training, or more specificly, how should I define the dimension of the data stored in the buffer?