Not able to load quantized model in android

I am able to load models in android, but when I quantize the same model with PyTorch and not able to load it in android. Below is the stack trace

java.lang.RuntimeException: Unable to start activity ComponentInfo{com.fitnest.testprediction/com.fitnest.testprediction.MainActivity}: java.lang.RuntimeException: weight.qscheme() == kPerTensorAffine CHECK FAILED at ../aten/src/ATen/native/quantized/cpu/qconv_prepack.cpp
    The above operation failed in interpreter, with the following stack trace:
    at code/__torch__/torch/nn/intrinsic/quantized/modules/conv_relu.py:70:10
        _22 = self.stride
        _23 = self.padding
        _24 = self.dilation
        _25 = self.groups
        _26, _27, = _22
        _28 = [_26, _27]
        _29, _30, = _23
        _31 = [_29, _30]
        _32, _33, = _24
        _34 = ops.quantized.conv_prepack(_20, _21, _28, _31, [_32, _33], _25)
              ~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
        self._packed_params = _34
        self.scale = (state)[12]
        self.zero_point = (state)[13]
        self.training = (state)[14]
        return None
      def _weight_bias(self: __torch__.torch.nn.intrinsic.quantized.modules.conv_relu.ConvReLU2d) -> Tuple[Tensor, Optional[Tensor]]:
        _35, _36 = ops.quantized.conv_unpack(self._packed_params)
        return (_35, _36)
      def set_weight_bias(self: __torch__.torch.nn.intrinsic.quantized.modules.conv_relu.ConvReLU2d,
    Compiled from code at /home/un270/anaconda2/envs/py37/lib/python3.7/site-packages/torch/nn/quantized/modules/conv.py:110:30
        def set_weight_bias(self, w, b):
            # type: (torch.Tensor, Optional[torch.Tensor]) -> None
            self._packed_params = torch.ops.quantized.conv_prepack(
                                  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
                w, b, self.stride, self.padding, self.dilation, self.groups)
    
        at android.app.ActivityThread.performLaunchActivity(ActivityThread.java:3447)
        at android.app.ActivityThread.handleLaunchActivity(ActivityThread.java:3594)
        at android.app.servertransaction.LaunchActivityItem.execute(LaunchActivityItem.java:83)
        at android.app.servertransaction.TransactionExecutor.executeCallbacks(TransactionExecutor.java:135)
        at android.app.servertransaction.TransactionExecutor.execute(TransactionExecutor.java:95)
        at android.app.ActivityThread$H.handleMessage(ActivityThread.java:2146)
        at android.os.Handler.dispatchMessage(Handler.java:107)
        at android.os.Looper.loop(Looper.java:237)
        at android.app.ActivityThread.main(ActivityThread.java:7777)
        at java.lang.reflect.Method.invoke(Native Method)
        at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:493)
        at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:1047)
     Caused by: java.lang.RuntimeException: weight.qscheme() == kPerTensorAffine CHECK FAILED at ../aten/src/ATen/native/quantized/cpu/qconv_prepack.cpp
    The above operation failed in interpreter, with the following stack trace:
    at code/__torch__/torch/nn/intrinsic/quantized/modules/conv_relu.py:70:10
        _22 = self.stride
        _23 = self.padding
        _24 = self.dilation
        _25 = self.groups
        _26, _27, = _22
        _28 = [_26, _27]
        _29, _30, = _23
        _31 = [_29, _30]
        _32, _33, = _24
        _34 = ops.quantized.conv_prepack(_20, _21, _28, _31, [_32, _33], _25)
              ~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
        self._packed_params = _34
        self.scale = (state)[12]
        self.zero_point = (state)[13]
        self.training = (state)[14]
        return None
      def _weight_bias(self: __torch__.torch.nn.intrinsic.quantized.modules.conv_relu.ConvReLU2d) -> Tuple[Tensor, Optional[Tensor]]:
        _35, _36 = ops.quantized.conv_unpack(self._packed_params)
        return (_35, _36)
      def set_weight_bias(self: __torch__.torch.nn.intrinsic.quantized.modules.conv_relu.ConvReLU2d,
    Compiled from code at /home/un270/anaconda2/envs/py37/lib/python3.7/site-packages/torch/nn/quantized/modules/conv.py:110:30
        def set_weight_bias(self, w, b):
            # type: (torch.Tensor, Optional[torch.Tensor]) -> None
            self._packed_params = torch.ops.quantized.conv_prepack(
                                  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
02-16 18:41:53.781 21251-21251/com.fitnest.testprediction E/AndroidRuntime:             w, b, self.stride, self.padding, self.dilation, self.groups)
    
        at org.pytorch.Module$NativePeer.initHybrid(Native Method)
        at org.pytorch.Module$NativePeer.<init>(Module.java:70)
        at org.pytorch.Module.<init>(Module.java:25)
        at org.pytorch.Module.load(Module.java:21)
        at com.fitnest.testprediction.MainActivity.onCreate(MainActivity.java:101)
        at android.app.Activity.performCreate(Activity.java:7981)
        at android.app.Activity.performCreate(Activity.java:7970)
        at android.app.Instrumentation.callActivityOnCreate(Instrumentation.java:1307)
        at android.app.ActivityThread.performLaunchActivity(ActivityThread.java:3422)

looks like you are passing a per tensor quantized weight to set_weight_bias, which is not supported.