Pytorch sparse adam how to run?

import torch
from torch import nn

class TrainNet(torch.nn.Module):

    def __init__(self, in_features, out_features, bias=True):
        super(TrainNet, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = torch.nn.Parameter(torch.randn(out_features, in_features).to_sparse().requires_grad_(True))
        self.bias = torch.nn.Parameter(torch.randn(out_features, out_features).to_sparse().requires_grad_(True))

    def forward(self, input):

        # weight = self.weight.to_dense().requires_grad_(True)
        x =, input)
        # x = torch.sparse.addmm(self.weight,input)
        # x = torch.nn.functional.linear(input, weight, None)

        return x

# a = torch.randn(2,3).to_sparse().requires_grad_(True)
x = torch.randn(3,3, requires_grad=True)
y = torch.randn(3,3, requires_grad=False)
Net = TrainNet(3,3)
# for para in Net.parameters():
#     print(para)
optimizer = torch.optim.SparseAdam(Net.parameters(), lr=1e-3, betas=(0.9, 0.999))
l1 = torch.nn.L1Loss(reduction='sum')
for epoth in range(500):
    out = Net(x)
    loss = l1(out, y)
    # loss = out.sum()
    # print(Net.weight.grad)
    # Net.weight.grad.zero_()
    # print(loss.item())

I want to run through the sparse Adam as an example to learn sparse, while it pops up with the error. Is the sparse Adam working now? Any example or docs talk about how it could be run?

/home/liang/anaconda3/envs/py36/bin/python /home/liang/PycharmProjects/sparse/
Traceback (most recent call last):
  File "/home/liang/PycharmProjects/sparse/", line 43, in <module>
  File "/home/liang/.local/lib/python3.6/site-packages/torch/optim/", line 85, in step
    old_exp_avg_values = exp_avg.sparse_mask(grad)._values()
RuntimeError: sparse_mask is not implemented for type torch.sparse.FloatTensor
                if len(state) == 0:
                    state['step'] = 0
                    # print(
                    # Exponential moving average of gradient values
                    state['exp_avg'] = torch.zeros_like(
                    # Exponential moving average of squared gradient values
                    state['exp_avg_sq'] = torch.zeros_like(

I have to change the code in sparseadam, now it is working. But I am not sure if it is working correctly.