Migrate time series code from CPU to GPU

Hello, now I am learning code from https://github.com/pytorch/examples/blob/master/time_sequence_prediction/train.py
I want to migrate the cpu version to GPU, to speed up total time.
Here is what I DO:

  1. modify all Variable to cuda()
  2. modify module: seq.float().cuda()
  3. modify loss to : loss.data.cpu().numpy()

However, I hardly see any time improvement, it’s just from 44s to 39s.

Can anyone find some errors on my code or give me some advise?

thanks…

Here is my code of GPU.

Blockquote
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import time
class Seq(nn.Module):
def init(self):
super(Seq,self).init()
self.lstm1=nn.LSTMCell(1,51)
self.lstm2=nn.LSTMCell(51,1)

def forward(self,input,future=0):
    outputs=[]
    seq_num,seq_len=input.size() #100*1000

    ht1=Variable(torch.zeros((seq_num,51)).cuda().float())
    ct1=Variable(torch.zeros((seq_num,51)).cuda().float())
    ht2=Variable(torch.zeros((seq_num,1)).cuda().float())
    ct2=Variable(torch.zeros((seq_num,1)).cuda().float())

    for i,item in enumerate(input.chunk(seq_len,dim=1)):
        ht1,ct1=self.lstm1(item,(ht1,ct1))
        ht2,ct2=self.lstm2(ct1,(ht2,ct2))
        outputs+=[ct2]
    for i in range(future):
        ht1,ct1=self.lstm1(ct2,(ht1,ct1))
        ht2,ct2=self.lstm2(ct1,(ht2,ct2))

        outputs+=[ct2]
    outputs=torch.stack(outputs,dim=1).squeeze(dim=2)
    return outputs

if name == ‘main’:

data=torch.load('sin.pt')

input=Variable(torch.from_numpy(data[3:,:-1]).cuda())
target=Variable(torch.from_numpy(data[3:,1:]).cuda())

test_input=Variable(torch.from_numpy(data[:3,:-1]).cuda())
test_target=Variable(torch.from_numpy(data[:3,1:]).cuda())

seq=Seq()
seq.float().cuda()
criterion=nn.MSELoss()
opt=optim.LBFGS(seq.parameters())

for i in range(10):
    t0=time.time()
    print('Step:',i)
    def closure():
        opt.zero_grad()
        out=seq(input)
        loss=criterion(out,target)
        print('loss:',loss.data.cpu().numpy()[0],end=' ')
        loss.backward()
        return loss
    opt.step(closure)
    print('\ntime:',time.time()-t0)
    future=1000
    pred=seq(test_input,future=future)
    loss=criterion(pred[:,:-future],test_target)
    print('\ntest loss:',loss.data.cpu().numpy()[0])
    y=pred.data.cpu().numpy()

    plt.plot(range(1000),y[0][:1000])
    plt.plot(range(1000,2000),y[0][999:])
    plt.show()