@smth as a side (but related) note, if we are selecting a random batch when doing SGD, do we also need to wrap things with variables or torch.tensors? E.g:
def get_batch2(X,Y,M,dtype):
X,Y = X.data.numpy(), Y.data.numpy()
N = len(Y)
valid_indices = np.array( range(N) )
batch_indices = np.random.choice(valid_indices,size=M,replace=False)
batch_xs = torch.FloatTensor(X[batch_indices,:]).type(dtype)
batch_ys = torch.FloatTensor(Y[batch_indices]).type(dtype)
return Variable(batch_xs, requires_grad=False), Variable(batch_ys, requires_grad=False)