It’s hard to tell if a speedup would be expected as the operations are quite small by themselves.
While a loop would add a certain overhead, the actual dispatching of these workloads could also be visible.
In any case, here is a draft of a code avoiding loops for the first operations (the higher poly ops could most likely also be avoided but are also unused in your current use case):
def build_sindy_theta(x_inputs: torch.Tensor) -> torch.Tensor:
"""
This function creates the state for Theta given input latent variable(s). The function is implemented for first
or second order systems. The second order system includes library terms for the derivatives of the latent
variable. Note that in the case where a second order system is used, z here is actually a concatenation of z and
dz to z,dz. It is assumed that z and dz are elements 0 and 1 on the first axis of the tensor x_inputs supplied
in the first argument. This z and dz are the latent variables produced by the encoder part of the autoencoder
prior to decoding.
The parameters dictionary must include:
* latent_dim - the number of latent variables needed to describe the system.
* poly_order - the maximum number of arguments in the polynomial describing the system.
* include_sine - Whether to include sine in the arguments used to describe the system.
* model_order - the order of the model needed to describe the system (1 or 2)
If a second order model is needed, then dx, the derivative of x_inputs will be used as well
as x_inputs.
"""
# TODO: remove the nested iterations. Also replace 'if' with range(poly_order)
# z_slice: torch.Tensor = x_inputs[0].unsqueeze(0) if len(
# x_inputs[0].shape
# ) == 1 else x_inputs[0]
z_slice: torch.Tensor = x_inputs[0]
library = [torch.ones(z_slice.shape[0], device=x_inputs.device)]
latent_dim = parameters["latent_dim"]
# In the case where a non-linear dynamic process is suspected, both z and dz will be modelled. In that case, a
# set of parameters will be needed for both, so we double the number of parameters needed in the latent
# dimensions (latent_dim) parameter.The latent variable list used in the model is reconstructed to be a
# concatenation of both z and dz.
if parameters["model_order"] == 2:
# dz_slice = x_inputs[1].unsqueeze(0) if len(x_inputs[1].shape) == 1 else x_inputs[1]
dz_slice = x_inputs[1]
z_slice = torch.cat([z_slice, dz_slice], 1) # concatenate.
latent_dim = 2 * parameters["latent_dim"]
for i in range(latent_dim):
library.append(z_slice[:, i])
# The parameters for z are now provided for each latent variable.
# A list is constructed such that each parameter is included with a group of all possible combinations of each
# other parameters.
if parameters["poly_order"] > 1:
for i in range(latent_dim):
for j in range(i, latent_dim):
library.append(torch.mul(z_slice[:, i], z_slice[:, j]))
if parameters["poly_order"] > 2:
for i in range(latent_dim):
for j in range(i, latent_dim):
for k in range(j, latent_dim):
library.append(
z_slice[:, i] * z_slice[:, j] * z_slice[:, k]
)
if parameters["poly_order"] > 3:
for i in range(latent_dim):
for j in range(i, latent_dim):
for k in range(j, latent_dim):
for p in range(k, latent_dim):
library.append(
z_slice[:, i]
* z_slice[:, j]
* z_slice[:, k]
* z_slice[:, p]
)
if parameters["poly_order"] > 4:
for i in range(latent_dim):
for j in range(i, latent_dim):
for k in range(j, latent_dim):
for p in range(k, latent_dim):
for q in range(p, latent_dim):
library.append(
z_slice[:, i]
* z_slice[:, j]
* z_slice[:, k]
* z_slice[:, p]
* z_slice[:, q]
)
if parameters["include_sine"]:
for i in range(latent_dim):
library.append(torch.sin(z_slice[:, i]))
# this is the variable Theta.
# torch.stack(library, axis=1).float()
return torch.stack(library).float()
def my_build_sindy_theta(x_inputs: torch.Tensor) -> torch.Tensor:
"""
This function creates the state for Theta given input latent variable(s). The function is implemented for first
or second order systems. The second order system includes library terms for the derivatives of the latent
variable. Note that in the case where a second order system is used, z here is actually a concatenation of z and
dz to z,dz. It is assumed that z and dz are elements 0 and 1 on the first axis of the tensor x_inputs supplied
in the first argument. This z and dz are the latent variables produced by the encoder part of the autoencoder
prior to decoding.
The parameters dictionary must include:
* latent_dim - the number of latent variables needed to describe the system.
* poly_order - the maximum number of arguments in the polynomial describing the system.
* include_sine - Whether to include sine in the arguments used to describe the system.
* model_order - the order of the model needed to describe the system (1 or 2)
If a second order model is needed, then dx, the derivative of x_inputs will be used as well
as x_inputs.
"""
# TODO: remove the nested iterations. Also replace 'if' with range(poly_order)
# z_slice: torch.Tensor = x_inputs[0].unsqueeze(0) if len(
# x_inputs[0].shape
# ) == 1 else x_inputs[0]
z_slice: torch.Tensor = x_inputs[0]
library = [torch.ones(1, z_slice.shape[0], device=x_inputs.device)]
latent_dim = parameters["latent_dim"]
# In the case where a non-linear dynamic process is suspected, both z and dz will be modelled. In that case, a
# set of parameters will be needed for both, so we double the number of parameters needed in the latent
# dimensions (latent_dim) parameter.The latent variable list used in the model is reconstructed to be a
# concatenation of both z and dz.
if parameters["model_order"] == 2:
# dz_slice = x_inputs[1].unsqueeze(0) if len(x_inputs[1].shape) == 1 else x_inputs[1]
dz_slice = x_inputs[1]
z_slice = torch.cat([z_slice, dz_slice], 1) # concatenate.
latent_dim = 2 * parameters["latent_dim"]
library.append(z_slice[:, torch.arange(latent_dim)].t())
# The parameters for z are now provided for each latent variable.
# A list is constructed such that each parameter is included with a group of all possible combinations of each
# other parameters.
if parameters["poly_order"] > 1:
tmp = z_slice.unsqueeze(1) * z_slice.unsqueeze(2)
mask = torch.empty_like(tmp).bool().fill_(True)
mask = mask.tril()
library.append(tmp[mask].view(tmp.size(0), torch.prod(torch.tensor(tmp.size()[1:]))-1).t())
if parameters["poly_order"] > 2:
tmp = z_slice.unsqueeze(1) * z_slice.unsqueeze(2) * z_slice.unsqueeze(2)
library.append(tmp.view(tmp.size(0), -1).t())
if parameters["poly_order"] > 3:
for i in range(latent_dim):
for j in range(i, latent_dim):
for k in range(j, latent_dim):
for p in range(k, latent_dim):
print(z_slice[:, i]
* z_slice[:, j]
* z_slice[:, k]
* z_slice[:, p])
library.append(
z_slice[:, i]
* z_slice[:, j]
* z_slice[:, k]
* z_slice[:, p]
)
if parameters["poly_order"] > 4:
for i in range(latent_dim):
for j in range(i, latent_dim):
for k in range(j, latent_dim):
for p in range(k, latent_dim):
for q in range(p, latent_dim):
library.append(
z_slice[:, i]
* z_slice[:, j]
* z_slice[:, k]
* z_slice[:, p]
* z_slice[:, q]
)
library.append(torch.sin(z_slice).t())
# this is the variable Theta.
return torch.cat(library).float()
parameters = {"latent_dim" : 1, "model_order" : 2, "poly_order" : 3, "include_sine" : True}
for i in range(3, 10):
x = torch.randn(1000, i, 1)
ref = build_sindy_theta(x)
out = my_build_sindy_theta(x)
print((ref - out).abs().max())
# tensor(2.9802e-08)
# tensor(2.3283e-10)
# tensor(7.4506e-09)
# tensor(2.9802e-08)
# tensor(5.9605e-08)
# tensor(2.3842e-07)
# tensor(9.5367e-07)
As you can see, some unused values are calculated and masked afterwards, which could also kill the performance gains (and would use more memory), so you should definitely profile your use case and see if a speedup is visible.