Hi, I am creating a LSTM model where categorical features need to be embedded before using it in the LSTM. Below is my code for LSTM. I am creating a custom dataset to pass to the model.
# Create a dictionary to store the embedding dimensions
embedding_dims = dict(zip(categorical_features, embedding_sizes))
class LSTMModel(nn.Module):
def __init__(self, embedding_dims, hidden_dim, output_dim):
super(LSTMModel, self).__init__()
self.hidden_dim = hidden_dim
self.embeddings = nn.ModuleDict({
feat: nn.Embedding(num_categories, embedding_size)
for feat, num_categories, embedding_size in zip(categorical_features, embedding_sizes, embedding_dims.values())
})
self.lstm = nn.LSTM(sum(embedding_dims.values()), hidden_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
embedded = [self.embeddings[feat](x[:, :, i]) for i, feat in enumerate(categorical_features)]
embedded = torch.cat(embedded, dim=2)
lstm_out, _ = self.lstm(embedded)
output = self.fc(lstm_out[:, -1, :])
return output
class CustomDataset(Dataset):
def __init__(self, df):
self.data = df.astype('category').values
def __getitem__(self, index):
x = torch.tensor(self.data[index:index+8, :-1]) # Sequence of 8 steps (input)
y = torch.tensor(self.data[index+8, -1], dtype=torch.float32) # Target value for the next step
return x, y
def __len__(self):
return len(self.data) - 8
It shows the datatype error
TypeError: can’t convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool. How to process the data show that I can feed it in above model architecture.