I think I need some help in preparing my numpy dataset for training with Torch.
I have multiple arrays of dimension [10000, 100, 24]
, which corresponds to (time_index, player, playerdata)
. The array isn’t 100% filled and has many zeroes, but I suspect that’s another issue, and I hear I can use a dropout layer later.
I have this code to try and train a model:
...
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def ConvertndArraytoTensor(ndarray):
return torch.from_numpy(ndarray).float().to(DEVICE)
def PytorchTrainOnData(data, model, optimizer, loss_fn, epochs, batch_size, device):
for epoch in range(epochs):
for batch in data:
batch = ConvertndArraytoTensor(batch)
optimizer.zero_grad()
output = model(batch)
loss = loss_fn(output, batch)
loss.backward()
optimizer.step()
print("Epoch: " + str(epoch) + " Loss: " + str(loss))
def DefineTorchModel(input_size, hidden_size, output_size):
model = torch.nn.Sequential(
torch.nn.Linear(input_size, hidden_size),
torch.nn.ReLU(),
torch.nn.Linear(hidden_size, output_size),
)
model.to(DEVICE)
return model
model = DefineTorchModel(input_size=3, hidden_size=10, output_size=3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = torch.nn.MSELoss()
epochs = 10
batch_size = 1
PytorchTrainOnData(data, model, optimizer, loss_fn, epochs, batch_size, DEVICE)
And I get this error:
Traceback (most recent call last):
File "/home/user/git/ai/TrainTrackingModel/train_pytorch.py", line 48, in <module>
PytorchTrainOnData(data, model, optimizer, loss_fn, epochs, batch_size, device)
File "/home/user/git/ai/TrainTrackingModel/train_pytorch.py", line 28, in PytorchTrainOnData
output = model(batch)
File "/home/user/Programs/miniconda3/envs/pytorch/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/Programs/miniconda3/envs/pytorch/lib/python3.9/site-packages/torch/nn/modules/container.py", line 141, in forward
input = module(input)
File "/home/user/Programs/miniconda3/envs/pytorch/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/Programs/miniconda3/envs/pytorch/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 103, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1000000x24 and 3x10)
Process finished with exit code 1
My goal is to train a model that can make predictions about what would happen in the next time_index
, but I think I’m making basic mistakes about how to prepare the data and batch it.
Can someone suggest what I should fix?