I have data where we have 3000 samples. Each sample has 9 measurements, and each measurement has 146 samples.
I would like to create a model that uses the same layers to process each of 9 measurements. So the first section of the model will have 146 inputs and lets say 10 outputs. Then we will take 10 outputs from each of 9 measurements and concat to get 90 elements.
Then feed these 90 elements into a following set of layers.
Anyways I tried to do this and I am getting all sorts of trouble, it isn’t liking the size of the dataset or it is not processing the batch properly…
Thanks !
Could you post your current code as a minimal and executable example showing the errors you were seeing?
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
num_datasets = 9
class SharedMLP(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SharedMLP, self).__init__()
# Define shared layers
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
class MultiDatasetModel(nn.Module):
def __init__(self, input_size, hidden_size, output_size, num_datasets, num_classes):
super(MultiDatasetModel, self).__init__()
# Shared MLP
self.shared_mlp = SharedMLP(input_size, hidden_size, output_size)
# Additional layers for combining or processing the shared outputs
s1= output_size * num_datasets
s2=output_size
print('self combined layer size:',s1,' ',s2)
self.combined_layer = nn.Linear(s1, s2)
self.relu_combined = nn.ReLU()
# Output layer for the final classification
self.final_output_layer = nn.Linear(output_size, num_classes)
def forward(self, datasets):
# Process each dataset using the shared MLP
for index in range(num_datasets):
data=datasets[index]
shared_output = self.shared_mlp(data)
if index==0:
shared_outputs=shared_output
else:
shared_outputs = torch.cat((shared_outputs, shared_output), dim=1)
print(len(shared_outputs))
shared_outputs = self.combined_layer(shared_outputs)
shared_outputs = self.relu_combined(shared_outputs)
# Final output layer for classification with softmax activation
final_output = self.final_output_layer(shared_outputs)
final_output_probs = F.softmax(final_output, dim=1)
return final_output_probs
from torch.utils.data import Dataset, DataLoader, random_split
class MyDataset(Dataset):
def __init__(self, data, labels):
self.data = data
self.labels = labels
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
sample = {'data': self.data[idx], 'label': self.labels[idx]}
return sample
import torch
from torch.utils.data import DataLoader, TensorDataset
# create synthetic data
rows = 3000
num_datasets = 10
aspect = pd.DataFrame()
for count in range(num_datasets):
data = [[np.random.rand() for _ in range(140)] for _ in range(rows)]
this_column = f"Column_{count}"
aspect[this_column]=data
# Create a list of truth data with values 0, 1, or 2
aClassify = np.random.choice([0, 1, 2], size=rows)
# Convert the DataFrame to PyTorch tensors
column_names = [col for col in aspect.columns if col != 'observation']
input_tensors = [torch.tensor(aspect[col].tolist(), dtype=torch.float32) for col in column_names]
# Stack the tensors along a new dimension to form the input dataset
input_dataset = torch.stack(input_tensors, dim=1)
print('Generated Tensor Dataset Shape:', (input_dataset.shape) )
y_tensor = torch.tensor(aClassify)
my_dataset = MyDataset(input_dataset, y_tensor)
# Set up k-fold cross-validation
train_size = int(0.8 * len(input_dataset))
test_size = len(input_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(my_dataset, [train_size, test_size])
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)
model = MultiDatasetModel(input_size=140, hidden_size=32, output_size=10,
num_datasets=num_datasets, num_classes=3)
for batch in train_dataloader:
data_batch, label_batch = batch['data'], batch['label']
print(data_batch.shape)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
num_epochs = 10 # Adjust as needed
model.train()
for epoch in range(num_epochs):
# Training loop
outputs = model(data_batch)
loss = criterion(outputs, label_batch)
# Backward pass and optimization
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Validation loop
model.eval()
with torch.no_grad():
total_correct = 0
total_samples = 0
for batch in val_loader:
inputs, labels = batch[:-1], batch[-1]
outputs = model(*inputs)
_, predicted = torch.max(outputs, 1)
total_samples += labels.size(0)
total_correct += (predicted == labels).sum().item()
accuracy = total_correct / total_samples
print(f'Epoch {epoch + 1}/{num_epochs}, Validation Accuracy: {accuracy:.4f}')