I am developing a physics informed neural network model.
I have a dataset in a grid form of space and time, with temperature at each point. Furthermore, I want to create sub datasets from this dataset to cover the initial condition that is row 1 and boundary conditions, i.e. first and last columns.
Further, when training, I only want to use a percentage of these datasets. This means i want to sample a predetermined number of samples for the training, not the entire dataset. How shall I proceed with it.
Soumya_Kundu
(Soumya Snigdha Kundu)
August 26, 2024, 9:27am
2
import numpy as np
def extract_conditions(dataset):
# Extract initial condition (first row)
initial_condition = dataset[0, :]
# Extract boundary conditions (first and last columns)
boundary_conditions = np.vstack((dataset[:, 0], dataset[:, -1])).T
return initial_condition, boundary_conditions
def sample_data(data, percentage):
num_samples = int(len(data) * percentage / 100)
sampled_indices = np.random.choice(len(data), num_samples, replace=False)
return data[sampled_indices]
# Dataset: 2D numpy array (e.g., temperature grid)
# dataset.shape = (num_time_steps, num_spatial_points) #! 100, 50
dataset = np.random.rand(100, 50) # Example dataset
initial_condition, boundary_conditions = extract_conditions(dataset)
# Datset Sample Size
percentage = 20
sampled_initial = sample_data(initial_condition, percentage)
sampled_boundary = sample_data(boundary_conditions, percentage)
print("Sampled Initial Condition:", sampled_initial)
print("Sampled Boundary Conditions:", sampled_boundary)
Are you only looking to build the dataset?
Thank you for the help.
I want to create a dataset from the temperature grid and then use it for training.
can you also share how it can be used with the dataloader class subsequently.
Soumya_Kundu
(Soumya Snigdha Kundu)
August 26, 2024, 2:57pm
4
Here is a (mostly prompted) training script:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
# Data preparation functions
def extract_conditions(dataset):
initial_condition = dataset[0, :]
boundary_conditions = np.vstack((dataset[:, 0], dataset[:, -1])).T
return initial_condition, boundary_conditions
def sample_data(data, percentage):
num_samples = int(len(data) * percentage / 100)
sampled_indices = np.random.choice(len(data), num_samples, replace=False)
return data[sampled_indices]
class TemperatureGridDataset(Dataset):
def __init__(self, dataset, initial_condition, boundary_conditions, percentage):
self.percentage = percentage
# Sample the data
num_samples = int(dataset.shape[0] * percentage / 100)
sampled_indices = np.random.choice(dataset.shape[0], num_samples, replace=False)
self.sampled_data = dataset[sampled_indices]
self.initial_condition = initial_condition # Use as is, don't sample
self.sampled_boundary = boundary_conditions[sampled_indices]
def __len__(self):
return len(self.sampled_data)
def __getitem__(self, idx):
initial = self.initial_condition # Use the same initial condition for all samples
boundary = self.sampled_boundary[idx]
x = np.concatenate([initial, boundary])
y = self.sampled_data[idx]
return torch.FloatTensor(x), torch.FloatTensor(y)
# Simple Model
class SimpleModel(nn.Module):
def __init__(self, input_size, output_size):
super(SimpleModel, self).__init__()
self.linear = nn.Linear(input_size, output_size)
def forward(self, x):
return self.linear(x)
if __name__ == "__main__":
# Create example dataset
dataset = np.random.rand(100, 50) # Example dataset
initial_condition, boundary_conditions = extract_conditions(dataset)
# Create custom dataset
percentage = 20
temp_dataset = TemperatureGridDataset(dataset, initial_condition, boundary_conditions, percentage)
# Print information about the dataset
print(f"Dataset size: {len(temp_dataset)}")
x, y = temp_dataset[0]
print(f"Input shape: {x.shape}")
print(f"Output shape: {y.shape}")
# Create DataLoader
batch_size = 32
dataloader = DataLoader(temp_dataset, batch_size=batch_size, shuffle=True)
# Initialize model, loss function, and optimizer
input_size = x.shape[0] # Size of input features
output_size = y.shape[0] # Size of output
model = SimpleModel(input_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters())
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
for batch_x, batch_y in dataloader:
# Forward pass
outputs = model(batch_x)
loss = criterion(outputs, batch_y)
# Backward pass and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
print("Training completed!")