Collocation point generation for NN training

I am developing a physics informed neural network model.

I have a dataset in a grid form of space and time, with temperature at each point. Furthermore, I want to create sub datasets from this dataset to cover the initial condition that is row 1 and boundary conditions, i.e. first and last columns.

Further, when training, I only want to use a percentage of these datasets. This means i want to sample a predetermined number of samples for the training, not the entire dataset. How shall I proceed with it.

import numpy as np

def extract_conditions(dataset):
    # Extract initial condition (first row)
    initial_condition = dataset[0, :]

    # Extract boundary conditions (first and last columns)
    boundary_conditions = np.vstack((dataset[:, 0], dataset[:, -1])).T
    
    return initial_condition, boundary_conditions

def sample_data(data, percentage):
    num_samples = int(len(data) * percentage / 100)
    sampled_indices = np.random.choice(len(data), num_samples, replace=False)
    return data[sampled_indices]

# Dataset: 2D numpy array (e.g., temperature grid)
# dataset.shape = (num_time_steps, num_spatial_points) #! 100, 50
dataset = np.random.rand(100, 50)  # Example dataset
initial_condition, boundary_conditions = extract_conditions(dataset)

# Datset Sample Size
percentage = 20

sampled_initial = sample_data(initial_condition, percentage)
sampled_boundary = sample_data(boundary_conditions, percentage)

print("Sampled Initial Condition:", sampled_initial)
print("Sampled Boundary Conditions:", sampled_boundary)

Are you only looking to build the dataset?

Thank you for the help.

I want to create a dataset from the temperature grid and then use it for training.

can you also share how it can be used with the dataloader class subsequently.

Here is a (mostly prompted) training script:

import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

# Data preparation functions
def extract_conditions(dataset):
    initial_condition = dataset[0, :]
    boundary_conditions = np.vstack((dataset[:, 0], dataset[:, -1])).T
    return initial_condition, boundary_conditions

def sample_data(data, percentage):
    num_samples = int(len(data) * percentage / 100)
    sampled_indices = np.random.choice(len(data), num_samples, replace=False)
    return data[sampled_indices]

class TemperatureGridDataset(Dataset):
    def __init__(self, dataset, initial_condition, boundary_conditions, percentage):
        self.percentage = percentage
        
        # Sample the data
        num_samples = int(dataset.shape[0] * percentage / 100)
        sampled_indices = np.random.choice(dataset.shape[0], num_samples, replace=False)
        
        self.sampled_data = dataset[sampled_indices]
        self.initial_condition = initial_condition  # Use as is, don't sample
        self.sampled_boundary = boundary_conditions[sampled_indices]
    
    def __len__(self):
        return len(self.sampled_data)
    
    def __getitem__(self, idx):
        initial = self.initial_condition  # Use the same initial condition for all samples
        boundary = self.sampled_boundary[idx]
        x = np.concatenate([initial, boundary])
        y = self.sampled_data[idx]
        return torch.FloatTensor(x), torch.FloatTensor(y)
        
# Simple Model
class SimpleModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(SimpleModel, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
    
    def forward(self, x):
        return self.linear(x)

if __name__ == "__main__":
    # Create example dataset
    dataset = np.random.rand(100, 50)  # Example dataset
    initial_condition, boundary_conditions = extract_conditions(dataset)

    # Create custom dataset
    percentage = 20
    temp_dataset = TemperatureGridDataset(dataset, initial_condition, boundary_conditions, percentage)

    # Print information about the dataset
    print(f"Dataset size: {len(temp_dataset)}")
    x, y = temp_dataset[0]
    print(f"Input shape: {x.shape}")
    print(f"Output shape: {y.shape}")

    # Create DataLoader
    batch_size = 32
    dataloader = DataLoader(temp_dataset, batch_size=batch_size, shuffle=True)

    # Initialize model, loss function, and optimizer
    input_size = x.shape[0]  # Size of input features
    output_size = y.shape[0]  # Size of output
    model = SimpleModel(input_size, output_size)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters())

    # Training loop
    num_epochs = 10
    for epoch in range(num_epochs):
        for batch_x, batch_y in dataloader:
            # Forward pass
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            
            # Backward pass and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    print("Training completed!")