# Collocation point generation for NN training

I am developing a physics informed neural network model.

I have a dataset in a grid form of space and time, with temperature at each point. Furthermore, I want to create sub datasets from this dataset to cover the initial condition that is row 1 and boundary conditions, i.e. first and last columns.

Further, when training, I only want to use a percentage of these datasets. This means i want to sample a predetermined number of samples for the training, not the entire dataset. How shall I proceed with it.

``````import numpy as np

def extract_conditions(dataset):
# Extract initial condition (first row)
initial_condition = dataset[0, :]

# Extract boundary conditions (first and last columns)
boundary_conditions = np.vstack((dataset[:, 0], dataset[:, -1])).T

return initial_condition, boundary_conditions

def sample_data(data, percentage):
num_samples = int(len(data) * percentage / 100)
sampled_indices = np.random.choice(len(data), num_samples, replace=False)
return data[sampled_indices]

# Dataset: 2D numpy array (e.g., temperature grid)
# dataset.shape = (num_time_steps, num_spatial_points) #! 100, 50
dataset = np.random.rand(100, 50)  # Example dataset
initial_condition, boundary_conditions = extract_conditions(dataset)

# Datset Sample Size
percentage = 20

sampled_initial = sample_data(initial_condition, percentage)
sampled_boundary = sample_data(boundary_conditions, percentage)

print("Sampled Initial Condition:", sampled_initial)
print("Sampled Boundary Conditions:", sampled_boundary)
``````

Are you only looking to build the dataset?

Thank you for the help.

I want to create a dataset from the temperature grid and then use it for training.

can you also share how it can be used with the dataloader class subsequently.

Here is a (mostly prompted) training script:

``````import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# Data preparation functions
def extract_conditions(dataset):
initial_condition = dataset[0, :]
boundary_conditions = np.vstack((dataset[:, 0], dataset[:, -1])).T
return initial_condition, boundary_conditions

def sample_data(data, percentage):
num_samples = int(len(data) * percentage / 100)
sampled_indices = np.random.choice(len(data), num_samples, replace=False)
return data[sampled_indices]

class TemperatureGridDataset(Dataset):
def __init__(self, dataset, initial_condition, boundary_conditions, percentage):
self.percentage = percentage

# Sample the data
num_samples = int(dataset.shape[0] * percentage / 100)
sampled_indices = np.random.choice(dataset.shape[0], num_samples, replace=False)

self.sampled_data = dataset[sampled_indices]
self.initial_condition = initial_condition  # Use as is, don't sample
self.sampled_boundary = boundary_conditions[sampled_indices]

def __len__(self):
return len(self.sampled_data)

def __getitem__(self, idx):
initial = self.initial_condition  # Use the same initial condition for all samples
boundary = self.sampled_boundary[idx]
x = np.concatenate([initial, boundary])
y = self.sampled_data[idx]

# Simple Model
class SimpleModel(nn.Module):
def __init__(self, input_size, output_size):
super(SimpleModel, self).__init__()
self.linear = nn.Linear(input_size, output_size)

def forward(self, x):
return self.linear(x)

if __name__ == "__main__":
# Create example dataset
dataset = np.random.rand(100, 50)  # Example dataset
initial_condition, boundary_conditions = extract_conditions(dataset)

# Create custom dataset
percentage = 20
temp_dataset = TemperatureGridDataset(dataset, initial_condition, boundary_conditions, percentage)

# Print information about the dataset
print(f"Dataset size: {len(temp_dataset)}")
x, y = temp_dataset[0]
print(f"Input shape: {x.shape}")
print(f"Output shape: {y.shape}")

batch_size = 32

# Initialize model, loss function, and optimizer
input_size = x.shape[0]  # Size of input features
output_size = y.shape[0]  # Size of output
model = SimpleModel(input_size, output_size)
criterion = nn.MSELoss()

# Training loop
num_epochs = 10
for epoch in range(num_epochs):