Hello,
I am getting Segmentation fault (core dumped) when calling model.to(device). Here the output from python -m torch.utils.collect_env
:
Collecting environment information...
PyTorch version: 1.5.1
Is debug build: No
CUDA used to build PyTorch: 10.2
OS: Red Hat Enterprise Linux Server release 7.9 (Maipo)
GCC version: (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)
CMake version: version 2.8.12.2
Python version: 3.7
Is CUDA available: Yes
CUDA runtime version: Could not collect
GPU models and configuration: GPU 0: NVIDIA A100-SXM4-80GB
Nvidia driver version: 525.105.17
cuDNN version: Could not collect
Versions of relevant libraries:
[pip3] adabelief-pytorch==0.2.1
[pip3] numpy==1.19.5
[pip3] torch==1.5.1
[conda] adabelief-pytorch 0.2.1 pypi_0 pypi
[conda] blas 1.0 mkl
[conda] libblas 3.9.0 12_linux64_mkl conda-forge
[conda] libcblas 3.9.0 12_linux64_mkl conda-forge
[conda] liblapack 3.9.0 12_linux64_mkl conda-forge
[conda] mkl 2021.4.0 h06a4308_640
[conda] mkl-service 2.4.0 py37h7f8727e_0
[conda] mkl_fft 1.3.1 py37hd3c417c_0
[conda] mkl_random 1.2.2 py37h51133e4_0
[conda] torch 1.5.1 pypi_0 pypi
There is some relevant code:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from adabelief_pytorch import AdaBelief
import matplotlib.pyplot as plt
def main():
'''
Creating Datasets and DataLoaders
We will create two different dataset classes: one for our training data, and one for our testing data.
Once these are done, we will also create a two dataloaders that will be used to create our training
and testing data. We will use the dataloaders to train our model.
'''
# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'device: {device}')
# Build dataset and dataloader.
class PitchDatasetTrain(Dataset):
def __init__(self):
# Read in data
df = pd.read_parquet('training_data.parquet', engine='fastparquet')
# Convert data to PyTorch tensors
x_data = df.drop(columns=['pitch_type'])
print(f'type(x_data) {type(x_data)}')
print(f'type(x_data.to_numpy().astype(np.float32)) {type(x_data.to_numpy().astype(np.float32))}')
self.X = torch.tensor(x_data.to_numpy().astype(np.float32))
self.y = torch.tensor(df['pitch_type'].values, dtype=torch.int8)
self.n_samples = df.shape[0]
def __getitem__(self, index):
return self.X[index], self.y[index]
def __len__(self):
return self.n_samples
class PitchDatasetTest(Dataset):
def __init__(self):
# Read in data
df = pd.read_parquet('testing_data.parquet', engine='fastparquet')
# Convert data to PyTorch tensors
x_data = df.drop(columns=['pitch_type'])
self.X = torch.tensor(x_data.to_numpy().astype(np.float32))
self.y = torch.tensor(df['pitch_type'].values, dtype=torch.int8)
self.n_samples = df.shape[0]
def __getitem__(self, index):
return self.X[index], self.y[index]
def __len__(self):
return self.n_samples
print('After datasets.')
# Create the dataset and dataloader.
train_data = PitchDatasetTrain()
test_data = PitchDatasetTest()
batch_size, num_workers = 8, 1
train_dataloader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
test_dataloader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
print('After dataloader')
'''
Helper Functions
These are the functions we will use in our model and/or training loop.
So far we only have a function to calculate the accuracy of our model, and our custom activation function.
'''
print('Before custom functions.')
# Accuracy Checker Function
def calculate_accuracy(model, dataloader):
model.eval() # Set the model to evaluation mode
correct = 0
total = 0
with torch.no_grad():
for inputs, labels in dataloader:
print(f'type(inputs) {type(inputs)}')
print(f'type(labels) {type(labels)}')
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
return correct / total
# Define new activation function PenalizedTanH
class PenalizedTanH(nn.Module):
def __init__(self):
super(PenalizedTanH, self).__init__()
def forward(self, x):
return torch.where(x > 0, torch.tanh(x), 0.25*torch.tanh(x))
print('After custom functions.')
'''
The Model
This has our deep learning model. It is bigger than the original. We now use use the <code>AdaBelief</code>
optimizer and the <code>CrossEntropyLoss</code> loss function.
'''
# Build the DNN
# First, define the hyperparameters
input_size = 31 # Input size (e.g., number of features)
hidden_size = 64 # Size of the hidden layer(s)
output_size = 19 # Output size (e.g., number of classes)
learning_rate = 0.001
# Define the neural network architecture
class PitchDNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(PitchDNN, self).__init__()
self.net = nn.Sequential(nn.Linear(input_size, hidden_size),
PenalizedTanH(),
nn.Linear(hidden_size, hidden_size*2),
PenalizedTanH(),
nn.Linear(hidden_size*2, hidden_size*3),
PenalizedTanH(),
nn.Linear(hidden_size*3, hidden_size*4),
PenalizedTanH(),
nn.Linear(hidden_size*4, hidden_size*4),
PenalizedTanH(),
nn.Linear(hidden_size*4, hidden_size*3),
PenalizedTanH(),
nn.Linear(hidden_size*3, hidden_size*2),
PenalizedTanH(),
nn.Linear(hidden_size*2, hidden_size),
PenalizedTanH(),
nn.Linear(hidden_size, output_size))
def forward(self, x):
return self.net(x)
# Create an instance of the model
print('At model.')
model = PitchDNN(input_size, hidden_size, output_size)
print('After model, before .to(device)')
print(f'type(model) {type(model)}')
model.to(device)
print('After device.')
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = AdaBelief(model.parameters(), lr=learning_rate)
# Define variables for training loop.
num_epochs = 10
total_samples = len(train_data)
n_iterations = np.ceil(total_samples/batch_size)
# Define variables for storing our loss and accuracy.
loss_vals, loss_occ = [], []
acc_vals, acc_occ = [], []
# Run training loop.
for epoch in range(num_epochs):
# Create our tqdm progress bar.
tqdm_data_loader = tqdm(train_dataloader, total=n_iterations, desc=f'Epoch [{epoch + 1}/{num_epochs}]', dynamic_ncols=True)
# Check the model's accuracy (do this for every epoch).
acc_vals.append(calculate_accuracy(model, test_dataloader))
# Create a list to hold the current epochs loss values (to average for the tqdm bar).
curr_loss = []
# Train the model.
for i, (inputs, labels) in enumerate(tqdm_data_loader):
optimizer.zero_grad() # Clear gradients from previous iteration
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs) # Forward pass
loss = criterion(outputs, labels) # Compute the loss
loss.backward() # Backpropagation
optimizer.step() # Update model parameters
loss_vals.append(loss.item())
curr_loss.append(loss.item())
# Update our tqdm loop so we can see what is happening.
tqdm_data_loader.set_postfix(loss=np.mean(curr_loss), acc=acc_vals[-1])
if __name__ == '__main__':
main()
I get the segmentation fault at the model.to(device)
call happens. When I do nvidia-smi
I get:
Tue Oct 10 07:47:50 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17 Driver Version: 525.105.17 CUDA Version: 12.0 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA A100-SXM... On | 00000000:46:00.0 Off | 0 |
| N/A 27C P0 59W / 400W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
I have tried adjusting batch size, making my model smaller. Sending my data to the gpu in my DataSet classes does not throw an error.
Any ideas?