Hey all,
Currently using a neural network to perform a binary classification of video data. However, the model seems to hang on loss.backwards(). I expect this could take awhile as I am running on the CPU, but I let it run for 2-3 hours and it was not able to complete one pass.
Below is my model structure
class VestibularNetwork(nn.Module):
def init(self, input_shape: int, hidden_units: int, output_shape: int, num_classes: int, batch_size: int = 1):
super().init()
self.video_block = nn.Sequential(
nn.Conv2d(in_channels=input_shape,
out_channels=hidden_units,
kernel_size=3, # how big is the square that’s going over the image?
stride=1, # default
padding=1),# options = “valid” (no padding) or “same” (output has same shape as input) or int for specific number
nn.ReLU(),
nn.Conv2d(in_channels=hidden_units,
out_channels=hidden_units,
kernel_size=3,
stride=1,
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,
stride=2) # default stride value is same as kernel_size
)
#self.label_block = nn.SequenThe batch_size parameter is set to 1 by default, but it’s also used as an argument in the method signature. This could lead to confusion about which value is actually being used for the batch size during the forward pass.tial(
# nn.Linear(in_features=1, out_features=1),
# nn.ReLU()
#)
self.common_block = nn.Sequential(
nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2)
)
# Define LSTM layer
self.lstm = nn.LSTM(input_size=hidden_units, hidden_size=hidden_units, batch_first=True)
self.classifier = nn.Linear(in_features=hidden_units, out_features=1)
def forward(self, video_frames, batch_size):
# Process video frames
video_frames = video_frames.float()
(batch_size, sequence_length, channels, height, width) = video_frames.size()
video_frames_reshaped = video_frames.view(batch_size*sequence_length, channels, height, width)
#video_frames = video_frames.permute(2, 0, 1).unsqueeze(0)
#video_frames = video_frames.repeat(batch_size, 1, 1, 1)
print("Video size:", video_frames_reshaped.shape)
x_list = []
for i in range(batch_size):
# Process each batch separately
x_batch = self.video_block(video_frames_reshaped[i * sequence_length:(i + 1) * sequence_length])
x_batch = self.common_block(x_batch)
x_batch = torch.mean(x_batch, dim=[2, 3]) # Pooling or flattening operation, adjust as needed
x_list.append(x_batch)
# Concatenate the outputs of all batches
x = torch.cat(x_list, dim=0)
print(f"after cat: {x.shape}")
# Reshape for LSTM input
x = x.view(batch_size, sequence_length, -1)
# Apply LSTM
lstm_out, _ = self.lstm(x)
# Aggregate predictions using the final state of the LSTM
lstm_out = lstm_out[:, -1, :]
# Apply classifier
x = self.classifier(lstm_out)
print(f"classifier: {x.shape}")
return x
Any tips or insight would be great as I am not sure how long it should take or if there is some issue in my underlying code causing inefficiencies.
Below is some additional code for my dataloader, train loop, and Bayesian optimization.
Dataloader code
class VideoDataset(Dataset):
def init(self, video_dir, video_transform = None, clip_length = 10):
super(VideoDataset).init()
self.video_dir = video_dir
self.video_files = [file for file in os.listdir(video_dir) if file.endswith(‘.mp4’)]
self.labels = [self.extract_label(file) for file in self.video_files]
self.outcomes = [label[2] for label in self.labels]
self.label_features = [self.extract_label(file)[:2] for file in self.video_files] # Ignore outcome label
self.video_transform = video_transform
self.clip_length = clip_lengthdef __len__(self): return len(self.video_files) def __getitem__(self, index): video_file= self.video_files[index] label = self.labels[index] # Ensuring outcome is a 1D tensor outcome = torch.tensor(label[2]) # Create a new tuple for label without modifying the original label label_without_outcome = (torch.tensor(label[0]), torch.tensor(label[1])) # Update the variable name to reflect the change label = label_without_outcome video_path = os.path.join(self.video_dir, video_file) # Open video file cap = cv2.VideoCapture(video_path) # Read frames dynamically frames = [] frame_count = 0 while True: ret, frame = cap.read() if not ret: break frames.append(frame) frame_count += 1 # Check if enough frames are read if frame_count >= self.clip_length: break # Release video capture object cap.release() # Apply transformations if specified if self.video_transform: frames = [self.video_transform(frame) for frame in frames] # Convert frames to tensor frames = torch.stack(frames) return frames, outcome def extract_label(self, filename): # Example: Extract labels using regular expression match = re.search(r'clip\d+_(\w+)_(\w+)_(\w+)', filename) if match: eye_side = 1 if match.group(1) == 'L' else 0 test_pos = 1 if match.group(2) == 'LHPD' else 0 outcome = 0 if match.group(3) == 'N' else 1 return (eye_side, test_pos, outcome) else: return (-1, -1, -1) # Default labels if not found def adjust_clip_length(self, frames): # Adjust clip length by selecting a subset of frames if len(frames) > self.clip_length: start_idx = (len(frames) - self.clip_length) // 2 frames = frames[start_idx:start_idx + self.clip_length] return frames
def create_dataloaders(
data_dir: str,
transform: transforms.Compose,
test_size: int,
batch_size: int,
num_workers: int=NUM_WORKERS,
random_seed: int = 33
):
Here is the train loop
Utilise device agnostic code
device = torch.device(‘cuda’ if torch.cuda.is_available() else ‘cpu’)
botorch_model = BotorchModel(acquisition_function_type=‘qExpectedImprovement’)
init_notebook_plotting()
Set up BotorchModel for Bayesian optimization
botorch_model = BotorchModel(acquisition_function_type=‘qExpectedImprovement’)
ax_client = AxClient()
ax_client.create_experiment(
name=“test_visualizations”,
parameters = [
{
“name”: “batch_size”,
“type”: “range”,
“bounds”: [16, 128]
},
{
“name”: “max_epochs”,
“type”: “range”,
“bounds”: [10, 100]
},
{
“name”: “learning_rate”,
“type”: “range”,
“bounds”: [1e-4, 1e-1],
“log_scale”: True
},
{
“name”: “weight_decay”,
“type”: “range”,
“bounds”: [1e-6, 1e-2],
“log_scale”: True
},
],
objective_name= “auc_score_fold”,
minimize= False,
)for i in range(5):
parameters, trial_index = ax_client.get_next_trial()
ax_client.complete_trial(trial_index=trial_index, raw_data=evaluate(parameters=parameters,
model=vest_model,
dataloader=train_dataloader,
loss_fn=torch.nn.BCEWithLogitsLoss(),
optimizer=optim.Adam(vest_model.parameters()),
device=device))
cv = cross_validate(model=ax_client.generation_strategy.model, folds=-1)
cv.evaluate(parameters=parameters)model = ax_client.generation_strategy.model
render(interact_contour(model=model, metric_name=“auc_score_fold”))render(ax_client.get_optimization_trace())
And lastly, the bayesian optimisation
Define the evaluation function for Bayesian optimization
def evaluate(parameters: str,
model: torch.nn.Module,
dataloader: torch.utils.data.DataLoader,
loss_fn: torch.nn.Module,
optimizer: torch.optim.Optimizer,
device: torch.device) → Tuple[float, float]:
auc_values =
loss_values =
X_train_list =
y_train_list =max_norm = 1.0 model = model.to(device) model.train() # Your existing code for setting up the optimizer for batch_idx, (video_frames_batch, outcomes) in enumerate(dataloader): X_train_tensor = torch.tensor(video_frames_batch, dtype=torch.float).to(device) y_train_tensor = outcomes.float().unsqueeze(1).to(device) if torch.any(torch.isnan(X_train_tensor)) | (torch.any(torch.isnan(y_train_tensor))): print("nan values found") print(f"show the train tensor {X_train_tensor}") # Perform forward pass outputs = model(X_train_tensor, batch_size = 1) print(f"show the outputs {outputs}") #print(f"show the train tensor {X_train_tensor}") #print(f"show the true outcomes {y_train_tensor}") # Compute loss print("Output shape:", outputs.shape) #print("Target shape:", y_train_tensor.shape) loss = loss_fn(outputs, y_train_tensor) print("calculate loss") # Perform backward pass and optimization optimizer.zero_grad() print("zero grad") print(f"show the loss {loss}") print(f"clip grads") torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) print(f"start backwards") loss.backward() print("backward loss") optimizer.step() print("optimiser") loss_values.append(loss.item()) # Compute ROC AUC score auc_value = roc_auc_score(y_train_tensor.cpu().numpy(), outputs.detach().cpu().numpy()) auc_values.append(auc_value) print("auc complete") # Compute average loss avg_loss = sum(loss_values) / len(loss_values) # Compute average ROC AUC score avg_auc = sum(auc_values) / len(auc_values) return avg_loss, avg_auc
Thank you and please let me know if there’s anything else needed.