error: runtimeError: CUDA error: invalid configuration argument
Compile with TORCH_USE_CUDA_DSA
to enable device-side assertions.
here is a minimal training code to reproduce (it depends on gpu memory - I am running on H100)
import torch
import torch.nn as nn
import torch.nn.functional as F
# Simplified model
class TinyModel(nn.Module):
def __init__(self, scale_factor):
super(TinyModel, self).__init__()
self.conv = nn.Conv2d(32, 1, 3, padding=1)
self.scale_factor = scale_factor
def forward(self, x):
W,H = x.size()[2:]
x = self.conv(x)
# Add interpolation
x = F.interpolate(x, size=(W*self.scale_factor,H*self.scale_factor), mode='bilinear', align_corners=True)
return x
# Create random sample
batch_size = 16
channels = 32
height = 32
width = 32
scale_factor = 80 *5
# Random input and target
input_tensor = torch.randn(batch_size, channels, height, width).cuda()
target = torch.randn(batch_size, 1, int(height * scale_factor), int(width * scale_factor)).cuda() # Target size matches interpolated output
# Initialize model and optimizer
model = TinyModel(scale_factor=scale_factor).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.MSELoss().cuda()
# Mini training loop
num_epochs = 10
for epoch in range(num_epochs):
# Forward pass
output = model(input_tensor)
loss = criterion(output, target)
# Backward pass and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')
print("Training finished!")