def training(model, optimiser, scheduler, tn, tf, num_bins, data_loader_train, data_loader_val, num_epoch, device='cuda', save_dir='/content/drive/MyDrive/NeRF/trained_models/Models/', save_every=1, resume_from=None):
# Mount Google Drive to save trained models
drive.mount('/content/drive')
training_loss = []
val_loss = []
start_epoch = 0
if resume_from is not None:
# Load saved checkpoint
checkpoint_path = os.path.join(save_dir, f"model_epoch_{resume_from}.pt")
if os.path.exists(checkpoint_path):
print(f"Resuming training from epoch {resume_from}")
model.load_state_dict(torch.load(checkpoint_path))
start_epoch = resume_from + 1
else:
print(f"No saved checkpoint found at {checkpoint_path}, starting from epoch 0")
for epoch in tqdm(range(start_epoch, num_epoch)):
total = 0
b = 0
correct = 0
for batch in tqdm(data_loader_train):
b += 1
o = batch[:,:3].to(device)
d = batch[:,3:6].to(device)
target = batch[:,6:].to(device)
prediction = rendering(model, o, d, tn, tf, num_bins=num_bins, device=device)
loss = ((prediction - target)**2).mean()
correct += ((prediction - target)**2 < 0.01**2).sum().item()
total += target.numel()
accuracy_train = correct / total
print("accuracy_train", accuracy_train)
optimiser.zero_grad()
loss.backward()
optimiser.step()
training_loss.append(loss.item())
scheduler.step()
if epoch % 1 == 0:
correct_val = 0
total_val = 0
k=0
with torch.no_grad():
for batch in tqdm(data_loader_val):
k += 1
o = batch[:,:3].to(device)
d = batch[:,3:6].to(device)
target = batch[:,6:].to(device)
prediction = rendering(model, o, d, tn, tf, num_bins=num_bins, device=device)
valid_loss = ((prediction - target)**2).mean()
correct_val += ((prediction - target)**2 < 0.01**2).sum().item()
val_loss.append(valid_loss)
total_val += target.numel()
accuracy_val = correct_val / total_val
print("accuracy_val", accuracy_val)
# Save model every save_every epochs
if epoch % save_every == 0:
save_path = os.path.join(save_dir, f"model_epoch_new_{epoch}.pt")
torch.save(model, save_path)
return training_loss, val_loss,
This is my training code, and when I running this , I am only getting training accuracy get printed, not the validation accuracy, why?
Did you wait long enough to finish the entire training epoch?
Yes, I have trained upto 6 epochs, only training accuracy is being shown, not validation accuracy. And another important question is when I load the model and test it, it produced a blank image, with psnr value(Lower is better) of 11.66 . But interestigly when I test it with an untrained model it produced some blurry image with a psnr value of 6.33. why it is happening. Please see the below codes.
This is for loading the trained model,
‘’'python
Load the saved model
model_final = torch.load(‘/content/drive/MyDrive/NeRF/trained_models/Models/model_epoch_5.pt’,map_location=torch.device(‘cuda’))
model_weights = model_final.state_dict()
nerf_model = nerf().to(‘cuda’)
nerf_model.load_state_dict(model_weights)
nerf_model.eval()
This is for testing,
imag, mse, psnr = testing(nerf_model, torch.from_numpy(o_test[5]).to(device).float(), torch.from_numpy(d_test[5]).to(device).float(),
tn, tf, num_bins=100, chunck_size=10, target=target_pixel_values_test[5].reshape(400, 400, 3))
Instead of nerf_model, when I change the name to nerf (Untrained model) in the testing function, it produced a blurry image with psnr value of 6.22.
here nerf means, just the instance of the class of my network architecture(main model)
nerf = nerf().to(device) ‘’’