Thanks for getting back.
- I am training the model in Keras and PyTorch and they converge approx. (architecture below)
- I am using the model.eval() with similar preprocessing steps. Do you think think the validation loss is bad from the output below?
- I am training the model in ex: PyTorch and saving it and loading it in PyTorch.
- Similarly, train in Keras, save, load in Keras, and test
Code reference
Load data
class DBData(torch.utils.data.Dataset):
"""DATALOADER TRAINING, VALIDATION SETS"""
def __init__(self,
fringe_images,
hwrap_images,
transform=None):
"""
Args:
fringe_dir: Directory with all the images.
hwrap_dir: Directory with all the targets.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.fringe_dir = fringe_images
self.hwrap_dir = hwrap_images
self.transform = None
self.count = len(self.fringe_dir)
def __len__(self):
return len(self.fringe_dir)
def make_grayscale(self,img):
# Transform color image to grayscale
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return gray_img
def __getitem__(self, ix):
#image
image = cv2.imread(self.fringe_dir[ix])
image = self.make_grayscale(image)
image = image/255.0
image = np.expand_dims(image, axis=-1)
image = torch.from_numpy(image).permute(2, 0, 1).float()
#mask
target = cv2.imread(self.hwrap_dir[ix])
target = self.make_grayscale(target)
target = target/255.0
target = np.expand_dims(target, axis=-1)
target = torch.from_numpy(target).permute(2, 0, 1).float()
return {
'image':image,
'target': target
}
model blocks
def up_block(in_channels, out_channels):
"""DECODING BLOCK"""
conv_block = torch.nn.Sequential(
torch.nn.Conv2d(in_channels,
out_channels,
padding="same",
kernel_size=3),
#torch.nn.BatchNorm2d(out_channels),
torch.nn.ReLU(inplace=True),
)
return conv_block
def down_block(in_channels, out_channels):
"""ENCODING BLOCK"""
conv_block = torch.nn.Sequential(
torch.nn.Conv2d(in_channels,
out_channels,
kernel_size=3,
padding="same"),
#torch.nn.BatchNorm2d(out_channels),
torch.nn.ReLU(inplace=True),
torch.nn.Conv2d(out_channels,
out_channels,
kernel_size=3,
padding="same"),
#torch.nn.BatchNorm2d(out_channels),
torch.nn.ReLU(inplace=True),
)
return conv_block
def out(in_channels, out_channels):
"""CLASSIFICATION-FINAL CONVOLUTIONAL LAYER"""
conv_layer = torch.nn.Sequential(
torch.nn.Conv2d(in_channels,
out_channels,
kernel_size=1),
torch.nn.ReLU(inplace=True)
)
return conv_layer
def out_reg(in_channels, out_channels):
"""REGRESSION-FINAL CONVOLUTIONAL LAYER"""
conv_layer = torch.nn.Sequential(
torch.nn.Conv2d(in_channels,
out_channels,
kernel_size=3,
padding="same")
)
return conv_layer
# model setup
class UNet1(torch.nn.Module):
"""UNET ARCHITECTURE REGRESSION"""
def __init__(self):
super(UNet1, self).__init__()
self.pool = torch.nn.MaxPool2d((2, 2))
self.up_samp = torch.nn.Upsample(
scale_factor=2,
mode="nearest"
)
self.down_conv1 = down_block(1, 32)
self.down_conv2 = down_block(32, 64)
self.down_conv3 = down_block(64, 128)
self.down_conv4 = down_block(128, 256)
self.down_conv5 = down_block(256, 512)
self.down_conv6 = down_block(512, 1024)
self.up_conv1 = up_block(1024, 512)
self.up_conv2 = up_block(512, 256)
self.up_conv3 = up_block(256, 128)
self.up_conv4 = up_block(128, 64)
self.up_conv5 = up_block(64, 32)
self.out_reg = out_reg(32, 1)
def forward(self, image):
# Encoding section
e1 = self.down_conv1(image)
p1 = self.pool(e1)
e2 = self.down_conv2(p1)
p2 = self.pool(e2)
e3 = self.down_conv3(p2)
p3 = self.pool(e3)
e4 = self.down_conv4(p3)
p4 = self.pool(e4)
e5 = self.down_conv5(p4)
p5 = self.pool(e5)
e6 = self.down_conv6(p5)
# Decoding section
u1 = self.up_samp(e6)
d1 = self.up_conv1(u1)
c1 = self.up_conv1(torch.cat([e5, d1], axis=1))
u2 = self.up_samp(c1)
d2 = self.up_conv2(u2)
c2 = self.up_conv2(torch.cat([e4, d2], axis=1))
u3 = self.up_samp(c2)
d3 = self.up_conv3(u3)
c3 = self.up_conv3(torch.cat([e3, d3], axis=1))
u4 = self.up_samp(c3)
d4 = self.up_conv4(u4)
c4 = self.up_conv4(torch.cat([e2, d4], axis=1))
u5 = self.up_samp(c4)
d5 = self.up_conv5(u5)
c5 = self.up_conv5(torch.cat([e1, d5], axis=1))
# print('Encoding dims','\ne1:', e1.shape, '\ne2:',e2.shape, '\ne3:',e3.shape, '\ne4:',e4.shape,'\ne5:',e5.shape,'\ne6:',e6.shape)
# print('\n')
# print('Decoding dims','\nd1:', d1.shape, '\nd2:', d2.shape, '\nd3:', d3.shape, '\nd4:', d4.shape, '\nd5:', d5.shape,'/nout')
return self.out_reg(c5)
#Check what our model returns as output
model=UNet1()
model(torch.randn(1, 1, 160, 160)).shape
#train, validation
def train(dataset,
data_loader,
model,
criterion,
optimizer,
DEVICE,
task_type,
save=False
):
"""TRAINING"""
model.train()
running_loss = 0.0
tr = []
num_batches = int(len(dataset) / data_loader.batch_size)
tk0 = tqdm(data_loader, total=num_batches)
for d in tk0:
inputs = d["image"]
targets = d["target"]
#checkpoint
# print(inputs.squeeze(0).numpy().squeeze(0).shape)
# plt.imshow(inputs.squeeze(0).numpy().squeeze(0), cmap='gray')
# # plt.imshow(targets.squeeze(0).numpy().squeeze(0), cmap='gray')
inputs = inputs.to(DEVICE, dtype=torch.float)
targets = targets.to(DEVICE)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
running_loss += loss
#Classification
#acc = (torch.max(outputs, 1)[1] == targets).float().mean()
loss.backward()
optimizer.step()
tk0.close()
return running_loss/num_batches#, acc
def evaluate(dataset,
data_loader,
model,
criterion,
scheduler,
DEVICE,
task_type,
):
"""EVALUTION STEP"""
model.eval()
final_loss = 0.0
num_batches = int(len(dataset) / data_loader.batch_size)
tk0 = tqdm(data_loader, total=num_batches)
val = []
with torch.no_grad():
for d in tk0:
inputs = d["image"]
targets = d["target"]
inputs = inputs.to(DEVICE, dtype=torch.float)
targets = targets.to(DEVICE)
#computation
outputs = model(inputs)
loss = criterion(outputs, targets)
#Classification
#acc = (torch.max(outputs, 1)[1] == targets).float().mean()
final_loss += loss
#scheduler.step(loss)
# close tqdm
tk0.close()
return final_loss/num_batches #, acc
#train loop
def train_loop(model,
optimizer,
criterion,
EPOCHS,
fringe_images_train,
hwrap_images_train,
fringe_images_test,
hwrap_images_test
):
"""RUNS TRAINING AND VALIDATION"""
#Training parameters
EPOCHS = EPOCHS
best_accuracy = 0.0
early_stopping_counter = 0.0
# Define optimizer and criterion = loss function
optimizer = optimizer
criterion = criterion
#Load model
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model
model.to(DEVICE)
#Dataloaders (Training and validation)
train_dataset = DBData(fringe_images_train, hwrap_images_train)
valid_dataset = DBData(fringe_images_test, hwrap_images_test)
train_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=1,
shuffle=False,
num_workers=0
)
valid_loader = torch.utils.data.DataLoader(
valid_dataset,
batch_size=1,
shuffle=False,
num_workers=0
)
#LR scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(
optimizer,
mode="min",
patience=3,
verbose=True
)
print("Starting to train model using the following parameters")
print(f"Training on {DEVICE}")
# Training loop
for epoch in range(EPOCHS):
#print(f"Training Epoch: {epoch}",optimizer.param_groups[0]['lr'])
train_log = train(
dataset=train_dataset,
data_loader=train_loader,
model=model,
criterion=criterion,
optimizer=optimizer,
DEVICE = DEVICE,
task_type=None #regression or classification
)
# calculate validation loss
val_log = evaluate(
valid_dataset,
valid_loader,
model,
criterion,
scheduler,
DEVICE = DEVICE,
task_type=None
)
trn_loss = train_log
val_loss = val_log
#Early stopping (early_stopping_counter=>patience)
# if val_acc> best_accuracy:
# best_accuracy = val_acc
# else:
# early_stopping_counter += 1
# if early_stopping_counter > 2:
# raise Exception("Early stopping")
print(f"Training/Validation set{epoch}",
'train_loss:',trn_loss.detach().cpu().numpy(),
'val_loss:',val_loss.detach().cpu().numpy()
)
print('routine complete')
return trn_loss.detach().cpu().numpy(), val_loss.detach().cpu().numpy()
#skipping training part
#
save model
epoch = 30
state = {
'epoch': epoch+1,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
'learning_rate':1e-3,
}
torch.save(state, "DB_model.pth")
#inference
'''
#Load saved model
model = UNet1()
learning_rate = 1e-2
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),
lr=learning_rate,
weight_decay = 1e-4,
momentum=0.9,
nesterov=True)
#Load stored values
checkpoint = torch.load("DB_model.pth")
model = model.load_state_dict(checkpoint['state_dict'])
optimizer = optimizer.load_state_dict(checkpoint['optimizer'])
#print(checkpoint['optimizer'])
EPOCHS = checkpoint['epoch']
#load image and mask
def make_grayscale(img):
# Transform color image to grayscale
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return gray_img
image = cv2.imread("/content/i/img6.png")
image = make_grayscale(image)
image = image/255.0
image = np.expand_dims(image, axis=-1)
images = torch.from_numpy(image).unsqueeze(0).permute(0,3,1,2).float()
mask = cv2.imread("/content/w/img6.png",0)
with torch.no_grad():
#load saved model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UNet1()
checkpoint=torch.load('DB_model.pth')
model.load_state_dict(checkpoint['state_dict'])
model.eval()
#Predictions
_mask = model(images)
fig = plt.figure(figsize=(20, 12))
# setting values to rows and column variables
rows = 1
columns = 3
# Adds a subplot at the 1st position
fig.add_subplot(rows, columns, 1)
# showing image
plt.imshow(images.squeeze(0).permute(1,2,0).squeeze(2).numpy(), cmap='gray')
plt.axis('off')
plt.title("Input")
# Adds a subplot at the 2nd position
fig.add_subplot(rows, columns, 2)
plt.imshow(_mask.squeeze(0).permute(1,2,0).detach().cpu()[:,:,0]*255.0, cmap='gray')
plt.axis('off')
plt.title("Prediction")
# Adds a subplot at the 3rd position
fig.add_subplot(rows, columns, 3)
plt.imshow(mask, cmap='gray')
plt.axis('off')
plt.title("target")
output
Training images size (β) 116 116
Validation images size (β) 27 27
Regression with U-net
Starting to train model using the following parameters
Training on cuda
100%|ββββββββββ| 116/116 [00:02<00:00, 50.60it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 146.21it/s]
Training/Validation set0 train_loss: 0.097666755 val_loss: 0.079518124
100%|ββββββββββ| 116/116 [00:02<00:00, 50.73it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 146.75it/s]
Training/Validation set1 train_loss: 0.07775278 val_loss: 0.0782941
100%|ββββββββββ| 116/116 [00:02<00:00, 50.77it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 148.06it/s]
Training/Validation set2 train_loss: 0.0763465 val_loss: 0.07699763
100%|ββββββββββ| 116/116 [00:02<00:00, 50.85it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 146.83it/s]
Training/Validation set3 train_loss: 0.07478243 val_loss: 0.075491294
100%|ββββββββββ| 116/116 [00:02<00:00, 50.75it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 147.53it/s]
Training/Validation set4 train_loss: 0.07280874 val_loss: 0.07346106
100%|ββββββββββ| 116/116 [00:02<00:00, 50.73it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 146.83it/s]
Training/Validation set5 train_loss: 0.07020978 val_loss: 0.07079809
100%|ββββββββββ| 116/116 [00:02<00:00, 50.72it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 147.35it/s]
Training/Validation set6 train_loss: 0.06662013 val_loss: 0.066935785
100%|ββββββββββ| 116/116 [00:02<00:00, 50.67it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 147.30it/s]
Training/Validation set7 train_loss: 0.06101884 val_loss: 0.06107542
100%|ββββββββββ| 116/116 [00:02<00:00, 50.55it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 147.00it/s]
Training/Validation set8 train_loss: 0.054175895 val_loss: 0.054572243
100%|ββββββββββ| 116/116 [00:02<00:00, 50.70it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 148.31it/s]
Training/Validation set9 train_loss: 0.04758354 val_loss: 0.049077705
100%|ββββββββββ| 116/116 [00:02<00:00, 50.58it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 145.89it/s]
Training/Validation set10 train_loss: 0.042901877 val_loss: 0.045424514
100%|ββββββββββ| 116/116 [00:02<00:00, 50.73it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 146.80it/s]
Training/Validation set11 train_loss: 0.040017847 val_loss: 0.043156996
100%|ββββββββββ| 116/116 [00:02<00:00, 50.75it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 147.32it/s]
Training/Validation set12 train_loss: 0.038354356 val_loss: 0.041663162
100%|ββββββββββ| 116/116 [00:02<00:00, 50.69it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 147.21it/s]
Training/Validation set13 train_loss: 0.037522454 val_loss: 0.04087787
100%|ββββββββββ| 116/116 [00:02<00:00, 50.72it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 146.60it/s]
Training/Validation set14 train_loss: 0.037010863 val_loss: 0.040288415
100%|ββββββββββ| 116/116 [00:02<00:00, 50.74it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 146.93it/s]
Training/Validation set15 train_loss: 0.03626076 val_loss: 0.039554063
100%|ββββββββββ| 116/116 [00:02<00:00, 50.75it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 147.31it/s]
Training/Validation set16 train_loss: 0.03532498 val_loss: 0.039040133
100%|ββββββββββ| 116/116 [00:02<00:00, 50.83it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 146.97it/s]
Training/Validation set17 train_loss: 0.034851596 val_loss: 0.038595054
100%|ββββββββββ| 116/116 [00:02<00:00, 50.72it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 146.06it/s]
Training/Validation set18 train_loss: 0.034334905 val_loss: 0.038168218
100%|ββββββββββ| 116/116 [00:02<00:00, 50.80it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 147.23it/s]
Training/Validation set19 train_loss: 0.03380642 val_loss: 0.037747495
100%|ββββββββββ| 116/116 [00:02<00:00, 50.78it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 146.60it/s]
Training/Validation set20 train_loss: 0.03329962 val_loss: 0.03736325
100%|ββββββββββ| 116/116 [00:02<00:00, 50.78it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 145.78it/s]
Training/Validation set21 train_loss: 0.03276293 val_loss: 0.0369993
100%|ββββββββββ| 116/116 [00:02<00:00, 50.78it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 147.88it/s]
Training/Validation set22 train_loss: 0.032343842 val_loss: 0.036700852
100%|ββββββββββ| 116/116 [00:02<00:00, 50.75it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 147.29it/s]
Training/Validation set23 train_loss: 0.03199774 val_loss: 0.03642374
100%|ββββββββββ| 116/116 [00:02<00:00, 50.71it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 147.27it/s]
Training/Validation set24 train_loss: 0.03165286 val_loss: 0.036165755
100%|ββββββββββ| 116/116 [00:02<00:00, 50.81it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 147.09it/s]
Training/Validation set25 train_loss: 0.031332493 val_loss: 0.03593022
100%|ββββββββββ| 116/116 [00:02<00:00, 50.69it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 147.52it/s]
Training/Validation set26 train_loss: 0.03102655 val_loss: 0.03571768
100%|ββββββββββ| 116/116 [00:02<00:00, 50.79it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 148.23it/s]
Training/Validation set27 train_loss: 0.030747896 val_loss: 0.035525315
100%|ββββββββββ| 116/116 [00:02<00:00, 50.68it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 146.09it/s]
Training/Validation set28 train_loss: 0.030485038 val_loss: 0.0353475
100%|ββββββββββ| 116/116 [00:02<00:00, 50.74it/s]
100%|ββββββββββ| 27/27 [00:00<00:00, 148.92it/s]
Training/Validation set29 train_loss: 0.03022127 val_loss: 0.03518007
routine complete