Hi,
I was trying to extract filenames along with features from the last fully connected layer FCN
of resnet50
i.e. ((2048,))
.
But, when I use kfold
validation, during testloader
, the filenames are repeating after 4 rows, whereas the inputs, targets are unique and changing as expected.
Any idea of how to get the correct filenames for each case in testloader?
Below is the full script file:
Thanks in advance
import os
import torch
from torch import nn
from torch.utils.data import DataLoader, ConcatDataset
from torchvision import transforms
import torchvision
from sklearn.model_selection import KFold
from torchvision import datasets, transforms, models
from tqdm import tqdm
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()
#Additional Info when using cuda
if device.type == 'cuda':
print(torch.cuda.get_device_name(0))
print('Memory Usage:')
print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
print('Cached: ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')
transforms = {
'Data': transforms.Compose([
transforms.Resize([224,224]),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
# data_dir = '/content/drive/MyDrive/Colab Notebooks/CBIR study/Dataset/map_data/Data'
data_dir = '/content/drive/MyDrive/Colab Notebooks//Dataset/'
dataset = datasets.ImageFolder(data_dir, transforms['Data'])
# data_set_data_dir = DataLoader(dataset=dataset, shuffle=True)
# dataset = torch.utils.data.DataLoader(dataset, shuffle=True)
dataset
if __name__ == '__main__':
# Configuration options
k_folds = 5
num_epochs = 5
loss_function = nn.CrossEntropyLoss()
# List to save final features with Name, target
results = []
# Set fixed random number seed
torch.manual_seed(42)
# Define the K-fold Cross Validator
kfold = KFold(n_splits=k_folds, shuffle=True)
# Start print
print('--------------------------------')
# K-fold Cross Validation model evaluation
for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
# Print
print(f'FOLD {fold}')
print('--------------------------------')
# Sample elements randomly from a given list of ids, no replacement.
train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
# Define data loaders for training and testing data in this fold
trainloader = torch.utils.data.DataLoader(
dataset,
batch_size=4, sampler=train_subsampler)
testloader = torch.utils.data.DataLoader(
dataset,
batch_size=1, sampler=test_subsampler)
print('\nKfold: {%d}' %(fold+1))
print('--------------------------------')
# Init the neural network
#network = SimpleConvNet()
#network.apply(reset_weights)
network = models.resnet50(pretrained=True)
#network.apply(reset_weights)
num_ftrs = network.fc.in_features
# Here the size of each output sample is set to 3.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
network.fc = nn.Linear(num_ftrs, 2)
network = network.to(device)
# Initialize optimizer
optimizer = torch.optim.Adam(network.parameters(), lr=1e-4)
# Run the training loop for defined number of epochs
for epoch in range(0, num_epochs):
# Print epoch
print(f'Starting epoch {epoch+1}')
# Set current loss value
current_loss = 0.0
# Iterate over the DataLoader for training data
for i, data in enumerate(trainloader):
network.train()
# Get inputs
inputs, targets = data
inputs = inputs.to(device)
targets = targets.to(device)
# Zero the gradients
optimizer.zero_grad()
# Perform forward pass
outputs = network(inputs)
# outputs = outputs.to(device)
# Compute loss
loss = loss_function(outputs, targets)
# Perform backward pass
loss.backward()
# Perform optimization
optimizer.step()
# Print statistics
current_loss += loss.item()
if i % 50 == 49:
print('Loss after mini-batch %5d: %.3f' %
(i + 1, current_loss / 50))
current_loss = 0.0
# Process is complete.
print('Training process has finished. Saving trained model.')
# Print about testing
print('Starting testing')
# Saving the model
base = '/content/drive/MyDrive/Colab Notebooks/CBIR study/save_models'
save_path = base + f'/model-fold-{fold}.pth'
torch.save(network.state_dict(), save_path)
# Evaluation for this fold
### strip the last layer to use as feature_extractor
feature_extractor = torch.nn.Sequential(*list(network.children())[:-1])
feature_extractor = feature_extractor.to(device)
with torch.no_grad():
# Iterate over the test data and generate predictions
for i, data in enumerate(testloader):
feature_extractor.eval()
# Get inputs
inputs, targets = data
inputs = inputs.to(device)
targets = targets.to(device)
sample_fname = testloader.dataset.samples[i][0]
sample_fname = sample_fname.rsplit('/')[-1]
# print(i, inputs.shape, targets, sample_fname)
# Generate outputs
# outputs = networks(inputs)
feature_tensor = feature_extractor(inputs) # output now has the features corresponding to input x
feature_arr = feature_tensor.cpu().detach().numpy().flatten()
results.append([sample_fname, targets, feature_arr])
flat_list1 = []
flat_list2 = []
for sublist in results:
flat_list1.append([sublist[0], sublist[1]])
flat_list2.append(list(sublist[2].flatten()))
flat_list1
Output:
[['case_0096_ARP0134_0901_LML.jpg', tensor([0], device='cuda:0')],
['case_0097_ARP0135_0901_LML.jpg', tensor([1], device='cuda:0')],
['case_0098_ARP0136_0901_RML.jpg', tensor([0], device='cuda:0')],
['case_0099_ARP0138_0901_RCC.jpg', tensor([0], device='cuda:0')],
['case_0096_ARP0134_0901_LML.jpg', tensor([0], device='cuda:0')],
['case_0097_ARP0135_0901_LML.jpg', tensor([0], device='cuda:0')],
['case_0098_ARP0136_0901_RML.jpg', tensor([0], device='cuda:0')],
['case_0099_ARP0138_0901_RCC.jpg', tensor([1], device='cuda:0')],
['case_0096_ARP0134_0901_LML.jpg', tensor([1], device='cuda:0')],
['case_0097_ARP0135_0901_LML.jpg', tensor([0], device='cuda:0')],
['case_0098_ARP0136_0901_RML.jpg', tensor([1], device='cuda:0')],
['case_0099_ARP0138_0901_RCC.jpg', tensor([1], device='cuda:0')],
['case_0096_ARP0134_0901_LML.jpg', tensor([0], device='cuda:0')],
['case_0097_ARP0135_0901_LML.jpg', tensor([1], device='cuda:0')],
['case_0098_ARP0136_0901_RML.jpg', tensor([1], device='cuda:0')],
['case_0099_ARP0138_0901_RCC.jpg', tensor([0], device='cuda:0')],
['case_0096_ARP0134_0901_LML.jpg', tensor([1], device='cuda:0')],
['case_0097_ARP0135_0901_LML.jpg', tensor([1], device='cuda:0')],
['case_0098_ARP0136_0901_RML.jpg', tensor([0], device='cuda:0')],
['case_0099_ARP0138_0901_RCC.jpg', tensor([1], device='cuda:0')]]
In the above output, we can see the filenames are repeating after every four, wheras the target tnesor is updating