Extract features from the last layer in alexnet pytorch

I have a dataset with 4 classes A, B, C and D. After training the alexnet to descriminative between the three classes, I want to extract the features from the last layer for each class individeually. in other words, I want a vector with (number of samples in class A, 4096) and the same for B,C and D.
the code divides into some stages: load the dataset- modify the last layer in alexnet - trained_model function- trained the alexnet for all classes in the dataset- create dataloader for each class - /// should I pass each class separetly to train_model function

 dataset_root = dset.ImageFolder('path root', transform=Compose([Resize((224,224)),ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]))
print(len(dataset))
datasets = train_val_dataset(dataset_root)
print('number of training samples= ', len(datasets['train']))
print('number of validation samples= ', len(datasets['val']))
# The original dataset is available in the Subset class
print(datasets['train'].dataset)

dataloaders    = {x:DataLoader(datasets[x], batch_size= 64, shuffle=True, num_workers=4) for x in ['train','val']}
dataset_sizes = {x: len(datasets[x]) for x in ['train', 'val']}

class_names = dataset.class_to_idx
print(dict(Counter(dataset.targets)))


x,y = next(iter(dataloaders['train']))
print(x.shape, y.shape)   # The shape of a batch is (batch_size, color_channels, height, width).
print('class_names is :', class_names)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

############
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

         
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    

    return model
############

Alexnet_model = torchvision.models.alexnet(pretrained=True)
 # this loop will freeze all layers
for param in Alexnet_model.parameters():
    param.requires_grad = False

Alexnet_model.classifier[6] = nn.Linear(Alexnet_model.classifier[6].in_features, 4)

#Compute SGD cross-entropy loss
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_conv = Alexnet_model.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as  opposed to before.
optimizer_conv = optim.SGD(model_conv.classifier.parameters(), lr=0.0001, momentum=0.9, weight_decay=0.0001)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=3, gamma=0.9)

model, best_model_wts = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=256)

###############
### pick out the feature extractor:
#import  trained alexnet
m.classifier[6] = nn.Linear(Alexnet_model.classifier[6].in_features, 1)

#Compute SGD cross-entropy loss
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_class= m.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_conv = optim.SGD(model_class.classifier.parameters(), lr=0.0001, momentum=0.9, weight_decay=0.0001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=3, gamma=0.9)


# define each class in the dataset
class_names = train_img.class_to_idx
print(dict(Counter(dataset_root.targets)))

A_dset = Subset(dataset_root,   range(0, 826))
B_dset = Subset(dataset_root,   range(826, 1648))
C_dset = Subset(dataset_root,   range(1648, 2043))
D_dset = Subset(dataset_root,   range(2043, 2870))


A_loader = DataLoader(A_dset, batch_size=1, shuffle = False)
B_loader = DataLoader(B_dset, batch_size=1, shuffle = False)
C_loader = DataLoader(C_dset, batch_size=1, shuffle = False)
D_loader = DataLoader(D_dset, batch_size=1, shuffle = False)


I confused about the next step!!. should I creat a custom train_model for each class or what

If you need embeddings after training is complete just do another loop with model.eval() and either use forward hook of modify alexnet to get 4096 FC layer output in eval mode or if you pass additional parameter to forward method. You don’t need to split samples per class because you can always get current class from the target labels.

I used forward hook like this:

# dataset_root = dset.ImageFolder('E:........', transform=Compose([Resize((224,224)),ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]))

class_names = dataset_root.class_to_idx
print(dict(Counter(dataset_root.targets)))

A_dset = Subset(dataset_root,   range(0, 826))
classA_loader = DataLoader(A_dset, batch_size=1, shuffle = False)

def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

trained_model.classifier[6] = nn.Linear(trained_model.classifier[6].in_features, 1)
trained_model.classifier[6].register_forward_hook(get_activation('fc3'))

# placeholders
PREDS = []

# loop through batches
for idx, inputs in enumerate(glioma_tumor_loader):

    # move to device
    inputs = inputs.to(device)
       
    # forward pass [with feature extraction]
    preds = trained_model(inputs)
    
    # add feats and preds to lists
    PREDS.append(preds.detach().cpu().numpy())

##### INSPECT FEATURES

PREDS = np.concatenate(PREDS)
print('- preds shape:', PREDS.shape)

I follow the link here. since I interested in the last layer I used PREDS.
Does the code above right when you said using hook method
when I run the code I got this error

  # move to device
---> 17     inputs = inputs.to(device)
     18 
     19     # forward pass [with feature extraction]

AttributeError: 'list' object has no attribute 'to'

There’s an issue with dataloader, ‘glioma_tumor_loader’ returns inputs as a list instead of a Tensor, and list doesn’t have method ‘to’

As for forward hook, I like this article: Debugging and Visualisation in PyTorch using Hooks, this one has example of feature extractor: How to Use PyTorch Hooks. PyTorch hooks provide a simple… | by Frank Odom | The DL | Medium

finally, I reached to this code

class FeatureExtractor(nn.Module):
      def __init__(self, model):
            super(FeatureExtractor, self).__init__()

            self.classifier= list(model.classifier)
            self.classifier = nn.Sequential(*self.classifier)
            self.flatten = nn.Flatten()
# Extract the last part of fully-connected layer from Alexnet
            self.fc = model.classifier[6]
  
      def forward(self, x):
# It will take the input 'x' until it returns the feature vector called 'out'
         out = self.classifier(x)
         out = self.flatten(out)
         out = self.fc(out) 
         return out 

# Initialize the model for one  class
model.classifier[6] = nn.Linear(model.classifier[6].in_features, 1)
new_model = FeatureExtractor(model)

# Change the device to GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
new_model = new_model.to(device)

error

from tqdm import tqdm
import numpy as np

# Transform the image, so it becomes readable with the model
transform = transforms.Compose([
  transforms.ToPILImage(),
  transforms.Resize(224),
  transforms.ToTensor()                              
])

# Will contain the feature
features = []
DATADIR = ('E:/........................')
path = os.path.join(DATADIR)  

# Iterate each image
for i in os.listdir(path):
    
  # Read the file
        img = cv2.imread(os.path.join(path,i)) 
        # Transform the image
        img = transform(img)
        img = img.reshape(1, 3, 224, 224)
        img = img.to(device)
        # We only extract features, so we don't need gradient
        with torch.no_grad():
        # Extract the feature from the image
               feature = model(img)
    # Convert to NumPy Array, Reshape it, and save it to features variable
        features.append(feature.cpu().detach().numpy().reshape(-1))

# Convert to NumPy Array
features = np.array(features)
features.shape

the output of the last line was (826, 1) !!! why …my target is (826,4069) ==(number of samples, featuers in the last fc layer)
can anyone help me?

take a look at classifier sequence
it’s last layer is Linear(in=4096, out=1)

I changed the line into self.fc = model.classifier[4] , still the same result (826,1)

it’s much easier to write a piece of code

import torch
import torchvision

def hook_fn(m, i, o):
    print(o.shape)

model = torchvision.models.alexnet(pretrained=True)
model.classifier[4].register_forward_hook(hook_fn)

a = torch.zeros(1, 3, 256, 256)
model.eval()
with torch.no_grad():
    model(a)

first of all, thanks for your respond and your help.
this is the code

import torch
import torchvision

def hook_fn(m, i, o):
    print(o.shape)
from tqdm import tqdm
import numpy as np

transform = transforms.Compose([
  transforms.ToPILImage(),
  transforms.Resize(224),
  transforms.ToTensor()                              
])

features = []
DATADIR = ('E:/.................................')
path = os.path.join(DATADIR)  

# Iterate each image
for i in os.listdir(path):
    
  # Read the file
        img = cv2.imread(os.path.join(path,i)) 
        # Transform the image
        img = transform(img)
            
        hook=model.classifier[4].register_forward_hook(hook_fn)
        img = img.reshape(1, 3, 224, 224)
        img = img.to(device)
        model.eval()

        with torch.no_grad():
             feature = model(img)
            
        features.append(feature.cpu().detach().numpy().reshape(-1))

features = np.array(features)

I got this output
Capture

However when I use feautres.shape I got (826,1)

1

Take a closer look what hook_fn does: it is called by the model during forward pass, and gets input (as parameter i) and output (as parameter o) of layer (model.classifier[4]) it was registered to as a hook. Hook functions are named this way because after been attached to some system, hooks get called by system itself.

‘Features’ is a final result of your model, which has Linear(4096,1) layer in the end. Thus, you’re collecting not intermediate outputs of layer model.classifier[4] but final outputs. You need to collect values inside hook function.

Thanks again for your interest and reply. My goal is to extract the features from the last fully connected layer after training alexnet based on shallow tuning mode and this is my code here