Hey guys, despite making changes to the model’s weights, the overall speed of the model remains constant and does not show any improvement or degradation.
(If my English is incorrect, I apologize.)
that’s my all code.
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms
import pdb
from google.colab import drive
drive.mount('/content/drive')
def LoadMNIST(batch_size=32, validation=True, num_workers=1):
root = "data"
transform = transforms.Compose([transforms.ToTensor()])
test_set = torchvision.datasets.MNIST(
root=root, train=False, transform=transform, download=True
)
test_loader = torch.utils.data.DataLoader(
test_set, batch_size=int(batch_size), shuffle=False, num_workers=num_workers
)
return test_loader
class Net(nn.Module):
def __init__(self, q_bit_weight, q_bit_act, p_ratio):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 4, 5, 1)
self.relu1 = nn.ReLU()
self.conv2 = nn.Conv2d(4, 16, 5, 1)
self.relu2 = nn.ReLU()
self.fc1 = nn.Linear(4*4*16, 32)
self.relu3 = nn.ReLU()
self.fc2 = nn.Linear(32, 10)
self.q_bit_weight = q_bit_weight
self.q_bit_act = q_bit_act
self.p_ratio = p_ratio
def forward(self, x):
x1 = self.relu1(self.conv1(x))
x1 = F.max_pool2d(x1, 2, 2)
x2 = self.relu2(self.conv2(x1))
x2 = F.max_pool2d(x2, 2, 2)
x2 = x2.view(-1, 4*4*16)
x3 = self.relu3(self.fc1(x2))
x4 = self.fc2(x3)
return x4
def test(model, test_loader, epoch):
correct = 0
for batch_idx, (data, target) in enumerate(test_loader):
output = model(data)
pred = output.data.max(1)[1]
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
acc = 100. * float(correct) / float(len(test_loader.dataset))
text = f"epoch: {str(epoch).zfill(2)} Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.2f}%)"
return acc
def quantize_weight(layer, q_bit): #signed
q_weight = quantize_layer(layer.weight, q_bit)
layer.weight = nn.Parameter(q_weight)
def quantize_act(input, q_bit): #unsigned
q_act = quantize_layer(input, q_bit)
return q_act
def pruning_weight(input, ratio):
p_weight = prune_layer(input.weight, ratio)
input.weight = nn.Parameter(p_weight)
def quantize_layer(layer, q_bit):
scale_factor = 2 ** q_bit - 1
quantized_layer = (layer * scale_factor).round() / scale_factor
return quantized_layer
def prune_layer(layer, ratio):
num_zeros = int(layer.numel() * ratio)
sorted_weights = layer.view(-1).abs().sort()[0]
threshold = sorted_weights[num_zeros]
pruned_layer = torch.where(torch.abs(layer) <= threshold, 0, layer)
return pruned_layer
quantize_weight(model.conv1, 8)
quantize_weight(model.conv2, 16)
quantize_weight(model.fc1, 16)
quantize_weight(model.fc2, 32)
pruning_weight(model.conv1, 0.5)
pruning_weight(model.conv2, 0.5)
pruning_weight(model.fc1, 0.5)
pruning_weight(model.fc2, 0.5)
test_loader = LoadMNIST(batch_size=256, validation=True)
ckpt_load_path = '/content/drive/MyDrive/Colab Notebooks/model.pth'
q_bit_weight = [8, 16, 16, 32] #layer별 quantization bit
q_bit_act = 8 #activation quantization bit
p_ratio = [0.2, 0.2, 0.2, 0.2] #layer별 pruning ratio
model = Net(q_bit_weight=q_bit_weight, q_bit_act=q_bit_act, p_ratio=p_ratio)
model.load_state_dict(torch.load(ckpt_load_path, map_location=torch.device('cpu')))
model.eval()
test_acc = test(model, test_loader, 0)
print(f"Maximum Test Accuarcy = {test_acc:.4f}")
````Preformatted text`
I want to prune the weights of a specific layer by setting 20% of the lowest weights to zero and assess the layer-wise pruning robustness.
If my English is incorrect, I apologize.