Pruning Language Model failure

I am testing out pruning on the DialoGPT LLM from Hugging Face

My pruning appears to succeed but the model outputs haven’t changed much. Does anyone know what the problem is here?

!pip3 install tqdm boto3 requests regex sentencepiece Transformers sacremoses huggingface_hub tokenizers

import torch

import logging

from transformers import pipeline
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
import transformers
from PIL import Image
import requests
from transformers import Blip2Processor, Blip2Model
import torch.nn.utils.prune as prune

model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
module = model.transformer.h[0].attn.c_attn
prune.random_unstructured(module, name="weight", amount=1.0)
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")

# Let's chat for 5 lines
for step in range(5):
    # encode the new user input, add the eos_token and return a tensor in Pytorch
    new_user_input_ids = tokenizer.encode(input(">> User:") + tokenizer.eos_token, return_tensors='pt')

    # append the new user input tokens to the chat history
    bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if step > 0 else new_user_input_ids

    # generated a response while limiting the total chat history to 1000 tokens, 
    chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)

    # pretty print last ouput tokens from bot
    print("DialoGPT: {}".format(tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)))