Hi,
When I want to use and load the 4-bit version of CohereForAI C4AI Command-R model
, I got following error:
OSError: CohereForAI/c4ai-command-r-v01 does not appear to have a file named configuration_cohere.py. Checkout 'https://huggingface.co/CohereForAI/c4ai-command-r-v01/main' for available files.
I follow as below:
https://huggingface.co/CohereForAI/c4ai-command-r-v01-4bit
Would you please help me to address this issue?
Reference
My code is as below:
# pip install 'transformers>=4.39.1' bitsandbytes accelerate
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
# ** prepare input texts:
input_text_english_test = "What is ML?"
# ** Load the model and tokenizer:
checkpoint = "CohereForAI/c4ai-command-r-v01-4bit"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
max_new_tokens = 256
temperature = 0.3
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
# low_cpu_mem_usage = True,
bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint, quantization_config=quantization_config, device_map="auto")
# ***** First Test:
print(" ***** First Test ***** ")
messages = [{"role": "user", "content": input_text_english_test}]
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(device)
## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
gen_tokens = model.generate(
input_ids,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
)
gen_text = tokenizer.decode(gen_tokens[0])
print(gen_text)
print(" --------- Print Generated Text --------- ")
output_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True).strip()
final_result = output_text[len("<|START_OF_TURN_TOKEN|><|USER_TOKEN|>") +
len(input_text_english_test)+
len("<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"):].strip()
print(final_result)
print(" -------------------- \n")