Here’s my code:
# Copyright (c) Meta Platforms, Inc. and affiliates.
# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
from typing import Optional
import fire
from llama import Llama
import jsonlines
import os
import pandas as pd
import multiprocessing as mp
class FileWriter:
def __init__(self, file_path):
self.file_path = file_path
self.lock = mp.Lock()
def write(self, text):
with self.lock:
with jsonlines.open(self.file_path, 'a') as fout:
fout.write(text)
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""
def main(
ckpt_dir: str,
tokenizer_path: str,
temperature: float = 0.6,
top_p: float = 0.9,
max_seq_len: int = 2048,
max_batch_size: int = 4,
max_gen_len: Optional[int] = None,
):
generator = Llama.build(
ckpt_dir=ckpt_dir,
tokenizer_path=tokenizer_path,
max_seq_len=max_seq_len,
max_batch_size=max_batch_size,
)
file_dir = '../SuperCLUEOpt_data_test/'
filenames = os.listdir(file_dir)
for filename in filenames:
data_name = os.path.splitext(filename)[0]
file_writer = FileWriter(f'opt_res/{data_name}_Llama-2-13B-chat.json')
excel_file = pd.ExcelFile(file_dir + filename)
df = excel_file.parse('Sheet1', header=None)
for _, row in df.iterrows():
dialog = [
{"role": "system", "content": DEFAULT_SYSTEM_PROMPT},
{"role": "user", "content": row[5]}
]
result = generator.chat_completion(
[dialog], # type: ignore
max_gen_len=max_gen_len,
temperature=temperature,
top_p=top_p,
)
print(result[0]['generation']['content'])
ans = {
"idx": row[0],
"task": row[2],
'question': row[1],
"choices": row[3],
"answer": row[4],
"model_answer": result[0]['generation']['content'],
}
file_writer.write(ans)
print('All done !!!!!!!!!!!!!!!!!!!!!!')
if __name__ == "__main__":
fire.Fire(main)
'''
conda activate llama
CUDA_VISIBLE_DEVICES=2,3 torchrun --nproc_per_node 2 example_chat_completion.py \
--ckpt_dir llama-2-13b-chat/ \
--tokenizer_path tokenizer.model > llama-2-13b-chat.log 2>&1
'''
The problem I ran into was that print() did not duplicate the output in the terminal, but it did duplicate the content when writing to the file.
If anyone can help me, I would appreciate it so much!