LLaMA-Factory 数据集制作与 Qwen3 模型微调评估

在这里插入图片描述

1 LLaMA-Factory 环境安装

LLaMA-Factory 安装

git clone https://github.com/hiyouga/LLaMA-Factory.git
cd LLaMA-Factory
pip install -e ".[torch,metrics]"
# 检查环境是否安装成功
llamafactory-cli version

启动 WebUI 界面，修改端口号为 6006，为了适配服务器环境：

GRADIO_SERVER_PORT=6006 llamafactory-cli webui

此外，Windows 本地部署的，启动 WebUI 方式如下：

在 LLaMA-Factory 目录下，输入：

python src/webui.py

2 数据集制作

需要的数据集格式参考官方仓库：

根据官方给的例子，本文所做的数据（文件名：hamful_jugement_train.json）如下：

将 hamful_jugement_train.json 放在 /workspace/LLaMA-Factory/data 中。

准备好数据后，在 dataset_info.json 中添加内容：

"hamful_jugement_train":{"file_name":"hamful_jugement_train.json"}

3 模型下载

这次微调的模型采用 Qwen3-4B-Instruct-2507。

模型地址：Qwen/Qwen3-4B-Instruct-2507

确保安装了 modelscope：

pip install modelscope

采用 SDK 方式下载：

from modelscope  snapshot_download


cache_dir = 


model_dir = snapshot_download(, cache_dir=cache_dir)
()

import json import torch from transformers import AutoModelForCausalLM, AutoTokenizer from sklearn.metrics import accuracy_score, recall_score, f1_score def load_json_data(file_path): """加载 JSON 数据文件""" try: with open(file_path, 'r', encoding='utf-8') as f: return json.load(f) except FileNotFoundError: print(f"错误：找不到文件 {file_path}") return None except json.JSONDecodeError: print(f"错误：文件 {file_path} 不是有效的 JSON 格式") return None def main(): # 模型名称 model_name = "/workspace/LLaMA-Factory/model/Qwen/Qwen3-4B-Instruct-2507-F" # 加载分词器和模型 print(f"正在加载模型 {model_name}...") tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype="auto", device_map="auto" ) model.eval() # 设置为评估模式 # 读取 JSON 文件路径 json_path = 'hamful_jugement_val.json' data = load_json_data(json_path) if not data: return # 初始化评估指标 total = 0 correct = 0 TP = 0 TN = 0 FP = 0 FN = 0 true_labels = [] pred_labels = [] print(f"开始评估，共 {len(data)} 条数据...") # 处理每条数据 for i, item in enumerate(data, 1): try: # 拼接 prompt instruction = item.get("instruction", "") input_content = item.get("input", "") prompt = f"{instruction}{input_content}" true_label = item.get("output", "").strip() # 确保真实标签是有效的 if true_label not in ["Harmful", "Safe"]: print(f"警告：第{i}条数据的 output 不是有效的标签，已跳过") continue # 准备模型输入 messages = [{"role": "user", "content": prompt}] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, ) model_inputs = tokenizer([text], return_tensors="pt").to(model.device) # 生成模型输出 with torch.no_grad(): generated_ids = model.generate( **model_inputs, max_new_tokens=10, temperature=0.0, do_sample=False ) # 解析模型输出 output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() content = tokenizer.decode(output_ids, skip_special_tokens=True).strip() # 标准化模型输出 if "harmful" in content.lower(): pred_label = "Harmful" elif "safe" in content.lower(): pred_label = "Safe" else: pred_label = None print(f"第{i}条数据：模型输出 '{content}' 不是有效的标签") # 更新评估指标 if pred_label is not None: total += 1 true_labels.append(1 if true_label == "Harmful" else 0) pred_labels.append(1 if pred_label == "Harmful" else 0) if true_label == pred_label: correct += 1 if true_label == "Harmful": TP += 1 else: TN += 1 else: if true_label == "Harmful": FN += 1 else: FP += 1 # 打印进度 if i % 10 == 0: print(f"已完成 {i}/{len(data)} 条数据") except Exception as e: print(f"处理第{i}条数据时出错：{str(e)}") continue # 计算评估指标 if total == 0: print("没有有效的数据用于评估") return accuracy = correct / total recall = recall_score(true_labels, pred_labels) f1 = f1_score(true_labels, pred_labels) # 输出评估结果 print("\n===== 评估结果 =====") print(f"总数据量：{len(data)}") print(f"有效评估数据量：{total}") print(f"正确预测：{correct}") print(f"准确率：{accuracy:.4f}") print(f"召回率：{recall:.4f}") print(f"F1 分数：{f1:.4f}") print("\n混淆矩阵:") print(f"TP (真阳性): {TP}") print(f"TN (真阴性): {TN}") print(f"FP (假阳性): {FP}") print(f"FN (假阴性): {FN}") if __name__ == "__main__": main()

import torch import pandas as pd from transformers import AutoModelForCausalLM, AutoTokenizer def read_instruction(file_path): """读取 instruction.txt 文件内容""" try: with open(file_path, 'r', encoding='utf-8') as f: return f.read().strip() except FileNotFoundError: print(f"错误：找不到 instruction 文件 {file_path}") return None except Exception as e: print(f"读取 instruction 文件时出错：{str(e)}") return None def main(): # 模型名称 model_name = "/workspace/LLaMA-Factory/model/Qwen/Qwen3-4B-Instruct-2507-F" # 加载分词器和模型 print(f"正在加载模型 {model_name}...") tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype="auto", device_map="auto" ) model.eval() # 设置为评估模式 # 读取 instruction.txt instruction = read_instruction("instruction.txt") if not instruction: return # 读取 Excel 文件路径 excel_path = 'data.xlsx' try: df = pd.read_excel(excel_path) print(f"成功读取 Excel 文件，共 {len(df)} 行数据") except Exception as e: print(f"读取 Excel 文件时出错：{str(e)}") return # 初始化计数器 harmful_count = 0 safe_count = 0 # 遍历 Excel 的第二列（索引为 1） for i, value in enumerate(df.iloc[1:, 1], 2): try: excel_content = str(value).strip() prompt = f"{instruction}{excel_content}" messages = [{"role": "user", "content": prompt}] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, ) model_inputs = tokenizer([text], return_tensors="pt").to(model.device) with torch.no_grad(): generated_ids = model.generate( **model_inputs, max_new_tokens=10, temperature=0.0, do_sample=False ) output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() content = tokenizer.decode(output_ids, skip_special_tokens=True).strip() if "harmful" in content.lower(): harmful_count += 1 result = "Harmful" elif "safe" in content.lower(): safe_count += 1 result = "Safe" else: print(f"第{i}行：模型输出 '{content}' 不是有效的标签，已跳过") continue print(f"第{i}行处理完成，结果：{result}") except Exception as e: print(f"处理第{i}行时出错：{str(e)}") continue # 计算 ASR total = harmful_count + safe_count if total == 0: print("\n没有有效的数据用于计算") return asr = harmful_count / total print("\n===== 处理结果 =====") print(f"Harmful 数量：{harmful_count}") print(f"Safe 数量：{safe_count}") print(f"总有效数据量：{total}") print(f"ASR (Harmful 比例): {asr:.4f}") if __name__ == "__main__": main()

LLaMA-Factory 数据集制作与 Qwen3 模型微调评估

1 LLaMA-Factory 环境安装

2 数据集制作

3 模型下载

更多推荐文章

相关免费在线工具

4 使用命令进行训练而非 webui

训练命令

导出模型命令

5 训练后的 Qwen3 模型评估

6 训练后的 Qwen3 模型进行测试

LLaMA-Factory 数据集制作与 Qwen3 模型微调评估

1 LLaMA-Factory 环境安装

2 数据集制作

3 模型下载

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

4 使用命令进行训练 而非 webui

训练命令

导出模型命令

5 训练后的 Qwen3 模型评估

6 训练后的 Qwen3 模型进行测试

4 使用命令进行训练而非 webui