Files
aiagent/backend/scripts/create_homework_manager_agent.py

488 lines
22 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
创建或更新学生作业管理助手AgentStart Cache Transform 合并 LLM Code 拆分 JSON
Transform 拼装 Cache 输出
侧重记录作业项截止日优先级跟进完成情况温和督促与周回顾不代写可提交的作业正文
强化**结构化 homework_board** 写入 `memory.context.homework_board`Redis / 持久记忆合并
学生作业管理助手2号名称含 **2** `HOMEWORK_FAST_AGENT=1`额外侧重**更长 Redis TTL**收紧预算与工具轮次默认 **deepseek-v4-flash**可通过环境变量改DeepSeek **`extra_body` 关闭 thinking**更快更稳的工具链Code 节点兜底避免整条失败
学生作业管理助手3号名称含 **3** `HOMEWORK_V3=1`**基础设施与 2 号同档**TTLhistory 上限8192 tokensthinking 关闭等提示词用**完整版**并追加 **知你客服14号记忆栈**说明`user_memory_*`四字段记忆包 `agent记忆实现方案.md` 对齐也可用 `scripts/create_homework_manager_agent_3.py` 一键创建
用法:
cd backend && .\\venv\\Scripts\\python.exe scripts/create_homework_manager_agent.py
环境变量:
PLATFORM_BASE_URL, PLATFORM_USERNAME, PLATFORM_PASSWORD
AGENT_NAME默认 学生作业管理助手2 `AGENT_NAME=学生作业管理助手2号`3 `AGENT_NAME=学生作业管理助手3号`
HOMEWORK_FAST_AGENT=1可选显式启用 2 号快速档案
HOMEWORK_V3=1可选显式启用 3 号档案通常用名称含3即可
HOMEWORK_LLM_PROVIDER / HOMEWORK_LLM_MODEL / HOMEWORK_LLM_TIMEOUT可选
"""
from __future__ import annotations
import json
import os
import sys
from typing import Any, Dict, List, Optional, Tuple
import requests
BACKEND_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if BACKEND_DIR not in sys.path:
sys.path.insert(0, BACKEND_DIR)
BASE = os.getenv("PLATFORM_BASE_URL", "http://127.0.0.1:8037").rstrip("/")
USER = os.getenv("PLATFORM_USERNAME", "admin")
PWD = os.getenv("PLATFORM_PASSWORD", "123456")
AGENT_NAME = os.getenv("AGENT_NAME", "学生作业管理助手")
FAST_PROFILE = "2号" in AGENT_NAME or os.getenv("HOMEWORK_FAST_AGENT", "").strip().lower() in (
"1",
"true",
"yes",
)
V3_PROFILE = "3号" in AGENT_NAME or os.getenv("HOMEWORK_V3", "").strip().lower() in (
"1",
"true",
"yes",
)
# 2 号 / 3 号共享:长 TTL、较高 max_tokens、可选关闭 thinking 等与「知你类」记忆工程对齐的基础设施
ZHINI_STYLE_INFRA = bool(FAST_PROFILE or V3_PROFILE)
PROVIDER = os.getenv(
"HOMEWORK_LLM_PROVIDER", os.getenv("ENTERPRISE_LLM_PROVIDER", "deepseek")
)
MODEL = os.getenv(
"HOMEWORK_LLM_MODEL", os.getenv("ENTERPRISE_LLM_MODEL", "deepseek-v4-flash")
)
_DEFAULT_TIMEOUT = "120" if ZHINI_STYLE_INFRA else "180"
REQ_TIMEOUT = max(
30,
int(
os.getenv(
"HOMEWORK_LLM_TIMEOUT",
os.getenv("ENTERPRISE_LLM_TIMEOUT", _DEFAULT_TIMEOUT),
)
),
)
if ZHINI_STYLE_INFRA:
REQ_TIMEOUT = min(REQ_TIMEOUT, 150)
BUDGET_CONFIG = (
{"max_steps": 80, "max_llm_invocations": 6, "max_tool_calls": 16}
if ZHINI_STYLE_INFRA
else {"max_steps": 100, "max_llm_invocations": 8, "max_tool_calls": 24}
)
_CACHE_TTL = 1209600 if ZHINI_STYLE_INFRA else 604800
_MAX_HISTORY_LENGTH = 48 if ZHINI_STYLE_INFRA else 40
HOMEWORK_TOOLS = ["file_read", "text_analyze", "datetime", "json_process"]
CODE_SPLIT_HOMEWORK_TAIL_JSON = r"""
def _tail_json_obj(s):
if not isinstance(s, str):
return None
t = s.strip()
if not t:
return None
last_nl = t.rfind("\n")
last_line = t[last_nl + 1 :].strip() if last_nl >= 0 else t
if not last_line.startswith("{"):
return None
try:
o = loads(last_line)
return o if isinstance(o, dict) else None
except Exception:
return None
def _llm_text(inp):
if isinstance(inp, str):
return inp
if isinstance(inp, dict):
out = inp.get("output")
if isinstance(out, str):
return out
if isinstance(out, dict):
return str(out.get("output") or out.get("text") or out.get("content") or "")
if out is not None:
return str(out)
return str(inp)
try:
raw = _llm_text(input_data)
obj = _tail_json_obj(raw)
hb = {}
if obj:
hb = obj.get("homework_board")
if not isinstance(hb, dict):
hb = {}
reply_visible = raw.strip() if isinstance(raw, str) else str(raw).strip()
if obj and isinstance(raw, str):
lines = raw.splitlines()
while lines and not lines[-1].strip():
lines.pop()
if lines and lines[-1].strip().startswith("{"):
lines.pop()
reply_visible = "\n".join(lines).strip()
result = {"reply": reply_visible, "homework_board": hb}
except Exception:
try:
_raw = _llm_text(input_data)
_reply = (_raw.strip() if isinstance(_raw, str) else str(_raw)).strip()
except Exception:
_reply = ""
result = {"reply": _reply, "homework_board": {}}
"""
# 与 agent记忆实现方案 / 知你客服线对齐:末行 JSON 含 user_profile、禁止无视已有快照与对话
HOMEWORK_PROMPT_ZHINI_ALIGN = """
与知你记忆方案对齐 · 必守
- 末行单行 JSON **完整可解析** `homework_board` **必须** `user_profile`用户若已说我叫我的名字是叫我须写入 "user_profile":{"name":""}未获知则 "user_profile":{}
- 先读上方最近对话作业快照再作答用户问有什么作业我有什么语文作业等时若快照或对话里**已有**科目/条目**逐条复述**禁止说没有记录暂时没有或逼用户从零重述除非快照与对话确为空
- 防截断表格与寒暄从简**宁可少写修饰语也不得省略末行 JSON**`homework_board.items` 与正文已列条数一致禁止用空 `items` 覆盖历史条目
"""
# 仅 3 号追加:显式对标知你客服 14 号 / agent记忆实现方案 中的记忆栈描述
HOMEWORK_V3_ZHINI14_APPEND = """
3 · 知你客服14号记忆方案工程对齐
- 与知你客服14号`agent记忆实现方案.md` 一致**Cache ** `user_memory_{user_id}`执行须带稳定 **`user_id`**预览端按 Agent 维度持久化避免退化为 `default` 串会话
- **记忆包四字段**`conversation_history``conversation_summary``user_profile``context`作业结构化数据在 **`context.homework_board`** 2 号相同引擎对末行 JSON `user_profile` Cache 合并逻辑与知你主线一致
- **Redis + 可选 MySQL**节点 TTL 见配置平台开启 `MEMORY_PERSIST_DB_ENABLED` 时与 `persistent_user_memories` 对齐合并冷启动仍可拉回
"""
def _homework_prompt(agent_display_name: str) -> str:
return f"""你是「{agent_display_name}」,帮助学生**记作业**与**监督完成**,语气友好、具体、可执行。
核心能力
1. **记作业**从用户自然语言中提取科目 / 作业内容 / 截止日期与时间 / 老师要求要点 / 预估耗时整理成清单
- 若用户用回形针**上传**了文件或照片消息里会出现相对工作区根路径列表**必须先调用 file_read**用返回的 `content`正文/OCR 文本整理进作业清单勿编造未读到的内容
- 支持常见格式纯文本/Markdown**PDF****Word(.docx)****Excel(.xlsx)****照片**作业拍照等依赖 OCR若工具返回需安装 Tesseract 等提示请如实转告用户并仍可基于用户口述继续记作业
2. **监督完成**根据清单追问进度未开始/进行中/已完成对临近截止的任务给**温和提醒**不制造焦虑可建议拆成小步骤与每日 1530 分钟微习惯
3. **周回顾**用户要求时 json_process 或清晰表格输出本周完成率延期项与下周优先三件事
原则
- **不代写**可提交的作业正文实验报告论文等可提供提纲自检表引用规范提示
- 日期时间以用户所在语境为准需要当前时间可借助工具 datetime
- 不确定的信息如具体截止时刻先列出假设并请用户确认
- 输出优先中文列表用编号便于复制到备忘录
交互习惯
- 用户只说记一下数学作业主动追问截止日与具体要求一次问 12 个点避免审问感
- 用户汇报做完了确认是否需拍照/上传检查清单并建议归档到下一条任务前的小结一句话
持久记忆必须利用
- 当前用户画像{{memory.user_profile}}
- 历史摘要{{memory.conversation_summary}}
- 最近历史{{memory.conversation_history}}
- **已知结构化作业快照优先以此为准可与正文互相补充**{{memory.context.homework_board}}
- 回答前先结合历史判断本轮是否在延续上一轮作业条目若是不要重复问已确认信息如科目截止日期
- 若上一轮你已经列出作业清单而本轮用户只补充了截止时间/科目/完成状态中的一部分必须把该信息回填到上一轮清单并给出更新后的清单禁止再问具体有哪些作业
- 当历史中已出现明确作业条目 4 条作业列表默认这些条目继续有效除非用户明确说作业变了/重置
{HOMEWORK_PROMPT_ZHINI_ALIGN}
结构化记忆强制 · 机器可读
- 在正文结束后**最后单独一行**输出**恰好一行**合法 JSON markdown 围栏格式示例
{{"homework_board":{{"subject":"语文","deadline_text":"2026-05-01","items":[{{"title":"写生字","detail":"第八课"}}],"notes":""}},"user_profile":{{}}}}
- `homework_board` 必须与正文一致若本轮用户只补充截止日/科目须在 `homework_board` **合并更新**已有 `items`可参考上面的快照与对话**禁止用空列表覆盖已有条目**
- 该行仅供系统解析正文不要复述该行 JSON
"""
def _homework_prompt_fast(agent_display_name: str) -> str:
return f"""你是「{agent_display_name}」,帮助学生**记作业**与**跟进度**;回复简短、可执行、中文优先。
持久记忆 先读后答
- 画像{{memory.user_profile}}
- 摘要{{memory.conversation_summary}}
- 最近对话{{memory.conversation_history}}
- **作业快照 homework_board优先采信勿臆测**{{memory.context.homework_board}}
工具 省延迟仅当消息里出现**上传文件的工作区路径列表**时才调用 file_read无附件时不要调用 file_read需要当前时间用 datetime结构化整理可用 json_process
原则不代写可提交正文延续上一轮时不要重复追问已确认的科目/清单用户只改截止日或状态时合并更新清单
{HOMEWORK_PROMPT_ZHINI_ALIGN}
末行 JSON 强制正文结束后**单独一行**合法 JSON markdown 围栏例如
{{"homework_board":{{"subject":"","deadline_text":"","items":[{{"title":"","detail":""}}],"notes":""}},"user_profile":{{}}}}
须与正文一致**合并**已有 items禁止用空列表覆盖历史条目
"""
def _sanitize_edges(edges: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
seen: set = set()
out: List[Dict[str, Any]] = []
for e in edges or []:
s, t = e.get("source"), e.get("target")
if not s or not t or s == t:
continue
key = (s, t, e.get("sourceHandle") or "")
if key in seen:
continue
seen.add(key)
ne = dict(e)
if not ne.get("targetHandle"):
ne["targetHandle"] = "left"
if not ne.get("id"):
sh = ne.get("sourceHandle") or "r"
ne["id"] = f"e_{s}_{t}_{sh}"
out.append(ne)
return out
def build_workflow() -> Dict[str, Any]:
llm_pos: Tuple[int, int] = (680, 220)
if FAST_PROFILE:
_prompt = _homework_prompt_fast(AGENT_NAME)
elif V3_PROFILE:
_prompt = _homework_prompt(AGENT_NAME) + HOMEWORK_V3_ZHINI14_APPEND
else:
_prompt = _homework_prompt(AGENT_NAME)
_llm_temp = 0.22 if FAST_PROFILE else (0.25 if V3_PROFILE else 0.3)
_llm_mti = 6 if FAST_PROFILE else (8 if V3_PROFILE else 10)
_llm_data: Dict[str, Any] = {
"label": "作业管理",
"prompt": _prompt,
"provider": PROVIDER,
"model": MODEL,
"temperature": _llm_temp,
"request_timeout": REQ_TIMEOUT,
"enable_tools": True,
"tools": list(HOMEWORK_TOOLS),
"selected_tools": list(HOMEWORK_TOOLS),
"max_tool_iterations": _llm_mti,
}
if ZHINI_STYLE_INFRA:
# 避免截断末行 JSON → homework_board / user_profile 无法落库
_llm_data["max_tokens"] = 8192
if ZHINI_STYLE_INFRA and PROVIDER.strip().lower() == "deepseek":
_llm_data["extra_body"] = {"thinking": {"type": "disabled"}}
nodes: List[Dict[str, Any]] = [
{"id": "start-1", "type": "start", "position": {"x": 80, "y": 220}, "data": {"label": "开始"}},
{
"id": "cache-query",
"type": "cache",
"position": {"x": 300, "y": 220},
"data": {
"label": "读取记忆",
"operation": "get",
"key": "user_memory_{user_id}",
"ttl": _CACHE_TTL,
"default_value": "{\"conversation_history\": [], \"conversation_summary\": \"\", \"user_profile\": {}, \"context\": {}}",
"input_variables": [],
"output_variables": [],
},
},
{
"id": "transform-merge",
"type": "transform",
"position": {"x": 510, "y": 220},
"data": {
"label": "合并输入与记忆",
"mode": "merge",
"mapping": {
"query": "{{query}}",
"user_input": "{{query}}",
"user_id": "{{user_id}}",
"timestamp": "{{timestamp}}",
"attachments": "{{attachments}}",
"memory": "{{output}}",
"conversation_history": "{{output.conversation_history}}",
"user_profile": "{{output.user_profile}}",
"context": "{{output.context}}",
},
"input_variables": [],
"output_variables": [],
},
},
{
"id": "llm-homework",
"type": "llm",
"position": {"x": llm_pos[0], "y": llm_pos[1]},
"data": dict(_llm_data),
},
{
"id": "code-split-homework-json",
"type": "code",
"position": {"x": llm_pos[0] + 260, "y": 220},
"data": {
"label": "拆分正文与homework_board",
"language": "python",
"code": CODE_SPLIT_HOMEWORK_TAIL_JSON,
"timeout": 20,
},
},
{
"id": "transform-build-append",
"type": "transform",
"position": {"x": llm_pos[0] + 520, "y": 220},
"data": {
"label": "拼装记忆更新",
"mode": "merge",
"mapping": {
"query": "{{query}}",
"user_input": "{{user_input}}",
"user_id": "{{user_id}}",
"timestamp": "{{timestamp}}",
"memory": "{{memory}}",
"output": "{{reply}}",
"homework_board_update": "{{homework_board}}",
},
},
},
{
"id": "cache-update-append",
"type": "cache",
"position": {"x": llm_pos[0] + 780, "y": 220},
"data": {
"label": "写回记忆(追加)",
"operation": "set",
"key": "user_memory_{user_id}",
"ttl": _CACHE_TTL,
"max_history_length": _MAX_HISTORY_LENGTH,
"value": "{\"conversation_summary\": (memory.get(\"conversation_summary\") or \"\"), \"conversation_history\": (memory.get(\"conversation_history\") or []) + [{\"role\": \"user\", \"content\": \"{{user_input}}\", \"timestamp\": \"{{timestamp}}\"}, {\"role\": \"assistant\", \"content\": \"{{output}}\", \"timestamp\": \"{{timestamp}}\"}], \"user_profile\": memory.get(\"user_profile\", {}), \"context\": memory.get(\"context\", {})}",
"input_variables": [],
"output_variables": [],
},
},
{
"id": "transform-output-format",
"type": "transform",
"position": {"x": llm_pos[0] + 1040, "y": 220},
"data": {
"label": "输出格式",
"mode": "merge",
"mapping": {
"reply": "{{output}}",
"output": "{{output}}",
"result": "{{output}}",
},
},
},
{"id": "end-1", "type": "end", "position": {"x": llm_pos[0] + 1300, "y": 220}, "data": {"label": "结束", "output_format": "text"}},
]
edges = _sanitize_edges(
[
{"source": "start-1", "target": "cache-query", "sourceHandle": "right", "targetHandle": "left"},
{"source": "cache-query", "target": "transform-merge", "sourceHandle": "right", "targetHandle": "left"},
{"source": "transform-merge", "target": "llm-homework", "sourceHandle": "right", "targetHandle": "left"},
{"source": "transform-merge", "target": "transform-build-append", "sourceHandle": "left", "targetHandle": "left"},
{"source": "llm-homework", "target": "code-split-homework-json", "sourceHandle": "right", "targetHandle": "left"},
{"source": "code-split-homework-json", "target": "transform-build-append", "sourceHandle": "right", "targetHandle": "left"},
{"source": "transform-build-append", "target": "cache-update-append", "sourceHandle": "right", "targetHandle": "left"},
{"source": "cache-update-append", "target": "transform-output-format", "sourceHandle": "right", "targetHandle": "left"},
{"source": "transform-output-format", "target": "end-1", "sourceHandle": "right", "targetHandle": "left"},
]
)
return {"nodes": nodes, "edges": edges}
def _validate_local(wf: Dict[str, Any]) -> None:
from app.services.workflow_validator import validate_workflow
r = validate_workflow(wf.get("nodes") or [], wf.get("edges") or [])
if not r.get("valid"):
errs = r.get("errors") or []
raise ValueError("工作流校验失败: " + "; ".join(errs))
def _find_agent_id(h: Dict[str, str], name: str) -> Optional[str]:
r = requests.get(f"{BASE}/api/v1/agents", params={"search": name, "limit": 80}, headers=h, timeout=45)
if r.status_code != 200:
return None
for a in r.json() or []:
if a.get("name") == name:
return a.get("id")
return None
def main() -> int:
wf = build_workflow()
try:
_validate_local(wf)
except ValueError as e:
print(e, file=sys.stderr)
return 1
r = requests.post(
f"{BASE}/api/v1/auth/login",
data={"username": USER, "password": PWD},
headers={"Content-Type": "application/x-www-form-urlencoded"},
timeout=15,
)
if r.status_code != 200:
print("登录失败:", r.status_code, r.text[:500], file=sys.stderr)
return 1
token = r.json().get("access_token")
if not token:
print("无 access_token", file=sys.stderr)
return 1
h = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
_max_tool_it = 6 if FAST_PROFILE else (8 if V3_PROFILE else 10)
if FAST_PROFILE:
_profile_note = (
f"快速档案2号TTL {_CACHE_TTL}shistory≤{_MAX_HISTORY_LENGTH},工具轮≤{_max_tool_it}"
f"budget {BUDGET_CONFIG}DeepSeek 关闭 thinking若适用"
)
elif V3_PROFILE:
_profile_note = (
f"3号基于2号基础设施TTL {_CACHE_TTL}shistory≤{_MAX_HISTORY_LENGTH}"
f"工具轮≤{_max_tool_it}max_tokens 8192budget {BUDGET_CONFIG}+ "
"知你客服14号记忆方案user_memory_*、四字段、MySQL 可选);完整提示词 + 记忆栈说明。"
)
else:
_profile_note = ""
desc = (
f"{AGENT_NAME}:记作业(科目、内容、截止日)、跟进度、温和督促与周回顾;"
"支持上传文件/照片后用 file_read 提取正文文本、PDF、docx、xlsx、图片 OCR与 json_process 整理;"
f"默认模型 {PROVIDER}/{MODEL},单次执行内工具迭代上限 {_max_tool_it}"
"持久记忆Redis/cache + conversation_history结构化 homework_board 写入 memory.context末行 JSON"
+ _profile_note
)
existing = _find_agent_id(h, AGENT_NAME)
if existing:
ur = requests.put(
f"{BASE}/api/v1/agents/{existing}",
headers=h,
json={
"description": desc,
"workflow_config": wf,
"budget_config": BUDGET_CONFIG,
},
timeout=120,
)
if ur.status_code != 200:
print("更新失败:", ur.status_code, ur.text[:800], file=sys.stderr)
return 1
print("已更新", AGENT_NAME, existing)
print(json.dumps({"id": existing, "name": AGENT_NAME}, ensure_ascii=False))
return 0
cr = requests.post(
f"{BASE}/api/v1/agents",
headers=h,
json={
"name": AGENT_NAME,
"description": desc,
"workflow_config": wf,
"budget_config": BUDGET_CONFIG,
},
timeout=120,
)
if cr.status_code != 201:
print("创建失败:", cr.status_code, cr.text[:800], file=sys.stderr)
return 1
aid = cr.json()["id"]
print("已创建", AGENT_NAME, aid)
print(json.dumps({"id": aid, "name": AGENT_NAME}, ensure_ascii=False))
return 0
if __name__ == "__main__":
raise SystemExit(main())