aiagent/backend/scripts/create_homework_manager_agent.py

#!/usr/bin/env python3
"""
创建或更新「学生作业管理助手」Agent：Start → Cache 读 → Transform 合并 → LLM → Code 拆分 JSON →
Transform 拼装 → Cache 写 → 输出。

侧重：记录作业项、截止日、优先级；跟进完成情况；温和督促与周回顾（不代写可提交的作业正文）。
强化：**结构化 homework_board** 写入 `memory.context.homework_board`（Redis / 持久记忆合并）。

「学生作业管理助手2号」（名称含 **2号** 或 `HOMEWORK_FAST_AGENT=1`）额外侧重：**更长 Redis TTL**、收紧预算与工具轮次、默认 **deepseek-v4-flash**（可通过环境变量改）、DeepSeek **`extra_body` 关闭 thinking**（更快更稳的工具链）、Code 节点兜底避免整条失败。

「学生作业管理助手3号」（名称含 **3号** 或 `HOMEWORK_V3=1`）：**基础设施与 2 号同档**（TTL、history 上限、8192 tokens、thinking 关闭等）；提示词用**完整版**并追加 **知你客服14号记忆栈**说明（`user_memory_*`、四字段记忆包、与 `agent记忆实现方案.md` 对齐）。也可用 `scripts/create_homework_manager_agent_3.py` 一键创建。

用法:
  cd backend && .\\venv\\Scripts\\python.exe scripts/create_homework_manager_agent.py

环境变量:
  PLATFORM_BASE_URL, PLATFORM_USERNAME, PLATFORM_PASSWORD
  AGENT_NAME（默认 学生作业管理助手）；2 号：`AGENT_NAME=学生作业管理助手2号`；3 号：`AGENT_NAME=学生作业管理助手3号`
  HOMEWORK_FAST_AGENT=1（可选，显式启用 2 号快速档案）
  HOMEWORK_V3=1（可选，显式启用 3 号档案；通常用名称含「3号」即可）
  HOMEWORK_LLM_PROVIDER / HOMEWORK_LLM_MODEL / HOMEWORK_LLM_TIMEOUT（可选）
"""
from __future__ import annotations

import json
import os
import sys
from typing import Any, Dict, List, Optional, Tuple

import requests

BACKEND_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if BACKEND_DIR not in sys.path:
    sys.path.insert(0, BACKEND_DIR)

BASE = os.getenv("PLATFORM_BASE_URL", "http://127.0.0.1:8037").rstrip("/")
USER = os.getenv("PLATFORM_USERNAME", "admin")
PWD = os.getenv("PLATFORM_PASSWORD", "123456")
AGENT_NAME = os.getenv("AGENT_NAME", "学生作业管理助手")
FAST_PROFILE = "2号" in AGENT_NAME or os.getenv("HOMEWORK_FAST_AGENT", "").strip().lower() in (
    "1",
    "true",
    "yes",
)
V3_PROFILE = "3号" in AGENT_NAME or os.getenv("HOMEWORK_V3", "").strip().lower() in (
    "1",
    "true",
    "yes",
)
# 2 号 / 3 号共享：长 TTL、较高 max_tokens、可选关闭 thinking 等与「知你类」记忆工程对齐的基础设施
ZHINI_STYLE_INFRA = bool(FAST_PROFILE or V3_PROFILE)

PROVIDER = os.getenv(
    "HOMEWORK_LLM_PROVIDER", os.getenv("ENTERPRISE_LLM_PROVIDER", "deepseek")
)
MODEL = os.getenv(
    "HOMEWORK_LLM_MODEL", os.getenv("ENTERPRISE_LLM_MODEL", "deepseek-v4-flash")
)
_DEFAULT_TIMEOUT = "120" if ZHINI_STYLE_INFRA else "180"
REQ_TIMEOUT = max(
    30,
    int(
        os.getenv(
            "HOMEWORK_LLM_TIMEOUT",
            os.getenv("ENTERPRISE_LLM_TIMEOUT", _DEFAULT_TIMEOUT),
        )
    ),
)
if ZHINI_STYLE_INFRA:
    REQ_TIMEOUT = min(REQ_TIMEOUT, 150)

BUDGET_CONFIG = (
    {"max_steps": 80, "max_llm_invocations": 6, "max_tool_calls": 16}
    if ZHINI_STYLE_INFRA
    else {"max_steps": 100, "max_llm_invocations": 8, "max_tool_calls": 24}
)

_CACHE_TTL = 1209600 if ZHINI_STYLE_INFRA else 604800
_MAX_HISTORY_LENGTH = 48 if ZHINI_STYLE_INFRA else 40

HOMEWORK_TOOLS = ["file_read", "text_analyze", "datetime", "json_process"]

CODE_SPLIT_HOMEWORK_TAIL_JSON = r"""
def _tail_json_obj(s):
    if not isinstance(s, str):
        return None
    t = s.strip()
    if not t:
        return None
    last_nl = t.rfind("\n")
    last_line = t[last_nl + 1 :].strip() if last_nl >= 0 else t
    if not last_line.startswith("{"):
        return None
    try:
        o = loads(last_line)
        return o if isinstance(o, dict) else None
    except Exception:
        return None


def _llm_text(inp):
    if isinstance(inp, str):
        return inp
    if isinstance(inp, dict):
        out = inp.get("output")
        if isinstance(out, str):
            return out
        if isinstance(out, dict):
            return str(out.get("output") or out.get("text") or out.get("content") or "")
        if out is not None:
            return str(out)
    return str(inp)


try:
    raw = _llm_text(input_data)
    obj = _tail_json_obj(raw)
    hb = {}
    if obj:
        hb = obj.get("homework_board")
        if not isinstance(hb, dict):
            hb = {}
    reply_visible = raw.strip() if isinstance(raw, str) else str(raw).strip()
    if obj and isinstance(raw, str):
        lines = raw.splitlines()
        while lines and not lines[-1].strip():
            lines.pop()
        if lines and lines[-1].strip().startswith("{"):
            lines.pop()
            reply_visible = "\n".join(lines).strip()
    result = {"reply": reply_visible, "homework_board": hb}
except Exception:
    try:
        _raw = _llm_text(input_data)
        _reply = (_raw.strip() if isinstance(_raw, str) else str(_raw)).strip()
    except Exception:
        _reply = ""
    result = {"reply": _reply, "homework_board": {}}
"""

# 与 agent记忆实现方案 / 知你客服线对齐：末行 JSON 含 user_profile、禁止无视已有快照与对话
HOMEWORK_PROMPT_ZHINI_ALIGN = """
【与知你记忆方案对齐 · 必守】
- 末行单行 JSON 须**完整可解析**。除 `homework_board` 外**必须**含 `user_profile`：用户若已说「我叫…」「我的名字是…」「叫我…」等，须写入 "user_profile":{"name":"…"}；未获知则 "user_profile":{}。
- 先读上方「最近对话」「作业快照」再作答：用户问「有什么作业」「我有什么语文作业」等时，若快照或对话里**已有**科目/条目，须**逐条复述**，禁止说「没有记录」「暂时没有」或逼用户从零重述，除非快照与对话确为空。
- 防截断：表格与寒暄从简；**宁可少写修饰语也不得省略末行 JSON**；`homework_board.items` 与正文已列条数一致，禁止用空 `items` 覆盖历史条目。
"""

# 仅 3 号追加：显式对标知你客服 14 号 / agent记忆实现方案 中的记忆栈描述
HOMEWORK_V3_ZHINI14_APPEND = """
【3号 · 知你客服14号记忆方案（工程对齐）】
- 与知你客服14号、`agent记忆实现方案.md` 一致：**Cache 键** `user_memory_{user_id}`；执行须带稳定 **`user_id`**（预览端按 Agent 维度持久化），避免退化为 `default` 串会话。
- **记忆包四字段**：`conversation_history`、`conversation_summary`、`user_profile`、`context`；作业结构化数据在 **`context.homework_board`**（与 2 号相同）；引擎对末行 JSON 的 `user_profile` 与 Cache 合并逻辑与知你主线一致。
- **Redis + 可选 MySQL**：节点 TTL 见配置；平台开启 `MEMORY_PERSIST_DB_ENABLED` 时与 `persistent_user_memories` 对齐合并，冷启动仍可拉回。
"""


def _homework_prompt(agent_display_name: str) -> str:
    return f"""你是「{agent_display_name}」，帮助学生**记作业**与**监督完成**，语气友好、具体、可执行。

【核心能力】
1. **记作业**：从用户自然语言中提取「科目 / 作业内容 / 截止日期与时间 / 老师要求要点 / 预估耗时」，整理成清单。
   - 若用户用回形针**上传**了文件或照片，消息里会出现「相对工作区根路径」列表：**必须先调用 file_read**，用返回的 `content`（正文/OCR 文本）整理进作业清单，勿编造未读到的内容。
   - 支持常见格式：纯文本/Markdown、**PDF**、**Word(.docx)**、**Excel(.xlsx)**、**照片**（作业拍照等，依赖 OCR；若工具返回需安装 Tesseract 等提示，请如实转告用户并仍可基于用户口述继续记作业）。
2. **监督完成**：根据清单追问进度（未开始/进行中/已完成）；对临近截止的任务给**温和提醒**（不制造焦虑）；可建议拆成小步骤与每日 15–30 分钟微习惯。
3. **周回顾**：用户要求时，用 json_process 或清晰表格输出本周完成率、延期项与下周优先三件事。

【原则】
- **不代写**可提交的作业正文、实验报告、论文等；可提供提纲、自检表、引用规范提示。
- 日期时间以用户所在语境为准；需要当前时间可借助工具 datetime。
- 不确定的信息（如具体截止时刻）先列出假设并请用户确认。
- 输出优先中文；列表用编号，便于复制到备忘录。

【交互习惯】
- 用户只说「记一下数学作业」时，主动追问截止日与具体要求（一次问 1–2 个点，避免审问感）。
- 用户汇报「做完了」时，确认是否需拍照/上传检查清单，并建议归档到下一条任务前的小结一句话。

【持久记忆（必须利用）】
- 当前用户画像：{{memory.user_profile}}
- 历史摘要：{{memory.conversation_summary}}
- 最近历史：{{memory.conversation_history}}
- **已知结构化作业快照（优先以此为准，可与正文互相补充）**：{{memory.context.homework_board}}
- 回答前先结合历史判断：本轮是否在“延续上一轮作业条目”。若是，不要重复问已确认信息（如科目、截止日期）。
- 若上一轮你已经列出作业清单，而本轮用户只补充了「截止时间/科目/完成状态」中的一部分，必须把该信息回填到上一轮清单并给出“更新后的清单”；禁止再问“具体有哪些作业”。
- 当历史中已出现明确作业条目（如 4 条作业列表）时，默认这些条目继续有效，除非用户明确说“作业变了/重置”。
{HOMEWORK_PROMPT_ZHINI_ALIGN}
【结构化记忆（强制 · 机器可读）】
- 在正文结束后，**最后单独一行**输出**恰好一行**合法 JSON（勿 markdown 围栏），格式示例：
{{"homework_board":{{"subject":"语文","deadline_text":"2026-05-01","items":[{{"title":"写生字","detail":"第八课"}}],"notes":""}},"user_profile":{{}}}}
- `homework_board` 必须与正文一致；若本轮用户只补充截止日/科目，须在 `homework_board` 中**合并更新**已有 `items`（可参考上面的快照与对话），**禁止用空列表覆盖已有条目**。
- 该行仅供系统解析；正文不要复述该行 JSON。
"""


def _homework_prompt_fast(agent_display_name: str) -> str:
    return f"""你是「{agent_display_name}」，帮助学生**记作业**与**跟进度**；回复简短、可执行、中文优先。

【持久记忆 — 先读后答】
- 画像：{{memory.user_profile}}
- 摘要：{{memory.conversation_summary}}
- 最近对话：{{memory.conversation_history}}
- **作业快照 homework_board（优先采信，勿臆测）**：{{memory.context.homework_board}}

【工具 — 省延迟】仅当消息里出现**上传文件的工作区路径列表**时才调用 file_read；无附件时不要调用 file_read。需要当前时间用 datetime；结构化整理可用 json_process。

【原则】不代写可提交正文；延续上一轮时不要重复追问已确认的科目/清单；用户只改截止日或状态时合并更新清单。
{HOMEWORK_PROMPT_ZHINI_ALIGN}
【末行 JSON — 强制】正文结束后**单独一行**合法 JSON（勿 markdown 围栏），例如：
{{"homework_board":{{"subject":"…","deadline_text":"…","items":[{{"title":"…","detail":"…"}}],"notes":"…"}},"user_profile":{{}}}}
须与正文一致；**合并**已有 items，禁止用空列表覆盖历史条目。
"""


def _sanitize_edges(edges: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    seen: set = set()
    out: List[Dict[str, Any]] = []
    for e in edges or []:
        s, t = e.get("source"), e.get("target")
        if not s or not t or s == t:
            continue
        key = (s, t, e.get("sourceHandle") or "")
        if key in seen:
            continue
        seen.add(key)
        ne = dict(e)
        if not ne.get("targetHandle"):
            ne["targetHandle"] = "left"
        if not ne.get("id"):
            sh = ne.get("sourceHandle") or "r"
            ne["id"] = f"e_{s}_{t}_{sh}"
        out.append(ne)
    return out


def build_workflow() -> Dict[str, Any]:
    llm_pos: Tuple[int, int] = (680, 220)
    if FAST_PROFILE:
        _prompt = _homework_prompt_fast(AGENT_NAME)
    elif V3_PROFILE:
        _prompt = _homework_prompt(AGENT_NAME) + HOMEWORK_V3_ZHINI14_APPEND
    else:
        _prompt = _homework_prompt(AGENT_NAME)
    _llm_temp = 0.22 if FAST_PROFILE else (0.25 if V3_PROFILE else 0.3)
    _llm_mti = 6 if FAST_PROFILE else (8 if V3_PROFILE else 10)
    _llm_data: Dict[str, Any] = {
        "label": "作业管理",
        "prompt": _prompt,
        "provider": PROVIDER,
        "model": MODEL,
        "temperature": _llm_temp,
        "request_timeout": REQ_TIMEOUT,
        "enable_tools": True,
        "tools": list(HOMEWORK_TOOLS),
        "selected_tools": list(HOMEWORK_TOOLS),
        "max_tool_iterations": _llm_mti,
    }
    if ZHINI_STYLE_INFRA:
        # 避免截断末行 JSON → homework_board / user_profile 无法落库
        _llm_data["max_tokens"] = 8192
    if ZHINI_STYLE_INFRA and PROVIDER.strip().lower() == "deepseek":
        _llm_data["extra_body"] = {"thinking": {"type": "disabled"}}

    nodes: List[Dict[str, Any]] = [
        {"id": "start-1", "type": "start", "position": {"x": 80, "y": 220}, "data": {"label": "开始"}},
        {
            "id": "cache-query",
            "type": "cache",
            "position": {"x": 300, "y": 220},
            "data": {
                "label": "读取记忆",
                "operation": "get",
                "key": "user_memory_{user_id}",
                "ttl": _CACHE_TTL,
                "default_value": "{\"conversation_history\": [], \"conversation_summary\": \"\", \"user_profile\": {}, \"context\": {}}",
                "input_variables": [],
                "output_variables": [],
            },
        },
        {
            "id": "transform-merge",
            "type": "transform",
            "position": {"x": 510, "y": 220},
            "data": {
                "label": "合并输入与记忆",
                "mode": "merge",
                "mapping": {
                    "query": "{{query}}",
                    "user_input": "{{query}}",
                    "user_id": "{{user_id}}",
                    "timestamp": "{{timestamp}}",
                    "attachments": "{{attachments}}",
                    "memory": "{{output}}",
                    "conversation_history": "{{output.conversation_history}}",
                    "user_profile": "{{output.user_profile}}",
                    "context": "{{output.context}}",
                },
                "input_variables": [],
                "output_variables": [],
            },
        },
        {
            "id": "llm-homework",
            "type": "llm",
            "position": {"x": llm_pos[0], "y": llm_pos[1]},
            "data": dict(_llm_data),
        },
        {
            "id": "code-split-homework-json",
            "type": "code",
            "position": {"x": llm_pos[0] + 260, "y": 220},
            "data": {
                "label": "拆分正文与homework_board",
                "language": "python",
                "code": CODE_SPLIT_HOMEWORK_TAIL_JSON,
                "timeout": 20,
            },
        },
        {
            "id": "transform-build-append",
            "type": "transform",
            "position": {"x": llm_pos[0] + 520, "y": 220},
            "data": {
                "label": "拼装记忆更新",
                "mode": "merge",
                "mapping": {
                    "query": "{{query}}",
                    "user_input": "{{user_input}}",
                    "user_id": "{{user_id}}",
                    "timestamp": "{{timestamp}}",
                    "memory": "{{memory}}",
                    "output": "{{reply}}",
                    "homework_board_update": "{{homework_board}}",
                },
            },
        },
        {
            "id": "cache-update-append",
            "type": "cache",
            "position": {"x": llm_pos[0] + 780, "y": 220},
            "data": {
                "label": "写回记忆(追加)",
                "operation": "set",
                "key": "user_memory_{user_id}",
                "ttl": _CACHE_TTL,
                "max_history_length": _MAX_HISTORY_LENGTH,
                "value": "{\"conversation_summary\": (memory.get(\"conversation_summary\") or \"\"), \"conversation_history\": (memory.get(\"conversation_history\") or []) + [{\"role\": \"user\", \"content\": \"{{user_input}}\", \"timestamp\": \"{{timestamp}}\"}, {\"role\": \"assistant\", \"content\": \"{{output}}\", \"timestamp\": \"{{timestamp}}\"}], \"user_profile\": memory.get(\"user_profile\", {}), \"context\": memory.get(\"context\", {})}",
                "input_variables": [],
                "output_variables": [],
            },
        },
        {
            "id": "transform-output-format",
            "type": "transform",
            "position": {"x": llm_pos[0] + 1040, "y": 220},
            "data": {
                "label": "输出格式",
                "mode": "merge",
                "mapping": {
                    "reply": "{{output}}",
                    "output": "{{output}}",
                    "result": "{{output}}",
                },
            },
        },
        {"id": "end-1", "type": "end", "position": {"x": llm_pos[0] + 1300, "y": 220}, "data": {"label": "结束", "output_format": "text"}},
    ]
    edges = _sanitize_edges(
        [
            {"source": "start-1", "target": "cache-query", "sourceHandle": "right", "targetHandle": "left"},
            {"source": "cache-query", "target": "transform-merge", "sourceHandle": "right", "targetHandle": "left"},
            {"source": "transform-merge", "target": "llm-homework", "sourceHandle": "right", "targetHandle": "left"},
            {"source": "transform-merge", "target": "transform-build-append", "sourceHandle": "left", "targetHandle": "left"},
            {"source": "llm-homework", "target": "code-split-homework-json", "sourceHandle": "right", "targetHandle": "left"},
            {"source": "code-split-homework-json", "target": "transform-build-append", "sourceHandle": "right", "targetHandle": "left"},
            {"source": "transform-build-append", "target": "cache-update-append", "sourceHandle": "right", "targetHandle": "left"},
            {"source": "cache-update-append", "target": "transform-output-format", "sourceHandle": "right", "targetHandle": "left"},
            {"source": "transform-output-format", "target": "end-1", "sourceHandle": "right", "targetHandle": "left"},
        ]
    )
    return {"nodes": nodes, "edges": edges}


def _validate_local(wf: Dict[str, Any]) -> None:
    from app.services.workflow_validator import validate_workflow

    r = validate_workflow(wf.get("nodes") or [], wf.get("edges") or [])
    if not r.get("valid"):
        errs = r.get("errors") or []
        raise ValueError("工作流校验失败: " + "; ".join(errs))


def _find_agent_id(h: Dict[str, str], name: str) -> Optional[str]:
    r = requests.get(f"{BASE}/api/v1/agents", params={"search": name, "limit": 80}, headers=h, timeout=45)
    if r.status_code != 200:
        return None
    for a in r.json() or []:
        if a.get("name") == name:
            return a.get("id")
    return None


def main() -> int:
    wf = build_workflow()
    try:
        _validate_local(wf)
    except ValueError as e:
        print(e, file=sys.stderr)
        return 1

    r = requests.post(
        f"{BASE}/api/v1/auth/login",
        data={"username": USER, "password": PWD},
        headers={"Content-Type": "application/x-www-form-urlencoded"},
        timeout=15,
    )
    if r.status_code != 200:
        print("登录失败:", r.status_code, r.text[:500], file=sys.stderr)
        return 1
    token = r.json().get("access_token")
    if not token:
        print("无 access_token", file=sys.stderr)
        return 1
    h = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}

    _max_tool_it = 6 if FAST_PROFILE else (8 if V3_PROFILE else 10)
    if FAST_PROFILE:
        _profile_note = (
            f"快速档案（2号）：TTL {_CACHE_TTL}s，history≤{_MAX_HISTORY_LENGTH}，工具轮≤{_max_tool_it}，"
            f"budget {BUDGET_CONFIG}；DeepSeek 关闭 thinking（若适用）。"
        )
    elif V3_PROFILE:
        _profile_note = (
            f"3号：基于2号基础设施（TTL {_CACHE_TTL}s，history≤{_MAX_HISTORY_LENGTH}，"
            f"工具轮≤{_max_tool_it}，max_tokens 8192，budget {BUDGET_CONFIG}）+ "
            "知你客服14号记忆方案（user_memory_*、四字段、MySQL 可选）；完整提示词 + 记忆栈说明。"
        )
    else:
        _profile_note = ""
    desc = (
        f"{AGENT_NAME}：记作业（科目、内容、截止日）、跟进度、温和督促与周回顾；"
        "支持上传文件/照片后用 file_read 提取正文（文本、PDF、docx、xlsx、图片 OCR）与 json_process 整理；"
        f"默认模型 {PROVIDER}/{MODEL}，单次执行内工具迭代上限 {_max_tool_it}；"
        "持久记忆：Redis/cache + conversation_history；结构化 homework_board 写入 memory.context（末行 JSON）。 "
        + _profile_note
    )

    existing = _find_agent_id(h, AGENT_NAME)
    if existing:
        ur = requests.put(
            f"{BASE}/api/v1/agents/{existing}",
            headers=h,
            json={
                "description": desc,
                "workflow_config": wf,
                "budget_config": BUDGET_CONFIG,
            },
            timeout=120,
        )
        if ur.status_code != 200:
            print("更新失败:", ur.status_code, ur.text[:800], file=sys.stderr)
            return 1
        print("已更新", AGENT_NAME, existing)
        print(json.dumps({"id": existing, "name": AGENT_NAME}, ensure_ascii=False))
        return 0

    cr = requests.post(
        f"{BASE}/api/v1/agents",
        headers=h,
        json={
            "name": AGENT_NAME,
            "description": desc,
            "workflow_config": wf,
            "budget_config": BUDGET_CONFIG,
        },
        timeout=120,
    )
    if cr.status_code != 201:
        print("创建失败:", cr.status_code, cr.text[:800], file=sys.stderr)
        return 1
    aid = cr.json()["id"]
    print("已创建", AGENT_NAME, aid)
    print(json.dumps({"id": aid, "name": AGENT_NAME}, ensure_ascii=False))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())