backend/scripts/create_zhini_kefu_16.py

#!/usr/bin/env python3
"""
从「知你客服15号」复制为「知你客服16号」—— **B 能力**：画布 **Loop + 循环体内 LLM**。

- **主线**：Cache 等上游 → `code-build` 注入 `loop_rounds` 数组 → `loop` →（体内）`llm-subtask` → `loop_end`；
  循环结束后 → `code-merge` 将各段 LLM 输出折叠为 `right`/`reply`（并合并 `user_profile`）→ 原 `llm-unified` 之后的节点（如 Cache 写、End）。
- **弃用节点**：原 `llm-unified` 从主链摘除并保留在画布上（无连线），避免与「多段执行」重复；真正推理在 `zhini16-llm-subtask`。
- **工具**：与 15 相同；每段内 `max_tool_iterations` 可单独配置（默认 12）。
- **轮数**：环境变量 `ZHINI16_LOOP_ROUNDS`（默认 3，上限 8）。

引擎要求：`loop` 的**第一条出边**必须指向循环体起点（脚本会对 `zhini16-loop-main` 的出边排序保证）。

用法:
  cd backend && .\\venv\\Scripts\\python.exe scripts/create_zhini_kefu_16.py

环境变量: PLATFORM_BASE_URL, PLATFORM_USERNAME, PLATFORM_PASSWORD,
          SOURCE_AGENT_NAME（默认 知你客服15号）, TARGET_NAME（默认 知你客服16号）
          ZHINI16_LOOP_ROUNDS, ZHINI16_SUBTASK_MAX_TOOL_ITERATIONS
"""
from __future__ import annotations

import copy
import json
import os
import sys
from collections import defaultdict
from typing import Any, Dict, List, Optional, Tuple

import requests

BASE = os.getenv("PLATFORM_BASE_URL", "http://127.0.0.1:8037").rstrip("/")
USER = os.getenv("PLATFORM_USERNAME", "admin")
PWD = os.getenv("PLATFORM_PASSWORD", "123456")
SOURCE_NAME = os.getenv("SOURCE_AGENT_NAME", "知你客服15号")
TARGET_NAME = os.getenv("TARGET_NAME", "知你客服16号")

LOOP_ROUNDS = max(1, min(int(os.getenv("ZHINI16_LOOP_ROUNDS", "3")), 8))
SUBTASK_MAX_TOOL_ITER = max(1, min(int(os.getenv("ZHINI16_SUBTASK_MAX_TOOL_ITERATIONS", "12")), 64))

TOOLS_V16: List[str] = [
    "http_request",
    "file_read",
    "file_write",
    "text_analyze",
    "datetime",
    "math_calculate",
    "system_info",
    "json_process",
    "database_query",
    "adb_log",
]

LLM_UNIFIED = "llm-unified"
N_CODE_BUILD = "zhini16-code-build-rounds"
N_LOOP = "zhini16-loop-main"
N_SUB_LLM = "zhini16-llm-subtask"
N_LOOP_END = "zhini16-loop-end"
N_CODE_MERGE = "zhini16-code-merge-rounds"

PROMPT_V16_MARKER = "【知你客服 16 号 · Loop 多段执行】"

PROMPT_V16_SUBTASK_EXTRA = f"""

{PROMPT_V16_MARKER}

【执行方式】当前用户请求会在**同一次 API 调用**内被拆成 **{{{{round_total}}}}** 段顺序执行；你收到的是其中**第 {{{{round_index}}}} 段**（从 0 起计），轮次标量 `round`={{{{round}}}}。每段都调用同一套工具与纪律，但请**聚焦本段可推进的子目标**，避免与前后段完全重复；若本段仅需承接上文，可简短小结并推进下一步。

【输出】每段末尾仍输出**一行合法 JSON**（含 `intent`、`reply`、`user_profile` 等，与 14/15 一致）。`reply` 写**本段**面向用户的可见说明；最终给用户展示时会与各段 `reply` 合并，请勿在段内假设用户已看到其他段的正文。

【纪律】继承 15 号：多步工具链、`database_query` 仅 SELECT、勿编造工具返回、勿刷屏 DSML。
"""


CODE_MERGE_PYTHON = r"""
import json as _json

def _parse_tail_json_obj(s):
    if not isinstance(s, str):
        return None
    t = s.strip()
    if not t:
        return None
    last_nl = t.rfind("\n")
    last_line = t[last_nl + 1 :].strip() if last_nl >= 0 else t
    if not last_line.startswith("{"):
        return None
    try:
        o = _json.loads(last_line)
        return o if isinstance(o, dict) else None
    except Exception:
        return None

def _reply_from_segment(s):
    if not isinstance(s, str):
        return str(s)
    o = _parse_tail_json_obj(s)
    if o and isinstance(o.get("reply"), str) and o["reply"].strip():
        return o["reply"].strip()
    return s.strip()

parts = []
if isinstance(input_data, dict):
    if isinstance(input_data.get("input"), list):
        parts = input_data.get("input")
    elif isinstance(input_data.get("right"), list):
        parts = input_data.get("right")
elif isinstance(input_data, list):
    parts = input_data
chunks = []
merged_profile = {}
for i, p in enumerate(parts):
    if p is None:
        continue
    chunks.append("【第%d段】\n%s" % (i + 1, _reply_from_segment(p)))
    o = _parse_tail_json_obj(p) if isinstance(p, str) else None
    if o and isinstance(o.get("user_profile"), dict):
        merged_profile.update(o["user_profile"])
merged_text = "\n\n".join(chunks) if chunks else "（循环未产生有效输出）"
out = dict(input_data) if isinstance(input_data, dict) else {}
out.pop("input", None)
out["reply"] = merged_text
out["right"] = {"right": merged_text}
if merged_profile:
    out["user_profile_update"] = merged_profile
result = out
""".strip()


def _code_build_source(rounds: List[int]) -> str:
    return (
        "out = dict(input_data) if isinstance(input_data, dict) else {}\n"
        f"out['loop_rounds'] = {rounds!r}\n"
        "result = out\n"
    )


def _sanitize_edges(edges: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    seen: set = set()
    out: List[Dict[str, Any]] = []
    for e in edges or []:
        s, t = e.get("source"), e.get("target")
        if not s or not t:
            continue
        if s == t:
            continue
        key = (s, t)
        if key in seen:
            continue
        seen.add(key)
        ne = dict(e)
        ne["sourceHandle"] = "right"
        ne["targetHandle"] = "left"
        if not ne.get("id"):
            ne["id"] = f"edge_{s}_{t}"
        out.append(ne)
    return out


def _find_start_node_ids(nodes: List[Dict[str, Any]]) -> List[str]:
    ids: List[str] = []
    for n in nodes or []:
        nid = n.get("id") or ""
        nt = (n.get("type") or (n.get("data") or {}).get("type") or "").lower()
        if nt == "start" or nid in ("start", "start-1") or str(nid).startswith("start-"):
            ids.append(nid)
    return ids


def _compute_ranks(
    nodes: List[Dict[str, Any]], edges: List[Dict[str, Any]]
) -> Dict[str, int]:
    node_ids = [n["id"] for n in nodes if n.get("id")]
    start_ids = _find_start_node_ids(nodes)
    incoming: Dict[str, int] = {nid: 0 for nid in node_ids}
    for e in edges:
        s, t = e.get("source"), e.get("target")
        if not s or not t or s == t:
            continue
        if t in incoming:
            incoming[t] += 1
    if not start_ids:
        start_ids = [nid for nid in node_ids if incoming.get(nid, 0) == 0] or ([node_ids[0]] if node_ids else [])

    rank: Dict[str, int] = {s: 0 for s in start_ids}
    nmax = max(len(nodes), 8)
    for _ in range(nmax + 5):
        updated = False
        for e in edges:
            s, t = e.get("source"), e.get("target")
            if not s or not t or s == t:
                continue
            if s not in rank:
                continue
            nv = rank[s] + 1
            if t not in rank or rank[t] < nv:
                rank[t] = nv
                updated = True
        if not updated:
            break
    max_r = max(rank.values(), default=0)
    for nid in node_ids:
        if nid not in rank:
            rank[nid] = max_r + 1
            max_r += 1
    return rank


def _apply_layered_positions(nodes: List[Dict[str, Any]], ranks: Dict[str, int]) -> None:
    layers: Dict[int, List[str]] = defaultdict(list)
    for nid, r in ranks.items():
        layers[r].append(nid)
    for r in layers:
        layers[r].sort()

    x0, y0 = 80.0, 140.0
    x_step = 300.0
    y_step = 110.0

    for r in sorted(layers.keys()):
        ids = layers[r]
        nlen = len(ids)
        y_base = y0 - (nlen - 1) * y_step / 2.0
        for j, nid in enumerate(ids):
            for node in nodes:
                if node.get("id") != nid:
                    continue
                pos = node.setdefault("position", {})
                pos["x"] = x0 + r * x_step
                pos["y"] = y_base + j * y_step
                break


def improve_workflow_layout_and_edges(wf: Dict[str, Any]) -> Tuple[int, int]:
    nodes = wf.get("nodes") or []
    raw_edges = wf.get("edges") or []
    loops = sum(
        1
        for e in raw_edges
        if e.get("source") and e.get("target") and e.get("source") == e.get("target")
    )
    clean = _sanitize_edges(raw_edges)
    removed_dup = len(raw_edges) - len(clean) - loops

    wf["edges"] = clean

    ranks = _compute_ranks(nodes, clean)
    _apply_layered_positions(nodes, ranks)
    return loops, max(0, removed_dup)


def _sort_loop_out_edges(edges: List[Dict[str, Any]], loop_id: str, body_target: str) -> None:
    loop_es = [e for e in edges if e.get("source") == loop_id]
    non = [e for e in edges if e.get("source") != loop_id]
    body = [e for e in loop_es if e.get("target") == body_target]
    rest = [e for e in loop_es if e.get("target") != body_target]
    edges[:] = non + body + rest


def _upsert_node(nodes: List[Dict[str, Any]], node: Dict[str, Any]) -> None:
    nid = node.get("id")
    for i, n in enumerate(nodes):
        if n.get("id") == nid:
            nodes[i] = node
            return
    nodes.append(node)


def _topology_already_applied(nodes: List[Dict[str, Any]]) -> bool:
    return any(n.get("id") == N_LOOP for n in (nodes or []))


def _apply_loop_b_topology(wf: Dict[str, Any], rounds: List[int]) -> None:
    nodes = wf.setdefault("nodes", [])
    edges = wf.setdefault("edges", [])

    pres: List[str] = []
    posts: List[str] = []
    seen_p: set = set()
    seen_t: set = set()
    for e in edges:
        if e.get("target") == LLM_UNIFIED:
            s = e.get("source")
            if s and s not in seen_p:
                seen_p.add(s)
                pres.append(s)
        if e.get("source") == LLM_UNIFIED:
            t = e.get("target")
            if t and t not in seen_t:
                seen_t.add(t)
                posts.append(t)

    if not pres:
        print("错误: 未找到指向 llm-unified 的边，无法从 15 号主线改造。", file=sys.stderr)
        raise SystemExit(2)
    if not posts:
        print("错误: 未找到从 llm-unified 出发的边。", file=sys.stderr)
        raise SystemExit(2)

    edges[:] = [e for e in edges if e.get("target") != LLM_UNIFIED and e.get("source") != LLM_UNIFIED]

    llm_template: Optional[Dict[str, Any]] = None
    for n in nodes:
        if n.get("id") == LLM_UNIFIED:
            llm_template = copy.deepcopy(n)
            break

    sub_llm: Dict[str, Any] = {
        "id": N_SUB_LLM,
        "type": "llm",
        "position": {"x": 0, "y": 0},
        "data": {
            "label": "16号·循环体内LLM",
            "prompt": "你是知你客服多段执行中的子任务模型。\n" + PROMPT_V16_SUBTASK_EXTRA,
            "enable_tools": True,
            "tools": list(TOOLS_V16),
            "selected_tools": list(TOOLS_V16),
            "max_tool_iterations": SUBTASK_MAX_TOOL_ITER,
        },
    }
    if llm_template:
        td = llm_template.get("data") or {}
        sd = sub_llm["data"]
        for k in ("provider", "model", "temperature", "max_tokens"):
            if k in td and td[k] is not None:
                sd[k] = td[k]
        base_p = (td.get("prompt") or "").strip()
        if base_p and PROMPT_V16_MARKER not in base_p:
            sd["prompt"] = base_p + "\n" + PROMPT_V16_SUBTASK_EXTRA.strip()

    _upsert_node(
        nodes,
        {
            "id": N_CODE_BUILD,
            "type": "code",
            "position": {"x": 0, "y": 0},
            "data": {
                "label": "16号·注入loop_rounds",
                "language": "python",
                "code": _code_build_source(rounds),
            },
        },
    )
    _upsert_node(
        nodes,
        {
            "id": N_LOOP,
            "type": "loop",
            "position": {"x": 0, "y": 0},
            "data": {
                "label": "16号·多段循环",
                "items_path": "right.loop_rounds",
                "item_variable": "round",
                "error_handling": "continue",
            },
        },
    )
    _upsert_node(nodes, sub_llm)
    _upsert_node(
        nodes,
        {
            "id": N_LOOP_END,
            "type": "loop_end",
            "position": {"x": 0, "y": 0},
            "data": {"label": "16号·循环结束"},
        },
    )
    _upsert_node(
        nodes,
        {
            "id": N_CODE_MERGE,
            "type": "code",
            "position": {"x": 0, "y": 0},
            "data": {
                "label": "16号·合并各段输出",
                "language": "python",
                "code": CODE_MERGE_PYTHON,
            },
        },
    )

    new_edges: List[Dict[str, Any]] = []
    for p in pres:
        new_edges.append(
            {
                "id": f"e_{p}_{N_CODE_BUILD}",
                "source": p,
                "target": N_CODE_BUILD,
                "sourceHandle": "right",
                "targetHandle": "left",
            }
        )
    new_edges.append(
        {
            "id": f"e_{N_CODE_BUILD}_{N_LOOP}",
            "source": N_CODE_BUILD,
            "target": N_LOOP,
            "sourceHandle": "right",
            "targetHandle": "left",
        }
    )
    new_edges.append(
        {
            "id": f"e_{N_LOOP}_{N_SUB_LLM}",
            "source": N_LOOP,
            "target": N_SUB_LLM,
            "sourceHandle": "right",
            "targetHandle": "left",
        }
    )
    new_edges.append(
        {
            "id": f"e_{N_SUB_LLM}_{N_LOOP_END}",
            "source": N_SUB_LLM,
            "target": N_LOOP_END,
            "sourceHandle": "right",
            "targetHandle": "left",
        }
    )
    new_edges.append(
        {
            "id": f"e_{N_LOOP}_{N_CODE_MERGE}",
            "source": N_LOOP,
            "target": N_CODE_MERGE,
            "sourceHandle": "right",
            "targetHandle": "left",
        }
    )
    for p in posts:
        new_edges.append(
            {
                "id": f"e_{N_CODE_MERGE}_{p}",
                "source": N_CODE_MERGE,
                "target": p,
                "sourceHandle": "right",
                "targetHandle": "left",
            }
        )

    edges.extend(new_edges)
    _sort_loop_out_edges(edges, N_LOOP, N_SUB_LLM)


def _patch_zhini16_content_only(wf: Dict[str, Any], rounds: List[int]) -> None:
    """已存在 16 号拓扑时，仅刷新 code 与 sub-llm 参数。"""
    for n in wf.get("nodes") or []:
        nid = n.get("id")
        if nid == N_CODE_BUILD:
            n.setdefault("data", {})["code"] = _code_build_source(rounds)
        elif nid == N_SUB_LLM:
            d = n.setdefault("data", {})
            d["tools"] = list(TOOLS_V16)
            d["selected_tools"] = list(TOOLS_V16)
            d["max_tool_iterations"] = SUBTASK_MAX_TOOL_ITER
        elif nid == N_LOOP:
            d = n.setdefault("data", {})
            d["items_path"] = "right.loop_rounds"
            d["item_variable"] = "round"
            d["error_handling"] = "continue"
        elif nid == N_CODE_MERGE:
            n.setdefault("data", {})["code"] = CODE_MERGE_PYTHON


def _find_agent_id_by_name(h: Dict[str, str], name: str) -> Optional[str]:
    r = requests.get(f"{BASE}/api/v1/agents", params={"search": name, "limit": 50}, headers=h, timeout=30)
    if r.status_code != 200:
        return None
    for a in r.json() or []:
        if a.get("name") == name:
            return a.get("id")
    return None


def main() -> int:
    rounds = list(range(1, LOOP_ROUNDS + 1))

    r = requests.post(
        f"{BASE}/api/v1/auth/login",
        data={"username": USER, "password": PWD},
        headers={"Content-Type": "application/x-www-form-urlencoded"},
        timeout=15,
    )
    if r.status_code != 200:
        print("登录失败:", r.status_code, r.text[:500], file=sys.stderr)
        return 1
    token = r.json().get("access_token")
    if not token:
        print("无 access_token", file=sys.stderr)
        return 1
    h = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}

    src_id = _find_agent_id_by_name(h, SOURCE_NAME)
    if not src_id:
        print(f"未找到源 Agent: {SOURCE_NAME}", file=sys.stderr)
        return 1

    existing = _find_agent_id_by_name(h, TARGET_NAME)
    if existing:
        print("已存在", TARGET_NAME, "-> 仅更新工作流", existing)
        new_id = existing
        g = requests.get(f"{BASE}/api/v1/agents/{new_id}", headers=h, timeout=30)
        if g.status_code != 200:
            print("读取失败:", g.text, file=sys.stderr)
            return 1
        agent = g.json()
    else:
        dup = requests.post(
            f"{BASE}/api/v1/agents/{src_id}/duplicate",
            headers=h,
            json={"name": TARGET_NAME},
            timeout=60,
        )
        if dup.status_code != 201:
            print("复制失败:", dup.status_code, dup.text[:800], file=sys.stderr)
            return 1
        new_id = dup.json()["id"]
        agent = dup.json()
        print("已创建副本:", new_id, TARGET_NAME)

    wf = copy.deepcopy(agent["workflow_config"])

    if _topology_already_applied(wf.get("nodes") or []):
        _patch_zhini16_content_only(wf, rounds)
    else:
        _apply_loop_b_topology(wf, rounds)

    loops, dup_edges = improve_workflow_layout_and_edges(wf)
    print(f"连线整理: 去掉自环 {loops} 条, 合并重复边 {dup_edges} 条")

    desc = (
        "知你客服16号：B能力——Loop+循环体内LLM；"
        f"默认 {LOOP_ROUNDS} 段顺序执行，每段节点 zhini16-llm-subtask 工具迭代上限 {SUBTASK_MAX_TOOL_ITER}；"
        "原 llm-unified 已从主链摘除；各段 reply 经 zhini16-code-merge-rounds 合并后写入下游 Cache/End。"
    )

    up = requests.put(
        f"{BASE}/api/v1/agents/{new_id}",
        headers=h,
        json={"description": desc, "workflow_config": wf},
        timeout=120,
    )
    if up.status_code != 200:
        print("更新失败:", up.status_code, up.text[:1200], file=sys.stderr)
        return 1
    print("loop_rounds:", rounds)
    print("工具:", ", ".join(TOOLS_V16))
    print("Agent ID:", new_id)
    print(json.dumps({"id": new_id, "name": TARGET_NAME}, ensure_ascii=False))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())