backend/tests/test_agent_core.py

"""
天工 Agent 核心运行时 — 综合测试
覆盖: ReAct 循环 / 工具调用 / 权限检查 / 上下文管理 / Schema 验证

运行: cd backend && python tests/test_agent_core.py
"""
import asyncio
import sys
import time
import os

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

PASS = 0
FAIL = 0
SKIP = 0

def test(name: str):
    def decorator(fn):
        global PASS, FAIL, SKIP
        try:
            result = fn()
            if asyncio.iscoroutine(result):
                result = asyncio.run(result)
            if result is False:
                FAIL += 1
                print(f"  FAIL {name}")
            elif result is True:
                PASS += 1
                print(f"  PASS {name}")
            else:
                PASS += 1
                print(f"  PASS {name} ({result})")
        except Exception as e:
            FAIL += 1
            import traceback
            print(f"  FAIL {name}: {e}")
            traceback.print_exc()
        return fn
    return decorator


# ════════════════ A. Agent Runtime 基础 ════════════════

@test("A1. AgentRuntime 导入正常")
def test_runtime_import():
    from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig
    assert callable(AgentRuntime)
    return "OK"


@test("A2. AgentRuntime 创建实例")
def test_runtime_instantiate():
    from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig

    config = AgentConfig(
        name="test_agent",
        system_prompt="You are a test assistant.",
        llm=AgentLLMConfig(model="deepseek-v4-flash", max_iterations=3),
    )
    runtime = AgentRuntime(config=config)
    assert runtime.context.iteration == 0
    assert runtime.context.tool_calls_made == 0
    return "OK"


@test("A3. AgentRuntime 无工具时仍然可运行")
def test_runtime_no_tools():
    from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig, AgentToolConfig

    config = AgentConfig(
        name="no_tool_agent",
        system_prompt="Echo back the user message exactly.",
        llm=AgentLLMConfig(model="deepseek-v4-flash", max_iterations=1, temperature=0.1),
        tools=AgentToolConfig(include_tools=[], exclude_tools=[]),
    )
    runtime = AgentRuntime(config=config)

    # 带记忆/DB的完整run需要数据库，跳过
    # 只验证构造正确
    assert runtime.tool_manager.has_tools() is False or len(runtime.tool_manager.tool_names()) >= 0
    return "OK"


@test("A4. AgentRuntime 上下文管理")
def test_runtime_context():
    from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig

    config = AgentConfig(
        name="ctx_test",
        system_prompt="You are helpful.",
        llm=AgentLLMConfig(max_iterations=5),
    )
    runtime = AgentRuntime(config=config)

    # context 已包含 system prompt
    msgs = runtime.context.messages
    assert len(msgs) >= 1
    assert msgs[0]["role"] == "system"
    assert msgs[0]["content"] == "You are helpful."
    return "OK"


@test("A5. AgentRuntime 记忆系统绑定")
def test_runtime_memory_binding():
    from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig, AgentMemoryConfig

    config = AgentConfig(
        name="mem_test",
        system_prompt="You are helpful.",
        llm=AgentLLMConfig(max_iterations=3),
        memory=AgentMemoryConfig(
            enabled=True,
            max_history_messages=10,
            vector_memory_enabled=False,
        ),
    )
    runtime = AgentRuntime(config=config)
    assert runtime.memory is not None
    assert runtime.memory.max_history == 10
    return "OK"


# ════════════════ B. AgentSchema 验证 ════════════════

@test("B1. AgentConfig 全默认值")
def test_config_defaults():
    from app.agent_runtime.schemas import AgentConfig

    c = AgentConfig(name="test")
    assert c.name == "test"
    assert c.llm.max_iterations == 10
    assert c.llm.temperature == 0.7
    assert c.llm.model == "gpt-4o-mini"
    assert c.budget.max_llm_invocations == 200
    assert c.budget.max_tool_calls == 500
    assert c.memory.enabled is True
    assert c.memory.vector_memory_top_k == 5
    assert c.tools.include_tools == []
    assert c.tools.exclude_tools == []
    # 默认 require_approval 包含 destructive 工具（deploy_push/send_email 等）
    assert len(c.tools.require_approval) >= 5
    assert "deploy_push" in c.tools.require_approval
    assert "send_email" in c.tools.require_approval
    return "OK"


@test("B2. AgentLLMConfig fallback_llm 嵌套")
def test_llm_config_fallback():
    from app.agent_runtime.schemas import AgentLLMConfig

    cfg = AgentLLMConfig(
        model="deepseek-v4-flash",
        fallback_llm={"provider": "openai", "model": "gpt-4o-mini"},
    )
    assert cfg.fallback_llm["model"] == "gpt-4o-mini"
    assert cfg.fallback_llm["provider"] == "openai"
    return "OK"


@test("B3. AgentMemoryConfig 团队共享参数")
def test_memory_config_team():
    from app.agent_runtime.schemas import AgentMemoryConfig

    cfg = AgentMemoryConfig(
        team_id="team_feishu",
        team_share_enabled=True,
        vector_memory_rerank=True,
    )
    assert cfg.team_id == "team_feishu"
    assert cfg.team_share_enabled is True
    assert cfg.vector_memory_rerank is True
    return "OK"


@test("B4. AgentTokenBudgetConfig 阈值验证")
def test_token_budget_config():
    from app.agent_runtime.schemas import AgentTokenBudgetConfig

    cfg = AgentTokenBudgetConfig(
        context_window=128000,
        warning_threshold_pct=0.75,
        compact_threshold_pct=0.85,
        hard_limit_pct=0.95,
    )
    assert cfg.context_window == 128000
    assert cfg.warning_threshold_pct == 0.75
    assert cfg.compact_threshold_pct == 0.85
    return "OK"


@test("B5. AgentStep 构造")
def test_agent_step():
    from app.agent_runtime.schemas import AgentStep

    step = AgentStep(
        iteration=1,
        type="tool_call",
        content="调用工具...",
        tool_name="file_read",
        tool_input={"path": "/test.txt"},
    )
    assert step.iteration == 1
    assert step.type == "tool_call"
    assert step.tool_name == "file_read"
    assert step.tool_input == {"path": "/test.txt"}
    return "OK"


@test("B6. AgentResult truncated + error")
def test_agent_result():
    from app.agent_runtime.schemas import AgentResult

    r = AgentResult(
        success=False,
        content="Task failed",
        truncated=True,
        iterations_used=3,
        tool_calls_made=5,
        error="max_iterations_reached",
    )
    assert r.success is False
    assert r.truncated is True
    assert r.iterations_used == 3
    assert r.tool_calls_made == 5
    assert r.error == "max_iterations_reached"
    return "OK"


# ════════════════ C. 权限系统 (permissions.py) ════════════════

@test("C1. PermissionChecker BYPASS 模式全放行")
def test_permission_bypass():
    from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction

    checker = PermissionChecker(level=PermissionLevel.BYPASS)
    assert checker.check("file_write").action == PermissionAction.ALLOW
    assert checker.check("deploy_push").action == PermissionAction.ALLOW
    assert checker.check("unknown_tool_xyz").action == PermissionAction.ALLOW
    return "OK"


@test("C2. PermissionChecker PLAN 模式只允许只读")
def test_permission_plan():
    from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction

    checker = PermissionChecker(level=PermissionLevel.PLAN)
    assert checker.check("file_read").action == PermissionAction.ALLOW
    assert checker.check("web_search").action == PermissionAction.ALLOW
    assert checker.check("file_write").action == PermissionAction.DENY
    assert checker.check("deploy_push").action == PermissionAction.DENY
    return "OK"


@test("C3. PermissionChecker ACCEPT_EDITS 允许编辑工具")
def test_permission_accept_edits():
    from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction

    checker = PermissionChecker(level=PermissionLevel.ACCEPT_EDITS)
    assert checker.check("file_read").action == PermissionAction.ALLOW
    assert checker.check("file_write").action == PermissionAction.ALLOW
    assert checker.check("file_edit").action == PermissionAction.ALLOW
    # 破坏性工具仍需确认
    assert checker.check("deploy_push").action == PermissionAction.ASK
    assert checker.check("git_reset_hard").action == PermissionAction.ASK
    return "OK"


@test("C4. PermissionChecker DEFAULT 破坏性工具返回 ASK")
def test_permission_default():
    from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction

    checker = PermissionChecker(level=PermissionLevel.DEFAULT)
    assert checker.check("file_read").action == PermissionAction.ALLOW
    assert checker.check("web_search").action == PermissionAction.ALLOW
    assert checker.check("file_write").action == PermissionAction.ASK
    assert checker.check("deploy_push").action == PermissionAction.ASK
    return "OK"


@test("C5. PermissionChecker 拒绝列表")
def test_permission_deny_list():
    from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction

    checker = PermissionChecker(
        level=PermissionLevel.DEFAULT,
        deny_rules=["dangerous_tool", "eval_exec"],
    )
    assert checker.check("dangerous_tool").action == PermissionAction.DENY
    assert checker.check("eval_exec").action == PermissionAction.DENY
    assert checker.check("file_read").action == PermissionAction.ALLOW
    return "OK"


@test("C6. AutoApproveRule 工具名通配符")
def test_auto_approve_wildcard():
    from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction, AutoApproveRule

    checker = PermissionChecker(
        level=PermissionLevel.DEFAULT,
        auto_approve_rules=[
            AutoApproveRule(tool_pattern="feishu_*", description="飞书工具自动批准"),
        ],
    )
    assert checker.check("feishu_send").action == PermissionAction.ALLOW
    assert checker.check("feishu_create_doc").action == PermissionAction.ALLOW
    assert checker.check("file_write").action == PermissionAction.ASK  # 不受影响
    return "OK"


@test("C7. AutoApproveRule 参数条件匹配")
def test_auto_approve_params():
    from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction, AutoApproveRule

    checker = PermissionChecker(
        level=PermissionLevel.DEFAULT,
        auto_approve_rules=[
            AutoApproveRule(
                tool_pattern="file_write",
                param_conditions={"path": "regex:\\.txt$"},
                description="自动批准txt文件写入",
            ),
        ],
    )
    # 写入 .txt 文件自动批准
    assert checker.check("file_write", {"path": "data.txt"}).action == PermissionAction.ALLOW
    # 写入 .env 文件仍需确认
    assert checker.check("file_write", {"path": "config.env"}).action == PermissionAction.ASK
    return "OK"


@test("C8. PermissionLevel 级别切换")
def test_permission_level_switch():
    from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction

    checker = PermissionChecker(level=PermissionLevel.DEFAULT)
    assert checker.check("file_write").action == PermissionAction.ASK

    checker.set_level(PermissionLevel.ACCEPT_EDITS)
    assert checker.check("file_write").action == PermissionAction.ALLOW

    checker.set_level(PermissionLevel.BYPASS)
    assert checker.check("deploy_push").action == PermissionAction.ALLOW

    return "OK"


# ════════════════ D. Tool Manager 测试 ════════════════

@test("D1. AgentToolManager 创建")
def test_tool_manager_create():
    from app.agent_runtime.tool_manager import AgentToolManager

    mgr = AgentToolManager()
    assert mgr.has_tools() is True
    names = mgr.tool_names()
    assert isinstance(names, list)
    assert len(names) >= 1
    return f"OK {len(names)} tools"


@test("D2. AgentToolManager 白名单过滤")
def test_tool_manager_whitelist():
    from app.agent_runtime.tool_manager import AgentToolManager

    mgr = AgentToolManager(include_tools=["datetime", "math_calculate"])
    assert "datetime" in mgr._include_tools
    assert "math_calculate" in mgr._include_tools
    return "OK"


@test("D3. AgentToolManager 黑名单过滤")
def test_tool_manager_blacklist():
    from app.agent_runtime.tool_manager import AgentToolManager

    mgr = AgentToolManager(exclude_tools=["file_delete", "deploy_push"])
    assert "file_delete" in mgr._exclude_tools
    assert "deploy_push" in mgr._exclude_tools
    return "OK"


@test("D4. AgentToolManager 工具schema格式")
def test_tool_manager_schema_format():
    from app.agent_runtime.tool_manager import AgentToolManager

    mgr = AgentToolManager(include_tools=["datetime"])
    schemas = mgr.get_tool_schemas()
    assert len(schemas) >= 1
    # OpenAI function calling 格式
    for s in schemas:
        assert "type" in s, f"缺少type字段: {s}"
        assert s["type"] == "function"
        assert "function" in s
        assert "name" in s["function"]
    return f"OK {len(schemas)} schemas"


@test("D5. AgentToolManager 确定性工具缓存判断")
def test_tool_manager_cacheable():
    from app.agent_runtime.tool_manager import AgentToolManager

    mgr = AgentToolManager()
    assert mgr._is_cacheable("file_read") is True
    assert mgr._is_cacheable("math_calculate") is True
    assert mgr._is_cacheable("unknown_random_tool") is False, "非确定性工具不应缓存"
    return "OK"


@test("D6. AgentToolManager 缓存关闭")
def test_tool_manager_cache_disabled():
    from app.agent_runtime.tool_manager import AgentToolManager

    mgr = AgentToolManager(cache_enabled=False)
    assert mgr._is_cacheable("file_read") is False
    return "OK"


@test("D7. AgentToolManager 执行内置工具")
def test_tool_manager_execute():
    from app.agent_runtime.tool_manager import AgentToolManager

    mgr = AgentToolManager()
    result = asyncio.run(mgr.execute("datetime", {"format": "%Y-%m-%d"}))
    assert isinstance(result, str)
    assert len(result) > 0
    return f"OK result={result[:30]}"


@test("D8. AgentToolManager 权限拒绝工具执行")
def test_tool_manager_permission_deny():
    from app.agent_runtime.tool_manager import AgentToolManager

    # 用 deny_tools 拒绝 datetime
    mgr = AgentToolManager(deny_tools=["datetime"])
    result = asyncio.run(mgr.execute("datetime", {"format": "%Y"}))
    assert "拒绝" in result or "禁用" in result or "denied" in result.lower() or "disabled" in result.lower()
    return "OK"


# ════════════════ E. 上下文管理 (AgentContext) ════════════════

@test("E1. AgentContext 消息管理")
def test_context_messages():
    from app.agent_runtime.context import AgentContext

    ctx = AgentContext(system_prompt="Hello.", session_id="s1")
    ctx.add_user_message("Hi")
    ctx.add_assistant_message("Hello there!")
    ctx.add_user_message("How are you?")
    ctx.add_assistant_message("I am fine.")

    msgs = ctx.messages
    assert msgs[0]["role"] == "system"
    assert len([m for m in msgs if m["role"] == "user"]) == 2
    assert len([m for m in msgs if m["role"] == "assistant"]) == 2
    return f"OK {len(msgs)} messages"


@test("E2. AgentContext 工具调用消息")
def test_context_tool_calls():
    from app.agent_runtime.context import AgentContext

    ctx = AgentContext(session_id="s1")
    ctx.add_user_message("Read file")

    tool_calls = [{
        "id": "call_1",
        "type": "function",
        "function": {"name": "file_read", "arguments": '{"path": "/test.txt"}'},
    }]
    ctx.add_assistant_message("Let me read the file.", tool_calls=tool_calls)
    ctx.add_tool_result("call_1", "file_read", "File content: Hello World")

    msgs = ctx.messages
    tool_msgs = [m for m in msgs if m["role"] == "tool"]
    assert len(tool_msgs) == 1
    assert tool_msgs[0]["tool_call_id"] == "call_1"
    assert tool_msgs[0]["name"] == "file_read"
    return "OK"


@test("E3. AgentContext 重置")
def test_context_reset():
    from app.agent_runtime.context import AgentContext

    ctx = AgentContext(system_prompt="Original.", session_id="s1")
    ctx.add_user_message("Hello")
    ctx.add_assistant_message("Hi")
    ctx.iteration = 5
    ctx.tool_calls_made = 10
    ctx.reset()

    assert ctx.iteration == 0
    assert ctx.tool_calls_made == 0
    msgs = ctx.messages
    assert len(msgs) == 1  # only system
    assert msgs[0]["content"] == "Original."
    return "OK"


@test("E4. AgentContext set_system_prompt")
def test_context_set_system_prompt():
    from app.agent_runtime.context import AgentContext

    ctx = AgentContext(system_prompt="V1", session_id="s1")
    ctx.set_system_prompt("V2 Updated")
    assert ctx.messages[0]["content"] == "V2 Updated"
    return "OK"


@test("E5. AgentContext 会话 ID 唯一")
def test_context_session_id():
    from app.agent_runtime.context import AgentContext

    ctx1 = AgentContext(session_id="session_a")
    ctx2 = AgentContext(session_id="session_b")
    assert ctx1.session_id != ctx2.session_id
    return "OK"


# ════════════════ F. 消息裁剪 (trim_messages) ════════════════

@test("F1. 裁剪保持 system 消息")
def test_trim_keeps_system():
    from app.agent_runtime.memory import AgentMemory

    mem = AgentMemory(persist=False, max_history=3)
    msgs = [
        {"role": "system", "content": "You are helpful."},
        {"role": "user", "content": "Q1"},
        {"role": "assistant", "content": "A1"},
        {"role": "user", "content": "Q2"},
        {"role": "assistant", "content": "A2"},
        {"role": "user", "content": "Q3"},
        {"role": "assistant", "content": "A3"},
        {"role": "user", "content": "Q4"},
    ]
    trimmed = mem.trim_messages(msgs)
    assert trimmed[0]["role"] == "system"
    assert len(trimmed) <= 4  # system + 3
    return f"OK trimmed to {len(trimmed)}"


@test("F2. 裁剪不超过 max_history")
def test_trim_max_history():
    from app.agent_runtime.memory import AgentMemory

    for mh in [5, 10, 15]:
        mem = AgentMemory(persist=False, max_history=mh)
        msgs = [{"role": "system", "content": "S"}]
        for i in range(50):
            msgs.append({"role": "user", "content": f"Q{i}"})
            msgs.append({"role": "assistant", "content": f"A{i}"})
        trimmed = mem.trim_messages(msgs)
        assert len(trimmed) <= mh + 1, f"max_history={mh} 但裁剪后 {len(trimmed)}"
    return "OK"


@test("F3. 裁剪保护 tool_calls 配对")
def test_trim_tool_pairing():
    from app.agent_runtime.memory import AgentMemory

    mem = AgentMemory(persist=False, max_history=5)
    msgs = [
        {"role": "system", "content": "S"},
        {"role": "user", "content": "U1"},
        {"role": "assistant", "content": "", "tool_calls": [
            {"id": "t1", "name": "f1", "type": "function", "function": {"name": "f1", "arguments": "{}"}},
        ]},
        {"role": "tool", "content": "R1", "tool_call_id": "t1"},
        {"role": "assistant", "content": "Done!"},
        {"role": "user", "content": "U2"},
        {"role": "assistant", "content": "", "tool_calls": [
            {"id": "t2", "name": "f2", "type": "function", "function": {"name": "f2", "arguments": "{}"}},
        ]},
        {"role": "tool", "content": "R2", "tool_call_id": "t2"},
        {"role": "assistant", "content": "All done!"},
        {"role": "user", "content": "U3"},
        {"role": "assistant", "content": "OK"},
    ]
    trimmed = mem.trim_messages(msgs)
    assert trimmed[0]["role"] == "system"
    # 开头不能是孤立 tool
    assert trimmed[1]["role"] != "tool", "首条不能是孤立tool消息"
    return f"OK trimmed to {len(trimmed)}"


# ════════════════ G. 记忆类型推断 ════════════════

@test("G1. 所有记忆类型可区分")
def test_memory_type_distinct():
    from app.agent_runtime.memory import AgentMemory

    # 每个类型至少2个case
    cases = [
        ("我喜欢Python", "好的", "user"),
        ("记住我的名字是小明", "记住了", "user"),
        ("这个不对，应该修复", "让我看看", "feedback"),
        ("报错了500错误", "检查一下", "feedback"),
        ("数据库地址是101.43.95.130", "已记录", "reference"),
        ("API的URL是什么", "http://api.example.com", "reference"),
        ("项目进度完成了80%", "好的", "project"),
        ("需求文档需要更新", "我来更新", "project"),
    ]
    for um, ar, expected in cases:
        result = AgentMemory._infer_memory_type(um, ar)
        assert result == expected, f"'{um[:25]}' expect {expected} got {result}"
    return f"OK {len(cases)} cases"


@test("G2. 混合信号按优先级分类")
def test_memory_type_priority():
    from app.agent_runtime.memory import AgentMemory

    # feedback 关键字优先级最高（先检查）
    r = AgentMemory._infer_memory_type("这个API地址不对，应该是127.0.0.1", "修正")
    assert r == "feedback", f"反馈关键词应优先, got {r}"

    # reference 关键字优先级次之
    r = AgentMemory._infer_memory_type("http://example.com的部署文档", "好的")
    assert r == "reference", f"URL应分类为reference, got {r}"

    # project 第三优先
    r = AgentMemory._infer_memory_type("这个任务完成了", "好的")
    assert r == "project", f"任务应分类为project, got {r}"

    # 默认 user
    r = AgentMemory._infer_memory_type("你好", "你好")
    assert r == "user", f"普通对话应为user, got {r}"

    return "OK"


# ════════════════ H. Agent错误处理 ════════════════

@test("H1. 无效工具名返回错误信息")
def test_invalid_tool_name():
    from app.agent_runtime.tool_manager import AgentToolManager

    mgr = AgentToolManager()
    result = asyncio.run(mgr.execute("NONEXISTENT_TOOL_XYZ123", {}))
    assert isinstance(result, str)
    assert "error" in result.lower() or "not found" in result.lower() or "不存在" in result, f"应报错, got: {result[:80]}"
    return "OK"


@test("H2. 空参数工具执行")
def test_tool_empty_args():
    from app.agent_runtime.tool_manager import AgentToolManager

    mgr = AgentToolManager()
    result = asyncio.run(mgr.execute("datetime", {}))
    assert isinstance(result, str)
    assert len(result) > 0
    return "OK"


# ════════════════ I. 降级回退链 (fallback_llm) ════════════════

@test("I1. AgentLLMConfig 支持 fallback_llm 字段")
def test_fallback_llm_config_field():
    from app.agent_runtime.schemas import AgentLLMConfig

    cfg = AgentLLMConfig(
        model="deepseek-v4-flash",
        fallback_llm={"model": "gpt-4o-mini", "api_key": "sk-test", "base_url": "https://api.openai.com/v1"},
    )
    assert cfg.fallback_llm is not None
    assert cfg.fallback_llm["model"] == "gpt-4o-mini"
    assert cfg.fallback_llm["api_key"] == "sk-test"
    return "OK"


@test("I2. fallback_llm 默认值为 None")
def test_fallback_llm_default_none():
    from app.agent_runtime.schemas import AgentLLMConfig

    cfg = AgentLLMConfig(model="deepseek-v4-flash")
    assert cfg.fallback_llm is None
    return "OK"


@test("I3. 全局降级配置可从 settings 读取")
def test_fallback_global_settings():
    from app.core.config import settings

    # 全局降级字段存在（可以为空）
    assert hasattr(settings, 'FALLBACK_LLM_MODEL')
    assert hasattr(settings, 'FALLBACK_LLM_API_KEY')
    assert hasattr(settings, 'FALLBACK_LLM_BASE_URL')
    return "OK"


@test("I4. _do_chat fallback 逻辑不崩溃 (mock)")
def test_fallback_do_chat_no_crash():
    """验证 fallback 代码路径存在且语法正确（不实际调用 LLM）"""
    from app.agent_runtime.core import _LLMClient
    from app.agent_runtime.schemas import AgentLLMConfig

    llm_config = AgentLLMConfig(
        model="deepseek-v4-flash",
        temperature=0.7,
        fallback_llm={"model": "gpt-4o-mini", "api_key": "sk-fake"},
    )
    client = _LLMClient(llm_config)
    assert client._config.fallback_llm is not None
    assert client._config.fallback_llm["model"] == "gpt-4o-mini"
    return "OK"


# ════════════════ 运行 ════════════════

if __name__ == "__main__":
    print("=" * 60)
    print("天工 Agent 核心运行时 — 综合测试")
    print("=" * 60)
    print()

    total = PASS + FAIL + SKIP
    print()
    print("=" * 60)
    print(f"测试结果: {PASS} 通过 / {FAIL} 失败 / {SKIP} 跳过 (共 {total})")
    print("=" * 60)

    if FAIL > 0:
        sys.exit(1)
    else:
        print("\n全部核心测试通过！")