""" 天工 Agent 核心运行时 — 综合测试 覆盖: ReAct 循环 / 工具调用 / 权限检查 / 上下文管理 / Schema 验证 运行: cd backend && python tests/test_agent_core.py """ import asyncio import sys import time import os sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) PASS = 0 FAIL = 0 SKIP = 0 def test(name: str): def decorator(fn): global PASS, FAIL, SKIP try: result = fn() if asyncio.iscoroutine(result): result = asyncio.run(result) if result is False: FAIL += 1 print(f" FAIL {name}") elif result is True: PASS += 1 print(f" PASS {name}") else: PASS += 1 print(f" PASS {name} ({result})") except Exception as e: FAIL += 1 import traceback print(f" FAIL {name}: {e}") traceback.print_exc() return fn return decorator # ════════════════ A. Agent Runtime 基础 ════════════════ @test("A1. AgentRuntime 导入正常") def test_runtime_import(): from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig assert callable(AgentRuntime) return "OK" @test("A2. AgentRuntime 创建实例") def test_runtime_instantiate(): from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig config = AgentConfig( name="test_agent", system_prompt="You are a test assistant.", llm=AgentLLMConfig(model="deepseek-v4-flash", max_iterations=3), ) runtime = AgentRuntime(config=config) assert runtime.context.iteration == 0 assert runtime.context.tool_calls_made == 0 return "OK" @test("A3. AgentRuntime 无工具时仍然可运行") def test_runtime_no_tools(): from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig, AgentToolConfig config = AgentConfig( name="no_tool_agent", system_prompt="Echo back the user message exactly.", llm=AgentLLMConfig(model="deepseek-v4-flash", max_iterations=1, temperature=0.1), tools=AgentToolConfig(include_tools=[], exclude_tools=[]), ) runtime = AgentRuntime(config=config) # 带记忆/DB的完整run需要数据库,跳过 # 只验证构造正确 assert runtime.tool_manager.has_tools() is False or len(runtime.tool_manager.tool_names()) >= 0 return "OK" @test("A4. AgentRuntime 上下文管理") def test_runtime_context(): from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig config = AgentConfig( name="ctx_test", system_prompt="You are helpful.", llm=AgentLLMConfig(max_iterations=5), ) runtime = AgentRuntime(config=config) # context 已包含 system prompt msgs = runtime.context.messages assert len(msgs) >= 1 assert msgs[0]["role"] == "system" assert msgs[0]["content"] == "You are helpful." return "OK" @test("A5. AgentRuntime 记忆系统绑定") def test_runtime_memory_binding(): from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig, AgentMemoryConfig config = AgentConfig( name="mem_test", system_prompt="You are helpful.", llm=AgentLLMConfig(max_iterations=3), memory=AgentMemoryConfig( enabled=True, max_history_messages=10, vector_memory_enabled=False, ), ) runtime = AgentRuntime(config=config) assert runtime.memory is not None assert runtime.memory.max_history == 10 return "OK" # ════════════════ B. AgentSchema 验证 ════════════════ @test("B1. AgentConfig 全默认值") def test_config_defaults(): from app.agent_runtime.schemas import AgentConfig c = AgentConfig(name="test") assert c.name == "test" assert c.llm.max_iterations == 10 assert c.llm.temperature == 0.7 assert c.llm.model == "gpt-4o-mini" assert c.budget.max_llm_invocations == 200 assert c.budget.max_tool_calls == 500 assert c.memory.enabled is True assert c.memory.vector_memory_top_k == 5 assert c.tools.include_tools == [] assert c.tools.exclude_tools == [] # 默认 require_approval 包含 destructive 工具(deploy_push/send_email 等) assert len(c.tools.require_approval) >= 5 assert "deploy_push" in c.tools.require_approval assert "send_email" in c.tools.require_approval return "OK" @test("B2. AgentLLMConfig fallback_llm 嵌套") def test_llm_config_fallback(): from app.agent_runtime.schemas import AgentLLMConfig cfg = AgentLLMConfig( model="deepseek-v4-flash", fallback_llm={"provider": "openai", "model": "gpt-4o-mini"}, ) assert cfg.fallback_llm["model"] == "gpt-4o-mini" assert cfg.fallback_llm["provider"] == "openai" return "OK" @test("B3. AgentMemoryConfig 团队共享参数") def test_memory_config_team(): from app.agent_runtime.schemas import AgentMemoryConfig cfg = AgentMemoryConfig( team_id="team_feishu", team_share_enabled=True, vector_memory_rerank=True, ) assert cfg.team_id == "team_feishu" assert cfg.team_share_enabled is True assert cfg.vector_memory_rerank is True return "OK" @test("B4. AgentTokenBudgetConfig 阈值验证") def test_token_budget_config(): from app.agent_runtime.schemas import AgentTokenBudgetConfig cfg = AgentTokenBudgetConfig( context_window=128000, warning_threshold_pct=0.75, compact_threshold_pct=0.85, hard_limit_pct=0.95, ) assert cfg.context_window == 128000 assert cfg.warning_threshold_pct == 0.75 assert cfg.compact_threshold_pct == 0.85 return "OK" @test("B5. AgentStep 构造") def test_agent_step(): from app.agent_runtime.schemas import AgentStep step = AgentStep( iteration=1, type="tool_call", content="调用工具...", tool_name="file_read", tool_input={"path": "/test.txt"}, ) assert step.iteration == 1 assert step.type == "tool_call" assert step.tool_name == "file_read" assert step.tool_input == {"path": "/test.txt"} return "OK" @test("B6. AgentResult truncated + error") def test_agent_result(): from app.agent_runtime.schemas import AgentResult r = AgentResult( success=False, content="Task failed", truncated=True, iterations_used=3, tool_calls_made=5, error="max_iterations_reached", ) assert r.success is False assert r.truncated is True assert r.iterations_used == 3 assert r.tool_calls_made == 5 assert r.error == "max_iterations_reached" return "OK" # ════════════════ C. 权限系统 (permissions.py) ════════════════ @test("C1. PermissionChecker BYPASS 模式全放行") def test_permission_bypass(): from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction checker = PermissionChecker(level=PermissionLevel.BYPASS) assert checker.check("file_write").action == PermissionAction.ALLOW assert checker.check("deploy_push").action == PermissionAction.ALLOW assert checker.check("unknown_tool_xyz").action == PermissionAction.ALLOW return "OK" @test("C2. PermissionChecker PLAN 模式只允许只读") def test_permission_plan(): from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction checker = PermissionChecker(level=PermissionLevel.PLAN) assert checker.check("file_read").action == PermissionAction.ALLOW assert checker.check("web_search").action == PermissionAction.ALLOW assert checker.check("file_write").action == PermissionAction.DENY assert checker.check("deploy_push").action == PermissionAction.DENY return "OK" @test("C3. PermissionChecker ACCEPT_EDITS 允许编辑工具") def test_permission_accept_edits(): from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction checker = PermissionChecker(level=PermissionLevel.ACCEPT_EDITS) assert checker.check("file_read").action == PermissionAction.ALLOW assert checker.check("file_write").action == PermissionAction.ALLOW assert checker.check("file_edit").action == PermissionAction.ALLOW # 破坏性工具仍需确认 assert checker.check("deploy_push").action == PermissionAction.ASK assert checker.check("git_reset_hard").action == PermissionAction.ASK return "OK" @test("C4. PermissionChecker DEFAULT 破坏性工具返回 ASK") def test_permission_default(): from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction checker = PermissionChecker(level=PermissionLevel.DEFAULT) assert checker.check("file_read").action == PermissionAction.ALLOW assert checker.check("web_search").action == PermissionAction.ALLOW assert checker.check("file_write").action == PermissionAction.ASK assert checker.check("deploy_push").action == PermissionAction.ASK return "OK" @test("C5. PermissionChecker 拒绝列表") def test_permission_deny_list(): from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction checker = PermissionChecker( level=PermissionLevel.DEFAULT, deny_rules=["dangerous_tool", "eval_exec"], ) assert checker.check("dangerous_tool").action == PermissionAction.DENY assert checker.check("eval_exec").action == PermissionAction.DENY assert checker.check("file_read").action == PermissionAction.ALLOW return "OK" @test("C6. AutoApproveRule 工具名通配符") def test_auto_approve_wildcard(): from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction, AutoApproveRule checker = PermissionChecker( level=PermissionLevel.DEFAULT, auto_approve_rules=[ AutoApproveRule(tool_pattern="feishu_*", description="飞书工具自动批准"), ], ) assert checker.check("feishu_send").action == PermissionAction.ALLOW assert checker.check("feishu_create_doc").action == PermissionAction.ALLOW assert checker.check("file_write").action == PermissionAction.ASK # 不受影响 return "OK" @test("C7. AutoApproveRule 参数条件匹配") def test_auto_approve_params(): from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction, AutoApproveRule checker = PermissionChecker( level=PermissionLevel.DEFAULT, auto_approve_rules=[ AutoApproveRule( tool_pattern="file_write", param_conditions={"path": "regex:\\.txt$"}, description="自动批准txt文件写入", ), ], ) # 写入 .txt 文件自动批准 assert checker.check("file_write", {"path": "data.txt"}).action == PermissionAction.ALLOW # 写入 .env 文件仍需确认 assert checker.check("file_write", {"path": "config.env"}).action == PermissionAction.ASK return "OK" @test("C8. PermissionLevel 级别切换") def test_permission_level_switch(): from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction checker = PermissionChecker(level=PermissionLevel.DEFAULT) assert checker.check("file_write").action == PermissionAction.ASK checker.set_level(PermissionLevel.ACCEPT_EDITS) assert checker.check("file_write").action == PermissionAction.ALLOW checker.set_level(PermissionLevel.BYPASS) assert checker.check("deploy_push").action == PermissionAction.ALLOW return "OK" # ════════════════ D. Tool Manager 测试 ════════════════ @test("D1. AgentToolManager 创建") def test_tool_manager_create(): from app.agent_runtime.tool_manager import AgentToolManager mgr = AgentToolManager() assert mgr.has_tools() is True names = mgr.tool_names() assert isinstance(names, list) assert len(names) >= 1 return f"OK {len(names)} tools" @test("D2. AgentToolManager 白名单过滤") def test_tool_manager_whitelist(): from app.agent_runtime.tool_manager import AgentToolManager mgr = AgentToolManager(include_tools=["datetime", "math_calculate"]) assert "datetime" in mgr._include_tools assert "math_calculate" in mgr._include_tools return "OK" @test("D3. AgentToolManager 黑名单过滤") def test_tool_manager_blacklist(): from app.agent_runtime.tool_manager import AgentToolManager mgr = AgentToolManager(exclude_tools=["file_delete", "deploy_push"]) assert "file_delete" in mgr._exclude_tools assert "deploy_push" in mgr._exclude_tools return "OK" @test("D4. AgentToolManager 工具schema格式") def test_tool_manager_schema_format(): from app.agent_runtime.tool_manager import AgentToolManager mgr = AgentToolManager(include_tools=["datetime"]) schemas = mgr.get_tool_schemas() assert len(schemas) >= 1 # OpenAI function calling 格式 for s in schemas: assert "type" in s, f"缺少type字段: {s}" assert s["type"] == "function" assert "function" in s assert "name" in s["function"] return f"OK {len(schemas)} schemas" @test("D5. AgentToolManager 确定性工具缓存判断") def test_tool_manager_cacheable(): from app.agent_runtime.tool_manager import AgentToolManager mgr = AgentToolManager() assert mgr._is_cacheable("file_read") is True assert mgr._is_cacheable("math_calculate") is True assert mgr._is_cacheable("unknown_random_tool") is False, "非确定性工具不应缓存" return "OK" @test("D6. AgentToolManager 缓存关闭") def test_tool_manager_cache_disabled(): from app.agent_runtime.tool_manager import AgentToolManager mgr = AgentToolManager(cache_enabled=False) assert mgr._is_cacheable("file_read") is False return "OK" @test("D7. AgentToolManager 执行内置工具") def test_tool_manager_execute(): from app.agent_runtime.tool_manager import AgentToolManager mgr = AgentToolManager() result = asyncio.run(mgr.execute("datetime", {"format": "%Y-%m-%d"})) assert isinstance(result, str) assert len(result) > 0 return f"OK result={result[:30]}" @test("D8. AgentToolManager 权限拒绝工具执行") def test_tool_manager_permission_deny(): from app.agent_runtime.tool_manager import AgentToolManager # 用 deny_tools 拒绝 datetime mgr = AgentToolManager(deny_tools=["datetime"]) result = asyncio.run(mgr.execute("datetime", {"format": "%Y"})) assert "拒绝" in result or "禁用" in result or "denied" in result.lower() or "disabled" in result.lower() return "OK" # ════════════════ E. 上下文管理 (AgentContext) ════════════════ @test("E1. AgentContext 消息管理") def test_context_messages(): from app.agent_runtime.context import AgentContext ctx = AgentContext(system_prompt="Hello.", session_id="s1") ctx.add_user_message("Hi") ctx.add_assistant_message("Hello there!") ctx.add_user_message("How are you?") ctx.add_assistant_message("I am fine.") msgs = ctx.messages assert msgs[0]["role"] == "system" assert len([m for m in msgs if m["role"] == "user"]) == 2 assert len([m for m in msgs if m["role"] == "assistant"]) == 2 return f"OK {len(msgs)} messages" @test("E2. AgentContext 工具调用消息") def test_context_tool_calls(): from app.agent_runtime.context import AgentContext ctx = AgentContext(session_id="s1") ctx.add_user_message("Read file") tool_calls = [{ "id": "call_1", "type": "function", "function": {"name": "file_read", "arguments": '{"path": "/test.txt"}'}, }] ctx.add_assistant_message("Let me read the file.", tool_calls=tool_calls) ctx.add_tool_result("call_1", "file_read", "File content: Hello World") msgs = ctx.messages tool_msgs = [m for m in msgs if m["role"] == "tool"] assert len(tool_msgs) == 1 assert tool_msgs[0]["tool_call_id"] == "call_1" assert tool_msgs[0]["name"] == "file_read" return "OK" @test("E3. AgentContext 重置") def test_context_reset(): from app.agent_runtime.context import AgentContext ctx = AgentContext(system_prompt="Original.", session_id="s1") ctx.add_user_message("Hello") ctx.add_assistant_message("Hi") ctx.iteration = 5 ctx.tool_calls_made = 10 ctx.reset() assert ctx.iteration == 0 assert ctx.tool_calls_made == 0 msgs = ctx.messages assert len(msgs) == 1 # only system assert msgs[0]["content"] == "Original." return "OK" @test("E4. AgentContext set_system_prompt") def test_context_set_system_prompt(): from app.agent_runtime.context import AgentContext ctx = AgentContext(system_prompt="V1", session_id="s1") ctx.set_system_prompt("V2 Updated") assert ctx.messages[0]["content"] == "V2 Updated" return "OK" @test("E5. AgentContext 会话 ID 唯一") def test_context_session_id(): from app.agent_runtime.context import AgentContext ctx1 = AgentContext(session_id="session_a") ctx2 = AgentContext(session_id="session_b") assert ctx1.session_id != ctx2.session_id return "OK" # ════════════════ F. 消息裁剪 (trim_messages) ════════════════ @test("F1. 裁剪保持 system 消息") def test_trim_keeps_system(): from app.agent_runtime.memory import AgentMemory mem = AgentMemory(persist=False, max_history=3) msgs = [ {"role": "system", "content": "You are helpful."}, {"role": "user", "content": "Q1"}, {"role": "assistant", "content": "A1"}, {"role": "user", "content": "Q2"}, {"role": "assistant", "content": "A2"}, {"role": "user", "content": "Q3"}, {"role": "assistant", "content": "A3"}, {"role": "user", "content": "Q4"}, ] trimmed = mem.trim_messages(msgs) assert trimmed[0]["role"] == "system" assert len(trimmed) <= 4 # system + 3 return f"OK trimmed to {len(trimmed)}" @test("F2. 裁剪不超过 max_history") def test_trim_max_history(): from app.agent_runtime.memory import AgentMemory for mh in [5, 10, 15]: mem = AgentMemory(persist=False, max_history=mh) msgs = [{"role": "system", "content": "S"}] for i in range(50): msgs.append({"role": "user", "content": f"Q{i}"}) msgs.append({"role": "assistant", "content": f"A{i}"}) trimmed = mem.trim_messages(msgs) assert len(trimmed) <= mh + 1, f"max_history={mh} 但裁剪后 {len(trimmed)}" return "OK" @test("F3. 裁剪保护 tool_calls 配对") def test_trim_tool_pairing(): from app.agent_runtime.memory import AgentMemory mem = AgentMemory(persist=False, max_history=5) msgs = [ {"role": "system", "content": "S"}, {"role": "user", "content": "U1"}, {"role": "assistant", "content": "", "tool_calls": [ {"id": "t1", "name": "f1", "type": "function", "function": {"name": "f1", "arguments": "{}"}}, ]}, {"role": "tool", "content": "R1", "tool_call_id": "t1"}, {"role": "assistant", "content": "Done!"}, {"role": "user", "content": "U2"}, {"role": "assistant", "content": "", "tool_calls": [ {"id": "t2", "name": "f2", "type": "function", "function": {"name": "f2", "arguments": "{}"}}, ]}, {"role": "tool", "content": "R2", "tool_call_id": "t2"}, {"role": "assistant", "content": "All done!"}, {"role": "user", "content": "U3"}, {"role": "assistant", "content": "OK"}, ] trimmed = mem.trim_messages(msgs) assert trimmed[0]["role"] == "system" # 开头不能是孤立 tool assert trimmed[1]["role"] != "tool", "首条不能是孤立tool消息" return f"OK trimmed to {len(trimmed)}" # ════════════════ G. 记忆类型推断 ════════════════ @test("G1. 所有记忆类型可区分") def test_memory_type_distinct(): from app.agent_runtime.memory import AgentMemory # 每个类型至少2个case cases = [ ("我喜欢Python", "好的", "user"), ("记住我的名字是小明", "记住了", "user"), ("这个不对,应该修复", "让我看看", "feedback"), ("报错了500错误", "检查一下", "feedback"), ("数据库地址是101.43.95.130", "已记录", "reference"), ("API的URL是什么", "http://api.example.com", "reference"), ("项目进度完成了80%", "好的", "project"), ("需求文档需要更新", "我来更新", "project"), ] for um, ar, expected in cases: result = AgentMemory._infer_memory_type(um, ar) assert result == expected, f"'{um[:25]}' expect {expected} got {result}" return f"OK {len(cases)} cases" @test("G2. 混合信号按优先级分类") def test_memory_type_priority(): from app.agent_runtime.memory import AgentMemory # feedback 关键字优先级最高(先检查) r = AgentMemory._infer_memory_type("这个API地址不对,应该是127.0.0.1", "修正") assert r == "feedback", f"反馈关键词应优先, got {r}" # reference 关键字优先级次之 r = AgentMemory._infer_memory_type("http://example.com的部署文档", "好的") assert r == "reference", f"URL应分类为reference, got {r}" # project 第三优先 r = AgentMemory._infer_memory_type("这个任务完成了", "好的") assert r == "project", f"任务应分类为project, got {r}" # 默认 user r = AgentMemory._infer_memory_type("你好", "你好") assert r == "user", f"普通对话应为user, got {r}" return "OK" # ════════════════ H. Agent错误处理 ════════════════ @test("H1. 无效工具名返回错误信息") def test_invalid_tool_name(): from app.agent_runtime.tool_manager import AgentToolManager mgr = AgentToolManager() result = asyncio.run(mgr.execute("NONEXISTENT_TOOL_XYZ123", {})) assert isinstance(result, str) assert "error" in result.lower() or "not found" in result.lower() or "不存在" in result, f"应报错, got: {result[:80]}" return "OK" @test("H2. 空参数工具执行") def test_tool_empty_args(): from app.agent_runtime.tool_manager import AgentToolManager mgr = AgentToolManager() result = asyncio.run(mgr.execute("datetime", {})) assert isinstance(result, str) assert len(result) > 0 return "OK" # ════════════════ I. 降级回退链 (fallback_llm) ════════════════ @test("I1. AgentLLMConfig 支持 fallback_llm 字段") def test_fallback_llm_config_field(): from app.agent_runtime.schemas import AgentLLMConfig cfg = AgentLLMConfig( model="deepseek-v4-flash", fallback_llm={"model": "gpt-4o-mini", "api_key": "sk-test", "base_url": "https://api.openai.com/v1"}, ) assert cfg.fallback_llm is not None assert cfg.fallback_llm["model"] == "gpt-4o-mini" assert cfg.fallback_llm["api_key"] == "sk-test" return "OK" @test("I2. fallback_llm 默认值为 None") def test_fallback_llm_default_none(): from app.agent_runtime.schemas import AgentLLMConfig cfg = AgentLLMConfig(model="deepseek-v4-flash") assert cfg.fallback_llm is None return "OK" @test("I3. 全局降级配置可从 settings 读取") def test_fallback_global_settings(): from app.core.config import settings # 全局降级字段存在(可以为空) assert hasattr(settings, 'FALLBACK_LLM_MODEL') assert hasattr(settings, 'FALLBACK_LLM_API_KEY') assert hasattr(settings, 'FALLBACK_LLM_BASE_URL') return "OK" @test("I4. _do_chat fallback 逻辑不崩溃 (mock)") def test_fallback_do_chat_no_crash(): """验证 fallback 代码路径存在且语法正确(不实际调用 LLM)""" from app.agent_runtime.core import _LLMClient from app.agent_runtime.schemas import AgentLLMConfig llm_config = AgentLLMConfig( model="deepseek-v4-flash", temperature=0.7, fallback_llm={"model": "gpt-4o-mini", "api_key": "sk-fake"}, ) client = _LLMClient(llm_config) assert client._config.fallback_llm is not None assert client._config.fallback_llm["model"] == "gpt-4o-mini" return "OK" # ════════════════ 运行 ════════════════ if __name__ == "__main__": print("=" * 60) print("天工 Agent 核心运行时 — 综合测试") print("=" * 60) print() total = PASS + FAIL + SKIP print() print("=" * 60) print(f"测试结果: {PASS} 通过 / {FAIL} 失败 / {SKIP} 跳过 (共 {total})") print("=" * 60) if FAIL > 0: sys.exit(1) else: print("\n全部核心测试通过!")