Files
aiagent/backend/tests/test_agent_core.py

729 lines
25 KiB
Python
Raw Normal View History

"""
天工 Agent 核心运行时 综合测试
覆盖: ReAct 循环 / 工具调用 / 权限检查 / 上下文管理 / Schema 验证
运行: cd backend && python tests/test_agent_core.py
"""
import asyncio
import sys
import time
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
PASS = 0
FAIL = 0
SKIP = 0
def test(name: str):
def decorator(fn):
global PASS, FAIL, SKIP
try:
result = fn()
if asyncio.iscoroutine(result):
result = asyncio.run(result)
if result is False:
FAIL += 1
print(f" FAIL {name}")
elif result is True:
PASS += 1
print(f" PASS {name}")
else:
PASS += 1
print(f" PASS {name} ({result})")
except Exception as e:
FAIL += 1
import traceback
print(f" FAIL {name}: {e}")
traceback.print_exc()
return fn
return decorator
# ════════════════ A. Agent Runtime 基础 ════════════════
@test("A1. AgentRuntime 导入正常")
def test_runtime_import():
from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig
assert callable(AgentRuntime)
return "OK"
@test("A2. AgentRuntime 创建实例")
def test_runtime_instantiate():
from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig
config = AgentConfig(
name="test_agent",
system_prompt="You are a test assistant.",
llm=AgentLLMConfig(model="deepseek-v4-flash", max_iterations=3),
)
runtime = AgentRuntime(config=config)
assert runtime.context.iteration == 0
assert runtime.context.tool_calls_made == 0
return "OK"
@test("A3. AgentRuntime 无工具时仍然可运行")
def test_runtime_no_tools():
from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig, AgentToolConfig
config = AgentConfig(
name="no_tool_agent",
system_prompt="Echo back the user message exactly.",
llm=AgentLLMConfig(model="deepseek-v4-flash", max_iterations=1, temperature=0.1),
tools=AgentToolConfig(include_tools=[], exclude_tools=[]),
)
runtime = AgentRuntime(config=config)
# 带记忆/DB的完整run需要数据库跳过
# 只验证构造正确
assert runtime.tool_manager.has_tools() is False or len(runtime.tool_manager.tool_names()) >= 0
return "OK"
@test("A4. AgentRuntime 上下文管理")
def test_runtime_context():
from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig
config = AgentConfig(
name="ctx_test",
system_prompt="You are helpful.",
llm=AgentLLMConfig(max_iterations=5),
)
runtime = AgentRuntime(config=config)
# context 已包含 system prompt
msgs = runtime.context.messages
assert len(msgs) >= 1
assert msgs[0]["role"] == "system"
assert msgs[0]["content"] == "You are helpful."
return "OK"
@test("A5. AgentRuntime 记忆系统绑定")
def test_runtime_memory_binding():
from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig, AgentMemoryConfig
config = AgentConfig(
name="mem_test",
system_prompt="You are helpful.",
llm=AgentLLMConfig(max_iterations=3),
memory=AgentMemoryConfig(
enabled=True,
max_history_messages=10,
vector_memory_enabled=False,
),
)
runtime = AgentRuntime(config=config)
assert runtime.memory is not None
assert runtime.memory.max_history == 10
return "OK"
# ════════════════ B. AgentSchema 验证 ════════════════
@test("B1. AgentConfig 全默认值")
def test_config_defaults():
from app.agent_runtime.schemas import AgentConfig
c = AgentConfig(name="test")
assert c.name == "test"
assert c.llm.max_iterations == 10
assert c.llm.temperature == 0.7
assert c.llm.model == "gpt-4o-mini"
assert c.budget.max_llm_invocations == 200
assert c.budget.max_tool_calls == 500
assert c.memory.enabled is True
assert c.memory.vector_memory_top_k == 5
assert c.tools.include_tools == []
assert c.tools.exclude_tools == []
# 默认 require_approval 包含 destructive 工具deploy_push/send_email 等)
assert len(c.tools.require_approval) >= 5
assert "deploy_push" in c.tools.require_approval
assert "send_email" in c.tools.require_approval
return "OK"
@test("B2. AgentLLMConfig fallback_llm 嵌套")
def test_llm_config_fallback():
from app.agent_runtime.schemas import AgentLLMConfig
cfg = AgentLLMConfig(
model="deepseek-v4-flash",
fallback_llm={"provider": "openai", "model": "gpt-4o-mini"},
)
assert cfg.fallback_llm["model"] == "gpt-4o-mini"
assert cfg.fallback_llm["provider"] == "openai"
return "OK"
@test("B3. AgentMemoryConfig 团队共享参数")
def test_memory_config_team():
from app.agent_runtime.schemas import AgentMemoryConfig
cfg = AgentMemoryConfig(
team_id="team_feishu",
team_share_enabled=True,
vector_memory_rerank=True,
)
assert cfg.team_id == "team_feishu"
assert cfg.team_share_enabled is True
assert cfg.vector_memory_rerank is True
return "OK"
@test("B4. AgentTokenBudgetConfig 阈值验证")
def test_token_budget_config():
from app.agent_runtime.schemas import AgentTokenBudgetConfig
cfg = AgentTokenBudgetConfig(
context_window=128000,
warning_threshold_pct=0.75,
compact_threshold_pct=0.85,
hard_limit_pct=0.95,
)
assert cfg.context_window == 128000
assert cfg.warning_threshold_pct == 0.75
assert cfg.compact_threshold_pct == 0.85
return "OK"
@test("B5. AgentStep 构造")
def test_agent_step():
from app.agent_runtime.schemas import AgentStep
step = AgentStep(
iteration=1,
type="tool_call",
content="调用工具...",
tool_name="file_read",
tool_input={"path": "/test.txt"},
)
assert step.iteration == 1
assert step.type == "tool_call"
assert step.tool_name == "file_read"
assert step.tool_input == {"path": "/test.txt"}
return "OK"
@test("B6. AgentResult truncated + error")
def test_agent_result():
from app.agent_runtime.schemas import AgentResult
r = AgentResult(
success=False,
content="Task failed",
truncated=True,
iterations_used=3,
tool_calls_made=5,
error="max_iterations_reached",
)
assert r.success is False
assert r.truncated is True
assert r.iterations_used == 3
assert r.tool_calls_made == 5
assert r.error == "max_iterations_reached"
return "OK"
# ════════════════ C. 权限系统 (permissions.py) ════════════════
@test("C1. PermissionChecker BYPASS 模式全放行")
def test_permission_bypass():
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction
checker = PermissionChecker(level=PermissionLevel.BYPASS)
assert checker.check("file_write").action == PermissionAction.ALLOW
assert checker.check("deploy_push").action == PermissionAction.ALLOW
assert checker.check("unknown_tool_xyz").action == PermissionAction.ALLOW
return "OK"
@test("C2. PermissionChecker PLAN 模式只允许只读")
def test_permission_plan():
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction
checker = PermissionChecker(level=PermissionLevel.PLAN)
assert checker.check("file_read").action == PermissionAction.ALLOW
assert checker.check("web_search").action == PermissionAction.ALLOW
assert checker.check("file_write").action == PermissionAction.DENY
assert checker.check("deploy_push").action == PermissionAction.DENY
return "OK"
@test("C3. PermissionChecker ACCEPT_EDITS 允许编辑工具")
def test_permission_accept_edits():
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction
checker = PermissionChecker(level=PermissionLevel.ACCEPT_EDITS)
assert checker.check("file_read").action == PermissionAction.ALLOW
assert checker.check("file_write").action == PermissionAction.ALLOW
assert checker.check("file_edit").action == PermissionAction.ALLOW
# 破坏性工具仍需确认
assert checker.check("deploy_push").action == PermissionAction.ASK
assert checker.check("git_reset_hard").action == PermissionAction.ASK
return "OK"
@test("C4. PermissionChecker DEFAULT 破坏性工具返回 ASK")
def test_permission_default():
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction
checker = PermissionChecker(level=PermissionLevel.DEFAULT)
assert checker.check("file_read").action == PermissionAction.ALLOW
assert checker.check("web_search").action == PermissionAction.ALLOW
assert checker.check("file_write").action == PermissionAction.ASK
assert checker.check("deploy_push").action == PermissionAction.ASK
return "OK"
@test("C5. PermissionChecker 拒绝列表")
def test_permission_deny_list():
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction
checker = PermissionChecker(
level=PermissionLevel.DEFAULT,
deny_rules=["dangerous_tool", "eval_exec"],
)
assert checker.check("dangerous_tool").action == PermissionAction.DENY
assert checker.check("eval_exec").action == PermissionAction.DENY
assert checker.check("file_read").action == PermissionAction.ALLOW
return "OK"
@test("C6. AutoApproveRule 工具名通配符")
def test_auto_approve_wildcard():
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction, AutoApproveRule
checker = PermissionChecker(
level=PermissionLevel.DEFAULT,
auto_approve_rules=[
AutoApproveRule(tool_pattern="feishu_*", description="飞书工具自动批准"),
],
)
assert checker.check("feishu_send").action == PermissionAction.ALLOW
assert checker.check("feishu_create_doc").action == PermissionAction.ALLOW
assert checker.check("file_write").action == PermissionAction.ASK # 不受影响
return "OK"
@test("C7. AutoApproveRule 参数条件匹配")
def test_auto_approve_params():
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction, AutoApproveRule
checker = PermissionChecker(
level=PermissionLevel.DEFAULT,
auto_approve_rules=[
AutoApproveRule(
tool_pattern="file_write",
param_conditions={"path": "regex:\\.txt$"},
description="自动批准txt文件写入",
),
],
)
# 写入 .txt 文件自动批准
assert checker.check("file_write", {"path": "data.txt"}).action == PermissionAction.ALLOW
# 写入 .env 文件仍需确认
assert checker.check("file_write", {"path": "config.env"}).action == PermissionAction.ASK
return "OK"
@test("C8. PermissionLevel 级别切换")
def test_permission_level_switch():
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction
checker = PermissionChecker(level=PermissionLevel.DEFAULT)
assert checker.check("file_write").action == PermissionAction.ASK
checker.set_level(PermissionLevel.ACCEPT_EDITS)
assert checker.check("file_write").action == PermissionAction.ALLOW
checker.set_level(PermissionLevel.BYPASS)
assert checker.check("deploy_push").action == PermissionAction.ALLOW
return "OK"
# ════════════════ D. Tool Manager 测试 ════════════════
@test("D1. AgentToolManager 创建")
def test_tool_manager_create():
from app.agent_runtime.tool_manager import AgentToolManager
mgr = AgentToolManager()
assert mgr.has_tools() is True
names = mgr.tool_names()
assert isinstance(names, list)
assert len(names) >= 1
return f"OK {len(names)} tools"
@test("D2. AgentToolManager 白名单过滤")
def test_tool_manager_whitelist():
from app.agent_runtime.tool_manager import AgentToolManager
mgr = AgentToolManager(include_tools=["datetime", "math_calculate"])
assert "datetime" in mgr._include_tools
assert "math_calculate" in mgr._include_tools
return "OK"
@test("D3. AgentToolManager 黑名单过滤")
def test_tool_manager_blacklist():
from app.agent_runtime.tool_manager import AgentToolManager
mgr = AgentToolManager(exclude_tools=["file_delete", "deploy_push"])
assert "file_delete" in mgr._exclude_tools
assert "deploy_push" in mgr._exclude_tools
return "OK"
@test("D4. AgentToolManager 工具schema格式")
def test_tool_manager_schema_format():
from app.agent_runtime.tool_manager import AgentToolManager
mgr = AgentToolManager(include_tools=["datetime"])
schemas = mgr.get_tool_schemas()
assert len(schemas) >= 1
# OpenAI function calling 格式
for s in schemas:
assert "type" in s, f"缺少type字段: {s}"
assert s["type"] == "function"
assert "function" in s
assert "name" in s["function"]
return f"OK {len(schemas)} schemas"
@test("D5. AgentToolManager 确定性工具缓存判断")
def test_tool_manager_cacheable():
from app.agent_runtime.tool_manager import AgentToolManager
mgr = AgentToolManager()
assert mgr._is_cacheable("file_read") is True
assert mgr._is_cacheable("math_calculate") is True
assert mgr._is_cacheable("unknown_random_tool") is False, "非确定性工具不应缓存"
return "OK"
@test("D6. AgentToolManager 缓存关闭")
def test_tool_manager_cache_disabled():
from app.agent_runtime.tool_manager import AgentToolManager
mgr = AgentToolManager(cache_enabled=False)
assert mgr._is_cacheable("file_read") is False
return "OK"
@test("D7. AgentToolManager 执行内置工具")
def test_tool_manager_execute():
from app.agent_runtime.tool_manager import AgentToolManager
mgr = AgentToolManager()
result = asyncio.run(mgr.execute("datetime", {"format": "%Y-%m-%d"}))
assert isinstance(result, str)
assert len(result) > 0
return f"OK result={result[:30]}"
@test("D8. AgentToolManager 权限拒绝工具执行")
def test_tool_manager_permission_deny():
from app.agent_runtime.tool_manager import AgentToolManager
# 用 deny_tools 拒绝 datetime
mgr = AgentToolManager(deny_tools=["datetime"])
result = asyncio.run(mgr.execute("datetime", {"format": "%Y"}))
assert "拒绝" in result or "禁用" in result or "denied" in result.lower() or "disabled" in result.lower()
return "OK"
# ════════════════ E. 上下文管理 (AgentContext) ════════════════
@test("E1. AgentContext 消息管理")
def test_context_messages():
from app.agent_runtime.context import AgentContext
ctx = AgentContext(system_prompt="Hello.", session_id="s1")
ctx.add_user_message("Hi")
ctx.add_assistant_message("Hello there!")
ctx.add_user_message("How are you?")
ctx.add_assistant_message("I am fine.")
msgs = ctx.messages
assert msgs[0]["role"] == "system"
assert len([m for m in msgs if m["role"] == "user"]) == 2
assert len([m for m in msgs if m["role"] == "assistant"]) == 2
return f"OK {len(msgs)} messages"
@test("E2. AgentContext 工具调用消息")
def test_context_tool_calls():
from app.agent_runtime.context import AgentContext
ctx = AgentContext(session_id="s1")
ctx.add_user_message("Read file")
tool_calls = [{
"id": "call_1",
"type": "function",
"function": {"name": "file_read", "arguments": '{"path": "/test.txt"}'},
}]
ctx.add_assistant_message("Let me read the file.", tool_calls=tool_calls)
ctx.add_tool_result("call_1", "file_read", "File content: Hello World")
msgs = ctx.messages
tool_msgs = [m for m in msgs if m["role"] == "tool"]
assert len(tool_msgs) == 1
assert tool_msgs[0]["tool_call_id"] == "call_1"
assert tool_msgs[0]["name"] == "file_read"
return "OK"
@test("E3. AgentContext 重置")
def test_context_reset():
from app.agent_runtime.context import AgentContext
ctx = AgentContext(system_prompt="Original.", session_id="s1")
ctx.add_user_message("Hello")
ctx.add_assistant_message("Hi")
ctx.iteration = 5
ctx.tool_calls_made = 10
ctx.reset()
assert ctx.iteration == 0
assert ctx.tool_calls_made == 0
msgs = ctx.messages
assert len(msgs) == 1 # only system
assert msgs[0]["content"] == "Original."
return "OK"
@test("E4. AgentContext set_system_prompt")
def test_context_set_system_prompt():
from app.agent_runtime.context import AgentContext
ctx = AgentContext(system_prompt="V1", session_id="s1")
ctx.set_system_prompt("V2 Updated")
assert ctx.messages[0]["content"] == "V2 Updated"
return "OK"
@test("E5. AgentContext 会话 ID 唯一")
def test_context_session_id():
from app.agent_runtime.context import AgentContext
ctx1 = AgentContext(session_id="session_a")
ctx2 = AgentContext(session_id="session_b")
assert ctx1.session_id != ctx2.session_id
return "OK"
# ════════════════ F. 消息裁剪 (trim_messages) ════════════════
@test("F1. 裁剪保持 system 消息")
def test_trim_keeps_system():
from app.agent_runtime.memory import AgentMemory
mem = AgentMemory(persist=False, max_history=3)
msgs = [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Q1"},
{"role": "assistant", "content": "A1"},
{"role": "user", "content": "Q2"},
{"role": "assistant", "content": "A2"},
{"role": "user", "content": "Q3"},
{"role": "assistant", "content": "A3"},
{"role": "user", "content": "Q4"},
]
trimmed = mem.trim_messages(msgs)
assert trimmed[0]["role"] == "system"
assert len(trimmed) <= 4 # system + 3
return f"OK trimmed to {len(trimmed)}"
@test("F2. 裁剪不超过 max_history")
def test_trim_max_history():
from app.agent_runtime.memory import AgentMemory
for mh in [5, 10, 15]:
mem = AgentMemory(persist=False, max_history=mh)
msgs = [{"role": "system", "content": "S"}]
for i in range(50):
msgs.append({"role": "user", "content": f"Q{i}"})
msgs.append({"role": "assistant", "content": f"A{i}"})
trimmed = mem.trim_messages(msgs)
assert len(trimmed) <= mh + 1, f"max_history={mh} 但裁剪后 {len(trimmed)}"
return "OK"
@test("F3. 裁剪保护 tool_calls 配对")
def test_trim_tool_pairing():
from app.agent_runtime.memory import AgentMemory
mem = AgentMemory(persist=False, max_history=5)
msgs = [
{"role": "system", "content": "S"},
{"role": "user", "content": "U1"},
{"role": "assistant", "content": "", "tool_calls": [
{"id": "t1", "name": "f1", "type": "function", "function": {"name": "f1", "arguments": "{}"}},
]},
{"role": "tool", "content": "R1", "tool_call_id": "t1"},
{"role": "assistant", "content": "Done!"},
{"role": "user", "content": "U2"},
{"role": "assistant", "content": "", "tool_calls": [
{"id": "t2", "name": "f2", "type": "function", "function": {"name": "f2", "arguments": "{}"}},
]},
{"role": "tool", "content": "R2", "tool_call_id": "t2"},
{"role": "assistant", "content": "All done!"},
{"role": "user", "content": "U3"},
{"role": "assistant", "content": "OK"},
]
trimmed = mem.trim_messages(msgs)
assert trimmed[0]["role"] == "system"
# 开头不能是孤立 tool
assert trimmed[1]["role"] != "tool", "首条不能是孤立tool消息"
return f"OK trimmed to {len(trimmed)}"
# ════════════════ G. 记忆类型推断 ════════════════
@test("G1. 所有记忆类型可区分")
def test_memory_type_distinct():
from app.agent_runtime.memory import AgentMemory
# 每个类型至少2个case
cases = [
("我喜欢Python", "好的", "user"),
("记住我的名字是小明", "记住了", "user"),
("这个不对,应该修复", "让我看看", "feedback"),
("报错了500错误", "检查一下", "feedback"),
("数据库地址是101.43.95.130", "已记录", "reference"),
("API的URL是什么", "http://api.example.com", "reference"),
("项目进度完成了80%", "好的", "project"),
("需求文档需要更新", "我来更新", "project"),
]
for um, ar, expected in cases:
result = AgentMemory._infer_memory_type(um, ar)
assert result == expected, f"'{um[:25]}' expect {expected} got {result}"
return f"OK {len(cases)} cases"
@test("G2. 混合信号按优先级分类")
def test_memory_type_priority():
from app.agent_runtime.memory import AgentMemory
# feedback 关键字优先级最高(先检查)
r = AgentMemory._infer_memory_type("这个API地址不对应该是127.0.0.1", "修正")
assert r == "feedback", f"反馈关键词应优先, got {r}"
# reference 关键字优先级次之
r = AgentMemory._infer_memory_type("http://example.com的部署文档", "好的")
assert r == "reference", f"URL应分类为reference, got {r}"
# project 第三优先
r = AgentMemory._infer_memory_type("这个任务完成了", "好的")
assert r == "project", f"任务应分类为project, got {r}"
# 默认 user
r = AgentMemory._infer_memory_type("你好", "你好")
assert r == "user", f"普通对话应为user, got {r}"
return "OK"
# ════════════════ H. Agent错误处理 ════════════════
@test("H1. 无效工具名返回错误信息")
def test_invalid_tool_name():
from app.agent_runtime.tool_manager import AgentToolManager
mgr = AgentToolManager()
result = asyncio.run(mgr.execute("NONEXISTENT_TOOL_XYZ123", {}))
assert isinstance(result, str)
assert "error" in result.lower() or "not found" in result.lower() or "不存在" in result, f"应报错, got: {result[:80]}"
return "OK"
@test("H2. 空参数工具执行")
def test_tool_empty_args():
from app.agent_runtime.tool_manager import AgentToolManager
mgr = AgentToolManager()
result = asyncio.run(mgr.execute("datetime", {}))
assert isinstance(result, str)
assert len(result) > 0
return "OK"
# ════════════════ I. 降级回退链 (fallback_llm) ════════════════
@test("I1. AgentLLMConfig 支持 fallback_llm 字段")
def test_fallback_llm_config_field():
from app.agent_runtime.schemas import AgentLLMConfig
cfg = AgentLLMConfig(
model="deepseek-v4-flash",
fallback_llm={"model": "gpt-4o-mini", "api_key": "sk-test", "base_url": "https://api.openai.com/v1"},
)
assert cfg.fallback_llm is not None
assert cfg.fallback_llm["model"] == "gpt-4o-mini"
assert cfg.fallback_llm["api_key"] == "sk-test"
return "OK"
@test("I2. fallback_llm 默认值为 None")
def test_fallback_llm_default_none():
from app.agent_runtime.schemas import AgentLLMConfig
cfg = AgentLLMConfig(model="deepseek-v4-flash")
assert cfg.fallback_llm is None
return "OK"
@test("I3. 全局降级配置可从 settings 读取")
def test_fallback_global_settings():
from app.core.config import settings
# 全局降级字段存在(可以为空)
assert hasattr(settings, 'FALLBACK_LLM_MODEL')
assert hasattr(settings, 'FALLBACK_LLM_API_KEY')
assert hasattr(settings, 'FALLBACK_LLM_BASE_URL')
return "OK"
@test("I4. _do_chat fallback 逻辑不崩溃 (mock)")
def test_fallback_do_chat_no_crash():
"""验证 fallback 代码路径存在且语法正确(不实际调用 LLM"""
from app.agent_runtime.core import _LLMClient
from app.agent_runtime.schemas import AgentLLMConfig
llm_config = AgentLLMConfig(
model="deepseek-v4-flash",
temperature=0.7,
fallback_llm={"model": "gpt-4o-mini", "api_key": "sk-fake"},
)
client = _LLMClient(llm_config)
assert client._config.fallback_llm is not None
assert client._config.fallback_llm["model"] == "gpt-4o-mini"
return "OK"
# ════════════════ 运行 ════════════════
if __name__ == "__main__":
print("=" * 60)
print("天工 Agent 核心运行时 — 综合测试")
print("=" * 60)
print()
total = PASS + FAIL + SKIP
print()
print("=" * 60)
print(f"测试结果: {PASS} 通过 / {FAIL} 失败 / {SKIP} 跳过 (共 {total})")
print("=" * 60)
if FAIL > 0:
sys.exit(1)
else:
print("\n全部核心测试通过!")