- Fix delete agent 500: clean up FK records (agent_llm_logs, permissions, schedules, executions, team_members) and unbind goals/tasks before delete - Remove hardcoded personality templates in Android, replace with dynamic system prompt generation from name + description - Set promptSectionsEnabled=false to bypass PromptComposer for personality - Add Tencent Cloud Linux deployment guide (Docker Compose) - Accumulated backend service updates, frontend UI fixes, Android app changes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
729 lines
25 KiB
Python
729 lines
25 KiB
Python
"""
|
||
天工 Agent 核心运行时 — 综合测试
|
||
覆盖: ReAct 循环 / 工具调用 / 权限检查 / 上下文管理 / Schema 验证
|
||
|
||
运行: cd backend && python tests/test_agent_core.py
|
||
"""
|
||
import asyncio
|
||
import sys
|
||
import time
|
||
import os
|
||
|
||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||
|
||
PASS = 0
|
||
FAIL = 0
|
||
SKIP = 0
|
||
|
||
def test(name: str):
|
||
def decorator(fn):
|
||
global PASS, FAIL, SKIP
|
||
try:
|
||
result = fn()
|
||
if asyncio.iscoroutine(result):
|
||
result = asyncio.run(result)
|
||
if result is False:
|
||
FAIL += 1
|
||
print(f" FAIL {name}")
|
||
elif result is True:
|
||
PASS += 1
|
||
print(f" PASS {name}")
|
||
else:
|
||
PASS += 1
|
||
print(f" PASS {name} ({result})")
|
||
except Exception as e:
|
||
FAIL += 1
|
||
import traceback
|
||
print(f" FAIL {name}: {e}")
|
||
traceback.print_exc()
|
||
return fn
|
||
return decorator
|
||
|
||
|
||
# ════════════════ A. Agent Runtime 基础 ════════════════
|
||
|
||
@test("A1. AgentRuntime 导入正常")
|
||
def test_runtime_import():
|
||
from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig
|
||
assert callable(AgentRuntime)
|
||
return "OK"
|
||
|
||
|
||
@test("A2. AgentRuntime 创建实例")
|
||
def test_runtime_instantiate():
|
||
from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig
|
||
|
||
config = AgentConfig(
|
||
name="test_agent",
|
||
system_prompt="You are a test assistant.",
|
||
llm=AgentLLMConfig(model="deepseek-v4-flash", max_iterations=3),
|
||
)
|
||
runtime = AgentRuntime(config=config)
|
||
assert runtime.context.iteration == 0
|
||
assert runtime.context.tool_calls_made == 0
|
||
return "OK"
|
||
|
||
|
||
@test("A3. AgentRuntime 无工具时仍然可运行")
|
||
def test_runtime_no_tools():
|
||
from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig, AgentToolConfig
|
||
|
||
config = AgentConfig(
|
||
name="no_tool_agent",
|
||
system_prompt="Echo back the user message exactly.",
|
||
llm=AgentLLMConfig(model="deepseek-v4-flash", max_iterations=1, temperature=0.1),
|
||
tools=AgentToolConfig(include_tools=[], exclude_tools=[]),
|
||
)
|
||
runtime = AgentRuntime(config=config)
|
||
|
||
# 带记忆/DB的完整run需要数据库,跳过
|
||
# 只验证构造正确
|
||
assert runtime.tool_manager.has_tools() is False or len(runtime.tool_manager.tool_names()) >= 0
|
||
return "OK"
|
||
|
||
|
||
@test("A4. AgentRuntime 上下文管理")
|
||
def test_runtime_context():
|
||
from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig
|
||
|
||
config = AgentConfig(
|
||
name="ctx_test",
|
||
system_prompt="You are helpful.",
|
||
llm=AgentLLMConfig(max_iterations=5),
|
||
)
|
||
runtime = AgentRuntime(config=config)
|
||
|
||
# context 已包含 system prompt
|
||
msgs = runtime.context.messages
|
||
assert len(msgs) >= 1
|
||
assert msgs[0]["role"] == "system"
|
||
assert msgs[0]["content"] == "You are helpful."
|
||
return "OK"
|
||
|
||
|
||
@test("A5. AgentRuntime 记忆系统绑定")
|
||
def test_runtime_memory_binding():
|
||
from app.agent_runtime import AgentRuntime, AgentConfig, AgentLLMConfig, AgentMemoryConfig
|
||
|
||
config = AgentConfig(
|
||
name="mem_test",
|
||
system_prompt="You are helpful.",
|
||
llm=AgentLLMConfig(max_iterations=3),
|
||
memory=AgentMemoryConfig(
|
||
enabled=True,
|
||
max_history_messages=10,
|
||
vector_memory_enabled=False,
|
||
),
|
||
)
|
||
runtime = AgentRuntime(config=config)
|
||
assert runtime.memory is not None
|
||
assert runtime.memory.max_history == 10
|
||
return "OK"
|
||
|
||
|
||
# ════════════════ B. AgentSchema 验证 ════════════════
|
||
|
||
@test("B1. AgentConfig 全默认值")
|
||
def test_config_defaults():
|
||
from app.agent_runtime.schemas import AgentConfig
|
||
|
||
c = AgentConfig(name="test")
|
||
assert c.name == "test"
|
||
assert c.llm.max_iterations == 10
|
||
assert c.llm.temperature == 0.7
|
||
assert c.llm.model == "gpt-4o-mini"
|
||
assert c.budget.max_llm_invocations == 200
|
||
assert c.budget.max_tool_calls == 500
|
||
assert c.memory.enabled is True
|
||
assert c.memory.vector_memory_top_k == 5
|
||
assert c.tools.include_tools == []
|
||
assert c.tools.exclude_tools == []
|
||
# 默认 require_approval 包含 destructive 工具(deploy_push/send_email 等)
|
||
assert len(c.tools.require_approval) >= 5
|
||
assert "deploy_push" in c.tools.require_approval
|
||
assert "send_email" in c.tools.require_approval
|
||
return "OK"
|
||
|
||
|
||
@test("B2. AgentLLMConfig fallback_llm 嵌套")
|
||
def test_llm_config_fallback():
|
||
from app.agent_runtime.schemas import AgentLLMConfig
|
||
|
||
cfg = AgentLLMConfig(
|
||
model="deepseek-v4-flash",
|
||
fallback_llm={"provider": "openai", "model": "gpt-4o-mini"},
|
||
)
|
||
assert cfg.fallback_llm["model"] == "gpt-4o-mini"
|
||
assert cfg.fallback_llm["provider"] == "openai"
|
||
return "OK"
|
||
|
||
|
||
@test("B3. AgentMemoryConfig 团队共享参数")
|
||
def test_memory_config_team():
|
||
from app.agent_runtime.schemas import AgentMemoryConfig
|
||
|
||
cfg = AgentMemoryConfig(
|
||
team_id="team_feishu",
|
||
team_share_enabled=True,
|
||
vector_memory_rerank=True,
|
||
)
|
||
assert cfg.team_id == "team_feishu"
|
||
assert cfg.team_share_enabled is True
|
||
assert cfg.vector_memory_rerank is True
|
||
return "OK"
|
||
|
||
|
||
@test("B4. AgentTokenBudgetConfig 阈值验证")
|
||
def test_token_budget_config():
|
||
from app.agent_runtime.schemas import AgentTokenBudgetConfig
|
||
|
||
cfg = AgentTokenBudgetConfig(
|
||
context_window=128000,
|
||
warning_threshold_pct=0.75,
|
||
compact_threshold_pct=0.85,
|
||
hard_limit_pct=0.95,
|
||
)
|
||
assert cfg.context_window == 128000
|
||
assert cfg.warning_threshold_pct == 0.75
|
||
assert cfg.compact_threshold_pct == 0.85
|
||
return "OK"
|
||
|
||
|
||
@test("B5. AgentStep 构造")
|
||
def test_agent_step():
|
||
from app.agent_runtime.schemas import AgentStep
|
||
|
||
step = AgentStep(
|
||
iteration=1,
|
||
type="tool_call",
|
||
content="调用工具...",
|
||
tool_name="file_read",
|
||
tool_input={"path": "/test.txt"},
|
||
)
|
||
assert step.iteration == 1
|
||
assert step.type == "tool_call"
|
||
assert step.tool_name == "file_read"
|
||
assert step.tool_input == {"path": "/test.txt"}
|
||
return "OK"
|
||
|
||
|
||
@test("B6. AgentResult truncated + error")
|
||
def test_agent_result():
|
||
from app.agent_runtime.schemas import AgentResult
|
||
|
||
r = AgentResult(
|
||
success=False,
|
||
content="Task failed",
|
||
truncated=True,
|
||
iterations_used=3,
|
||
tool_calls_made=5,
|
||
error="max_iterations_reached",
|
||
)
|
||
assert r.success is False
|
||
assert r.truncated is True
|
||
assert r.iterations_used == 3
|
||
assert r.tool_calls_made == 5
|
||
assert r.error == "max_iterations_reached"
|
||
return "OK"
|
||
|
||
|
||
# ════════════════ C. 权限系统 (permissions.py) ════════════════
|
||
|
||
@test("C1. PermissionChecker BYPASS 模式全放行")
|
||
def test_permission_bypass():
|
||
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction
|
||
|
||
checker = PermissionChecker(level=PermissionLevel.BYPASS)
|
||
assert checker.check("file_write").action == PermissionAction.ALLOW
|
||
assert checker.check("deploy_push").action == PermissionAction.ALLOW
|
||
assert checker.check("unknown_tool_xyz").action == PermissionAction.ALLOW
|
||
return "OK"
|
||
|
||
|
||
@test("C2. PermissionChecker PLAN 模式只允许只读")
|
||
def test_permission_plan():
|
||
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction
|
||
|
||
checker = PermissionChecker(level=PermissionLevel.PLAN)
|
||
assert checker.check("file_read").action == PermissionAction.ALLOW
|
||
assert checker.check("web_search").action == PermissionAction.ALLOW
|
||
assert checker.check("file_write").action == PermissionAction.DENY
|
||
assert checker.check("deploy_push").action == PermissionAction.DENY
|
||
return "OK"
|
||
|
||
|
||
@test("C3. PermissionChecker ACCEPT_EDITS 允许编辑工具")
|
||
def test_permission_accept_edits():
|
||
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction
|
||
|
||
checker = PermissionChecker(level=PermissionLevel.ACCEPT_EDITS)
|
||
assert checker.check("file_read").action == PermissionAction.ALLOW
|
||
assert checker.check("file_write").action == PermissionAction.ALLOW
|
||
assert checker.check("file_edit").action == PermissionAction.ALLOW
|
||
# 破坏性工具仍需确认
|
||
assert checker.check("deploy_push").action == PermissionAction.ASK
|
||
assert checker.check("git_reset_hard").action == PermissionAction.ASK
|
||
return "OK"
|
||
|
||
|
||
@test("C4. PermissionChecker DEFAULT 破坏性工具返回 ASK")
|
||
def test_permission_default():
|
||
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction
|
||
|
||
checker = PermissionChecker(level=PermissionLevel.DEFAULT)
|
||
assert checker.check("file_read").action == PermissionAction.ALLOW
|
||
assert checker.check("web_search").action == PermissionAction.ALLOW
|
||
assert checker.check("file_write").action == PermissionAction.ASK
|
||
assert checker.check("deploy_push").action == PermissionAction.ASK
|
||
return "OK"
|
||
|
||
|
||
@test("C5. PermissionChecker 拒绝列表")
|
||
def test_permission_deny_list():
|
||
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction
|
||
|
||
checker = PermissionChecker(
|
||
level=PermissionLevel.DEFAULT,
|
||
deny_rules=["dangerous_tool", "eval_exec"],
|
||
)
|
||
assert checker.check("dangerous_tool").action == PermissionAction.DENY
|
||
assert checker.check("eval_exec").action == PermissionAction.DENY
|
||
assert checker.check("file_read").action == PermissionAction.ALLOW
|
||
return "OK"
|
||
|
||
|
||
@test("C6. AutoApproveRule 工具名通配符")
|
||
def test_auto_approve_wildcard():
|
||
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction, AutoApproveRule
|
||
|
||
checker = PermissionChecker(
|
||
level=PermissionLevel.DEFAULT,
|
||
auto_approve_rules=[
|
||
AutoApproveRule(tool_pattern="feishu_*", description="飞书工具自动批准"),
|
||
],
|
||
)
|
||
assert checker.check("feishu_send").action == PermissionAction.ALLOW
|
||
assert checker.check("feishu_create_doc").action == PermissionAction.ALLOW
|
||
assert checker.check("file_write").action == PermissionAction.ASK # 不受影响
|
||
return "OK"
|
||
|
||
|
||
@test("C7. AutoApproveRule 参数条件匹配")
|
||
def test_auto_approve_params():
|
||
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction, AutoApproveRule
|
||
|
||
checker = PermissionChecker(
|
||
level=PermissionLevel.DEFAULT,
|
||
auto_approve_rules=[
|
||
AutoApproveRule(
|
||
tool_pattern="file_write",
|
||
param_conditions={"path": "regex:\\.txt$"},
|
||
description="自动批准txt文件写入",
|
||
),
|
||
],
|
||
)
|
||
# 写入 .txt 文件自动批准
|
||
assert checker.check("file_write", {"path": "data.txt"}).action == PermissionAction.ALLOW
|
||
# 写入 .env 文件仍需确认
|
||
assert checker.check("file_write", {"path": "config.env"}).action == PermissionAction.ASK
|
||
return "OK"
|
||
|
||
|
||
@test("C8. PermissionLevel 级别切换")
|
||
def test_permission_level_switch():
|
||
from app.agent_runtime.permissions import PermissionChecker, PermissionLevel, PermissionAction
|
||
|
||
checker = PermissionChecker(level=PermissionLevel.DEFAULT)
|
||
assert checker.check("file_write").action == PermissionAction.ASK
|
||
|
||
checker.set_level(PermissionLevel.ACCEPT_EDITS)
|
||
assert checker.check("file_write").action == PermissionAction.ALLOW
|
||
|
||
checker.set_level(PermissionLevel.BYPASS)
|
||
assert checker.check("deploy_push").action == PermissionAction.ALLOW
|
||
|
||
return "OK"
|
||
|
||
|
||
# ════════════════ D. Tool Manager 测试 ════════════════
|
||
|
||
@test("D1. AgentToolManager 创建")
|
||
def test_tool_manager_create():
|
||
from app.agent_runtime.tool_manager import AgentToolManager
|
||
|
||
mgr = AgentToolManager()
|
||
assert mgr.has_tools() is True
|
||
names = mgr.tool_names()
|
||
assert isinstance(names, list)
|
||
assert len(names) >= 1
|
||
return f"OK {len(names)} tools"
|
||
|
||
|
||
@test("D2. AgentToolManager 白名单过滤")
|
||
def test_tool_manager_whitelist():
|
||
from app.agent_runtime.tool_manager import AgentToolManager
|
||
|
||
mgr = AgentToolManager(include_tools=["datetime", "math_calculate"])
|
||
assert "datetime" in mgr._include_tools
|
||
assert "math_calculate" in mgr._include_tools
|
||
return "OK"
|
||
|
||
|
||
@test("D3. AgentToolManager 黑名单过滤")
|
||
def test_tool_manager_blacklist():
|
||
from app.agent_runtime.tool_manager import AgentToolManager
|
||
|
||
mgr = AgentToolManager(exclude_tools=["file_delete", "deploy_push"])
|
||
assert "file_delete" in mgr._exclude_tools
|
||
assert "deploy_push" in mgr._exclude_tools
|
||
return "OK"
|
||
|
||
|
||
@test("D4. AgentToolManager 工具schema格式")
|
||
def test_tool_manager_schema_format():
|
||
from app.agent_runtime.tool_manager import AgentToolManager
|
||
|
||
mgr = AgentToolManager(include_tools=["datetime"])
|
||
schemas = mgr.get_tool_schemas()
|
||
assert len(schemas) >= 1
|
||
# OpenAI function calling 格式
|
||
for s in schemas:
|
||
assert "type" in s, f"缺少type字段: {s}"
|
||
assert s["type"] == "function"
|
||
assert "function" in s
|
||
assert "name" in s["function"]
|
||
return f"OK {len(schemas)} schemas"
|
||
|
||
|
||
@test("D5. AgentToolManager 确定性工具缓存判断")
|
||
def test_tool_manager_cacheable():
|
||
from app.agent_runtime.tool_manager import AgentToolManager
|
||
|
||
mgr = AgentToolManager()
|
||
assert mgr._is_cacheable("file_read") is True
|
||
assert mgr._is_cacheable("math_calculate") is True
|
||
assert mgr._is_cacheable("unknown_random_tool") is False, "非确定性工具不应缓存"
|
||
return "OK"
|
||
|
||
|
||
@test("D6. AgentToolManager 缓存关闭")
|
||
def test_tool_manager_cache_disabled():
|
||
from app.agent_runtime.tool_manager import AgentToolManager
|
||
|
||
mgr = AgentToolManager(cache_enabled=False)
|
||
assert mgr._is_cacheable("file_read") is False
|
||
return "OK"
|
||
|
||
|
||
@test("D7. AgentToolManager 执行内置工具")
|
||
def test_tool_manager_execute():
|
||
from app.agent_runtime.tool_manager import AgentToolManager
|
||
|
||
mgr = AgentToolManager()
|
||
result = asyncio.run(mgr.execute("datetime", {"format": "%Y-%m-%d"}))
|
||
assert isinstance(result, str)
|
||
assert len(result) > 0
|
||
return f"OK result={result[:30]}"
|
||
|
||
|
||
@test("D8. AgentToolManager 权限拒绝工具执行")
|
||
def test_tool_manager_permission_deny():
|
||
from app.agent_runtime.tool_manager import AgentToolManager
|
||
|
||
# 用 deny_tools 拒绝 datetime
|
||
mgr = AgentToolManager(deny_tools=["datetime"])
|
||
result = asyncio.run(mgr.execute("datetime", {"format": "%Y"}))
|
||
assert "拒绝" in result or "禁用" in result or "denied" in result.lower() or "disabled" in result.lower()
|
||
return "OK"
|
||
|
||
|
||
# ════════════════ E. 上下文管理 (AgentContext) ════════════════
|
||
|
||
@test("E1. AgentContext 消息管理")
|
||
def test_context_messages():
|
||
from app.agent_runtime.context import AgentContext
|
||
|
||
ctx = AgentContext(system_prompt="Hello.", session_id="s1")
|
||
ctx.add_user_message("Hi")
|
||
ctx.add_assistant_message("Hello there!")
|
||
ctx.add_user_message("How are you?")
|
||
ctx.add_assistant_message("I am fine.")
|
||
|
||
msgs = ctx.messages
|
||
assert msgs[0]["role"] == "system"
|
||
assert len([m for m in msgs if m["role"] == "user"]) == 2
|
||
assert len([m for m in msgs if m["role"] == "assistant"]) == 2
|
||
return f"OK {len(msgs)} messages"
|
||
|
||
|
||
@test("E2. AgentContext 工具调用消息")
|
||
def test_context_tool_calls():
|
||
from app.agent_runtime.context import AgentContext
|
||
|
||
ctx = AgentContext(session_id="s1")
|
||
ctx.add_user_message("Read file")
|
||
|
||
tool_calls = [{
|
||
"id": "call_1",
|
||
"type": "function",
|
||
"function": {"name": "file_read", "arguments": '{"path": "/test.txt"}'},
|
||
}]
|
||
ctx.add_assistant_message("Let me read the file.", tool_calls=tool_calls)
|
||
ctx.add_tool_result("call_1", "file_read", "File content: Hello World")
|
||
|
||
msgs = ctx.messages
|
||
tool_msgs = [m for m in msgs if m["role"] == "tool"]
|
||
assert len(tool_msgs) == 1
|
||
assert tool_msgs[0]["tool_call_id"] == "call_1"
|
||
assert tool_msgs[0]["name"] == "file_read"
|
||
return "OK"
|
||
|
||
|
||
@test("E3. AgentContext 重置")
|
||
def test_context_reset():
|
||
from app.agent_runtime.context import AgentContext
|
||
|
||
ctx = AgentContext(system_prompt="Original.", session_id="s1")
|
||
ctx.add_user_message("Hello")
|
||
ctx.add_assistant_message("Hi")
|
||
ctx.iteration = 5
|
||
ctx.tool_calls_made = 10
|
||
ctx.reset()
|
||
|
||
assert ctx.iteration == 0
|
||
assert ctx.tool_calls_made == 0
|
||
msgs = ctx.messages
|
||
assert len(msgs) == 1 # only system
|
||
assert msgs[0]["content"] == "Original."
|
||
return "OK"
|
||
|
||
|
||
@test("E4. AgentContext set_system_prompt")
|
||
def test_context_set_system_prompt():
|
||
from app.agent_runtime.context import AgentContext
|
||
|
||
ctx = AgentContext(system_prompt="V1", session_id="s1")
|
||
ctx.set_system_prompt("V2 Updated")
|
||
assert ctx.messages[0]["content"] == "V2 Updated"
|
||
return "OK"
|
||
|
||
|
||
@test("E5. AgentContext 会话 ID 唯一")
|
||
def test_context_session_id():
|
||
from app.agent_runtime.context import AgentContext
|
||
|
||
ctx1 = AgentContext(session_id="session_a")
|
||
ctx2 = AgentContext(session_id="session_b")
|
||
assert ctx1.session_id != ctx2.session_id
|
||
return "OK"
|
||
|
||
|
||
# ════════════════ F. 消息裁剪 (trim_messages) ════════════════
|
||
|
||
@test("F1. 裁剪保持 system 消息")
|
||
def test_trim_keeps_system():
|
||
from app.agent_runtime.memory import AgentMemory
|
||
|
||
mem = AgentMemory(persist=False, max_history=3)
|
||
msgs = [
|
||
{"role": "system", "content": "You are helpful."},
|
||
{"role": "user", "content": "Q1"},
|
||
{"role": "assistant", "content": "A1"},
|
||
{"role": "user", "content": "Q2"},
|
||
{"role": "assistant", "content": "A2"},
|
||
{"role": "user", "content": "Q3"},
|
||
{"role": "assistant", "content": "A3"},
|
||
{"role": "user", "content": "Q4"},
|
||
]
|
||
trimmed = mem.trim_messages(msgs)
|
||
assert trimmed[0]["role"] == "system"
|
||
assert len(trimmed) <= 4 # system + 3
|
||
return f"OK trimmed to {len(trimmed)}"
|
||
|
||
|
||
@test("F2. 裁剪不超过 max_history")
|
||
def test_trim_max_history():
|
||
from app.agent_runtime.memory import AgentMemory
|
||
|
||
for mh in [5, 10, 15]:
|
||
mem = AgentMemory(persist=False, max_history=mh)
|
||
msgs = [{"role": "system", "content": "S"}]
|
||
for i in range(50):
|
||
msgs.append({"role": "user", "content": f"Q{i}"})
|
||
msgs.append({"role": "assistant", "content": f"A{i}"})
|
||
trimmed = mem.trim_messages(msgs)
|
||
assert len(trimmed) <= mh + 1, f"max_history={mh} 但裁剪后 {len(trimmed)}"
|
||
return "OK"
|
||
|
||
|
||
@test("F3. 裁剪保护 tool_calls 配对")
|
||
def test_trim_tool_pairing():
|
||
from app.agent_runtime.memory import AgentMemory
|
||
|
||
mem = AgentMemory(persist=False, max_history=5)
|
||
msgs = [
|
||
{"role": "system", "content": "S"},
|
||
{"role": "user", "content": "U1"},
|
||
{"role": "assistant", "content": "", "tool_calls": [
|
||
{"id": "t1", "name": "f1", "type": "function", "function": {"name": "f1", "arguments": "{}"}},
|
||
]},
|
||
{"role": "tool", "content": "R1", "tool_call_id": "t1"},
|
||
{"role": "assistant", "content": "Done!"},
|
||
{"role": "user", "content": "U2"},
|
||
{"role": "assistant", "content": "", "tool_calls": [
|
||
{"id": "t2", "name": "f2", "type": "function", "function": {"name": "f2", "arguments": "{}"}},
|
||
]},
|
||
{"role": "tool", "content": "R2", "tool_call_id": "t2"},
|
||
{"role": "assistant", "content": "All done!"},
|
||
{"role": "user", "content": "U3"},
|
||
{"role": "assistant", "content": "OK"},
|
||
]
|
||
trimmed = mem.trim_messages(msgs)
|
||
assert trimmed[0]["role"] == "system"
|
||
# 开头不能是孤立 tool
|
||
assert trimmed[1]["role"] != "tool", "首条不能是孤立tool消息"
|
||
return f"OK trimmed to {len(trimmed)}"
|
||
|
||
|
||
# ════════════════ G. 记忆类型推断 ════════════════
|
||
|
||
@test("G1. 所有记忆类型可区分")
|
||
def test_memory_type_distinct():
|
||
from app.agent_runtime.memory import AgentMemory
|
||
|
||
# 每个类型至少2个case
|
||
cases = [
|
||
("我喜欢Python", "好的", "user"),
|
||
("记住我的名字是小明", "记住了", "user"),
|
||
("这个不对,应该修复", "让我看看", "feedback"),
|
||
("报错了500错误", "检查一下", "feedback"),
|
||
("数据库地址是101.43.95.130", "已记录", "reference"),
|
||
("API的URL是什么", "http://api.example.com", "reference"),
|
||
("项目进度完成了80%", "好的", "project"),
|
||
("需求文档需要更新", "我来更新", "project"),
|
||
]
|
||
for um, ar, expected in cases:
|
||
result = AgentMemory._infer_memory_type(um, ar)
|
||
assert result == expected, f"'{um[:25]}' expect {expected} got {result}"
|
||
return f"OK {len(cases)} cases"
|
||
|
||
|
||
@test("G2. 混合信号按优先级分类")
|
||
def test_memory_type_priority():
|
||
from app.agent_runtime.memory import AgentMemory
|
||
|
||
# feedback 关键字优先级最高(先检查)
|
||
r = AgentMemory._infer_memory_type("这个API地址不对,应该是127.0.0.1", "修正")
|
||
assert r == "feedback", f"反馈关键词应优先, got {r}"
|
||
|
||
# reference 关键字优先级次之
|
||
r = AgentMemory._infer_memory_type("http://example.com的部署文档", "好的")
|
||
assert r == "reference", f"URL应分类为reference, got {r}"
|
||
|
||
# project 第三优先
|
||
r = AgentMemory._infer_memory_type("这个任务完成了", "好的")
|
||
assert r == "project", f"任务应分类为project, got {r}"
|
||
|
||
# 默认 user
|
||
r = AgentMemory._infer_memory_type("你好", "你好")
|
||
assert r == "user", f"普通对话应为user, got {r}"
|
||
|
||
return "OK"
|
||
|
||
|
||
# ════════════════ H. Agent错误处理 ════════════════
|
||
|
||
@test("H1. 无效工具名返回错误信息")
|
||
def test_invalid_tool_name():
|
||
from app.agent_runtime.tool_manager import AgentToolManager
|
||
|
||
mgr = AgentToolManager()
|
||
result = asyncio.run(mgr.execute("NONEXISTENT_TOOL_XYZ123", {}))
|
||
assert isinstance(result, str)
|
||
assert "error" in result.lower() or "not found" in result.lower() or "不存在" in result, f"应报错, got: {result[:80]}"
|
||
return "OK"
|
||
|
||
|
||
@test("H2. 空参数工具执行")
|
||
def test_tool_empty_args():
|
||
from app.agent_runtime.tool_manager import AgentToolManager
|
||
|
||
mgr = AgentToolManager()
|
||
result = asyncio.run(mgr.execute("datetime", {}))
|
||
assert isinstance(result, str)
|
||
assert len(result) > 0
|
||
return "OK"
|
||
|
||
|
||
# ════════════════ I. 降级回退链 (fallback_llm) ════════════════
|
||
|
||
@test("I1. AgentLLMConfig 支持 fallback_llm 字段")
|
||
def test_fallback_llm_config_field():
|
||
from app.agent_runtime.schemas import AgentLLMConfig
|
||
|
||
cfg = AgentLLMConfig(
|
||
model="deepseek-v4-flash",
|
||
fallback_llm={"model": "gpt-4o-mini", "api_key": "sk-test", "base_url": "https://api.openai.com/v1"},
|
||
)
|
||
assert cfg.fallback_llm is not None
|
||
assert cfg.fallback_llm["model"] == "gpt-4o-mini"
|
||
assert cfg.fallback_llm["api_key"] == "sk-test"
|
||
return "OK"
|
||
|
||
|
||
@test("I2. fallback_llm 默认值为 None")
|
||
def test_fallback_llm_default_none():
|
||
from app.agent_runtime.schemas import AgentLLMConfig
|
||
|
||
cfg = AgentLLMConfig(model="deepseek-v4-flash")
|
||
assert cfg.fallback_llm is None
|
||
return "OK"
|
||
|
||
|
||
@test("I3. 全局降级配置可从 settings 读取")
|
||
def test_fallback_global_settings():
|
||
from app.core.config import settings
|
||
|
||
# 全局降级字段存在(可以为空)
|
||
assert hasattr(settings, 'FALLBACK_LLM_MODEL')
|
||
assert hasattr(settings, 'FALLBACK_LLM_API_KEY')
|
||
assert hasattr(settings, 'FALLBACK_LLM_BASE_URL')
|
||
return "OK"
|
||
|
||
|
||
@test("I4. _do_chat fallback 逻辑不崩溃 (mock)")
|
||
def test_fallback_do_chat_no_crash():
|
||
"""验证 fallback 代码路径存在且语法正确(不实际调用 LLM)"""
|
||
from app.agent_runtime.core import _LLMClient
|
||
from app.agent_runtime.schemas import AgentLLMConfig
|
||
|
||
llm_config = AgentLLMConfig(
|
||
model="deepseek-v4-flash",
|
||
temperature=0.7,
|
||
fallback_llm={"model": "gpt-4o-mini", "api_key": "sk-fake"},
|
||
)
|
||
client = _LLMClient(llm_config)
|
||
assert client._config.fallback_llm is not None
|
||
assert client._config.fallback_llm["model"] == "gpt-4o-mini"
|
||
return "OK"
|
||
|
||
|
||
# ════════════════ 运行 ════════════════
|
||
|
||
if __name__ == "__main__":
|
||
print("=" * 60)
|
||
print("天工 Agent 核心运行时 — 综合测试")
|
||
print("=" * 60)
|
||
print()
|
||
|
||
total = PASS + FAIL + SKIP
|
||
print()
|
||
print("=" * 60)
|
||
print(f"测试结果: {PASS} 通过 / {FAIL} 失败 / {SKIP} 跳过 (共 {total})")
|
||
print("=" * 60)
|
||
|
||
if FAIL > 0:
|
||
sys.exit(1)
|
||
else:
|
||
print("\n全部核心测试通过!")
|