- Fix delete agent 500: clean up FK records (agent_llm_logs, permissions, schedules, executions, team_members) and unbind goals/tasks before delete - Remove hardcoded personality templates in Android, replace with dynamic system prompt generation from name + description - Set promptSectionsEnabled=false to bypass PromptComposer for personality - Add Tencent Cloud Linux deployment guide (Docker Compose) - Accumulated backend service updates, frontend UI fixes, Android app changes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
246 lines
9.2 KiB
Python
246 lines
9.2 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""对话压缩引擎集成测试"""
|
||
import sys, os, io
|
||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "backend"))
|
||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||
|
||
import asyncio
|
||
from app.core.token_counter import TokenCounter, is_context_length_error, get_model_context_window
|
||
from app.core.compaction_config import CompactionConfig
|
||
from app.core.compaction import CompactionEngine, CompactionStrategy
|
||
|
||
# 生成模拟长对话消息
|
||
def make_long_messages(rounds=20, tool_result_size=2000):
|
||
"""生成模拟多轮对话,每轮包含 tool_call + 大工具结果"""
|
||
messages = [
|
||
{"role": "system", "content": "你是一个有用的AI助手。" + "X" * 500},
|
||
]
|
||
for i in range(rounds):
|
||
messages.append({"role": "user", "content": f"请帮我完成第{i+1}个任务。"})
|
||
messages.append({
|
||
"role": "assistant",
|
||
"content": f"好的,让我来执行第{i+1}个任务。",
|
||
"tool_calls": [{"id": f"call_{i}", "type": "function", "function": {"name": "file_read", "arguments": '{"path":"/test"}'}}]
|
||
})
|
||
messages.append({
|
||
"role": "tool",
|
||
"tool_call_id": f"call_{i}",
|
||
"name": "file_read",
|
||
"content": f"[文件内容] 这是第{i+1}次文件读取的结果。" + "A" * tool_result_size,
|
||
})
|
||
messages.append({
|
||
"role": "assistant",
|
||
"content": f"第{i+1}个任务完成。结果是:发现了一些重要信息。" + "B" * 200,
|
||
})
|
||
return messages
|
||
|
||
async def test_token_counting():
|
||
"""测试 token 计数器"""
|
||
print("=" * 60)
|
||
print("测试 1: Token 计数器")
|
||
tc = TokenCounter(model="deepseek-v4-flash")
|
||
|
||
text_en = "Hello world " * 100
|
||
tokens_en = tc.count(text_en)
|
||
print(f" 英文 200 词: ~{tokens_en} tokens")
|
||
|
||
text_zh = "你好世界" * 100
|
||
tokens_zh = tc.count(text_zh)
|
||
print(f" 中文 400 字: ~{tokens_zh} tokens")
|
||
|
||
msgs = [
|
||
{"role": "system", "content": "You are a helpful assistant."},
|
||
{"role": "user", "content": "Hello!"},
|
||
{"role": "assistant", "content": "Hi there!"},
|
||
]
|
||
total = tc.count_messages(msgs)
|
||
print(f" 3条简单消息: ~{total} tokens")
|
||
|
||
print(" [OK] Token 计数器工作正常")
|
||
return tc
|
||
|
||
async def test_micro_compact():
|
||
"""测试 MicroCompact (Tier 1)"""
|
||
print("\n" + "=" * 60)
|
||
print("测试 2: MicroCompact (工具结果打桩)")
|
||
|
||
config = CompactionConfig(
|
||
enabled=True,
|
||
micro_compact_enabled=True,
|
||
micro_compact_threshold=0.10, # 最低阈值
|
||
full_compact_threshold=0.90, # 高阈值,让 MicroCompact 先触发
|
||
compact_older_than_rounds=5, # 超过5轮的工具结果可压缩
|
||
min_preserve_messages=4,
|
||
context_window_override=10000, # 小窗口强制高占用比
|
||
output_reserve_tokens=512,
|
||
)
|
||
engine = CompactionEngine(config=config, model="deepseek-v4-flash")
|
||
|
||
# 生成20轮对话,每轮有工具调用
|
||
messages = make_long_messages(rounds=20, tool_result_size=500)
|
||
tokens_before = engine.token_counter.count_messages(messages)
|
||
print(f" 消息数: {len(messages)}, tokens: {tokens_before}")
|
||
|
||
result = await engine.maybe_compact(messages)
|
||
|
||
print(f" 压缩策略: {result.strategy.value}")
|
||
print(f" Tokens: {result.tokens_before} -> {result.tokens_after}")
|
||
print(f" 节省: {result.tokens_saved} tokens")
|
||
print(f" 详情: {result.details}")
|
||
|
||
# 验证工具结果被替换
|
||
stubbed = sum(1 for m in result.messages if m.get("content") == "[Tool result compacted]")
|
||
print(f" 被压缩的工具结果数: {stubbed}")
|
||
|
||
if result.strategy == CompactionStrategy.MICRO and stubbed > 0:
|
||
print(" [OK] MicroCompact 工作正常")
|
||
else:
|
||
print(f" [INFO] 策略={result.strategy}, stubbed={stubbed}")
|
||
|
||
return engine
|
||
|
||
async def test_full_compact():
|
||
"""测试 FullCompact (Tier 2) — 无 LLM 客户端的 fallback"""
|
||
print("\n" + "=" * 60)
|
||
print("测试 3: FullCompact (LLM 摘要替换)")
|
||
|
||
config = CompactionConfig(
|
||
enabled=True,
|
||
micro_compact_enabled=False,
|
||
full_compact_enabled=True,
|
||
full_compact_threshold=0.10, # 最低阈值
|
||
min_preserve_messages=4,
|
||
context_window_override=10000,
|
||
output_reserve_tokens=512,
|
||
)
|
||
engine = CompactionEngine(config=config, model="deepseek-v4-flash")
|
||
|
||
# 生成大量消息
|
||
messages = make_long_messages(rounds=30, tool_result_size=200)
|
||
tokens_before = engine.token_counter.count_messages(messages)
|
||
print(f" 消息数: {len(messages)}, tokens: {tokens_before}")
|
||
|
||
result = await engine.maybe_compact(messages)
|
||
|
||
print(f" 压缩策略: {result.strategy.value}")
|
||
print(f" Tokens: {result.tokens_before} -> {result.tokens_after}")
|
||
print(f" 节省: {result.tokens_saved} tokens")
|
||
print(f" 详情: {result.details}")
|
||
|
||
# 验证 structure: system + compact_boundary + recent
|
||
if result.strategy == CompactionStrategy.FULL:
|
||
# 查找 compact_boundary
|
||
boundaries = [m for m in result.messages if "对话上下文摘要" in str(m.get("content", ""))]
|
||
if boundaries:
|
||
print(f" 摘要边界消息数: {len(boundaries)}")
|
||
print(f" 摘要长度: {len(boundaries[0]['content'])} 字符")
|
||
print(" [OK] FullCompact 工作正常")
|
||
else:
|
||
print(" [WARN] 未找到摘要边界消息")
|
||
|
||
return engine
|
||
|
||
async def test_reactive_compact():
|
||
"""测试 ReactiveCompact (Tier 3) — 错误触发"""
|
||
print("\n" + "=" * 60)
|
||
print("测试 4: ReactiveCompact (错误触发)")
|
||
|
||
config = CompactionConfig(
|
||
enabled=True,
|
||
reactive_compact_enabled=True,
|
||
min_preserve_messages=4,
|
||
context_window_override=128000,
|
||
)
|
||
engine = CompactionEngine(config=config, model="deepseek-v4-flash")
|
||
|
||
messages = make_long_messages(rounds=30, tool_result_size=200)
|
||
tokens_before = engine.token_counter.count_messages(messages)
|
||
|
||
# 模拟上下文超限错误
|
||
error = Exception("This model's maximum context length is 128000 tokens. You requested 150000 tokens.")
|
||
|
||
try:
|
||
result = await engine.reactive_compact(messages, error)
|
||
print(f" 压缩策略: {result.strategy.value}")
|
||
print(f" Tokens: {result.tokens_before} -> {result.tokens_after}")
|
||
print(f" 节省: {result.tokens_saved} tokens")
|
||
print(" [OK] ReactiveCompact 工作正常")
|
||
except Exception as e:
|
||
print(f" [WARN] ReactiveCompact 异常: {e}")
|
||
|
||
async def test_context_length_detection():
|
||
"""测试上下文超限检测"""
|
||
print("\n" + "=" * 60)
|
||
print("测试 5: 上下文超限检测")
|
||
|
||
# 测试 is_context_length_error
|
||
e1 = Exception("context length 128000 exceeded, requested 150000")
|
||
e2 = Exception("rate limit exceeded")
|
||
e3 = Exception("maximum context length is 128000 tokens. however, you requested 250000 tokens")
|
||
|
||
print(f" 上下文超限错误检测: {is_context_length_error(e1)} (应为 True)")
|
||
print(f" 频率限制错误检测: {is_context_length_error(e2)} (应为 False)")
|
||
print(f" 超限错误变体检测: {is_context_length_error(e3)} (应为 True)")
|
||
|
||
assert is_context_length_error(e1) == True
|
||
assert is_context_length_error(e2) == False
|
||
assert is_context_length_error(e3) == True
|
||
|
||
# 测试 get_model_context_window
|
||
print(f" GPT-4o 窗口: {get_model_context_window('gpt-4o')}")
|
||
print(f" DeepSeek 窗口: {get_model_context_window('deepseek-v4-flash')}")
|
||
print(f" Claude 窗口: {get_model_context_window('claude-sonnet-4-6')}")
|
||
print(" [OK] 上下文检测工作正常")
|
||
|
||
async def test_edge_cases():
|
||
"""测试边缘情况"""
|
||
print("\n" + "=" * 60)
|
||
print("测试 6: 边缘情况")
|
||
|
||
config = CompactionConfig(
|
||
enabled=True,
|
||
micro_compact_enabled=True,
|
||
full_compact_enabled=True,
|
||
)
|
||
engine = CompactionEngine(config=config, model="deepseek-v4-flash")
|
||
|
||
# 空消息列表
|
||
result = await engine.maybe_compact([])
|
||
print(f" 空消息: strategy={result.strategy.value}")
|
||
|
||
# 少量消息(不应压缩)
|
||
few = [
|
||
{"role": "system", "content": "You are a helpful assistant."},
|
||
{"role": "user", "content": "Hello!"},
|
||
{"role": "assistant", "content": "Hi!"},
|
||
]
|
||
result = await engine.maybe_compact(few)
|
||
print(f" 少量消息: strategy={result.strategy.value}, details={result.details}")
|
||
|
||
# 压缩被禁用
|
||
config_disabled = CompactionConfig(enabled=False)
|
||
engine2 = CompactionEngine(config=config_disabled)
|
||
result = await engine2.maybe_compact(make_long_messages(rounds=10))
|
||
print(f" 禁用压缩: strategy={result.strategy.value}, details={result.details}")
|
||
|
||
print(" [OK] 边缘情况处理正常")
|
||
|
||
async def main():
|
||
print("=" * 60)
|
||
print("对话压缩引擎 — 集成测试")
|
||
print("=" * 60)
|
||
|
||
await test_token_counting()
|
||
await test_micro_compact()
|
||
await test_full_compact()
|
||
await test_reactive_compact()
|
||
await test_context_length_detection()
|
||
await test_edge_cases()
|
||
|
||
print("\n" + "=" * 60)
|
||
print("所有测试完成!")
|
||
print("=" * 60)
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(main())
|