Files
aiagent/test/test_compaction.py

246 lines
9.2 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
"""对话压缩引擎集成测试"""
import sys, os, io
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "backend"))
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
import asyncio
from app.core.token_counter import TokenCounter, is_context_length_error, get_model_context_window
from app.core.compaction_config import CompactionConfig
from app.core.compaction import CompactionEngine, CompactionStrategy
# 生成模拟长对话消息
def make_long_messages(rounds=20, tool_result_size=2000):
"""生成模拟多轮对话,每轮包含 tool_call + 大工具结果"""
messages = [
{"role": "system", "content": "你是一个有用的AI助手。" + "X" * 500},
]
for i in range(rounds):
messages.append({"role": "user", "content": f"请帮我完成第{i+1}个任务。"})
messages.append({
"role": "assistant",
"content": f"好的,让我来执行第{i+1}个任务。",
"tool_calls": [{"id": f"call_{i}", "type": "function", "function": {"name": "file_read", "arguments": '{"path":"/test"}'}}]
})
messages.append({
"role": "tool",
"tool_call_id": f"call_{i}",
"name": "file_read",
"content": f"[文件内容] 这是第{i+1}次文件读取的结果。" + "A" * tool_result_size,
})
messages.append({
"role": "assistant",
"content": f"{i+1}个任务完成。结果是:发现了一些重要信息。" + "B" * 200,
})
return messages
async def test_token_counting():
"""测试 token 计数器"""
print("=" * 60)
print("测试 1: Token 计数器")
tc = TokenCounter(model="deepseek-v4-flash")
text_en = "Hello world " * 100
tokens_en = tc.count(text_en)
print(f" 英文 200 词: ~{tokens_en} tokens")
text_zh = "你好世界" * 100
tokens_zh = tc.count(text_zh)
print(f" 中文 400 字: ~{tokens_zh} tokens")
msgs = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Hi there!"},
]
total = tc.count_messages(msgs)
print(f" 3条简单消息: ~{total} tokens")
print(" [OK] Token 计数器工作正常")
return tc
async def test_micro_compact():
"""测试 MicroCompact (Tier 1)"""
print("\n" + "=" * 60)
print("测试 2: MicroCompact (工具结果打桩)")
config = CompactionConfig(
enabled=True,
micro_compact_enabled=True,
micro_compact_threshold=0.10, # 最低阈值
full_compact_threshold=0.90, # 高阈值,让 MicroCompact 先触发
compact_older_than_rounds=5, # 超过5轮的工具结果可压缩
min_preserve_messages=4,
context_window_override=10000, # 小窗口强制高占用比
output_reserve_tokens=512,
)
engine = CompactionEngine(config=config, model="deepseek-v4-flash")
# 生成20轮对话每轮有工具调用
messages = make_long_messages(rounds=20, tool_result_size=500)
tokens_before = engine.token_counter.count_messages(messages)
print(f" 消息数: {len(messages)}, tokens: {tokens_before}")
result = await engine.maybe_compact(messages)
print(f" 压缩策略: {result.strategy.value}")
print(f" Tokens: {result.tokens_before} -> {result.tokens_after}")
print(f" 节省: {result.tokens_saved} tokens")
print(f" 详情: {result.details}")
# 验证工具结果被替换
stubbed = sum(1 for m in result.messages if m.get("content") == "[Tool result compacted]")
print(f" 被压缩的工具结果数: {stubbed}")
if result.strategy == CompactionStrategy.MICRO and stubbed > 0:
print(" [OK] MicroCompact 工作正常")
else:
print(f" [INFO] 策略={result.strategy}, stubbed={stubbed}")
return engine
async def test_full_compact():
"""测试 FullCompact (Tier 2) — 无 LLM 客户端的 fallback"""
print("\n" + "=" * 60)
print("测试 3: FullCompact (LLM 摘要替换)")
config = CompactionConfig(
enabled=True,
micro_compact_enabled=False,
full_compact_enabled=True,
full_compact_threshold=0.10, # 最低阈值
min_preserve_messages=4,
context_window_override=10000,
output_reserve_tokens=512,
)
engine = CompactionEngine(config=config, model="deepseek-v4-flash")
# 生成大量消息
messages = make_long_messages(rounds=30, tool_result_size=200)
tokens_before = engine.token_counter.count_messages(messages)
print(f" 消息数: {len(messages)}, tokens: {tokens_before}")
result = await engine.maybe_compact(messages)
print(f" 压缩策略: {result.strategy.value}")
print(f" Tokens: {result.tokens_before} -> {result.tokens_after}")
print(f" 节省: {result.tokens_saved} tokens")
print(f" 详情: {result.details}")
# 验证 structure: system + compact_boundary + recent
if result.strategy == CompactionStrategy.FULL:
# 查找 compact_boundary
boundaries = [m for m in result.messages if "对话上下文摘要" in str(m.get("content", ""))]
if boundaries:
print(f" 摘要边界消息数: {len(boundaries)}")
print(f" 摘要长度: {len(boundaries[0]['content'])} 字符")
print(" [OK] FullCompact 工作正常")
else:
print(" [WARN] 未找到摘要边界消息")
return engine
async def test_reactive_compact():
"""测试 ReactiveCompact (Tier 3) — 错误触发"""
print("\n" + "=" * 60)
print("测试 4: ReactiveCompact (错误触发)")
config = CompactionConfig(
enabled=True,
reactive_compact_enabled=True,
min_preserve_messages=4,
context_window_override=128000,
)
engine = CompactionEngine(config=config, model="deepseek-v4-flash")
messages = make_long_messages(rounds=30, tool_result_size=200)
tokens_before = engine.token_counter.count_messages(messages)
# 模拟上下文超限错误
error = Exception("This model's maximum context length is 128000 tokens. You requested 150000 tokens.")
try:
result = await engine.reactive_compact(messages, error)
print(f" 压缩策略: {result.strategy.value}")
print(f" Tokens: {result.tokens_before} -> {result.tokens_after}")
print(f" 节省: {result.tokens_saved} tokens")
print(" [OK] ReactiveCompact 工作正常")
except Exception as e:
print(f" [WARN] ReactiveCompact 异常: {e}")
async def test_context_length_detection():
"""测试上下文超限检测"""
print("\n" + "=" * 60)
print("测试 5: 上下文超限检测")
# 测试 is_context_length_error
e1 = Exception("context length 128000 exceeded, requested 150000")
e2 = Exception("rate limit exceeded")
e3 = Exception("maximum context length is 128000 tokens. however, you requested 250000 tokens")
print(f" 上下文超限错误检测: {is_context_length_error(e1)} (应为 True)")
print(f" 频率限制错误检测: {is_context_length_error(e2)} (应为 False)")
print(f" 超限错误变体检测: {is_context_length_error(e3)} (应为 True)")
assert is_context_length_error(e1) == True
assert is_context_length_error(e2) == False
assert is_context_length_error(e3) == True
# 测试 get_model_context_window
print(f" GPT-4o 窗口: {get_model_context_window('gpt-4o')}")
print(f" DeepSeek 窗口: {get_model_context_window('deepseek-v4-flash')}")
print(f" Claude 窗口: {get_model_context_window('claude-sonnet-4-6')}")
print(" [OK] 上下文检测工作正常")
async def test_edge_cases():
"""测试边缘情况"""
print("\n" + "=" * 60)
print("测试 6: 边缘情况")
config = CompactionConfig(
enabled=True,
micro_compact_enabled=True,
full_compact_enabled=True,
)
engine = CompactionEngine(config=config, model="deepseek-v4-flash")
# 空消息列表
result = await engine.maybe_compact([])
print(f" 空消息: strategy={result.strategy.value}")
# 少量消息(不应压缩)
few = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Hi!"},
]
result = await engine.maybe_compact(few)
print(f" 少量消息: strategy={result.strategy.value}, details={result.details}")
# 压缩被禁用
config_disabled = CompactionConfig(enabled=False)
engine2 = CompactionEngine(config=config_disabled)
result = await engine2.maybe_compact(make_long_messages(rounds=10))
print(f" 禁用压缩: strategy={result.strategy.value}, details={result.details}")
print(" [OK] 边缘情况处理正常")
async def main():
print("=" * 60)
print("对话压缩引擎 — 集成测试")
print("=" * 60)
await test_token_counting()
await test_micro_compact()
await test_full_compact()
await test_reactive_compact()
await test_context_length_detection()
await test_edge_cases()
print("\n" + "=" * 60)
print("所有测试完成!")
print("=" * 60)
if __name__ == "__main__":
asyncio.run(main())