# -*- coding: utf-8 -*- """对话压缩引擎集成测试""" import sys, os, io sys.path.insert(0, os.path.join(os.path.dirname(__file__), "backend")) sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") import asyncio from app.core.token_counter import TokenCounter, is_context_length_error, get_model_context_window from app.core.compaction_config import CompactionConfig from app.core.compaction import CompactionEngine, CompactionStrategy # 生成模拟长对话消息 def make_long_messages(rounds=20, tool_result_size=2000): """生成模拟多轮对话,每轮包含 tool_call + 大工具结果""" messages = [ {"role": "system", "content": "你是一个有用的AI助手。" + "X" * 500}, ] for i in range(rounds): messages.append({"role": "user", "content": f"请帮我完成第{i+1}个任务。"}) messages.append({ "role": "assistant", "content": f"好的,让我来执行第{i+1}个任务。", "tool_calls": [{"id": f"call_{i}", "type": "function", "function": {"name": "file_read", "arguments": '{"path":"/test"}'}}] }) messages.append({ "role": "tool", "tool_call_id": f"call_{i}", "name": "file_read", "content": f"[文件内容] 这是第{i+1}次文件读取的结果。" + "A" * tool_result_size, }) messages.append({ "role": "assistant", "content": f"第{i+1}个任务完成。结果是:发现了一些重要信息。" + "B" * 200, }) return messages async def test_token_counting(): """测试 token 计数器""" print("=" * 60) print("测试 1: Token 计数器") tc = TokenCounter(model="deepseek-v4-flash") text_en = "Hello world " * 100 tokens_en = tc.count(text_en) print(f" 英文 200 词: ~{tokens_en} tokens") text_zh = "你好世界" * 100 tokens_zh = tc.count(text_zh) print(f" 中文 400 字: ~{tokens_zh} tokens") msgs = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}, {"role": "assistant", "content": "Hi there!"}, ] total = tc.count_messages(msgs) print(f" 3条简单消息: ~{total} tokens") print(" [OK] Token 计数器工作正常") return tc async def test_micro_compact(): """测试 MicroCompact (Tier 1)""" print("\n" + "=" * 60) print("测试 2: MicroCompact (工具结果打桩)") config = CompactionConfig( enabled=True, micro_compact_enabled=True, micro_compact_threshold=0.10, # 最低阈值 full_compact_threshold=0.90, # 高阈值,让 MicroCompact 先触发 compact_older_than_rounds=5, # 超过5轮的工具结果可压缩 min_preserve_messages=4, context_window_override=10000, # 小窗口强制高占用比 output_reserve_tokens=512, ) engine = CompactionEngine(config=config, model="deepseek-v4-flash") # 生成20轮对话,每轮有工具调用 messages = make_long_messages(rounds=20, tool_result_size=500) tokens_before = engine.token_counter.count_messages(messages) print(f" 消息数: {len(messages)}, tokens: {tokens_before}") result = await engine.maybe_compact(messages) print(f" 压缩策略: {result.strategy.value}") print(f" Tokens: {result.tokens_before} -> {result.tokens_after}") print(f" 节省: {result.tokens_saved} tokens") print(f" 详情: {result.details}") # 验证工具结果被替换 stubbed = sum(1 for m in result.messages if m.get("content") == "[Tool result compacted]") print(f" 被压缩的工具结果数: {stubbed}") if result.strategy == CompactionStrategy.MICRO and stubbed > 0: print(" [OK] MicroCompact 工作正常") else: print(f" [INFO] 策略={result.strategy}, stubbed={stubbed}") return engine async def test_full_compact(): """测试 FullCompact (Tier 2) — 无 LLM 客户端的 fallback""" print("\n" + "=" * 60) print("测试 3: FullCompact (LLM 摘要替换)") config = CompactionConfig( enabled=True, micro_compact_enabled=False, full_compact_enabled=True, full_compact_threshold=0.10, # 最低阈值 min_preserve_messages=4, context_window_override=10000, output_reserve_tokens=512, ) engine = CompactionEngine(config=config, model="deepseek-v4-flash") # 生成大量消息 messages = make_long_messages(rounds=30, tool_result_size=200) tokens_before = engine.token_counter.count_messages(messages) print(f" 消息数: {len(messages)}, tokens: {tokens_before}") result = await engine.maybe_compact(messages) print(f" 压缩策略: {result.strategy.value}") print(f" Tokens: {result.tokens_before} -> {result.tokens_after}") print(f" 节省: {result.tokens_saved} tokens") print(f" 详情: {result.details}") # 验证 structure: system + compact_boundary + recent if result.strategy == CompactionStrategy.FULL: # 查找 compact_boundary boundaries = [m for m in result.messages if "对话上下文摘要" in str(m.get("content", ""))] if boundaries: print(f" 摘要边界消息数: {len(boundaries)}") print(f" 摘要长度: {len(boundaries[0]['content'])} 字符") print(" [OK] FullCompact 工作正常") else: print(" [WARN] 未找到摘要边界消息") return engine async def test_reactive_compact(): """测试 ReactiveCompact (Tier 3) — 错误触发""" print("\n" + "=" * 60) print("测试 4: ReactiveCompact (错误触发)") config = CompactionConfig( enabled=True, reactive_compact_enabled=True, min_preserve_messages=4, context_window_override=128000, ) engine = CompactionEngine(config=config, model="deepseek-v4-flash") messages = make_long_messages(rounds=30, tool_result_size=200) tokens_before = engine.token_counter.count_messages(messages) # 模拟上下文超限错误 error = Exception("This model's maximum context length is 128000 tokens. You requested 150000 tokens.") try: result = await engine.reactive_compact(messages, error) print(f" 压缩策略: {result.strategy.value}") print(f" Tokens: {result.tokens_before} -> {result.tokens_after}") print(f" 节省: {result.tokens_saved} tokens") print(" [OK] ReactiveCompact 工作正常") except Exception as e: print(f" [WARN] ReactiveCompact 异常: {e}") async def test_context_length_detection(): """测试上下文超限检测""" print("\n" + "=" * 60) print("测试 5: 上下文超限检测") # 测试 is_context_length_error e1 = Exception("context length 128000 exceeded, requested 150000") e2 = Exception("rate limit exceeded") e3 = Exception("maximum context length is 128000 tokens. however, you requested 250000 tokens") print(f" 上下文超限错误检测: {is_context_length_error(e1)} (应为 True)") print(f" 频率限制错误检测: {is_context_length_error(e2)} (应为 False)") print(f" 超限错误变体检测: {is_context_length_error(e3)} (应为 True)") assert is_context_length_error(e1) == True assert is_context_length_error(e2) == False assert is_context_length_error(e3) == True # 测试 get_model_context_window print(f" GPT-4o 窗口: {get_model_context_window('gpt-4o')}") print(f" DeepSeek 窗口: {get_model_context_window('deepseek-v4-flash')}") print(f" Claude 窗口: {get_model_context_window('claude-sonnet-4-6')}") print(" [OK] 上下文检测工作正常") async def test_edge_cases(): """测试边缘情况""" print("\n" + "=" * 60) print("测试 6: 边缘情况") config = CompactionConfig( enabled=True, micro_compact_enabled=True, full_compact_enabled=True, ) engine = CompactionEngine(config=config, model="deepseek-v4-flash") # 空消息列表 result = await engine.maybe_compact([]) print(f" 空消息: strategy={result.strategy.value}") # 少量消息(不应压缩) few = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}, {"role": "assistant", "content": "Hi!"}, ] result = await engine.maybe_compact(few) print(f" 少量消息: strategy={result.strategy.value}, details={result.details}") # 压缩被禁用 config_disabled = CompactionConfig(enabled=False) engine2 = CompactionEngine(config=config_disabled) result = await engine2.maybe_compact(make_long_messages(rounds=10)) print(f" 禁用压缩: strategy={result.strategy.value}, details={result.details}") print(" [OK] 边缘情况处理正常") async def main(): print("=" * 60) print("对话压缩引擎 — 集成测试") print("=" * 60) await test_token_counting() await test_micro_compact() await test_full_compact() await test_reactive_compact() await test_context_length_detection() await test_edge_cases() print("\n" + "=" * 60) print("所有测试完成!") print("=" * 60) if __name__ == "__main__": asyncio.run(main())