feat: Phase 3 - parallel execution, progress reporting, result caching + AgentChat bug fixes

Phase 3 能力: - DAG 并行执行 (workflow_engine): asyncio.gather 并行执行就绪节点 - Debate 并行 (orchestrator): for 循环改为 asyncio.gather - 粒度进度上报 (workflow_engine + tasks + websocket): Redis 推送 + DB 降级 - 工具结果缓存 (tool_manager): 确定性工具默认开启缓存 - LLM 响应缓存 (core): messages[-4:] + model 哈希，5min TTL AgentChat bug 修复 (Gitea #1-#5): - #1 SSE 降级重复空消息: fallback POST 前移除占位消息 - #2 streamTimeout 泄漏: while 正常退出后 clearTimeout - #3 loading 闪烁: final/error 事件中提前设 loading=false - #4 SSE 事件类型对齐: 确认匹配，未知类型加 console.warn - #5 retryMessage 流式残留: 重试时清理占位消息 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-05 00:00:51 +08:00
parent f3cb35c460
commit 7e00b027d4
8 changed files with 605 additions and 345 deletions
--- a/backend/app/agent_runtime/core.py
+++ b/backend/app/agent_runtime/core.py
@@ -10,6 +10,7 @@ Agent Runtime 核心 —— 自主 ReAct 循环。
 """
 from __future__ import annotations

+import hashlib
 import json
 import logging
 import time
@@ -98,6 +99,9 @@ class AgentRuntime:
        self.tool_manager = tool_manager or AgentToolManager(
            include_tools=self.config.tools.include_tools,
            exclude_tools=self.config.tools.exclude_tools,
+            cache_enabled=self.config.tools.cache_enabled,
+            cache_tool_whitelist=self.config.tools.cache_tool_whitelist,
+            cache_ttl_ms=self.config.tools.cache_ttl_ms,
        )
        self.execution_logger = execution_logger
        self.on_tool_executed = on_tool_executed
@@ -912,6 +916,32 @@ class AgentRuntime:
        return any(kw in err_lower for kw in _RETRYABLE_ERRORS)


+# LLM 缓存辅助
+def _llm_cache_key(messages: list, model: str) -> str:
+    import hashlib
+    raw = json.dumps({"msgs": messages[-4:], "model": model}, sort_keys=True, ensure_ascii=False)
+    return f"llm:{model}:{hashlib.sha256(raw.encode()).hexdigest()[:16]}"
+
+async def _llm_cache_get(key: str) -> Optional[str]:
+    try:
+        from app.core.redis_client import get_redis_client
+        redis = get_redis_client()
+        if redis:
+            return await redis.get(key)
+    except Exception:
+        pass
+    return None
+
+async def _llm_cache_set(key: str, value: str, ttl_ms: int):
+    try:
+        from app.core.redis_client import get_redis_client
+        redis = get_redis_client()
+        if redis:
+            await redis.setex(key, max(1, int(ttl_ms / 1000)), value)
+    except Exception:
+        pass
+
+
 class _LLMClient:
    """轻量 LLM 客户端包装，复用已有 LLMService 能力。"""

@@ -984,12 +1014,29 @@ class _LLMClient:
            kwargs["tools"] = normalized
            kwargs["tool_choice"] = "auto"

+        # LLM 响应缓存（仅不用工具时缓存，避免复杂序列化）
+        if self._config.cache_enabled and not tools:
+            cache_key = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
+            cached = await _llm_cache_get(cache_key)
+            if cached is not None:
+                logger.info("LLM 响应命中缓存: model=%s", kwargs.get("model"))
+                # 构造简易 message 对象（含 content 字段即可）
+                class _CachedMsg:
+                    content = cached
+                    tool_calls = None
+                return _CachedMsg()
+
        start_time = time.perf_counter()
        try:
            response = await client.chat.completions.create(**kwargs)
            latency_ms = int((time.perf_counter() - start_time) * 1000)
            message = response.choices[0].message

+            # 缓存写入（仅不用工具时）
+            if self._config.cache_enabled and not tools and message.content:
+                ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
+                await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
+
            # 提取 token 用量
            usage = getattr(response, "usage", None)
            prompt_tokens = usage.prompt_tokens if usage else 0