feat: Phase 3 - parallel execution, progress reporting, result caching + AgentChat bug fixes

Phase 3 能力:
- DAG 并行执行 (workflow_engine): asyncio.gather 并行执行就绪节点
- Debate 并行 (orchestrator): for 循环改为 asyncio.gather
- 粒度进度上报 (workflow_engine + tasks + websocket): Redis 推送 + DB 降级
- 工具结果缓存 (tool_manager): 确定性工具默认开启缓存
- LLM 响应缓存 (core): messages[-4:] + model 哈希,5min TTL

AgentChat bug 修复 (Gitea #1-#5):
- #1 SSE 降级重复空消息: fallback POST 前移除占位消息
- #2 streamTimeout 泄漏: while 正常退出后 clearTimeout
- #3 loading 闪烁: final/error 事件中提前设 loading=false
- #4 SSE 事件类型对齐: 确认匹配,未知类型加 console.warn
- #5 retryMessage 流式残留: 重试时清理占位消息

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
renjianbo
2026-05-05 00:00:51 +08:00
parent f3cb35c460
commit 7e00b027d4
8 changed files with 605 additions and 345 deletions

View File

@@ -10,6 +10,7 @@ Agent Runtime 核心 —— 自主 ReAct 循环。
"""
from __future__ import annotations
import hashlib
import json
import logging
import time
@@ -98,6 +99,9 @@ class AgentRuntime:
self.tool_manager = tool_manager or AgentToolManager(
include_tools=self.config.tools.include_tools,
exclude_tools=self.config.tools.exclude_tools,
cache_enabled=self.config.tools.cache_enabled,
cache_tool_whitelist=self.config.tools.cache_tool_whitelist,
cache_ttl_ms=self.config.tools.cache_ttl_ms,
)
self.execution_logger = execution_logger
self.on_tool_executed = on_tool_executed
@@ -912,6 +916,32 @@ class AgentRuntime:
return any(kw in err_lower for kw in _RETRYABLE_ERRORS)
# LLM 缓存辅助
def _llm_cache_key(messages: list, model: str) -> str:
import hashlib
raw = json.dumps({"msgs": messages[-4:], "model": model}, sort_keys=True, ensure_ascii=False)
return f"llm:{model}:{hashlib.sha256(raw.encode()).hexdigest()[:16]}"
async def _llm_cache_get(key: str) -> Optional[str]:
try:
from app.core.redis_client import get_redis_client
redis = get_redis_client()
if redis:
return await redis.get(key)
except Exception:
pass
return None
async def _llm_cache_set(key: str, value: str, ttl_ms: int):
try:
from app.core.redis_client import get_redis_client
redis = get_redis_client()
if redis:
await redis.setex(key, max(1, int(ttl_ms / 1000)), value)
except Exception:
pass
class _LLMClient:
"""轻量 LLM 客户端包装,复用已有 LLMService 能力。"""
@@ -984,12 +1014,29 @@ class _LLMClient:
kwargs["tools"] = normalized
kwargs["tool_choice"] = "auto"
# LLM 响应缓存(仅不用工具时缓存,避免复杂序列化)
if self._config.cache_enabled and not tools:
cache_key = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
cached = await _llm_cache_get(cache_key)
if cached is not None:
logger.info("LLM 响应命中缓存: model=%s", kwargs.get("model"))
# 构造简易 message 对象(含 content 字段即可)
class _CachedMsg:
content = cached
tool_calls = None
return _CachedMsg()
start_time = time.perf_counter()
try:
response = await client.chat.completions.create(**kwargs)
latency_ms = int((time.perf_counter() - start_time) * 1000)
message = response.choices[0].message
# 缓存写入(仅不用工具时)
if self._config.cache_enabled and not tools and message.content:
ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
# 提取 token 用量
usage = getattr(response, "usage", None)
prompt_tokens = usage.prompt_tokens if usage else 0