feat: Phase 3 - parallel execution, progress reporting, result caching + AgentChat bug fixes
Phase 3 能力: - DAG 并行执行 (workflow_engine): asyncio.gather 并行执行就绪节点 - Debate 并行 (orchestrator): for 循环改为 asyncio.gather - 粒度进度上报 (workflow_engine + tasks + websocket): Redis 推送 + DB 降级 - 工具结果缓存 (tool_manager): 确定性工具默认开启缓存 - LLM 响应缓存 (core): messages[-4:] + model 哈希,5min TTL AgentChat bug 修复 (Gitea #1-#5): - #1 SSE 降级重复空消息: fallback POST 前移除占位消息 - #2 streamTimeout 泄漏: while 正常退出后 clearTimeout - #3 loading 闪烁: final/error 事件中提前设 loading=false - #4 SSE 事件类型对齐: 确认匹配,未知类型加 console.warn - #5 retryMessage 流式残留: 重试时清理占位消息 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -10,6 +10,7 @@ Agent Runtime 核心 —— 自主 ReAct 循环。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
@@ -98,6 +99,9 @@ class AgentRuntime:
|
||||
self.tool_manager = tool_manager or AgentToolManager(
|
||||
include_tools=self.config.tools.include_tools,
|
||||
exclude_tools=self.config.tools.exclude_tools,
|
||||
cache_enabled=self.config.tools.cache_enabled,
|
||||
cache_tool_whitelist=self.config.tools.cache_tool_whitelist,
|
||||
cache_ttl_ms=self.config.tools.cache_ttl_ms,
|
||||
)
|
||||
self.execution_logger = execution_logger
|
||||
self.on_tool_executed = on_tool_executed
|
||||
@@ -912,6 +916,32 @@ class AgentRuntime:
|
||||
return any(kw in err_lower for kw in _RETRYABLE_ERRORS)
|
||||
|
||||
|
||||
# LLM 缓存辅助
|
||||
def _llm_cache_key(messages: list, model: str) -> str:
|
||||
import hashlib
|
||||
raw = json.dumps({"msgs": messages[-4:], "model": model}, sort_keys=True, ensure_ascii=False)
|
||||
return f"llm:{model}:{hashlib.sha256(raw.encode()).hexdigest()[:16]}"
|
||||
|
||||
async def _llm_cache_get(key: str) -> Optional[str]:
|
||||
try:
|
||||
from app.core.redis_client import get_redis_client
|
||||
redis = get_redis_client()
|
||||
if redis:
|
||||
return await redis.get(key)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
async def _llm_cache_set(key: str, value: str, ttl_ms: int):
|
||||
try:
|
||||
from app.core.redis_client import get_redis_client
|
||||
redis = get_redis_client()
|
||||
if redis:
|
||||
await redis.setex(key, max(1, int(ttl_ms / 1000)), value)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
class _LLMClient:
|
||||
"""轻量 LLM 客户端包装,复用已有 LLMService 能力。"""
|
||||
|
||||
@@ -984,12 +1014,29 @@ class _LLMClient:
|
||||
kwargs["tools"] = normalized
|
||||
kwargs["tool_choice"] = "auto"
|
||||
|
||||
# LLM 响应缓存(仅不用工具时缓存,避免复杂序列化)
|
||||
if self._config.cache_enabled and not tools:
|
||||
cache_key = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
|
||||
cached = await _llm_cache_get(cache_key)
|
||||
if cached is not None:
|
||||
logger.info("LLM 响应命中缓存: model=%s", kwargs.get("model"))
|
||||
# 构造简易 message 对象(含 content 字段即可)
|
||||
class _CachedMsg:
|
||||
content = cached
|
||||
tool_calls = None
|
||||
return _CachedMsg()
|
||||
|
||||
start_time = time.perf_counter()
|
||||
try:
|
||||
response = await client.chat.completions.create(**kwargs)
|
||||
latency_ms = int((time.perf_counter() - start_time) * 1000)
|
||||
message = response.choices[0].message
|
||||
|
||||
# 缓存写入(仅不用工具时)
|
||||
if self._config.cache_enabled and not tools and message.content:
|
||||
ck = _llm_cache_key(kwargs.get("messages", []), kwargs.get("model", ""))
|
||||
await _llm_cache_set(ck, message.content, self._config.cache_ttl_ms)
|
||||
|
||||
# 提取 token 用量
|
||||
usage = getattr(response, "usage", None)
|
||||
prompt_tokens = usage.prompt_tokens if usage else 0
|
||||
|
||||
Reference in New Issue
Block a user