fix: Feishu channel agents file_write permission blocked + memory system tests & docs
- Fix 8 Feishu agent handlers to use permission_level="acceptEdits" so file_write tool works without Web UI approval popup (lingxi/renshenguo/suyao/tiantian/orange/main/schedule) - Add P5-P7 memory improvements: offline keyword fallback, team sharing, file-based memory - Add auto_dream_service for daily memory consolidation - Add 99 memory system test cases (basic 18 + advanced 43 + pytest 38) - Add platform capability assessment report and unfinished project checklist Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -25,6 +25,35 @@ from app.agent_runtime.context import AgentContext
|
||||
from app.agent_runtime.memory import AgentMemory
|
||||
from app.agent_runtime.tool_manager import AgentToolManager
|
||||
from app.core.exceptions import WorkflowExecutionError
|
||||
from app.core.hooks import HookManager, HookEvent, HookContext, HookResult
|
||||
from app.agent_runtime.plan_mode import PlanMode, Plan, PlanStatus
|
||||
from app.core.error_recovery import ErrorClassifier, ErrorType, ConversationRecovery
|
||||
from app.core.memdir import MemoryDir, MemoryType as MemType, MemoryManifest, parse_frontmatter
|
||||
from app.core.memory_selector import memory_selector
|
||||
from app.core.compaction import CompactionEngine, CompactionResult, CompactionStrategy
|
||||
from app.core.compaction_config import CompactionConfig
|
||||
from app.core.token_counter import is_context_length_error
|
||||
from app.core.streamlined_output import (
|
||||
StreamlinedTransformer,
|
||||
create_streamlined_transformer,
|
||||
get_tool_summary_text,
|
||||
ToolCounts,
|
||||
categorize_tool,
|
||||
)
|
||||
from app.core.prompt_sections import (
|
||||
PromptComposer,
|
||||
PromptSection,
|
||||
create_prompt_composer,
|
||||
create_default_static_sections,
|
||||
create_default_dynamic_sections,
|
||||
section_environment,
|
||||
section_language,
|
||||
)
|
||||
from app.core.token_budget import (
|
||||
TokenBudget,
|
||||
TokenBudgetConfig,
|
||||
create_token_budget,
|
||||
)
|
||||
from app.services.agent_learning_service import (
|
||||
extract_pattern_from_result,
|
||||
format_pattern_hint,
|
||||
@@ -54,18 +83,8 @@ class LLMCallMetrics(TypedDict, total=False):
|
||||
status: str # success / error
|
||||
error_message: Optional[str]
|
||||
|
||||
# 可重试的 API 异常
|
||||
_RETRYABLE_ERRORS = (
|
||||
"timed out",
|
||||
"timeout",
|
||||
"connection error",
|
||||
"temporarily unavailable",
|
||||
"server disconnected",
|
||||
"rate limit",
|
||||
"too many requests",
|
||||
"internal server error",
|
||||
"service unavailable",
|
||||
)
|
||||
# 全局错误分类器(可重试判定 + 退避策略)
|
||||
_error_classifier = ErrorClassifier()
|
||||
|
||||
|
||||
class AgentRuntime:
|
||||
@@ -86,6 +105,8 @@ class AgentRuntime:
|
||||
execution_logger: Optional[Any] = None,
|
||||
on_tool_executed: Optional[Callable[[str], Any]] = None,
|
||||
on_llm_call: Optional[Callable[[Dict[str, Any]], Any]] = None,
|
||||
hook_manager: Optional[HookManager] = None,
|
||||
streamlined: bool = False,
|
||||
):
|
||||
self.config = config or AgentConfig()
|
||||
self.context = context or AgentContext(
|
||||
@@ -97,6 +118,14 @@ class AgentRuntime:
|
||||
scope_id=_mem_scope,
|
||||
max_history=self.config.memory.max_history_messages,
|
||||
persist=self.config.memory.persist_to_db,
|
||||
vector_memory_enabled=self.config.memory.vector_memory_enabled,
|
||||
vector_memory_top_k=self.config.memory.vector_memory_top_k,
|
||||
vector_memory_rerank=self.config.memory.vector_memory_rerank,
|
||||
memory_type_filter=self.config.memory.memory_type_filter,
|
||||
team_id=self.config.memory.team_id,
|
||||
team_share_enabled=self.config.memory.team_share_enabled,
|
||||
memory_dir_enabled=self.config.memory.memory_dir_enabled,
|
||||
memory_dir_path=self.config.memory.memory_dir_path,
|
||||
)
|
||||
self.tool_manager = tool_manager or AgentToolManager(
|
||||
include_tools=self.config.tools.include_tools,
|
||||
@@ -104,6 +133,9 @@ class AgentRuntime:
|
||||
cache_enabled=self.config.tools.cache_enabled,
|
||||
cache_tool_whitelist=self.config.tools.cache_tool_whitelist,
|
||||
cache_ttl_ms=self.config.tools.cache_ttl_ms,
|
||||
permission_level=self.config.tools.permission_level,
|
||||
auto_approve_rules=self.config.tools.auto_approve_rules,
|
||||
deny_tools=self.config.tools.deny_tools,
|
||||
)
|
||||
self.execution_logger = execution_logger
|
||||
self.on_tool_executed = on_tool_executed
|
||||
@@ -113,10 +145,119 @@ class AgentRuntime:
|
||||
# 自主学习作用域:bare 聊天用 "bare",Agent 用 "agent"
|
||||
self._learning_scope_kind = "bare" if "bare" in str(_mem_scope) else "agent"
|
||||
|
||||
# Hook 管理器 (P1)
|
||||
self.hook_manager = hook_manager or HookManager()
|
||||
|
||||
# 计划模式 (P2)
|
||||
self.plan_mode = PlanMode(self.config.llm) if self.config.llm.plan_mode_enabled else None
|
||||
|
||||
# 对话自动压缩 (参考 Claude Code compact)
|
||||
self.compaction_engine: Optional[CompactionEngine] = None
|
||||
compaction_cfg = getattr(self.config.memory, 'compaction', None)
|
||||
if compaction_cfg is None:
|
||||
compaction_cfg = CompactionConfig()
|
||||
if compaction_cfg.enabled:
|
||||
self.compaction_engine = CompactionEngine(
|
||||
config=compaction_cfg,
|
||||
model=self.config.llm.model,
|
||||
)
|
||||
logger.info("对话压缩引擎已启用 (model=%s, window=%d)",
|
||||
self.config.llm.model, self.config.llm.context_window)
|
||||
|
||||
# 工具结果流式美化 (参考 Claude Code streamlinedTransform)
|
||||
self.streamlined = streamlined
|
||||
self._streamlined_transformer: Optional[StreamlinedTransformer] = None
|
||||
if streamlined:
|
||||
self._streamlined_transformer = create_streamlined_transformer(enabled=True)
|
||||
logger.info("工具结果流式美化已启用")
|
||||
|
||||
# 系统提示词分层装配 (P2 — 参考 Claude Code systemPromptSections.ts)
|
||||
self._prompt_composer: Optional[PromptComposer] = None
|
||||
self._prompt_sections_enabled = self.config.prompt_sections.enabled
|
||||
if self._prompt_sections_enabled:
|
||||
ps_config = self.config.prompt_sections
|
||||
# 构建静态段(按开关过滤)
|
||||
static_sections = []
|
||||
s_switches = ps_config.static_sections
|
||||
if s_switches.get("persona", True):
|
||||
static_sections.append(PromptSection(
|
||||
"persona",
|
||||
lambda cfg=self.config: f"{cfg.system_prompt}\n\n"
|
||||
))
|
||||
if s_switches.get("capabilities", True):
|
||||
from app.core.prompt_sections import section_capabilities
|
||||
static_sections.append(PromptSection("capabilities", section_capabilities))
|
||||
if s_switches.get("tool_instructions", True):
|
||||
from app.core.prompt_sections import section_tool_instructions
|
||||
static_sections.append(PromptSection("tool_instructions", section_tool_instructions))
|
||||
if s_switches.get("safety_rules", True):
|
||||
from app.core.prompt_sections import section_safety_rules
|
||||
static_sections.append(PromptSection("safety_rules", section_safety_rules))
|
||||
if s_switches.get("output_style", True):
|
||||
from app.core.prompt_sections import section_output_style
|
||||
static_sections.append(PromptSection("output_style", section_output_style))
|
||||
|
||||
self._prompt_composer = PromptComposer()
|
||||
self._prompt_composer.add_static_sections(static_sections)
|
||||
logger.info("系统提示词分层装配已启用 (%d 静态段)", len(static_sections))
|
||||
|
||||
# Token 预算管理 (P2 — 参考 Claude Code tokenBudget.ts)
|
||||
self._token_budget: Optional[TokenBudget] = None
|
||||
tb_config = self.config.token_budget
|
||||
if tb_config.enabled:
|
||||
self._token_budget = TokenBudget(
|
||||
config=TokenBudgetConfig(
|
||||
enabled=True,
|
||||
context_window=tb_config.context_window or self.config.llm.context_window,
|
||||
output_reserve=tb_config.output_reserve,
|
||||
warning_threshold_pct=tb_config.warning_threshold_pct,
|
||||
compact_threshold_pct=tb_config.compact_threshold_pct,
|
||||
hard_limit_pct=tb_config.hard_limit_pct,
|
||||
user_budget=tb_config.user_budget,
|
||||
auto_continue=tb_config.auto_continue,
|
||||
compaction_after_warning=tb_config.compaction_after_warning,
|
||||
max_compaction_attempts=tb_config.max_compaction_attempts,
|
||||
),
|
||||
model=self.config.llm.model,
|
||||
)
|
||||
logger.info("Token 预算管理已启用 (window=%d, compact@%d%%)",
|
||||
self._token_budget.config.context_window,
|
||||
int(tb_config.compact_threshold_pct * 100))
|
||||
|
||||
# 崩溃恢复 (P4)
|
||||
self.recovery = ConversationRecovery()
|
||||
self._recovery_snapshot_counter = 0
|
||||
|
||||
# 文件式记忆 (MEMORY.md)
|
||||
self._memdir: Optional[MemoryDir] = None
|
||||
self._memdir_manifest: Optional[MemoryManifest] = None
|
||||
if self.config.memory.memory_dir_enabled:
|
||||
mem_path = self.config.memory.memory_dir_path
|
||||
if not mem_path:
|
||||
# 默认路径: 项目根目录下的 .claude/memory
|
||||
import os as _os
|
||||
mem_path = _os.path.join(
|
||||
_os.path.dirname(_os.path.dirname(_os.path.dirname(__file__))),
|
||||
".claude", "memory",
|
||||
)
|
||||
self._memdir = MemoryDir(mem_path)
|
||||
# 启动时扫描一次
|
||||
self._memdir_manifest = self._memdir.scan()
|
||||
memory_selector.reset()
|
||||
logger.info("文件式记忆已启用: %s (%d 条)", mem_path,
|
||||
self._memdir_manifest.total_files)
|
||||
|
||||
# 预算回调:供 WorkflowEngine 注入,使 Agent 内部计数计入工作流预算
|
||||
# 返回 True 表示预算充足;返回 False 或抛出异常表示超限
|
||||
self.on_llm_invocation: Optional[Callable[[], Any]] = None
|
||||
|
||||
def _attach_token_usage(self, result: AgentResult) -> AgentResult:
|
||||
"""将 TokenBudget 摘要附加到 AgentResult(若启用)。"""
|
||||
if self._token_budget:
|
||||
from app.agent_runtime.schemas import TokenUsageInfo
|
||||
result.token_usage = TokenUsageInfo(**self._token_budget.summary())
|
||||
return result
|
||||
|
||||
def _build_execution_log_kwargs(self, user_input: str, result: AgentResult, latency_ms: int) -> dict:
|
||||
"""从 AgentResult 构建 execution_logger 所需的参数字典。"""
|
||||
tool_chain = []
|
||||
@@ -151,6 +292,27 @@ class AgentRuntime:
|
||||
provider=self.config.llm.provider,
|
||||
)
|
||||
|
||||
def _fire_recovery_snapshot(self):
|
||||
"""Fire-and-forget 保存崩溃恢复快照(每 5 次工具调用保存一次)。"""
|
||||
self._recovery_snapshot_counter += 1
|
||||
if self._recovery_snapshot_counter % 5 != 0:
|
||||
return
|
||||
try:
|
||||
import asyncio
|
||||
asyncio.ensure_future(
|
||||
self.recovery.save_snapshot(
|
||||
session_id=self.context.session_id,
|
||||
messages=self.context.messages,
|
||||
extra={
|
||||
"agent_name": self.config.name,
|
||||
"iteration": self.context.iteration,
|
||||
"tool_calls_made": self.context.tool_calls_made,
|
||||
},
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _fire_execution_log(self, user_input: str, result: AgentResult, start_time: float):
|
||||
"""Fire-and-forget 记录执行日志(非阻塞)。"""
|
||||
try:
|
||||
@@ -172,17 +334,49 @@ class AgentRuntime:
|
||||
self._llm_invocations = 0 # 每次 run() 重置 LLM 调用计数
|
||||
_run_start = time.time() # 执行开始时间,用于计算总延迟
|
||||
|
||||
# 1. 首次运行时加载长期记忆到 system prompt
|
||||
if not self._memory_context_loaded:
|
||||
# 1. 系统提示词分层装配(首次加载全部段,后续只刷新动态段)
|
||||
if self._prompt_sections_enabled:
|
||||
system_prompt = await self._compose_system_prompt(user_input)
|
||||
self.context.set_system_prompt(system_prompt)
|
||||
if not self._memory_context_loaded:
|
||||
self._memory_context_loaded = True
|
||||
logger.info("分层装配已完成(静态段 + 动态段)")
|
||||
elif not self._memory_context_loaded:
|
||||
await self._inject_memory_context(user_input)
|
||||
self._memory_context_loaded = True
|
||||
|
||||
# 1.5 知识检索增强:从知识库注入相关经验到 system prompt
|
||||
await self._inject_knowledge_context(user_input)
|
||||
await self._inject_knowledge_context(user_input)
|
||||
|
||||
# 2. 追加用户消息
|
||||
self.context.add_user_message(user_input)
|
||||
|
||||
# 2.5 计划模式 (P2) — 生成执行计划
|
||||
plan: Optional[Plan] = None
|
||||
if self.plan_mode and self.config.llm.plan_mode_enabled:
|
||||
try:
|
||||
plan = await self.plan_mode.generate_plan(
|
||||
user_input=user_input,
|
||||
available_tools=self.tool_manager.tool_names(),
|
||||
messages_history=self.context.messages,
|
||||
)
|
||||
logger.info("计划模式: 已生成计划 (%d 步骤)", len(plan.steps))
|
||||
if self.config.llm.plan_approval_required:
|
||||
approved = await self.plan_mode.present_plan(plan)
|
||||
if not approved:
|
||||
logger.info("计划模式: 计划被拒绝")
|
||||
result = AgentResult(
|
||||
success=False,
|
||||
content=f"计划已被拒绝。\n\n{plan.to_markdown()}",
|
||||
iterations_used=0,
|
||||
tool_calls_made=0,
|
||||
error="plan_rejected",
|
||||
)
|
||||
self._fire_execution_log(user_input, result, _run_start)
|
||||
self._attach_token_usage(result)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.warning("计划生成失败,回退到直接执行: %s", e)
|
||||
plan = None
|
||||
|
||||
# 3. ReAct 循环
|
||||
llm = _LLMClient(self.config.llm)
|
||||
tool_schemas = self.tool_manager.get_tool_schemas()
|
||||
@@ -194,6 +388,18 @@ class AgentRuntime:
|
||||
llm_callback_ctx = {"step_type": "think", "tool_name": None}
|
||||
|
||||
def _llm_callback(metrics: Dict[str, Any]):
|
||||
# Token 预算追踪 (P2)
|
||||
if self._token_budget:
|
||||
prompt_tok = metrics.get("prompt_tokens", 0)
|
||||
comp_tok = metrics.get("completion_tokens", 0)
|
||||
if prompt_tok <= 0:
|
||||
prompt_tok = self._token_budget.input_tokens # fallback estimate
|
||||
self._token_budget.record_llm_call(
|
||||
prompt_tokens=prompt_tok,
|
||||
completion_tokens=comp_tok,
|
||||
iteration=self.context.iteration,
|
||||
step_type=llm_callback_ctx["step_type"],
|
||||
)
|
||||
if self.on_llm_call:
|
||||
metrics.update({
|
||||
"session_id": self.context.session_id,
|
||||
@@ -206,6 +412,31 @@ class AgentRuntime:
|
||||
while self.context.iteration < max_iter:
|
||||
self.context.iteration += 1
|
||||
|
||||
# Token 预算检查:每次迭代前更新输入 token 估计
|
||||
if self._token_budget:
|
||||
self._token_budget.update_from_counter(self.context.messages)
|
||||
self._token_budget.reset_compaction_attempts()
|
||||
|
||||
# 对话自动压缩 (参考 Claude Code autoCompact) + Token 预算驱动压缩
|
||||
_should_compact = self.compaction_engine and self.context.iteration > 1
|
||||
if _should_compact and self._token_budget and self._token_budget.needs_compaction:
|
||||
self._token_budget.record_compaction_attempt()
|
||||
logger.info("TokenBudget 触发自动压缩: %s", self._token_budget.status_line)
|
||||
if self.compaction_engine and self.context.iteration > 1:
|
||||
compact_result = await self.compaction_engine.maybe_compact(
|
||||
self.context.messages,
|
||||
self.config.llm.context_window,
|
||||
)
|
||||
if compact_result.strategy != CompactionStrategy.NONE:
|
||||
self.context.replace_internal_messages(
|
||||
[m for m in compact_result.messages
|
||||
if m.get("role") != "system"] # 去掉 system(由 context 管理)
|
||||
)
|
||||
logger.debug(
|
||||
"压缩完成: strategy=%s saved=%d tokens",
|
||||
compact_result.strategy.value, compact_result.tokens_saved,
|
||||
)
|
||||
|
||||
# 裁剪过长历史
|
||||
messages = self.memory.trim_messages(self.context.messages)
|
||||
|
||||
@@ -221,6 +452,7 @@ class AgentRuntime:
|
||||
tool_calls_made=self.context.tool_calls_made,
|
||||
steps=steps, error=err)
|
||||
self._fire_execution_log(user_input, result, _run_start)
|
||||
self._attach_token_usage(result)
|
||||
return result
|
||||
|
||||
# 调用外部 LLM 预算回调(WorkflowEngine 注入,将 Agent 的 LLM 计入工作流预算)
|
||||
@@ -237,6 +469,7 @@ class AgentRuntime:
|
||||
tool_calls_made=self.context.tool_calls_made,
|
||||
steps=steps, error=str(e))
|
||||
self._fire_execution_log(user_input, result, _run_start)
|
||||
self._attach_token_usage(result)
|
||||
return result
|
||||
|
||||
# 调用 LLM
|
||||
@@ -267,6 +500,7 @@ class AgentRuntime:
|
||||
error=err_str,
|
||||
)
|
||||
self._fire_execution_log(user_input, result, _run_start)
|
||||
self._attach_token_usage(result)
|
||||
return result
|
||||
|
||||
# 记录 LLM 调用次数(内部计数)
|
||||
@@ -337,6 +571,7 @@ class AgentRuntime:
|
||||
steps=steps,
|
||||
)
|
||||
self._fire_execution_log(user_input, result, _run_start)
|
||||
self._attach_token_usage(result)
|
||||
return result
|
||||
|
||||
# 有工具调用 → 先记录 assistant 消息(含 tool_calls)
|
||||
@@ -380,6 +615,26 @@ class AgentRuntime:
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
targs = {}
|
||||
|
||||
# Hook: PreToolUse — 可拦截/修改工具调用
|
||||
hook_ctx = HookContext(
|
||||
event=HookEvent.PRE_TOOL_USE,
|
||||
tool_name=tname,
|
||||
tool_input=targs,
|
||||
session_id=self.context.session_id,
|
||||
agent_name=self.config.name,
|
||||
user_id=self.config.user_id,
|
||||
)
|
||||
hook_res = await self.hook_manager.trigger(HookEvent.PRE_TOOL_USE, hook_ctx)
|
||||
if not hook_res.allowed:
|
||||
result = json.dumps({"error": hook_res.reason}, ensure_ascii=False)
|
||||
self.context.add_tool_result(tcid, tname, result)
|
||||
continue
|
||||
if hook_res.modified_input:
|
||||
targs = hook_res.modified_input
|
||||
# 审批检查需要原始参数,所以审批在前;但如果 hook 改了参数,需要重新构建
|
||||
if hook_res.modified_input and tname in self.config.tools.require_approval:
|
||||
tfn["arguments"] = json.dumps(targs, ensure_ascii=False)
|
||||
|
||||
# 工具执行前审批检查
|
||||
if tname in self.config.tools.require_approval:
|
||||
from app.services.approval_manager import approval_manager as _am
|
||||
@@ -420,6 +675,21 @@ class AgentRuntime:
|
||||
self.context.add_tool_result(tcid, tname, result)
|
||||
self.context.tool_calls_made += 1
|
||||
|
||||
# Hook: PostToolUse — 工具执行后处理
|
||||
post_ctx = HookContext(
|
||||
event=HookEvent.POST_TOOL_USE,
|
||||
tool_name=tname,
|
||||
tool_input=targs,
|
||||
tool_output=result,
|
||||
session_id=self.context.session_id,
|
||||
agent_name=self.config.name,
|
||||
user_id=self.config.user_id,
|
||||
)
|
||||
await self.hook_manager.trigger(HookEvent.POST_TOOL_USE, post_ctx)
|
||||
|
||||
# 崩溃恢复快照 (P4)
|
||||
self._fire_recovery_snapshot()
|
||||
|
||||
# 预算检查:工具调用次数
|
||||
if self.context.tool_calls_made > budget.max_tool_calls:
|
||||
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
|
||||
@@ -431,6 +701,7 @@ class AgentRuntime:
|
||||
tool_calls_made=self.context.tool_calls_made,
|
||||
steps=steps, error=err)
|
||||
self._fire_execution_log(user_input, result, _run_start)
|
||||
self._attach_token_usage(result)
|
||||
return result
|
||||
|
||||
if self.on_tool_executed:
|
||||
@@ -484,11 +755,32 @@ class AgentRuntime:
|
||||
error=truncation_msg,
|
||||
)
|
||||
self._fire_execution_log(user_input, result, _run_start)
|
||||
self._attach_token_usage(result)
|
||||
return result
|
||||
|
||||
async def run_stream(self, user_input: str) -> AsyncGenerator[dict, None]:
|
||||
"""
|
||||
流式执行 Agent 单轮对话。
|
||||
流式执行 Agent 单轮对话(支持 streamlined 模式)。
|
||||
|
||||
与 run() 逻辑相同,但在每个关键步骤 yield SSE 事件。
|
||||
当 streamlined=True 时,工具调用会被折叠为累计摘要。
|
||||
"""
|
||||
if self._streamlined_transformer:
|
||||
self._streamlined_transformer.reset()
|
||||
async for event in self._run_stream_impl(user_input):
|
||||
transformed = self._streamlined_transformer.transform(event)
|
||||
if transformed is not None:
|
||||
yield transformed
|
||||
flushed = self._streamlined_transformer.flush()
|
||||
if flushed:
|
||||
yield flushed
|
||||
else:
|
||||
async for event in self._run_stream_impl(user_input):
|
||||
yield event
|
||||
|
||||
async def _run_stream_impl(self, user_input: str) -> AsyncGenerator[dict, None]:
|
||||
"""
|
||||
流式执行 Agent 单轮对话(内部实现)。
|
||||
|
||||
与 run() 逻辑相同,但在每个关键步骤 yield SSE 事件:
|
||||
- think: LLM 思考中,准备调用工具
|
||||
@@ -501,17 +793,63 @@ class AgentRuntime:
|
||||
self.context.iteration = 0
|
||||
self.context.tool_calls_made = 0
|
||||
|
||||
# 1. 首次运行时加载长期记忆到 system prompt
|
||||
if not self._memory_context_loaded:
|
||||
# 1. 系统提示词分层装配
|
||||
if self._prompt_sections_enabled:
|
||||
system_prompt = await self._compose_system_prompt(user_input)
|
||||
self.context.set_system_prompt(system_prompt)
|
||||
if not self._memory_context_loaded:
|
||||
self._memory_context_loaded = True
|
||||
logger.info("分层装配已完成(静态段 + 动态段)")
|
||||
elif not self._memory_context_loaded:
|
||||
await self._inject_memory_context(user_input)
|
||||
self._memory_context_loaded = True
|
||||
|
||||
# 1.5 知识检索增强:从知识库注入相关经验到 system prompt
|
||||
await self._inject_knowledge_context(user_input)
|
||||
await self._inject_knowledge_context(user_input)
|
||||
|
||||
# 2. 追加用户消息
|
||||
self.context.add_user_message(user_input)
|
||||
|
||||
# 2.5 计划模式 (P2) — 流式生成执行计划
|
||||
plan: Optional[Plan] = None
|
||||
if self.plan_mode and self.config.llm.plan_mode_enabled:
|
||||
yield {"type": "plan_generating", "content": "正在生成执行计划…", "iteration": 0}
|
||||
try:
|
||||
plan = await self.plan_mode.generate_plan(
|
||||
user_input=user_input,
|
||||
available_tools=self.tool_manager.tool_names(),
|
||||
messages_history=self.context.messages,
|
||||
)
|
||||
logger.info("计划模式: 已生成计划 (%d 步骤)", len(plan.steps))
|
||||
yield {
|
||||
"type": "plan",
|
||||
"content": plan.to_markdown(),
|
||||
"plan_data": plan.to_dict(),
|
||||
"iteration": 0,
|
||||
"session_id": self.context.session_id,
|
||||
}
|
||||
if self.config.llm.plan_approval_required:
|
||||
# 等待外部审批(通过 on_approval_required 回调)
|
||||
approved = await self.plan_mode.present_plan(plan)
|
||||
if not approved:
|
||||
logger.info("计划模式: 计划被拒绝")
|
||||
yield {
|
||||
"type": "plan_rejected",
|
||||
"content": "计划已被拒绝",
|
||||
"plan_data": plan.to_dict(),
|
||||
"iteration": 0,
|
||||
"session_id": self.context.session_id,
|
||||
}
|
||||
return
|
||||
yield {
|
||||
"type": "plan_approved",
|
||||
"content": "计划已批准,开始执行",
|
||||
"iteration": 0,
|
||||
"session_id": self.context.session_id,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning("计划生成失败,回退到直接执行: %s", e)
|
||||
yield {"type": "plan_failed", "content": f"计划生成失败: {e}", "iteration": 0}
|
||||
plan = None
|
||||
|
||||
# 3. ReAct 循环
|
||||
llm = _LLMClient(self.config.llm)
|
||||
tool_schemas = self.tool_manager.get_tool_schemas()
|
||||
@@ -522,6 +860,18 @@ class AgentRuntime:
|
||||
llm_callback_ctx = {"step_type": "think", "tool_name": None}
|
||||
|
||||
def _llm_callback(metrics: Dict[str, Any]):
|
||||
# Token 预算追踪 (P2)
|
||||
if self._token_budget:
|
||||
prompt_tok = metrics.get("prompt_tokens", 0)
|
||||
comp_tok = metrics.get("completion_tokens", 0)
|
||||
if prompt_tok <= 0:
|
||||
prompt_tok = self._token_budget.input_tokens # fallback estimate
|
||||
self._token_budget.record_llm_call(
|
||||
prompt_tokens=prompt_tok,
|
||||
completion_tokens=comp_tok,
|
||||
iteration=self.context.iteration,
|
||||
step_type=llm_callback_ctx["step_type"],
|
||||
)
|
||||
if self.on_llm_call:
|
||||
metrics.update({
|
||||
"session_id": self.context.session_id,
|
||||
@@ -533,6 +883,31 @@ class AgentRuntime:
|
||||
|
||||
while self.context.iteration < max_iter:
|
||||
self.context.iteration += 1
|
||||
|
||||
# Token 预算检查:每次迭代前更新输入 token 估计
|
||||
if self._token_budget:
|
||||
self._token_budget.update_from_counter(self.context.messages)
|
||||
self._token_budget.reset_compaction_attempts()
|
||||
|
||||
# 对话自动压缩 (参考 Claude Code autoCompact) + Token 预算驱动压缩
|
||||
if self.compaction_engine and self.context.iteration > 1:
|
||||
if self._token_budget and self._token_budget.needs_compaction:
|
||||
self._token_budget.record_compaction_attempt()
|
||||
logger.info("TokenBudget 触发自动压缩: %s", self._token_budget.status_line)
|
||||
compact_result = await self.compaction_engine.maybe_compact(
|
||||
self.context.messages,
|
||||
self.config.llm.context_window,
|
||||
)
|
||||
if compact_result.strategy != CompactionStrategy.NONE:
|
||||
self.context.replace_internal_messages(
|
||||
[m for m in compact_result.messages
|
||||
if m.get("role") != "system"]
|
||||
)
|
||||
logger.debug(
|
||||
"压缩完成: strategy=%s saved=%d tokens",
|
||||
compact_result.strategy.value, compact_result.tokens_saved,
|
||||
)
|
||||
|
||||
messages = self.memory.trim_messages(self.context.messages)
|
||||
|
||||
# 预算检查:LLM 调用次数(在调用 LLM 之前检查,避免浪费额度)
|
||||
@@ -626,6 +1001,7 @@ class AgentRuntime:
|
||||
self.context.add_user_message(fix_prompt)
|
||||
continue # 回到 ReAct 循环,让 LLM 修正
|
||||
|
||||
token_usage_final = self._token_budget.summary() if self._token_budget else None
|
||||
yield {
|
||||
"type": "final",
|
||||
"content": final_text,
|
||||
@@ -634,6 +1010,7 @@ class AgentRuntime:
|
||||
"iterations_used": self.context.iteration,
|
||||
"tool_calls_made": self.context.tool_calls_made,
|
||||
"session_id": self.context.session_id,
|
||||
"token_usage": token_usage_final,
|
||||
}
|
||||
await self.memory.save_context(user_input, final_text, self.context.messages)
|
||||
# 保存学习模式
|
||||
@@ -695,6 +1072,24 @@ class AgentRuntime:
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
targs = {}
|
||||
|
||||
# Hook: PreToolUse — 可拦截/修改工具调用 (流式)
|
||||
hook_ctx = HookContext(
|
||||
event=HookEvent.PRE_TOOL_USE,
|
||||
tool_name=tname,
|
||||
tool_input=targs,
|
||||
session_id=self.context.session_id,
|
||||
agent_name=self.config.name,
|
||||
user_id=self.config.user_id,
|
||||
)
|
||||
hook_res = await self.hook_manager.trigger(HookEvent.PRE_TOOL_USE, hook_ctx)
|
||||
if not hook_res.allowed:
|
||||
result = json.dumps({"error": hook_res.reason}, ensure_ascii=False)
|
||||
yield {"type": "tool_result", "name": tname, "result": result, "iteration": self.context.iteration}
|
||||
self.context.add_tool_result(tcid, tname, result)
|
||||
continue
|
||||
if hook_res.modified_input:
|
||||
targs = hook_res.modified_input
|
||||
|
||||
# yield tool_call 事件
|
||||
yield {
|
||||
"type": "tool_call",
|
||||
@@ -760,6 +1155,21 @@ class AgentRuntime:
|
||||
self.context.add_tool_result(tcid, tname, result)
|
||||
self.context.tool_calls_made += 1
|
||||
|
||||
# Hook: PostToolUse — 工具执行后处理 (流式)
|
||||
post_ctx = HookContext(
|
||||
event=HookEvent.POST_TOOL_USE,
|
||||
tool_name=tname,
|
||||
tool_input=targs,
|
||||
tool_output=result,
|
||||
session_id=self.context.session_id,
|
||||
agent_name=self.config.name,
|
||||
user_id=self.config.user_id,
|
||||
)
|
||||
await self.hook_manager.trigger(HookEvent.POST_TOOL_USE, post_ctx)
|
||||
|
||||
# 崩溃恢复快照 (P4)
|
||||
self._fire_recovery_snapshot()
|
||||
|
||||
# 预算检查:工具调用次数
|
||||
if self.context.tool_calls_made > budget.max_tool_calls:
|
||||
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
|
||||
@@ -783,6 +1193,15 @@ class AgentRuntime:
|
||||
data={"tool_name": tname, "result_preview": preview},
|
||||
)
|
||||
|
||||
# Hook: Stop — 对话完成
|
||||
stop_ctx = HookContext(
|
||||
event=HookEvent.STOP,
|
||||
session_id=self.context.session_id,
|
||||
agent_name=self.config.name,
|
||||
user_id=self.config.user_id,
|
||||
)
|
||||
await self.hook_manager.trigger(HookEvent.STOP, stop_ctx)
|
||||
|
||||
# 达到最大迭代次数
|
||||
last_content = ""
|
||||
for m in reversed(self.context.messages):
|
||||
@@ -802,6 +1221,7 @@ class AgentRuntime:
|
||||
# 提取知识到全局知识池(即便截断,工具调用序列仍有参考价值)
|
||||
if last_content:
|
||||
await self._extract_global_knowledge(user_input, last_content, steps)
|
||||
token_usage_truncated = self._token_budget.summary() if self._token_budget else None
|
||||
yield {
|
||||
"type": "final",
|
||||
"content": last_content or "已达最大迭代次数,但模型未返回最终回答。",
|
||||
@@ -810,8 +1230,123 @@ class AgentRuntime:
|
||||
"tool_calls_made": self.context.tool_calls_made,
|
||||
"truncated": True,
|
||||
"session_id": self.context.session_id,
|
||||
"token_usage": token_usage_truncated,
|
||||
}
|
||||
|
||||
async def _compose_system_prompt(self, query: str = "") -> str:
|
||||
"""使用分层装配构建完整系统提示词。
|
||||
|
||||
将静态段 + 动态段并行解析后拼接,替代原先的字符串拼接方式。
|
||||
返回最终的 system_prompt 字符串。
|
||||
"""
|
||||
if not self._prompt_composer:
|
||||
# 降级:使用原有字符串拼接方式
|
||||
enriched = self.config.system_prompt.rstrip("\n")
|
||||
mem_text = await self.memory.initialize(query=query)
|
||||
if mem_text:
|
||||
enriched += "\n\n" + mem_text
|
||||
if self.config.memory.learning_enabled:
|
||||
pattern_hint = await self._inject_learning_patterns(query)
|
||||
if pattern_hint:
|
||||
enriched += "\n\n" + pattern_hint
|
||||
if self._memdir and self._memdir_manifest:
|
||||
memdir_text = await self._inject_memdir_context(query)
|
||||
if memdir_text:
|
||||
enriched += "\n\n" + memdir_text
|
||||
try:
|
||||
enriched = knowledge_retriever.inject_knowledge(enriched, query)
|
||||
except Exception:
|
||||
pass
|
||||
return enriched
|
||||
|
||||
# 分层装配路径
|
||||
ps_config = self.config.prompt_sections
|
||||
d_switches = ps_config.dynamic_sections
|
||||
|
||||
# 清除上一次运行的动态段
|
||||
# (静态段保留缓存,动态段每次重算)
|
||||
self._prompt_composer._dynamic_sections.clear()
|
||||
|
||||
# 动态段:环境信息
|
||||
if d_switches.get("environment", True):
|
||||
self._prompt_composer.add_dynamic(PromptSection(
|
||||
"environment",
|
||||
lambda uid=self.config.user_id: section_environment(uid),
|
||||
cache_break=True,
|
||||
))
|
||||
|
||||
# 动态段:语言偏好
|
||||
if d_switches.get("language", True):
|
||||
lang = ps_config.language
|
||||
if lang:
|
||||
self._prompt_composer.add_dynamic(PromptSection(
|
||||
"language",
|
||||
lambda l=lang: section_language(l),
|
||||
cache_break=False,
|
||||
))
|
||||
|
||||
# 动态段:长期记忆上下文
|
||||
if d_switches.get("memory_context", True):
|
||||
mem_text = await self.memory.initialize(query=query)
|
||||
if mem_text:
|
||||
self._prompt_composer.add_dynamic(PromptSection(
|
||||
"memory_context",
|
||||
lambda t=mem_text: f"# Long-term Memory\n\n{t}",
|
||||
cache_break=True,
|
||||
))
|
||||
|
||||
# 动态段:学习模式提示
|
||||
if self.config.memory.learning_enabled:
|
||||
pattern_hint = await self._inject_learning_patterns(query)
|
||||
if pattern_hint:
|
||||
self._prompt_composer.add_dynamic(PromptSection(
|
||||
"learning_patterns",
|
||||
lambda p=pattern_hint: p,
|
||||
cache_break=True,
|
||||
))
|
||||
|
||||
# 动态段:文件式记忆
|
||||
if self._memdir and self._memdir_manifest:
|
||||
memdir_text = await self._inject_memdir_context(query)
|
||||
if memdir_text:
|
||||
self._prompt_composer.add_dynamic(PromptSection(
|
||||
"memdir",
|
||||
lambda t=memdir_text: t,
|
||||
cache_break=True,
|
||||
))
|
||||
|
||||
# 动态段:知识库检索
|
||||
if d_switches.get("memory_context", True):
|
||||
try:
|
||||
base_enriched = knowledge_retriever.inject_knowledge(
|
||||
self.config.system_prompt, query
|
||||
)
|
||||
if base_enriched != self.config.system_prompt:
|
||||
# 提取增量部分
|
||||
knowledge_delta = base_enriched[len(self.config.system_prompt):].strip()
|
||||
if knowledge_delta:
|
||||
self._prompt_composer.add_dynamic(PromptSection(
|
||||
"knowledge_base",
|
||||
lambda kd=knowledge_delta: f"# Relevant Knowledge\n\n{kd}",
|
||||
cache_break=True,
|
||||
))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 工具列表段(默认关闭,太长)
|
||||
if d_switches.get("tool_list", False):
|
||||
tool_names = self.tool_manager.tool_names()
|
||||
if tool_names:
|
||||
tool_list_text = "\n".join(f"- {n}" for n in sorted(tool_names))
|
||||
self._prompt_composer.add_dynamic(PromptSection(
|
||||
"tool_list",
|
||||
lambda t=tool_list_text: f"# Available Tools\n\n{t}",
|
||||
cache_break=False,
|
||||
))
|
||||
|
||||
# 解析 + 装配
|
||||
return await self._prompt_composer.assemble_full()
|
||||
|
||||
async def _inject_memory_context(self, query: str = "") -> None:
|
||||
"""加载长期记忆并注入 system prompt。"""
|
||||
mem_text = await self.memory.initialize(query=query)
|
||||
@@ -826,9 +1361,65 @@ class AgentRuntime:
|
||||
if pattern_hint:
|
||||
enriched += "\n\n" + pattern_hint
|
||||
|
||||
# 注入文件式记忆 (MEMORY.md)
|
||||
if self._memdir and self._memdir_manifest:
|
||||
memdir_text = await self._inject_memdir_context(query)
|
||||
if memdir_text:
|
||||
enriched += "\n\n" + memdir_text
|
||||
|
||||
self.context.set_system_prompt(enriched)
|
||||
logger.info("Agent 已注入长期记忆上下文")
|
||||
|
||||
async def _inject_memdir_context(self, query: str) -> str:
|
||||
"""加载文件式记忆并构建注入文本。"""
|
||||
if not self._memdir or not self._memdir_manifest:
|
||||
return ""
|
||||
|
||||
parts: List[str] = []
|
||||
|
||||
# 记忆操作指导(首次注入)
|
||||
memdir_prompt = self._memdir.build_system_prompt()
|
||||
parts.append(memdir_prompt)
|
||||
|
||||
# AI 驱动的相关性选择
|
||||
if self._memdir_manifest.entries:
|
||||
try:
|
||||
selected = await memory_selector.select(
|
||||
query=query,
|
||||
manifest=self._memdir_manifest,
|
||||
recent_tools=self.tool_manager.tool_names(),
|
||||
)
|
||||
if selected:
|
||||
# 读取选中的记忆文件
|
||||
parts.append("\n## 相关记忆\n")
|
||||
for fn in selected:
|
||||
entry = next(
|
||||
(e for e in self._memdir_manifest.entries
|
||||
if e.filename == fn), None
|
||||
)
|
||||
if entry:
|
||||
# 加载完整内容
|
||||
try:
|
||||
with open(entry.filepath, "r", encoding="utf-8") as _f:
|
||||
_, content = parse_frontmatter(_f.read())
|
||||
except Exception:
|
||||
content = entry.content
|
||||
if not content:
|
||||
content = entry.content
|
||||
staleness = entry.staleness_note
|
||||
parts.append(
|
||||
f"<system-reminder>\n"
|
||||
f"### [{entry.mem_type.value}] {entry.name}\n"
|
||||
f"{content[:2000]}"
|
||||
)
|
||||
if staleness:
|
||||
parts.append(f"\n{staleness}")
|
||||
parts.append("</system-reminder>")
|
||||
except Exception as e:
|
||||
logger.warning("AI 记忆选择失败: %s", e)
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
async def _inject_learning_patterns(self, query: str) -> str:
|
||||
"""查询学习模式,返回格式化的提示文本。"""
|
||||
from app.core.database import SessionLocal
|
||||
@@ -1062,9 +1653,17 @@ class AgentRuntime:
|
||||
|
||||
@staticmethod
|
||||
def _is_retryable(err_str: str) -> bool:
|
||||
"""判断错误是否可重试。"""
|
||||
err_lower = err_str.lower()
|
||||
return any(kw in err_lower for kw in _RETRYABLE_ERRORS)
|
||||
"""判断错误是否可重试(使用 ErrorClassifier)。"""
|
||||
try:
|
||||
error_type, _ = _error_classifier.classify(Exception(err_str))
|
||||
return error_type == ErrorType.RETRYABLE
|
||||
except Exception:
|
||||
err_lower = err_str.lower()
|
||||
return any(kw in err_lower for kw in (
|
||||
"timed out", "timeout", "connection error",
|
||||
"rate limit", "too many requests", "internal server error",
|
||||
"service unavailable", "temporarily unavailable",
|
||||
))
|
||||
|
||||
|
||||
# LLM 缓存辅助
|
||||
@@ -1193,6 +1792,35 @@ class _LLMClient:
|
||||
response = await client.chat.completions.create(**kwargs)
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
|
||||
# Reactive Compact: 上下文超限时压缩后重试 (Tier 3)
|
||||
if (
|
||||
self.compaction_engine
|
||||
and is_context_length_error(e)
|
||||
and self.compaction_engine.config.reactive_compact_enabled
|
||||
):
|
||||
logger.warning("检测到上下文超限,触发 ReactiveCompact: %s", str(e)[:100])
|
||||
try:
|
||||
compact_result = await self.compaction_engine.reactive_compact(
|
||||
messages, e, self._config.context_window,
|
||||
)
|
||||
if compact_result.strategy != CompactionStrategy.NONE:
|
||||
logger.info(
|
||||
"ReactiveCompact 完成: saved=%d tokens, 重试中...",
|
||||
compact_result.tokens_saved,
|
||||
)
|
||||
return await self._do_chat(
|
||||
api_key=api_key, base_url=base_url,
|
||||
model=model,
|
||||
messages=compact_result.messages,
|
||||
tools=tools,
|
||||
iteration=iteration,
|
||||
on_completion=on_completion,
|
||||
_is_fallback=_is_fallback,
|
||||
)
|
||||
except Exception as ce:
|
||||
logger.error("ReactiveCompact 失败: %s", ce)
|
||||
|
||||
# 降级回退:主模型失败时尝试 fallback_llm
|
||||
fallback = self._config.fallback_llm
|
||||
if fallback and isinstance(fallback, dict) and not _is_fallback:
|
||||
|
||||
Reference in New Issue
Block a user