fix: Feishu channel agents file_write permission blocked + memory system tests & docs

- Fix 8 Feishu agent handlers to use permission_level="acceptEdits" so file_write
  tool works without Web UI approval popup (lingxi/renshenguo/suyao/tiantian/orange/main/schedule)
- Add P5-P7 memory improvements: offline keyword fallback, team sharing, file-based memory
- Add auto_dream_service for daily memory consolidation
- Add 99 memory system test cases (basic 18 + advanced 43 + pytest 38)
- Add platform capability assessment report and unfinished project checklist

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
renjianbo
2026-06-14 20:35:12 +08:00
parent a7512a5423
commit 7f4aeb021b
22 changed files with 6191 additions and 68 deletions

View File

@@ -25,6 +25,35 @@ from app.agent_runtime.context import AgentContext
from app.agent_runtime.memory import AgentMemory
from app.agent_runtime.tool_manager import AgentToolManager
from app.core.exceptions import WorkflowExecutionError
from app.core.hooks import HookManager, HookEvent, HookContext, HookResult
from app.agent_runtime.plan_mode import PlanMode, Plan, PlanStatus
from app.core.error_recovery import ErrorClassifier, ErrorType, ConversationRecovery
from app.core.memdir import MemoryDir, MemoryType as MemType, MemoryManifest, parse_frontmatter
from app.core.memory_selector import memory_selector
from app.core.compaction import CompactionEngine, CompactionResult, CompactionStrategy
from app.core.compaction_config import CompactionConfig
from app.core.token_counter import is_context_length_error
from app.core.streamlined_output import (
StreamlinedTransformer,
create_streamlined_transformer,
get_tool_summary_text,
ToolCounts,
categorize_tool,
)
from app.core.prompt_sections import (
PromptComposer,
PromptSection,
create_prompt_composer,
create_default_static_sections,
create_default_dynamic_sections,
section_environment,
section_language,
)
from app.core.token_budget import (
TokenBudget,
TokenBudgetConfig,
create_token_budget,
)
from app.services.agent_learning_service import (
extract_pattern_from_result,
format_pattern_hint,
@@ -54,18 +83,8 @@ class LLMCallMetrics(TypedDict, total=False):
status: str # success / error
error_message: Optional[str]
# 可重试的 API 异常
_RETRYABLE_ERRORS = (
"timed out",
"timeout",
"connection error",
"temporarily unavailable",
"server disconnected",
"rate limit",
"too many requests",
"internal server error",
"service unavailable",
)
# 全局错误分类器(可重试判定 + 退避策略)
_error_classifier = ErrorClassifier()
class AgentRuntime:
@@ -86,6 +105,8 @@ class AgentRuntime:
execution_logger: Optional[Any] = None,
on_tool_executed: Optional[Callable[[str], Any]] = None,
on_llm_call: Optional[Callable[[Dict[str, Any]], Any]] = None,
hook_manager: Optional[HookManager] = None,
streamlined: bool = False,
):
self.config = config or AgentConfig()
self.context = context or AgentContext(
@@ -97,6 +118,14 @@ class AgentRuntime:
scope_id=_mem_scope,
max_history=self.config.memory.max_history_messages,
persist=self.config.memory.persist_to_db,
vector_memory_enabled=self.config.memory.vector_memory_enabled,
vector_memory_top_k=self.config.memory.vector_memory_top_k,
vector_memory_rerank=self.config.memory.vector_memory_rerank,
memory_type_filter=self.config.memory.memory_type_filter,
team_id=self.config.memory.team_id,
team_share_enabled=self.config.memory.team_share_enabled,
memory_dir_enabled=self.config.memory.memory_dir_enabled,
memory_dir_path=self.config.memory.memory_dir_path,
)
self.tool_manager = tool_manager or AgentToolManager(
include_tools=self.config.tools.include_tools,
@@ -104,6 +133,9 @@ class AgentRuntime:
cache_enabled=self.config.tools.cache_enabled,
cache_tool_whitelist=self.config.tools.cache_tool_whitelist,
cache_ttl_ms=self.config.tools.cache_ttl_ms,
permission_level=self.config.tools.permission_level,
auto_approve_rules=self.config.tools.auto_approve_rules,
deny_tools=self.config.tools.deny_tools,
)
self.execution_logger = execution_logger
self.on_tool_executed = on_tool_executed
@@ -113,10 +145,119 @@ class AgentRuntime:
# 自主学习作用域bare 聊天用 "bare"Agent 用 "agent"
self._learning_scope_kind = "bare" if "bare" in str(_mem_scope) else "agent"
# Hook 管理器 (P1)
self.hook_manager = hook_manager or HookManager()
# 计划模式 (P2)
self.plan_mode = PlanMode(self.config.llm) if self.config.llm.plan_mode_enabled else None
# 对话自动压缩 (参考 Claude Code compact)
self.compaction_engine: Optional[CompactionEngine] = None
compaction_cfg = getattr(self.config.memory, 'compaction', None)
if compaction_cfg is None:
compaction_cfg = CompactionConfig()
if compaction_cfg.enabled:
self.compaction_engine = CompactionEngine(
config=compaction_cfg,
model=self.config.llm.model,
)
logger.info("对话压缩引擎已启用 (model=%s, window=%d)",
self.config.llm.model, self.config.llm.context_window)
# 工具结果流式美化 (参考 Claude Code streamlinedTransform)
self.streamlined = streamlined
self._streamlined_transformer: Optional[StreamlinedTransformer] = None
if streamlined:
self._streamlined_transformer = create_streamlined_transformer(enabled=True)
logger.info("工具结果流式美化已启用")
# 系统提示词分层装配 (P2 — 参考 Claude Code systemPromptSections.ts)
self._prompt_composer: Optional[PromptComposer] = None
self._prompt_sections_enabled = self.config.prompt_sections.enabled
if self._prompt_sections_enabled:
ps_config = self.config.prompt_sections
# 构建静态段(按开关过滤)
static_sections = []
s_switches = ps_config.static_sections
if s_switches.get("persona", True):
static_sections.append(PromptSection(
"persona",
lambda cfg=self.config: f"{cfg.system_prompt}\n\n"
))
if s_switches.get("capabilities", True):
from app.core.prompt_sections import section_capabilities
static_sections.append(PromptSection("capabilities", section_capabilities))
if s_switches.get("tool_instructions", True):
from app.core.prompt_sections import section_tool_instructions
static_sections.append(PromptSection("tool_instructions", section_tool_instructions))
if s_switches.get("safety_rules", True):
from app.core.prompt_sections import section_safety_rules
static_sections.append(PromptSection("safety_rules", section_safety_rules))
if s_switches.get("output_style", True):
from app.core.prompt_sections import section_output_style
static_sections.append(PromptSection("output_style", section_output_style))
self._prompt_composer = PromptComposer()
self._prompt_composer.add_static_sections(static_sections)
logger.info("系统提示词分层装配已启用 (%d 静态段)", len(static_sections))
# Token 预算管理 (P2 — 参考 Claude Code tokenBudget.ts)
self._token_budget: Optional[TokenBudget] = None
tb_config = self.config.token_budget
if tb_config.enabled:
self._token_budget = TokenBudget(
config=TokenBudgetConfig(
enabled=True,
context_window=tb_config.context_window or self.config.llm.context_window,
output_reserve=tb_config.output_reserve,
warning_threshold_pct=tb_config.warning_threshold_pct,
compact_threshold_pct=tb_config.compact_threshold_pct,
hard_limit_pct=tb_config.hard_limit_pct,
user_budget=tb_config.user_budget,
auto_continue=tb_config.auto_continue,
compaction_after_warning=tb_config.compaction_after_warning,
max_compaction_attempts=tb_config.max_compaction_attempts,
),
model=self.config.llm.model,
)
logger.info("Token 预算管理已启用 (window=%d, compact@%d%%)",
self._token_budget.config.context_window,
int(tb_config.compact_threshold_pct * 100))
# 崩溃恢复 (P4)
self.recovery = ConversationRecovery()
self._recovery_snapshot_counter = 0
# 文件式记忆 (MEMORY.md)
self._memdir: Optional[MemoryDir] = None
self._memdir_manifest: Optional[MemoryManifest] = None
if self.config.memory.memory_dir_enabled:
mem_path = self.config.memory.memory_dir_path
if not mem_path:
# 默认路径: 项目根目录下的 .claude/memory
import os as _os
mem_path = _os.path.join(
_os.path.dirname(_os.path.dirname(_os.path.dirname(__file__))),
".claude", "memory",
)
self._memdir = MemoryDir(mem_path)
# 启动时扫描一次
self._memdir_manifest = self._memdir.scan()
memory_selector.reset()
logger.info("文件式记忆已启用: %s (%d 条)", mem_path,
self._memdir_manifest.total_files)
# 预算回调:供 WorkflowEngine 注入,使 Agent 内部计数计入工作流预算
# 返回 True 表示预算充足;返回 False 或抛出异常表示超限
self.on_llm_invocation: Optional[Callable[[], Any]] = None
def _attach_token_usage(self, result: AgentResult) -> AgentResult:
"""将 TokenBudget 摘要附加到 AgentResult若启用"""
if self._token_budget:
from app.agent_runtime.schemas import TokenUsageInfo
result.token_usage = TokenUsageInfo(**self._token_budget.summary())
return result
def _build_execution_log_kwargs(self, user_input: str, result: AgentResult, latency_ms: int) -> dict:
"""从 AgentResult 构建 execution_logger 所需的参数字典。"""
tool_chain = []
@@ -151,6 +292,27 @@ class AgentRuntime:
provider=self.config.llm.provider,
)
def _fire_recovery_snapshot(self):
"""Fire-and-forget 保存崩溃恢复快照(每 5 次工具调用保存一次)。"""
self._recovery_snapshot_counter += 1
if self._recovery_snapshot_counter % 5 != 0:
return
try:
import asyncio
asyncio.ensure_future(
self.recovery.save_snapshot(
session_id=self.context.session_id,
messages=self.context.messages,
extra={
"agent_name": self.config.name,
"iteration": self.context.iteration,
"tool_calls_made": self.context.tool_calls_made,
},
)
)
except Exception:
pass
def _fire_execution_log(self, user_input: str, result: AgentResult, start_time: float):
"""Fire-and-forget 记录执行日志(非阻塞)。"""
try:
@@ -172,17 +334,49 @@ class AgentRuntime:
self._llm_invocations = 0 # 每次 run() 重置 LLM 调用计数
_run_start = time.time() # 执行开始时间,用于计算总延迟
# 1. 首次运行时加载长期记忆到 system prompt
if not self._memory_context_loaded:
# 1. 系统提示词分层装配(首次加载全部段,后续只刷新动态段)
if self._prompt_sections_enabled:
system_prompt = await self._compose_system_prompt(user_input)
self.context.set_system_prompt(system_prompt)
if not self._memory_context_loaded:
self._memory_context_loaded = True
logger.info("分层装配已完成(静态段 + 动态段)")
elif not self._memory_context_loaded:
await self._inject_memory_context(user_input)
self._memory_context_loaded = True
# 1.5 知识检索增强:从知识库注入相关经验到 system prompt
await self._inject_knowledge_context(user_input)
await self._inject_knowledge_context(user_input)
# 2. 追加用户消息
self.context.add_user_message(user_input)
# 2.5 计划模式 (P2) — 生成执行计划
plan: Optional[Plan] = None
if self.plan_mode and self.config.llm.plan_mode_enabled:
try:
plan = await self.plan_mode.generate_plan(
user_input=user_input,
available_tools=self.tool_manager.tool_names(),
messages_history=self.context.messages,
)
logger.info("计划模式: 已生成计划 (%d 步骤)", len(plan.steps))
if self.config.llm.plan_approval_required:
approved = await self.plan_mode.present_plan(plan)
if not approved:
logger.info("计划模式: 计划被拒绝")
result = AgentResult(
success=False,
content=f"计划已被拒绝。\n\n{plan.to_markdown()}",
iterations_used=0,
tool_calls_made=0,
error="plan_rejected",
)
self._fire_execution_log(user_input, result, _run_start)
self._attach_token_usage(result)
return result
except Exception as e:
logger.warning("计划生成失败,回退到直接执行: %s", e)
plan = None
# 3. ReAct 循环
llm = _LLMClient(self.config.llm)
tool_schemas = self.tool_manager.get_tool_schemas()
@@ -194,6 +388,18 @@ class AgentRuntime:
llm_callback_ctx = {"step_type": "think", "tool_name": None}
def _llm_callback(metrics: Dict[str, Any]):
# Token 预算追踪 (P2)
if self._token_budget:
prompt_tok = metrics.get("prompt_tokens", 0)
comp_tok = metrics.get("completion_tokens", 0)
if prompt_tok <= 0:
prompt_tok = self._token_budget.input_tokens # fallback estimate
self._token_budget.record_llm_call(
prompt_tokens=prompt_tok,
completion_tokens=comp_tok,
iteration=self.context.iteration,
step_type=llm_callback_ctx["step_type"],
)
if self.on_llm_call:
metrics.update({
"session_id": self.context.session_id,
@@ -206,6 +412,31 @@ class AgentRuntime:
while self.context.iteration < max_iter:
self.context.iteration += 1
# Token 预算检查:每次迭代前更新输入 token 估计
if self._token_budget:
self._token_budget.update_from_counter(self.context.messages)
self._token_budget.reset_compaction_attempts()
# 对话自动压缩 (参考 Claude Code autoCompact) + Token 预算驱动压缩
_should_compact = self.compaction_engine and self.context.iteration > 1
if _should_compact and self._token_budget and self._token_budget.needs_compaction:
self._token_budget.record_compaction_attempt()
logger.info("TokenBudget 触发自动压缩: %s", self._token_budget.status_line)
if self.compaction_engine and self.context.iteration > 1:
compact_result = await self.compaction_engine.maybe_compact(
self.context.messages,
self.config.llm.context_window,
)
if compact_result.strategy != CompactionStrategy.NONE:
self.context.replace_internal_messages(
[m for m in compact_result.messages
if m.get("role") != "system"] # 去掉 system由 context 管理)
)
logger.debug(
"压缩完成: strategy=%s saved=%d tokens",
compact_result.strategy.value, compact_result.tokens_saved,
)
# 裁剪过长历史
messages = self.memory.trim_messages(self.context.messages)
@@ -221,6 +452,7 @@ class AgentRuntime:
tool_calls_made=self.context.tool_calls_made,
steps=steps, error=err)
self._fire_execution_log(user_input, result, _run_start)
self._attach_token_usage(result)
return result
# 调用外部 LLM 预算回调WorkflowEngine 注入,将 Agent 的 LLM 计入工作流预算)
@@ -237,6 +469,7 @@ class AgentRuntime:
tool_calls_made=self.context.tool_calls_made,
steps=steps, error=str(e))
self._fire_execution_log(user_input, result, _run_start)
self._attach_token_usage(result)
return result
# 调用 LLM
@@ -267,6 +500,7 @@ class AgentRuntime:
error=err_str,
)
self._fire_execution_log(user_input, result, _run_start)
self._attach_token_usage(result)
return result
# 记录 LLM 调用次数(内部计数)
@@ -337,6 +571,7 @@ class AgentRuntime:
steps=steps,
)
self._fire_execution_log(user_input, result, _run_start)
self._attach_token_usage(result)
return result
# 有工具调用 → 先记录 assistant 消息(含 tool_calls
@@ -380,6 +615,26 @@ class AgentRuntime:
except (json.JSONDecodeError, TypeError):
targs = {}
# Hook: PreToolUse — 可拦截/修改工具调用
hook_ctx = HookContext(
event=HookEvent.PRE_TOOL_USE,
tool_name=tname,
tool_input=targs,
session_id=self.context.session_id,
agent_name=self.config.name,
user_id=self.config.user_id,
)
hook_res = await self.hook_manager.trigger(HookEvent.PRE_TOOL_USE, hook_ctx)
if not hook_res.allowed:
result = json.dumps({"error": hook_res.reason}, ensure_ascii=False)
self.context.add_tool_result(tcid, tname, result)
continue
if hook_res.modified_input:
targs = hook_res.modified_input
# 审批检查需要原始参数,所以审批在前;但如果 hook 改了参数,需要重新构建
if hook_res.modified_input and tname in self.config.tools.require_approval:
tfn["arguments"] = json.dumps(targs, ensure_ascii=False)
# 工具执行前审批检查
if tname in self.config.tools.require_approval:
from app.services.approval_manager import approval_manager as _am
@@ -420,6 +675,21 @@ class AgentRuntime:
self.context.add_tool_result(tcid, tname, result)
self.context.tool_calls_made += 1
# Hook: PostToolUse — 工具执行后处理
post_ctx = HookContext(
event=HookEvent.POST_TOOL_USE,
tool_name=tname,
tool_input=targs,
tool_output=result,
session_id=self.context.session_id,
agent_name=self.config.name,
user_id=self.config.user_id,
)
await self.hook_manager.trigger(HookEvent.POST_TOOL_USE, post_ctx)
# 崩溃恢复快照 (P4)
self._fire_recovery_snapshot()
# 预算检查:工具调用次数
if self.context.tool_calls_made > budget.max_tool_calls:
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
@@ -431,6 +701,7 @@ class AgentRuntime:
tool_calls_made=self.context.tool_calls_made,
steps=steps, error=err)
self._fire_execution_log(user_input, result, _run_start)
self._attach_token_usage(result)
return result
if self.on_tool_executed:
@@ -484,11 +755,32 @@ class AgentRuntime:
error=truncation_msg,
)
self._fire_execution_log(user_input, result, _run_start)
self._attach_token_usage(result)
return result
async def run_stream(self, user_input: str) -> AsyncGenerator[dict, None]:
"""
流式执行 Agent 单轮对话。
流式执行 Agent 单轮对话(支持 streamlined 模式)
与 run() 逻辑相同,但在每个关键步骤 yield SSE 事件。
当 streamlined=True 时,工具调用会被折叠为累计摘要。
"""
if self._streamlined_transformer:
self._streamlined_transformer.reset()
async for event in self._run_stream_impl(user_input):
transformed = self._streamlined_transformer.transform(event)
if transformed is not None:
yield transformed
flushed = self._streamlined_transformer.flush()
if flushed:
yield flushed
else:
async for event in self._run_stream_impl(user_input):
yield event
async def _run_stream_impl(self, user_input: str) -> AsyncGenerator[dict, None]:
"""
流式执行 Agent 单轮对话(内部实现)。
与 run() 逻辑相同,但在每个关键步骤 yield SSE 事件:
- think: LLM 思考中,准备调用工具
@@ -501,17 +793,63 @@ class AgentRuntime:
self.context.iteration = 0
self.context.tool_calls_made = 0
# 1. 首次运行时加载长期记忆到 system prompt
if not self._memory_context_loaded:
# 1. 系统提示词分层装配
if self._prompt_sections_enabled:
system_prompt = await self._compose_system_prompt(user_input)
self.context.set_system_prompt(system_prompt)
if not self._memory_context_loaded:
self._memory_context_loaded = True
logger.info("分层装配已完成(静态段 + 动态段)")
elif not self._memory_context_loaded:
await self._inject_memory_context(user_input)
self._memory_context_loaded = True
# 1.5 知识检索增强:从知识库注入相关经验到 system prompt
await self._inject_knowledge_context(user_input)
await self._inject_knowledge_context(user_input)
# 2. 追加用户消息
self.context.add_user_message(user_input)
# 2.5 计划模式 (P2) — 流式生成执行计划
plan: Optional[Plan] = None
if self.plan_mode and self.config.llm.plan_mode_enabled:
yield {"type": "plan_generating", "content": "正在生成执行计划…", "iteration": 0}
try:
plan = await self.plan_mode.generate_plan(
user_input=user_input,
available_tools=self.tool_manager.tool_names(),
messages_history=self.context.messages,
)
logger.info("计划模式: 已生成计划 (%d 步骤)", len(plan.steps))
yield {
"type": "plan",
"content": plan.to_markdown(),
"plan_data": plan.to_dict(),
"iteration": 0,
"session_id": self.context.session_id,
}
if self.config.llm.plan_approval_required:
# 等待外部审批(通过 on_approval_required 回调)
approved = await self.plan_mode.present_plan(plan)
if not approved:
logger.info("计划模式: 计划被拒绝")
yield {
"type": "plan_rejected",
"content": "计划已被拒绝",
"plan_data": plan.to_dict(),
"iteration": 0,
"session_id": self.context.session_id,
}
return
yield {
"type": "plan_approved",
"content": "计划已批准,开始执行",
"iteration": 0,
"session_id": self.context.session_id,
}
except Exception as e:
logger.warning("计划生成失败,回退到直接执行: %s", e)
yield {"type": "plan_failed", "content": f"计划生成失败: {e}", "iteration": 0}
plan = None
# 3. ReAct 循环
llm = _LLMClient(self.config.llm)
tool_schemas = self.tool_manager.get_tool_schemas()
@@ -522,6 +860,18 @@ class AgentRuntime:
llm_callback_ctx = {"step_type": "think", "tool_name": None}
def _llm_callback(metrics: Dict[str, Any]):
# Token 预算追踪 (P2)
if self._token_budget:
prompt_tok = metrics.get("prompt_tokens", 0)
comp_tok = metrics.get("completion_tokens", 0)
if prompt_tok <= 0:
prompt_tok = self._token_budget.input_tokens # fallback estimate
self._token_budget.record_llm_call(
prompt_tokens=prompt_tok,
completion_tokens=comp_tok,
iteration=self.context.iteration,
step_type=llm_callback_ctx["step_type"],
)
if self.on_llm_call:
metrics.update({
"session_id": self.context.session_id,
@@ -533,6 +883,31 @@ class AgentRuntime:
while self.context.iteration < max_iter:
self.context.iteration += 1
# Token 预算检查:每次迭代前更新输入 token 估计
if self._token_budget:
self._token_budget.update_from_counter(self.context.messages)
self._token_budget.reset_compaction_attempts()
# 对话自动压缩 (参考 Claude Code autoCompact) + Token 预算驱动压缩
if self.compaction_engine and self.context.iteration > 1:
if self._token_budget and self._token_budget.needs_compaction:
self._token_budget.record_compaction_attempt()
logger.info("TokenBudget 触发自动压缩: %s", self._token_budget.status_line)
compact_result = await self.compaction_engine.maybe_compact(
self.context.messages,
self.config.llm.context_window,
)
if compact_result.strategy != CompactionStrategy.NONE:
self.context.replace_internal_messages(
[m for m in compact_result.messages
if m.get("role") != "system"]
)
logger.debug(
"压缩完成: strategy=%s saved=%d tokens",
compact_result.strategy.value, compact_result.tokens_saved,
)
messages = self.memory.trim_messages(self.context.messages)
# 预算检查LLM 调用次数(在调用 LLM 之前检查,避免浪费额度)
@@ -626,6 +1001,7 @@ class AgentRuntime:
self.context.add_user_message(fix_prompt)
continue # 回到 ReAct 循环,让 LLM 修正
token_usage_final = self._token_budget.summary() if self._token_budget else None
yield {
"type": "final",
"content": final_text,
@@ -634,6 +1010,7 @@ class AgentRuntime:
"iterations_used": self.context.iteration,
"tool_calls_made": self.context.tool_calls_made,
"session_id": self.context.session_id,
"token_usage": token_usage_final,
}
await self.memory.save_context(user_input, final_text, self.context.messages)
# 保存学习模式
@@ -695,6 +1072,24 @@ class AgentRuntime:
except (json.JSONDecodeError, TypeError):
targs = {}
# Hook: PreToolUse — 可拦截/修改工具调用 (流式)
hook_ctx = HookContext(
event=HookEvent.PRE_TOOL_USE,
tool_name=tname,
tool_input=targs,
session_id=self.context.session_id,
agent_name=self.config.name,
user_id=self.config.user_id,
)
hook_res = await self.hook_manager.trigger(HookEvent.PRE_TOOL_USE, hook_ctx)
if not hook_res.allowed:
result = json.dumps({"error": hook_res.reason}, ensure_ascii=False)
yield {"type": "tool_result", "name": tname, "result": result, "iteration": self.context.iteration}
self.context.add_tool_result(tcid, tname, result)
continue
if hook_res.modified_input:
targs = hook_res.modified_input
# yield tool_call 事件
yield {
"type": "tool_call",
@@ -760,6 +1155,21 @@ class AgentRuntime:
self.context.add_tool_result(tcid, tname, result)
self.context.tool_calls_made += 1
# Hook: PostToolUse — 工具执行后处理 (流式)
post_ctx = HookContext(
event=HookEvent.POST_TOOL_USE,
tool_name=tname,
tool_input=targs,
tool_output=result,
session_id=self.context.session_id,
agent_name=self.config.name,
user_id=self.config.user_id,
)
await self.hook_manager.trigger(HookEvent.POST_TOOL_USE, post_ctx)
# 崩溃恢复快照 (P4)
self._fire_recovery_snapshot()
# 预算检查:工具调用次数
if self.context.tool_calls_made > budget.max_tool_calls:
err = f"已超过工具调用预算({budget.max_tool_calls} 次)"
@@ -783,6 +1193,15 @@ class AgentRuntime:
data={"tool_name": tname, "result_preview": preview},
)
# Hook: Stop — 对话完成
stop_ctx = HookContext(
event=HookEvent.STOP,
session_id=self.context.session_id,
agent_name=self.config.name,
user_id=self.config.user_id,
)
await self.hook_manager.trigger(HookEvent.STOP, stop_ctx)
# 达到最大迭代次数
last_content = ""
for m in reversed(self.context.messages):
@@ -802,6 +1221,7 @@ class AgentRuntime:
# 提取知识到全局知识池(即便截断,工具调用序列仍有参考价值)
if last_content:
await self._extract_global_knowledge(user_input, last_content, steps)
token_usage_truncated = self._token_budget.summary() if self._token_budget else None
yield {
"type": "final",
"content": last_content or "已达最大迭代次数,但模型未返回最终回答。",
@@ -810,8 +1230,123 @@ class AgentRuntime:
"tool_calls_made": self.context.tool_calls_made,
"truncated": True,
"session_id": self.context.session_id,
"token_usage": token_usage_truncated,
}
async def _compose_system_prompt(self, query: str = "") -> str:
"""使用分层装配构建完整系统提示词。
将静态段 + 动态段并行解析后拼接,替代原先的字符串拼接方式。
返回最终的 system_prompt 字符串。
"""
if not self._prompt_composer:
# 降级:使用原有字符串拼接方式
enriched = self.config.system_prompt.rstrip("\n")
mem_text = await self.memory.initialize(query=query)
if mem_text:
enriched += "\n\n" + mem_text
if self.config.memory.learning_enabled:
pattern_hint = await self._inject_learning_patterns(query)
if pattern_hint:
enriched += "\n\n" + pattern_hint
if self._memdir and self._memdir_manifest:
memdir_text = await self._inject_memdir_context(query)
if memdir_text:
enriched += "\n\n" + memdir_text
try:
enriched = knowledge_retriever.inject_knowledge(enriched, query)
except Exception:
pass
return enriched
# 分层装配路径
ps_config = self.config.prompt_sections
d_switches = ps_config.dynamic_sections
# 清除上一次运行的动态段
# (静态段保留缓存,动态段每次重算)
self._prompt_composer._dynamic_sections.clear()
# 动态段:环境信息
if d_switches.get("environment", True):
self._prompt_composer.add_dynamic(PromptSection(
"environment",
lambda uid=self.config.user_id: section_environment(uid),
cache_break=True,
))
# 动态段:语言偏好
if d_switches.get("language", True):
lang = ps_config.language
if lang:
self._prompt_composer.add_dynamic(PromptSection(
"language",
lambda l=lang: section_language(l),
cache_break=False,
))
# 动态段:长期记忆上下文
if d_switches.get("memory_context", True):
mem_text = await self.memory.initialize(query=query)
if mem_text:
self._prompt_composer.add_dynamic(PromptSection(
"memory_context",
lambda t=mem_text: f"# Long-term Memory\n\n{t}",
cache_break=True,
))
# 动态段:学习模式提示
if self.config.memory.learning_enabled:
pattern_hint = await self._inject_learning_patterns(query)
if pattern_hint:
self._prompt_composer.add_dynamic(PromptSection(
"learning_patterns",
lambda p=pattern_hint: p,
cache_break=True,
))
# 动态段:文件式记忆
if self._memdir and self._memdir_manifest:
memdir_text = await self._inject_memdir_context(query)
if memdir_text:
self._prompt_composer.add_dynamic(PromptSection(
"memdir",
lambda t=memdir_text: t,
cache_break=True,
))
# 动态段:知识库检索
if d_switches.get("memory_context", True):
try:
base_enriched = knowledge_retriever.inject_knowledge(
self.config.system_prompt, query
)
if base_enriched != self.config.system_prompt:
# 提取增量部分
knowledge_delta = base_enriched[len(self.config.system_prompt):].strip()
if knowledge_delta:
self._prompt_composer.add_dynamic(PromptSection(
"knowledge_base",
lambda kd=knowledge_delta: f"# Relevant Knowledge\n\n{kd}",
cache_break=True,
))
except Exception:
pass
# 工具列表段(默认关闭,太长)
if d_switches.get("tool_list", False):
tool_names = self.tool_manager.tool_names()
if tool_names:
tool_list_text = "\n".join(f"- {n}" for n in sorted(tool_names))
self._prompt_composer.add_dynamic(PromptSection(
"tool_list",
lambda t=tool_list_text: f"# Available Tools\n\n{t}",
cache_break=False,
))
# 解析 + 装配
return await self._prompt_composer.assemble_full()
async def _inject_memory_context(self, query: str = "") -> None:
"""加载长期记忆并注入 system prompt。"""
mem_text = await self.memory.initialize(query=query)
@@ -826,9 +1361,65 @@ class AgentRuntime:
if pattern_hint:
enriched += "\n\n" + pattern_hint
# 注入文件式记忆 (MEMORY.md)
if self._memdir and self._memdir_manifest:
memdir_text = await self._inject_memdir_context(query)
if memdir_text:
enriched += "\n\n" + memdir_text
self.context.set_system_prompt(enriched)
logger.info("Agent 已注入长期记忆上下文")
async def _inject_memdir_context(self, query: str) -> str:
"""加载文件式记忆并构建注入文本。"""
if not self._memdir or not self._memdir_manifest:
return ""
parts: List[str] = []
# 记忆操作指导(首次注入)
memdir_prompt = self._memdir.build_system_prompt()
parts.append(memdir_prompt)
# AI 驱动的相关性选择
if self._memdir_manifest.entries:
try:
selected = await memory_selector.select(
query=query,
manifest=self._memdir_manifest,
recent_tools=self.tool_manager.tool_names(),
)
if selected:
# 读取选中的记忆文件
parts.append("\n## 相关记忆\n")
for fn in selected:
entry = next(
(e for e in self._memdir_manifest.entries
if e.filename == fn), None
)
if entry:
# 加载完整内容
try:
with open(entry.filepath, "r", encoding="utf-8") as _f:
_, content = parse_frontmatter(_f.read())
except Exception:
content = entry.content
if not content:
content = entry.content
staleness = entry.staleness_note
parts.append(
f"<system-reminder>\n"
f"### [{entry.mem_type.value}] {entry.name}\n"
f"{content[:2000]}"
)
if staleness:
parts.append(f"\n{staleness}")
parts.append("</system-reminder>")
except Exception as e:
logger.warning("AI 记忆选择失败: %s", e)
return "\n".join(parts)
async def _inject_learning_patterns(self, query: str) -> str:
"""查询学习模式,返回格式化的提示文本。"""
from app.core.database import SessionLocal
@@ -1062,9 +1653,17 @@ class AgentRuntime:
@staticmethod
def _is_retryable(err_str: str) -> bool:
"""判断错误是否可重试。"""
err_lower = err_str.lower()
return any(kw in err_lower for kw in _RETRYABLE_ERRORS)
"""判断错误是否可重试(使用 ErrorClassifier"""
try:
error_type, _ = _error_classifier.classify(Exception(err_str))
return error_type == ErrorType.RETRYABLE
except Exception:
err_lower = err_str.lower()
return any(kw in err_lower for kw in (
"timed out", "timeout", "connection error",
"rate limit", "too many requests", "internal server error",
"service unavailable", "temporarily unavailable",
))
# LLM 缓存辅助
@@ -1193,6 +1792,35 @@ class _LLMClient:
response = await client.chat.completions.create(**kwargs)
except Exception as e:
last_error = e
# Reactive Compact: 上下文超限时压缩后重试 (Tier 3)
if (
self.compaction_engine
and is_context_length_error(e)
and self.compaction_engine.config.reactive_compact_enabled
):
logger.warning("检测到上下文超限,触发 ReactiveCompact: %s", str(e)[:100])
try:
compact_result = await self.compaction_engine.reactive_compact(
messages, e, self._config.context_window,
)
if compact_result.strategy != CompactionStrategy.NONE:
logger.info(
"ReactiveCompact 完成: saved=%d tokens, 重试中...",
compact_result.tokens_saved,
)
return await self._do_chat(
api_key=api_key, base_url=base_url,
model=model,
messages=compact_result.messages,
tools=tools,
iteration=iteration,
on_completion=on_completion,
_is_fallback=_is_fallback,
)
except Exception as ce:
logger.error("ReactiveCompact 失败: %s", ce)
# 降级回退:主模型失败时尝试 fallback_llm
fallback = self._config.fallback_llm
if fallback and isinstance(fallback, dict) and not _is_fallback: