- Fix delete agent 500: clean up FK records (agent_llm_logs, permissions, schedules, executions, team_members) and unbind goals/tasks before delete - Remove hardcoded personality templates in Android, replace with dynamic system prompt generation from name + description - Set promptSectionsEnabled=false to bypass PromptComposer for personality - Add Tencent Cloud Linux deployment guide (Docker Compose) - Accumulated backend service updates, frontend UI fixes, Android app changes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
166 lines
5.7 KiB
Python
166 lines
5.7 KiB
Python
"""
|
||
Token 计数器 — tiktoken 优先,不可用时 fallback 字符估算。
|
||
|
||
参考 Claude Code src/utils/tokens.ts + src/utils/tokenBudget.ts
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# ──────────────────────────── tiktoken 探测 ────────────────────────────
|
||
|
||
_tiktoken_enc: Any = None
|
||
|
||
def _try_load_tiktoken():
|
||
global _tiktoken_enc
|
||
if _tiktoken_enc is not None:
|
||
return
|
||
try:
|
||
import tiktoken
|
||
_tiktoken_enc = tiktoken.get_encoding("cl100k_base") # GPT-4/DeepSeek 共用
|
||
logger.info("TokenCounter: 使用 tiktoken cl100k_base 编码器")
|
||
except ImportError:
|
||
logger.info("TokenCounter: tiktoken 不可用,使用字符估算 fallback")
|
||
except Exception as e:
|
||
logger.warning("TokenCounter: tiktoken 加载失败 (%s),使用 fallback", e)
|
||
|
||
|
||
# ──────────────────────────── TokenCounter ────────────────────────────
|
||
|
||
class TokenCounter:
|
||
"""轻量 token 计数,自动选择最佳策略。"""
|
||
|
||
def __init__(self, model: str = "gpt-4"):
|
||
_try_load_tiktoken()
|
||
self.model = model
|
||
|
||
def count(self, text: str) -> int:
|
||
"""计算单段文本的 token 数。"""
|
||
if not text:
|
||
return 0
|
||
if _tiktoken_enc:
|
||
return len(_tiktoken_enc.encode(text))
|
||
return self._estimate(text)
|
||
|
||
def count_messages(self, messages: List[Dict[str, Any]]) -> int:
|
||
"""计算 OpenAI 格式消息列表的 token 数。
|
||
|
||
参考 OpenAI token 计数规则:
|
||
- 每条消息基础 4 token(role + 格式开销)
|
||
- content 按文本计数
|
||
- tool_calls / tool_call_id / name 额外计算
|
||
"""
|
||
total = 0
|
||
for msg in messages:
|
||
total += 4 # 消息格式开销
|
||
role = msg.get("role", "")
|
||
content = msg.get("content", "") or ""
|
||
total += self.count(str(content))
|
||
|
||
# tool_calls 中的 function.name + arguments
|
||
for tc in msg.get("tool_calls") or []:
|
||
fn = tc.get("function") or {}
|
||
total += self.count(str(fn.get("name", "")))
|
||
total += self.count(str(fn.get("arguments", "")))
|
||
total += 3 # tool_call 格式开销
|
||
|
||
# tool 消息的 tool_call_id + name
|
||
if role == "tool":
|
||
total += self.count(str(msg.get("tool_call_id", "")))
|
||
total += self.count(str(msg.get("name", "")))
|
||
|
||
# assistant 消息的 name
|
||
if msg.get("name"):
|
||
total += self.count(str(msg["name"]))
|
||
|
||
return total
|
||
|
||
def count_reasoning(self, text: str) -> int:
|
||
"""计算思考内容 token 数(reasoning_content 通常更长)。"""
|
||
return self.count(text)
|
||
|
||
# ──────────────────── 字符估算 fallback ────────────────────
|
||
|
||
@staticmethod
|
||
def _estimate(text: str) -> int:
|
||
"""字符估算:英文 ~4 char/token,中文 ~1.5 char/token。
|
||
|
||
这个比值来自对 GPT-4 tokenizer 的经验观察:
|
||
- 纯英文:~4 字符/token
|
||
- 纯中文:~1.0-1.5 字符/token(中文字符通常 1-2 token)
|
||
- 混合文本:按比例加权
|
||
"""
|
||
if not text:
|
||
return 0
|
||
|
||
chinese_chars = sum(1 for c in text if '\u4e00' <= c <= '\u9fff' or '\u3400' <= c <= '\u4dbf')
|
||
other_chars = len(text) - chinese_chars
|
||
|
||
# 中文 ~1.5 char/token, 英文/其他 ~4 char/token
|
||
chinese_tokens = chinese_chars / 1.5
|
||
other_tokens = other_chars / 4.0
|
||
|
||
return max(1, int(chinese_tokens + other_tokens))
|
||
|
||
|
||
# ──────────────────────────── 辅助函数 ────────────────────────────
|
||
|
||
# 常见模型的上下文窗口大小
|
||
MODEL_CONTEXT_WINDOWS: Dict[str, int] = {
|
||
"gpt-4o": 128_000,
|
||
"gpt-4o-mini": 128_000,
|
||
"gpt-4": 8_192,
|
||
"gpt-4-turbo": 128_000,
|
||
"gpt-3.5-turbo": 16_384,
|
||
"deepseek-v4-pro": 128_000,
|
||
"deepseek-v4-flash": 128_000,
|
||
"deepseek-chat": 64_000,
|
||
"deepseek-reasoner": 64_000,
|
||
"claude-sonnet-4-6": 200_000,
|
||
"claude-opus-4-6": 200_000,
|
||
}
|
||
|
||
# 压缩阈值默认值(占窗口比例)
|
||
DEFAULT_MICRO_COMPACT_THRESHOLD = 0.70
|
||
DEFAULT_FULL_COMPACT_THRESHOLD = 0.85
|
||
DEFAULT_REACTIVE_THRESHOLD = 0.95
|
||
|
||
# 安全余量(留给模型输出的空间)
|
||
DEFAULT_OUTPUT_RESERVE = 8_192
|
||
|
||
|
||
def get_model_context_window(model: str) -> int:
|
||
"""获取模型的上下文窗口大小。"""
|
||
# 精确匹配
|
||
if model in MODEL_CONTEXT_WINDOWS:
|
||
return MODEL_CONTEXT_WINDOWS[model]
|
||
# 模糊匹配
|
||
for prefix, window in MODEL_CONTEXT_WINDOWS.items():
|
||
if model.startswith(prefix):
|
||
return window
|
||
# 默认 128K
|
||
logger.warning("未知模型 %s 的上下文窗口,默认 128K", model)
|
||
return 128_000
|
||
|
||
|
||
def is_context_length_error(error: Exception) -> bool:
|
||
"""判断异常是否为上下文长度超限错误。"""
|
||
msg = str(error).lower()
|
||
indicators = [
|
||
"context_length_exceeded",
|
||
"maximum context length",
|
||
"context length",
|
||
"context_length",
|
||
"too long",
|
||
"413",
|
||
"prompt too long",
|
||
"reduce the length",
|
||
"token limit",
|
||
"max_tokens",
|
||
"context window",
|
||
]
|
||
return any(indicator in msg for indicator in indicators)
|