""" Token 计数器 — tiktoken 优先,不可用时 fallback 字符估算。 参考 Claude Code src/utils/tokens.ts + src/utils/tokenBudget.ts """ from __future__ import annotations import logging from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) # ──────────────────────────── tiktoken 探测 ──────────────────────────── _tiktoken_enc: Any = None def _try_load_tiktoken(): global _tiktoken_enc if _tiktoken_enc is not None: return try: import tiktoken _tiktoken_enc = tiktoken.get_encoding("cl100k_base") # GPT-4/DeepSeek 共用 logger.info("TokenCounter: 使用 tiktoken cl100k_base 编码器") except ImportError: logger.info("TokenCounter: tiktoken 不可用,使用字符估算 fallback") except Exception as e: logger.warning("TokenCounter: tiktoken 加载失败 (%s),使用 fallback", e) # ──────────────────────────── TokenCounter ──────────────────────────── class TokenCounter: """轻量 token 计数,自动选择最佳策略。""" def __init__(self, model: str = "gpt-4"): _try_load_tiktoken() self.model = model def count(self, text: str) -> int: """计算单段文本的 token 数。""" if not text: return 0 if _tiktoken_enc: return len(_tiktoken_enc.encode(text)) return self._estimate(text) def count_messages(self, messages: List[Dict[str, Any]]) -> int: """计算 OpenAI 格式消息列表的 token 数。 参考 OpenAI token 计数规则: - 每条消息基础 4 token(role + 格式开销) - content 按文本计数 - tool_calls / tool_call_id / name 额外计算 """ total = 0 for msg in messages: total += 4 # 消息格式开销 role = msg.get("role", "") content = msg.get("content", "") or "" total += self.count(str(content)) # tool_calls 中的 function.name + arguments for tc in msg.get("tool_calls") or []: fn = tc.get("function") or {} total += self.count(str(fn.get("name", ""))) total += self.count(str(fn.get("arguments", ""))) total += 3 # tool_call 格式开销 # tool 消息的 tool_call_id + name if role == "tool": total += self.count(str(msg.get("tool_call_id", ""))) total += self.count(str(msg.get("name", ""))) # assistant 消息的 name if msg.get("name"): total += self.count(str(msg["name"])) return total def count_reasoning(self, text: str) -> int: """计算思考内容 token 数(reasoning_content 通常更长)。""" return self.count(text) # ──────────────────── 字符估算 fallback ──────────────────── @staticmethod def _estimate(text: str) -> int: """字符估算:英文 ~4 char/token,中文 ~1.5 char/token。 这个比值来自对 GPT-4 tokenizer 的经验观察: - 纯英文:~4 字符/token - 纯中文:~1.0-1.5 字符/token(中文字符通常 1-2 token) - 混合文本:按比例加权 """ if not text: return 0 chinese_chars = sum(1 for c in text if '\u4e00' <= c <= '\u9fff' or '\u3400' <= c <= '\u4dbf') other_chars = len(text) - chinese_chars # 中文 ~1.5 char/token, 英文/其他 ~4 char/token chinese_tokens = chinese_chars / 1.5 other_tokens = other_chars / 4.0 return max(1, int(chinese_tokens + other_tokens)) # ──────────────────────────── 辅助函数 ──────────────────────────── # 常见模型的上下文窗口大小 MODEL_CONTEXT_WINDOWS: Dict[str, int] = { "gpt-4o": 128_000, "gpt-4o-mini": 128_000, "gpt-4": 8_192, "gpt-4-turbo": 128_000, "gpt-3.5-turbo": 16_384, "deepseek-v4-pro": 128_000, "deepseek-v4-flash": 128_000, "deepseek-chat": 64_000, "deepseek-reasoner": 64_000, "claude-sonnet-4-6": 200_000, "claude-opus-4-6": 200_000, } # 压缩阈值默认值(占窗口比例) DEFAULT_MICRO_COMPACT_THRESHOLD = 0.70 DEFAULT_FULL_COMPACT_THRESHOLD = 0.85 DEFAULT_REACTIVE_THRESHOLD = 0.95 # 安全余量(留给模型输出的空间) DEFAULT_OUTPUT_RESERVE = 8_192 def get_model_context_window(model: str) -> int: """获取模型的上下文窗口大小。""" # 精确匹配 if model in MODEL_CONTEXT_WINDOWS: return MODEL_CONTEXT_WINDOWS[model] # 模糊匹配 for prefix, window in MODEL_CONTEXT_WINDOWS.items(): if model.startswith(prefix): return window # 默认 128K logger.warning("未知模型 %s 的上下文窗口,默认 128K", model) return 128_000 def is_context_length_error(error: Exception) -> bool: """判断异常是否为上下文长度超限错误。""" msg = str(error).lower() indicators = [ "context_length_exceeded", "maximum context length", "context length", "context_length", "too long", "413", "prompt too long", "reduce the length", "token limit", "max_tokens", "context window", ] return any(indicator in msg for indicator in indicators)