aiagent/backend/app/core/token_counter.py

"""
Token 计数器 — tiktoken 优先，不可用时 fallback 字符估算。

参考 Claude Code src/utils/tokens.ts + src/utils/tokenBudget.ts
"""
from __future__ import annotations

import logging
from typing import Any, Dict, List, Optional

logger = logging.getLogger(__name__)

# ──────────────────────────── tiktoken 探测 ────────────────────────────

_tiktoken_enc: Any = None

def _try_load_tiktoken():
    global _tiktoken_enc
    if _tiktoken_enc is not None:
        return
    try:
        import tiktoken
        _tiktoken_enc = tiktoken.get_encoding("cl100k_base")  # GPT-4/DeepSeek 共用
        logger.info("TokenCounter: 使用 tiktoken cl100k_base 编码器")
    except ImportError:
        logger.info("TokenCounter: tiktoken 不可用，使用字符估算 fallback")
    except Exception as e:
        logger.warning("TokenCounter: tiktoken 加载失败 (%s)，使用 fallback", e)


# ──────────────────────────── TokenCounter ────────────────────────────

class TokenCounter:
    """轻量 token 计数，自动选择最佳策略。"""

    def __init__(self, model: str = "gpt-4"):
        _try_load_tiktoken()
        self.model = model

    def count(self, text: str) -> int:
        """计算单段文本的 token 数。"""
        if not text:
            return 0
        if _tiktoken_enc:
            return len(_tiktoken_enc.encode(text))
        return self._estimate(text)

    def count_messages(self, messages: List[Dict[str, Any]]) -> int:
        """计算 OpenAI 格式消息列表的 token 数。

        参考 OpenAI token 计数规则：
        - 每条消息基础 4 token（role + 格式开销）
        - content 按文本计数
        - tool_calls / tool_call_id / name 额外计算
        """
        total = 0
        for msg in messages:
            total += 4  # 消息格式开销
            role = msg.get("role", "")
            content = msg.get("content", "") or ""
            total += self.count(str(content))

            # tool_calls 中的 function.name + arguments
            for tc in msg.get("tool_calls") or []:
                fn = tc.get("function") or {}
                total += self.count(str(fn.get("name", "")))
                total += self.count(str(fn.get("arguments", "")))
                total += 3  # tool_call 格式开销

            # tool 消息的 tool_call_id + name
            if role == "tool":
                total += self.count(str(msg.get("tool_call_id", "")))
                total += self.count(str(msg.get("name", "")))

            # assistant 消息的 name
            if msg.get("name"):
                total += self.count(str(msg["name"]))

        return total

    def count_reasoning(self, text: str) -> int:
        """计算思考内容 token 数（reasoning_content 通常更长）。"""
        return self.count(text)

    # ──────────────────── 字符估算 fallback ────────────────────

    @staticmethod
    def _estimate(text: str) -> int:
        """字符估算：英文 ~4 char/token，中文 ~1.5 char/token。

        这个比值来自对 GPT-4 tokenizer 的经验观察：
        - 纯英文：~4 字符/token
        - 纯中文：~1.0-1.5 字符/token（中文字符通常 1-2 token）
        - 混合文本：按比例加权
        """
        if not text:
            return 0

        chinese_chars = sum(1 for c in text if '\u4e00' <= c <= '\u9fff' or '\u3400' <= c <= '\u4dbf')
        other_chars = len(text) - chinese_chars

        # 中文 ~1.5 char/token, 英文/其他 ~4 char/token
        chinese_tokens = chinese_chars / 1.5
        other_tokens = other_chars / 4.0

        return max(1, int(chinese_tokens + other_tokens))


# ──────────────────────────── 辅助函数 ────────────────────────────

# 常见模型的上下文窗口大小
MODEL_CONTEXT_WINDOWS: Dict[str, int] = {
    "gpt-4o": 128_000,
    "gpt-4o-mini": 128_000,
    "gpt-4": 8_192,
    "gpt-4-turbo": 128_000,
    "gpt-3.5-turbo": 16_384,
    "deepseek-v4-pro": 128_000,
    "deepseek-v4-flash": 128_000,
    "deepseek-chat": 64_000,
    "deepseek-reasoner": 64_000,
    "claude-sonnet-4-6": 200_000,
    "claude-opus-4-6": 200_000,
}

# 压缩阈值默认值（占窗口比例）
DEFAULT_MICRO_COMPACT_THRESHOLD = 0.70
DEFAULT_FULL_COMPACT_THRESHOLD = 0.85
DEFAULT_REACTIVE_THRESHOLD = 0.95

# 安全余量（留给模型输出的空间）
DEFAULT_OUTPUT_RESERVE = 8_192


def get_model_context_window(model: str) -> int:
    """获取模型的上下文窗口大小。"""
    # 精确匹配
    if model in MODEL_CONTEXT_WINDOWS:
        return MODEL_CONTEXT_WINDOWS[model]
    # 模糊匹配
    for prefix, window in MODEL_CONTEXT_WINDOWS.items():
        if model.startswith(prefix):
            return window
    # 默认 128K
    logger.warning("未知模型 %s 的上下文窗口，默认 128K", model)
    return 128_000


def is_context_length_error(error: Exception) -> bool:
    """判断异常是否为上下文长度超限错误。"""
    msg = str(error).lower()
    indicators = [
        "context_length_exceeded",
        "maximum context length",
        "context length",
        "context_length",
        "too long",
        "413",
        "prompt too long",
        "reduce the length",
        "token limit",
        "max_tokens",
        "context window",
    ]
    return any(indicator in msg for indicator in indicators)