aiagent/backend/app/services/cost_estimator.py

"""LLM 成本估算 — 基于模型定价和 Token 用量估算费用"""
from __future__ import annotations

import logging
from typing import Dict, Optional, Tuple

logger = logging.getLogger(__name__)

# 模型定价 (per 1M tokens, USD) — 2024 Q4 参考值
MODEL_PRICING: Dict[str, Tuple[float, float]] = {
    # (input_price_per_1M, output_price_per_1M)
    "gpt-4o": (2.50, 10.00),
    "gpt-4o-mini": (0.15, 0.60),
    "gpt-4-turbo": (10.00, 30.00),
    "gpt-3.5-turbo": (0.50, 1.50),
    "deepseek-chat": (0.14, 0.28),
    "deepseek-v3": (0.27, 1.10),
    "deepseek-r1": (0.55, 2.19),
    "deepseek-v4-flash": (0.14, 0.28),
    "deepseek-v4-pro": (0.27, 1.10),
    "claude-3-opus": (15.00, 75.00),
    "claude-3-sonnet": (3.00, 15.00),
    "claude-3-haiku": (0.25, 1.25),
    "claude-3.5-sonnet": (3.00, 15.00),
    "claude-3.5-haiku": (1.00, 5.00),
}

# 缓存未命中模型的默认定价
DEFAULT_PRICING = (1.00, 4.00)


def estimate_cost(
    model: str,
    prompt_tokens: int = 0,
    completion_tokens: int = 0,
) -> float:
    """估算单次 LLM 调用的费用（USD）。

    Args:
        model: 模型名称（模糊匹配）
        prompt_tokens: 输入 token 数
        completion_tokens: 输出 token 数
    """
    input_price, output_price = _get_price(model)
    cost = (prompt_tokens / 1_000_000) * input_price + (
        completion_tokens / 1_000_000
    ) * output_price
    return round(cost, 6)


def estimate_cost_yuan(
    model: str,
    prompt_tokens: int = 0,
    completion_tokens: int = 0,
    exchange_rate: float = 7.2,
) -> float:
    """估算费用（人民币）。"""
    return round(estimate_cost(model, prompt_tokens, completion_tokens) * exchange_rate, 4)


def _get_price(model: str) -> Tuple[float, float]:
    """模糊匹配模型定价。"""
    model_lower = model.lower().replace("-", "").replace(".", "")
    for key, price in MODEL_PRICING.items():
        if key.replace("-", "").replace(".", "") in model_lower:
            return price
    return DEFAULT_PRICING


def get_model_pricing_table() -> Dict[str, dict]:
    """返回模型定价表（供前端展示）。"""
    return {
        model: {
            "input_per_1M": input_p,
            "output_per_1M": output_p,
        }
        for model, (input_p, output_p) in MODEL_PRICING.items()
    }