aiagent/backend/app/services/requirement_estimator.py

"""
需求评估代理 — 数字分身从历史数据估算需求工时，提供分阶段方案和风险评估
"""
from __future__ import annotations

import logging
import re
from typing import Any, Dict, List, Optional

from sqlalchemy.orm import Session

from app.core.database import SessionLocal
from app.models.execution import Execution
from app.models.task import Task
from app.services.fingerprint_engine import fingerprint_engine
from app.services.behavior_collector import behavior_collector

logger = logging.getLogger(__name__)


class RequirementEstimator:
    """需求评估代理 — 基于历史数据估算工时与风险"""

    def estimate(self, user_id: str, title: str, description: str = "",
                 modules: Optional[List[str]] = None) -> Dict[str, Any]:
        """评估需求的工作量和时间。

        Args:
            user_id: 用户ID
            title: 需求标题
            description: 需求描述
            modules: 涉及的模块/组件列表
        """
        modules = modules or self._extract_modules(description)

        # 1. 历史同类需求分析
        similar_tasks = self._find_similar_tasks(title, description)

        # 2. 复杂度评估
        complexity = self._assess_complexity(title, description, modules)

        # 3. 时间估算
        time_estimate = self._estimate_time(complexity, similar_tasks, modules)

        # 4. 风险识别
        risks = self._identify_risks(title, description, modules)

        # 5. 分阶段建议
        phases = self._suggest_phases(title, description, modules, time_estimate)

        # 6. 方案建议
        suggestions = self._suggest_approach(complexity, similar_tasks, risks)

        result = {
            "title": title,
            "complexity": complexity,
            "time_estimate": time_estimate,
            "modules": modules,
            "similar_tasks": similar_tasks,
            "risks": risks,
            "phases": phases,
            "suggestions": suggestions,
        }

        # 记录行为
        behavior_collector.log_fire_and_forget(
            user_id=user_id,
            category="decision",
            action="estimate_requirement",
            context={"title": title, "modules": modules},
            result={"complexity": complexity["level"], "estimated_hours": time_estimate["best_case_hours"]},
        )

        return result

    def _extract_modules(self, description: str) -> List[str]:
        """从描述中提取可能涉及的模块名。"""
        modules = []
        patterns = [
            r'(?:模块|组件|服务)[：:]\s*(\S+)',
            r'(\w+(?:模块|组件|服务|API|数据库|前端|后端))',
            r'(?:涉及|影响|修改)[：:]?\s*([^，,\n]+)',
        ]
        for pattern in patterns:
            matches = re.findall(pattern, description)
            modules.extend(matches)
        # 去重
        seen = set()
        result = []
        for m in modules:
            if m not in seen:
                seen.add(m)
                result.append(m)
        return result[:10]

    def _find_similar_tasks(self, title: str, description: str) -> List[Dict[str, Any]]:
        """查找历史同类需求（基于关键词匹配）。"""
        db: Optional[Session] = None
        try:
            db = SessionLocal()

            # 提取关键词
            keywords = set(re.findall(r'[\w]{2,}', title + description))
            # 过滤常用词
            stopwords = {'的', '和', '是', '在', '了', '不', '有', '与', '对', '这', '那'}
            keywords = keywords - stopwords

            if not keywords:
                return []

            tasks = db.query(Task).filter(
                Task.title.isnot(None),
                Task.actual_hours.isnot(None),
            ).order_by(Task.created_at.desc()).limit(200).all()

            similar = []
            for task in tasks:
                task_text = (task.title or "") + (task.description or "")
                score = sum(1 for kw in keywords if kw in task_text)
                if score >= 2:
                    similar.append({
                        "task_id": str(task.id) if task.id else None,
                        "title": task.title[:100] if task.title else "",
                        "actual_hours": float(task.actual_hours) if task.actual_hours else None,
                        "similarity_score": score,
                    })

            similar.sort(key=lambda x: x.get("similarity_score", 0), reverse=True)
            return similar[:5]

        except Exception as e:
            logger.error("查找同类需求失败: %s", e)
            return []
        finally:
            if db:
                try:
                    db.close()
                except Exception:
                    pass

    def _assess_complexity(self, title: str, description: str,
                           modules: List[str]) -> Dict[str, Any]:
        """评估需求复杂度。"""
        text = title + " " + description
        factors = {
            "module_count": min(len(modules) if modules else 1, 10),
            "has_migration": 1 if re.search(r'迁移|migration|schema.*change', text, re.I) else 0,
            "has_payment": 1 if re.search(r'支付|付款|资金|金额', text) else 0,
            "has_permission": 1 if re.search(r'权限|permission|auth|role', text, re.I) else 0,
            "has_external_api": 1 if re.search(r'第三方|外部.*API|webhook|回调', text) else 0,
            "has_report": 1 if re.search(r'报表|统计|chart|dashboard|导出', text) else 0,
            "has_real_time": 1 if re.search(r'实时|websocket|push|消息推送', text) else 0,
            "has_batch": 1 if re.search(r'批量|import|export|导入|导出', text) else 0,
        }

        score = (
            factors["module_count"] * 3
            + factors["has_migration"] * 10
            + factors["has_payment"] * 8
            + factors["has_permission"] * 5
            + factors["has_external_api"] * 5
            + factors["has_report"] * 3
            + factors["has_real_time"] * 4
            + factors["has_batch"] * 3
        )

        if score <= 15:
            level = "simple"
            label = "简单"
        elif score <= 30:
            level = "medium"
            label = "中等"
        elif score <= 50:
            level = "complex"
            label = "复杂"
        else:
            level = "very_complex"
            label = "非常复杂"

        return {
            "level": level,
            "label": label,
            "score": score,
            "factors": factors,
        }

    def _estimate_time(self, complexity: Dict[str, Any],
                       similar_tasks: List[Dict[str, Any]],
                       modules: List[str]) -> Dict[str, Any]:
        """估算开发时间。"""
        level = complexity["level"]
        module_count = len(modules) if modules else 1

        # 基准时间（小时/模块）
        base_hours = {
            "simple": 8,
            "medium": 16,
            "complex": 32,
            "very_complex": 56,
        }.get(level, 16)

        # 历史数据调整
        if similar_tasks:
            hist_hours = [t["actual_hours"] for t in similar_tasks if t.get("actual_hours")]
            if hist_hours:
                hist_avg = sum(hist_hours) / len(hist_hours)
                base_hours = base_hours * 0.4 + hist_avg * 0.6

        best_case = round(base_hours * module_count * 0.8, 1)
        expected = round(base_hours * module_count, 1)
        worst_case = round(base_hours * module_count * 1.8, 1)

        # 转换为天（8小时/天）
        return {
            "best_case_hours": best_case,
            "expected_hours": expected,
            "worst_case_hours": worst_case,
            "best_case_days": round(best_case / 8, 1),
            "expected_days": round(expected / 8, 1),
            "worst_case_days": round(worst_case / 8, 1),
            "confidence": "high" if similar_tasks and len(similar_tasks) >= 3 else "medium",
            "based_on": f"{len(similar_tasks)} 个历史同类需求" if similar_tasks else "行业基准估算",
        }

    def _identify_risks(self, title: str, description: str,
                        modules: List[str]) -> List[Dict[str, Any]]:
        """识别需求风险。"""
        text = title + " " + description
        risks = []

        risk_checks = [
            ("high", r'数据.*迁移|schema.*变更|数据库.*改', "数据库结构变更"),
            ("high", r'支付|交易|退款|结算', "涉及支付/资金流程"),
            ("high", r'删除|物理删除|DROP|TRUNCATE', "包含数据删除操作"),
            ("medium", r'第三方.*API|外部.*依赖', "依赖外部API可用性"),
            ("medium", r'权限.*改|角色.*调整|auth.*change', "权限体系变更"),
            ("medium", r'性能|并发|QPS|高并发', "性能/并发要求"),
            ("medium", r'多个.*系统|跨.*系统|集成', "多系统集成复杂度"),
            ("low", r'导出|报表|dashboard|可视化', "导出/报表格式兼容"),
            ("low", r'手机|移动端|H5|小程序|APP', "多端适配工作"),
            ("low", r'旧.*兼容|legacy|向后兼容', "历史兼容性约束"),
        ]

        for severity, pattern, label in risk_checks:
            if re.search(pattern, text):
                risks.append({
                    "severity": severity,
                    "label": label,
                    "mitigation": self._suggest_mitigation(label),
                })

        return risks

    def _suggest_mitigation(self, risk_label: str) -> str:
        """根据风险类型建议缓解措施。"""
        mitigations = {
            "数据库结构变更": "建议编写 migration 脚本并先在 staging 验证",
            "涉及支付/资金流程": "建议增加财务团队 review 环节和完整测试用例",
            "包含数据删除操作": "建议实现软删除，添加二次确认机制",
            "依赖外部API可用性": "建议添加超时重试和降级方案",
            "权限体系变更": "建议灰度发布，充分回归测试",
            "性能/并发要求": "建议提前压测并设置监控告警",
            "多系统集成复杂度": "建议明确接口契约，增加集成测试",
            "导出/报表格式兼容": "建议确认所有目标格式和编码",
            "多端适配工作": "建议使用响应式设计或明确优先级",
            "历史兼容性约束": "建议增加兼容性测试覆盖",
        }
        return mitigations.get(risk_label, "建议细化方案后评估")

    def _suggest_phases(self, title: str, description: str,
                        modules: List[str],
                        time_estimate: Dict[str, Any]) -> List[Dict[str, Any]]:
        """建议分阶段实施计划。"""
        total_days = time_estimate["expected_days"]

        if total_days <= 3:
            return [{
                "phase": 1,
                "name": "一站式开发",
                "tasks": [f"实现 {title}"],
                "estimated_days": total_days,
                "checkpoint": "功能验收",
            }]

        phases = []
        if modules and len(modules) > 1:
            mid = max(1, len(modules) // 2)
            phases.append({
                "phase": 1,
                "name": "核心功能",
                "tasks": [f"实现 {m} 模块" for m in modules[:mid]],
                "estimated_days": round(total_days * 0.5, 1),
                "checkpoint": "核心功能可 Demo",
            })
            phases.append({
                "phase": 2,
                "name": "扩展功能",
                "tasks": [f"实现 {m} 模块" for m in modules[mid:]],
                "estimated_days": round(total_days * 0.3, 1),
                "checkpoint": "完整功能验收",
            })
            phases.append({
                "phase": 3,
                "name": "优化 & 测试",
                "tasks": ["性能优化", "完善测试", "文档整理"],
                "estimated_days": round(total_days * 0.2, 1),
                "checkpoint": "上线发布",
            })
        else:
            phases = [
                {"phase": 1, "name": "设计与准备", "tasks": ["技术方案设计", "接口定义"], "estimated_days": round(total_days * 0.2, 1), "checkpoint": "方案评审通过"},
                {"phase": 2, "name": "核心开发", "tasks": [f"实现 {title}"], "estimated_days": round(total_days * 0.5, 1), "checkpoint": "功能完成"},
                {"phase": 3, "name": "测试与上线", "tasks": ["集成测试", "性能测试", "上线部署"], "estimated_days": round(total_days * 0.3, 1), "checkpoint": "上线发布"},
            ]

        return phases

    def _suggest_approach(self, complexity: Dict[str, Any],
                          similar_tasks: List[Dict[str, Any]],
                          risks: List[Dict[str, Any]]) -> List[str]:
        """生成开发策略建议。"""
        suggestions = []

        level = complexity["level"]
        if level in ("complex", "very_complex"):
            suggestions.append("建议先做技术方案评审，再启动开发")
            suggestions.append("建议拆分为多个独立可交付的 milestone")

        if len(risks) >= 3:
            suggestions.append(f"识别到 {len(risks)} 项风险，建议逐项制定应对预案")

        high_risks = [r for r in risks if r["severity"] == "high"]
        if high_risks:
            suggestions.append(f"存在 {len(high_risks)} 项高风险: "
                               f"{', '.join(r['label'] for r in high_risks)}，强烈建议专项评审")

        if similar_tasks:
            suggestions.append(f"参考 {len(similar_tasks)} 个历史同类需求的实际工时数据")

        if level == "simple":
            suggestions.append("需求较简单，可快速迭代，不必过度设计")

        # 模块数多时建议并行
        factors = complexity.get("factors", {})
        if factors.get("module_count", 0) > 4:
            suggestions.append("涉及模块较多，建议多人并行开发")

        return suggestions


requirement_estimator = RequirementEstimator()