aiagent/backend/tests/test_agent_runtime.py

"""
Agent ReAct 循环集成测试

覆盖 core.py 核心路径：正常对话 / 工具调用 / 预算熔断 / max_iterations 截断
"""
from __future__ import annotations

import json
import pytest
from unittest.mock import AsyncMock, MagicMock, patch

from app.agent_runtime.core import AgentRuntime
from app.agent_runtime.schemas import (
    AgentConfig,
    AgentBudgetConfig,
    AgentLLMConfig,
    AgentToolConfig,
    AgentMemoryConfig,
)
from app.services.tool_registry import tool_registry as _global_registry


def make_config(**kwargs) -> AgentConfig:
    """快捷创建测试用 AgentConfig"""
    defaults = {
        "name": "test_agent",
        "system_prompt": "You are a test assistant.",
        "llm": AgentLLMConfig(
            provider="openai",
            model="gpt-4o-mini",
            max_iterations=5,
        ),
        "budget": AgentBudgetConfig(
            max_llm_invocations=10,
            max_tool_calls=20,
        ),
        "tools": AgentToolConfig(),
        "memory": AgentMemoryConfig(
            enabled=False,
            persist_to_db=False,
            vector_memory_enabled=False,
            learning_enabled=False,
        ),
    }
    defaults.update(kwargs)
    return AgentConfig(**defaults)


# ── Mock helpers ─────────────────────────────────────────────────────────────

class MockFunction:
    """模拟 OpenAI SDK tool_call.function 对象"""
    def __init__(self, name="", arguments="{}"):
        self.name = name
        self.arguments = arguments


class MockToolCall:
    """模拟 OpenAI SDK tool_call 对象"""
    def __init__(self, id="call_1", name="", arguments="{}"):
        self.id = id
        self.type = "function"
        self.function = MockFunction(name=name, arguments=arguments)


class MockLLMResponse:
    """模拟 LLM 返回的 response 对象"""
    def __init__(self, content="", tool_calls=None, reasoning_content=None):
        self.content = content
        self.tool_calls = tool_calls
        self.reasoning_content = reasoning_content

    def get(self, key, default=None):
        return getattr(self, key, default)


def tc(id="call_1", name="", arguments="{}"):
    """快捷构造 tool_call"""
    return MockToolCall(id=id, name=name, arguments=arguments)


def _base_mocks(runtime):
    """返回 (patches, mock_llm_class) 以便统一管理 mock 生命周期。"""
    mock_llm = patch("app.agent_runtime.core._LLMClient")
    return {
        "runtime": patch.multiple(
            runtime,
            _fire_execution_log=MagicMock(),
            _inject_memory_context=AsyncMock(),
            _inject_knowledge_context=AsyncMock(),
        ),
        "memory": patch.object(runtime.memory, "save_context", new=AsyncMock()),
        "llm": mock_llm,
    }, mock_llm


# ── Tests ────────────────────────────────────────────────────────────────────

class TestAgentRuntimeNormalConversation:
    """正常对话流程测试"""

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_simple_text_response(self):
        config = make_config()
        runtime = AgentRuntime(config=config)
        mock_response = MockLLMResponse(content="Hello! How can I help you?")

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = AsyncMock(return_value=mock_response)
            result = await runtime.run("Hello!")

        assert result.success is True
        assert "Hello!" in result.content
        assert result.iterations_used == 1
        assert result.tool_calls_made == 0
        assert len(result.steps) >= 1

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_multi_turn_conversation_context(self):
        config = make_config()
        runtime = AgentRuntime(config=config)
        call_count = [0]

        async def mock_chat(messages, tools, iteration, on_completion=None):
            call_count[0] += 1
            if call_count[0] == 1:
                return MockLLMResponse(content="I processed your first message.")
            return MockLLMResponse(content="I processed your second message.")

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = mock_chat

            result1 = await runtime.run("First message")
            assert result1.success
            assert "first" in result1.content

            result2 = await runtime.run("Second message")
            assert result2.success
            assert "second" in result2.content

        assert len(runtime.context.messages) >= 4

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_empty_llm_response_handling(self):
        config = make_config()
        runtime = AgentRuntime(config=config)
        mock_response = MockLLMResponse(content="")

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = AsyncMock(return_value=mock_response)
            result = await runtime.run("Test")

        assert result.success is True
        assert result.content is not None


class TestAgentRuntimeToolCalling:
    """工具调用流程测试"""

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_single_tool_call(self):
        config = make_config()
        runtime = AgentRuntime(config=config)

        def get_weather(**kwargs):
            return json.dumps({"city": kwargs.get("city", "Beijing"), "temp": 25})

        _global_registry.register_builtin_tool(
            "get_weather", get_weather,
            {"type": "function", "function": {
                "name": "get_weather", "description": "Get weather",
                "parameters": {"type": "object", "properties": {"city": {"type": "string"}}},
            }},
        )

        llm_responses = [
            MockLLMResponse(content=None, tool_calls=[
                tc(id="call_1", name="get_weather", arguments='{"city": "Beijing"}'),
            ]),
            MockLLMResponse(content="Beijing is 25C today."),
        ]
        it = iter(llm_responses)

        async def mock_chat(messages, tools, iteration, on_completion=None):
            return next(it)

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = mock_chat
            result = await runtime.run("What's the weather?")

        assert result.success is True
        assert "Beijing" in result.content
        assert result.tool_calls_made == 1
        tool_steps = [s for s in result.steps if s.type == "tool_result"]
        assert len(tool_steps) == 1
        assert tool_steps[0].tool_name == "get_weather"

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_tool_args_json_parse_failure(self):
        config = make_config()
        runtime = AgentRuntime(config=config)

        def echo(**kwargs):
            return json.dumps(kwargs)

        _global_registry.register_builtin_tool(
            "echo", echo,
            {"type": "function", "function": {"name": "echo"}},
        )

        # invalid JSON in arguments → should be caught by _extract_tool_calls / json.loads
        llm_responses = [
            MockLLMResponse(content=None, tool_calls=[
                tc(id="call_1", name="echo", arguments="not valid json {{{"),
            ]),
            MockLLMResponse(content="Done."),
        ]
        it = iter(llm_responses)

        async def mock_chat(messages, tools, iteration, on_completion=None):
            return next(it)

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = mock_chat
            result = await runtime.run("echo something")

        # Should not crash, should complete successfully
        assert result.success is True

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_tool_execution_exception_protection(self):
        config = make_config()
        runtime = AgentRuntime(config=config)

        def broken_tool(**kwargs):
            raise RuntimeError("Tool internal failure!")

        _global_registry.register_builtin_tool(
            "broken_tool", broken_tool,
            {"type": "function", "function": {"name": "broken_tool"}},
        )

        llm_responses = [
            MockLLMResponse(content=None, tool_calls=[
                tc(id="call_1", name="broken_tool", arguments="{}"),
            ]),
            MockLLMResponse(content="The tool failed, let me try something else."),
        ]
        it = iter(llm_responses)

        async def mock_chat(messages, tools, iteration, on_completion=None):
            return next(it)

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = mock_chat
            result = await runtime.run("Use the broken tool")

        assert result.success is True

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_multiple_parallel_tool_calls(self):
        config = make_config()
        runtime = AgentRuntime(config=config)

        def tool_a(**kwargs):
            return json.dumps({"a": 1})

        def tool_b(**kwargs):
            return json.dumps({"b": 2})

        _global_registry.register_builtin_tool(
            "tool_a", tool_a, {"type": "function", "function": {"name": "tool_a"}},
        )
        _global_registry.register_builtin_tool(
            "tool_b", tool_b, {"type": "function", "function": {"name": "tool_b"}},
        )

        llm_responses = [
            MockLLMResponse(content=None, tool_calls=[
                tc(id="call_1", name="tool_a", arguments="{}"),
                tc(id="call_2", name="tool_b", arguments="{}"),
            ]),
            MockLLMResponse(content="Both tools executed."),
        ]
        it = iter(llm_responses)

        async def mock_chat(messages, tools, iteration, on_completion=None):
            return next(it)

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = mock_chat
            result = await runtime.run("Run both tools")

        assert result.success is True
        assert result.tool_calls_made == 2


class TestAgentRuntimeBudget:
    """预算熔断测试"""

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_llm_invocation_budget_exceeded(self):
        config = make_config()
        config.budget.max_llm_invocations = 0  # no LLM calls allowed
        runtime = AgentRuntime(config=config)

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            mock_chat = AsyncMock()
            llm_class.return_value.chat = mock_chat
            result = await runtime.run("Hello")

        assert result.success is False
        assert "预算" in result.content
        mock_chat.assert_not_called()

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_tool_call_budget_exceeded(self):
        config = make_config()
        config.budget.max_tool_calls = 1
        runtime = AgentRuntime(config=config)

        def echo(**kwargs):
            return json.dumps(kwargs)

        _global_registry.register_builtin_tool(
            "echo", echo, {"type": "function", "function": {"name": "echo"}},
        )

        # two tool calls → second one triggers budget exceeded
        llm_responses = [
            MockLLMResponse(content=None, tool_calls=[
                tc(id="call_1", name="echo", arguments='{"msg": "hello"}'),
            ]),
            MockLLMResponse(content=None, tool_calls=[
                tc(id="call_2", name="echo", arguments='{"msg": "world"}'),
            ]),
        ]
        it = iter(llm_responses)

        async def mock_chat(messages, tools, iteration, on_completion=None):
            return next(it)

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = mock_chat
            result = await runtime.run("Echo twice")

        # budget exceeded → tool_calls_made exceeds max_tool_calls
        assert result.truncated is True or result.success is False

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_on_llm_invocation_callback_budget(self):
        config = make_config()
        runtime = AgentRuntime(config=config)
        runtime.on_llm_invocation = MagicMock(side_effect=Exception("workflow budget exceeded"))

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = AsyncMock()
            result = await runtime.run("Hello")

        assert result.success is False
        assert "工作流预算" in result.content


class TestAgentRuntimeMaxIterations:
    """max_iterations 截断测试"""

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_max_iterations_truncation(self):
        config = make_config()
        config.llm.max_iterations = 2
        runtime = AgentRuntime(config=config)

        def echo(**kwargs):
            return json.dumps(kwargs)

        _global_registry.register_builtin_tool(
            "echo", echo, {"type": "function", "function": {"name": "echo"}},
        )

        mock_response = MockLLMResponse(content=None, tool_calls=[
            tc(id="call_1", name="echo", arguments='{"msg": "hello"}'),
        ])

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = AsyncMock(return_value=mock_response)
            result = await runtime.run("Echo forever")

        assert result.success is False
        assert result.truncated is True
        assert result.iterations_used == 2

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_max_iterations_minimum_one(self):
        config = make_config()
        config.llm.max_iterations = 0
        runtime = AgentRuntime(config=config)
        mock_response = MockLLMResponse(content="Response")

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = AsyncMock(return_value=mock_response)
            result = await runtime.run("Test")

        assert result.iterations_used >= 1


class TestAgentRuntimeLLMRetry:
    """LLM 调用重试测试"""

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_retryable_error_triggers_retry(self):
        config = make_config()
        config.llm.max_iterations = 3
        runtime = AgentRuntime(config=config)
        call_count = [0]

        async def mock_chat(messages, tools, iteration, on_completion=None):
            call_count[0] += 1
            if call_count[0] == 1:
                raise Exception("request timed out")
            return MockLLMResponse(content="Retry succeeded!")

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = mock_chat
            result = await runtime.run("Test")

        assert result.success is True
        assert "Retry succeeded" in result.content
        assert call_count[0] == 2

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_non_retryable_error_stops(self):
        config = make_config()
        runtime = AgentRuntime(config=config)

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = AsyncMock(
                side_effect=Exception("invalid API key"))
            result = await runtime.run("Test")

        assert result.success is False
        assert "invalid API key" in result.error


class TestAgentRuntimeSelfReview:
    """Self-review 自检测试"""

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_self_review_passed(self):
        config = make_config()
        config.self_review_enabled = True
        runtime = AgentRuntime(config=config)
        mock_response = MockLLMResponse(content="A well-crafted response.")

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class, \
             patch.object(runtime, "_self_review", new=AsyncMock(
                 return_value={"score": 0.85, "passed": True, "threshold": 0.6,
                               "issues": [], "suggestions": []})) as mock_review:
            llm_class.return_value.chat = AsyncMock(return_value=mock_response)
            result = await runtime.run("Test")

        assert result.success is True
        mock_review.assert_called_once()

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_self_review_failed_triggers_correction(self):
        config = make_config()
        config.self_review_enabled = True
        config.llm.max_iterations = 5
        runtime = AgentRuntime(config=config)

        llm_responses = [
            MockLLMResponse(content="Bad answer."),
            MockLLMResponse(content="Corrected answer, much better now."),
        ]
        llm_it = iter(llm_responses)

        review_responses = [
            {"score": 0.3, "passed": False, "threshold": 0.6,
             "issues": ["Too short"], "suggestions": ["Add more context"]},
            {"score": 0.8, "passed": True, "threshold": 0.6,
             "issues": [], "suggestions": []},
        ]
        review_it = iter(review_responses)

        async def mock_chat(messages, tools, iteration, on_completion=None):
            return next(llm_it)

        async def mock_review(text, task_context=None):
            return next(review_it)

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class, \
             patch.object(runtime, "_self_review", side_effect=mock_review):
            llm_class.return_value.chat = mock_chat
            result = await runtime.run("Test")

        assert result.success is True
        assert "Corrected" in result.content

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_self_review_exception_fallback(self):
        """self_review 返回 error 结果时不影响主流程"""
        config = make_config()
        config.self_review_enabled = True
        runtime = AgentRuntime(config=config)
        mock_response = MockLLMResponse(content="Some answer.")

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class, \
             patch.object(runtime, "_self_review", new=AsyncMock(
                 return_value={"score": 0.0, "passed": False, "threshold": 0.6,
                               "issues": ["review error"],
                               "suggestions": ["check configuration"], "error": "service down"})):
            llm_class.return_value.chat = AsyncMock(return_value=mock_response)
            result = await runtime.run("Test")

        # review returns passed=False, correction gets appended, LLM responds again...
        # The result should still be success=True since the correction loop works
        assert result.success is True


class TestAgentRuntimeStreaming:
    """流式执行测试"""

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_run_stream_yields_events(self):
        config = make_config()
        config.llm.max_iterations = 2
        runtime = AgentRuntime(config=config)
        mock_response = MockLLMResponse(content="Streamed response.")

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = AsyncMock(return_value=mock_response)
            events = []
            async for event in runtime.run_stream("Hello"):
                events.append(event)

        assert len(events) > 0
        last = events[-1]
        assert last["type"] in ("final", "error")

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_run_stream_with_tool_calls(self):
        config = make_config()
        config.llm.max_iterations = 3
        runtime = AgentRuntime(config=config)

        def echo(**kwargs):
            return json.dumps(kwargs)

        _global_registry.register_builtin_tool(
            "echo", echo, {"type": "function", "function": {"name": "echo"}},
        )

        llm_responses = [
            MockLLMResponse(content=None, tool_calls=[
                tc(id="call_1", name="echo", arguments='{"msg": "hi"}'),
            ]),
            MockLLMResponse(content="Done streaming."),
        ]
        it = iter(llm_responses)

        async def mock_chat(messages, tools, iteration, on_completion=None):
            return next(it)

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = mock_chat
            events = []
            async for event in runtime.run_stream("Hello"):
                events.append(event)

        event_types = [e["type"] for e in events]
        assert "tool_call" in event_types
        assert "tool_result" in event_types
        assert "final" in event_types

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_run_stream_budget_error(self):
        config = make_config()
        config.budget.max_llm_invocations = 0
        runtime = AgentRuntime(config=config)
        runtime._llm_invocations = 0

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = AsyncMock()
            events = []
            async for event in runtime.run_stream("Test"):
                events.append(event)

        assert any(e["type"] == "error" for e in events)


class TestAgentRuntimeConfig:
    """配置相关测试"""

    @pytest.mark.unit
    def test_default_config_values(self):
        config = AgentConfig()
        assert config.name == "default_agent"
        assert config.llm.max_iterations == 10
        assert config.budget.max_llm_invocations == 200
        assert config.budget.max_tool_calls == 500
        assert config.memory.enabled is True
        assert config.self_review_enabled is False

    @pytest.mark.unit
    def test_minimal_config_creates_runtime(self):
        config = AgentConfig(name="minimal")
        runtime = AgentRuntime(config=config)
        assert runtime.config.name == "minimal"
        assert runtime._llm_invocations == 0
        assert runtime._memory_context_loaded is False

    @pytest.mark.unit
    @pytest.mark.asyncio
    async def test_llm_invocation_counter_resets_per_run(self):
        config = make_config()
        runtime = AgentRuntime(config=config)
        mock_response = MockLLMResponse(content="OK")

        mocks, mock_llm = _base_mocks(runtime)
        with mocks["runtime"], mocks["memory"], mock_llm as llm_class:
            llm_class.return_value.chat = AsyncMock(return_value=mock_response)
            await runtime.run("First")
            assert runtime._llm_invocations >= 1

            await runtime.run("Second")
            assert runtime._llm_invocations == 1