aiagent/_run_round10.py

"""
Execute Round 10: Android Client Testing with improved virtual team.
"""
import urllib.request
import json
import time

BASE = "http://127.0.0.1:8037/api/v1"
TEAM_ID = "5b032316-9e31-4ecb-a2b1-b46c7beff2eb"

def login():
    req = urllib.request.Request(
        f"{BASE}/auth/login",
        data=b"username=admin&password=123456",
        headers={"Content-Type": "application/x-www-form-urlencoded"}
    )
    return json.loads(urllib.request.urlopen(req).read())["access_token"]

TOKEN = login()
print(f"Logged in. Token: {TOKEN[:40]}...")

# The task description
TASK_DESCRIPTION = """## Round 10: Android Client Comprehensive Quality Audit

### Target
D:\\aaa\\aiagent\\android - This is a Kotlin/Jetpack Compose Android client for the Tiangong AI Agent Platform.

### Mission
Each team member MUST scan REAL source code files (NOT design documents). The target directory contains 40+ Kotlin source files. Use your tools to read them.

### MANDATORY FOR ALL AGENTS:
1. FIRST: Use `project_scan` on D:\\aaa\\aiagent\\android to identify project type and key files
2. SECOND: Use `list_files` with recursive=true to see all source files
3. THIRD: Use `file_read` to read AT LEAST 5-8 core source files completely
4. FOURTH: Use `grep_search` to find patterns across the codebase
5. DO NOT analyze design documents (android-app-design.md) as a substitute for real code
6. Every finding MUST cite specific file paths and line numbers from ACTUAL code you read

### Quality Standards:
- Compare against ChatGPT Android app and Doubao (豆包) for UX benchmarking
- Every bug must have: file path, line number, code snippet, fix suggestion
- Cross-reference findings between phases to avoid duplicate counting
- P0 = blocks release, P1 = serious, P2 = improvement

### Expected Deliverables per Phase:
1. Architect: Architecture context doc based on real code scan (not design doc)
2. Test Planner: 20+ test scenarios with risk assessment
3. Functional Tester: 15+ real bugs with file:line evidence
4. UX Reviewer: Benchmark vs ChatGPT/豆包 with gap analysis
5. Edge Explorer: 40+ boundary scenarios from code analysis
6. Performance Evaluator: Memory/CPU/thread anti-patterns from code

### Success Criteria:
- ALL findings cite real source file paths and line numbers
- NO findings based purely on design documents
- Cross-phase duplication < 10%
- At least 50% of findings independently verifiable by reading the cited code
"""

print("\n=== Sending execution request ===")
print(f"Task: {TASK_DESCRIPTION[:200]}...")

# Use streaming endpoint to monitor progress
body = json.dumps({
    "project_description": TASK_DESCRIPTION
}).encode("utf-8")

req = urllib.request.Request(
    f"{BASE}/teams/{TEAM_ID}/execute/stream",
    data=body,
    headers={
        "Authorization": f"Bearer {TOKEN}",
        "Content-Type": "application/json"
    },
    method="POST"
)

print("Connecting to SSE stream...")
try:
    response = urllib.request.urlopen(req, timeout=600)  # 10 min timeout
    # Read SSE events
    buffer = ""
    while True:
        chunk = response.read(4096)
        if not chunk:
            break
        try:
            text = chunk.decode("utf-8")
        except:
            text = chunk.decode("gbk", errors="replace")
        buffer += text
        # Process complete SSE events
        while "\n\n" in buffer:
            event_str, buffer = buffer.split("\n\n", 1)
            lines = event_str.strip().split("\n")
            for line in lines:
                if line.startswith("data: "):
                    try:
                        data = json.loads(line[6:])
                        event_type = data.get("type", "?")
                        if event_type == "phase_start":
                            print(f"\n[PHASE START] {data.get('name', '?')} - {data.get('role', '?')}")
                        elif event_type == "phase_done":
                            success = data.get("success", False)
                            output_len = len(data.get("output", ""))
                            print(f"[PHASE DONE] {data.get('name', '?')} - success={success}, output={output_len} chars")
                        elif event_type == "final":
                            print(f"\n[FINAL] Project: {data.get('project_path', '?')}")
                            files = data.get("files", [])
                            print(f"Files: {len(files)} total")
                            for f in files[:10]:
                                print(f"  - {f}")
                            if len(files) > 10:
                                print(f"  ... and {len(files)-10} more")
                        elif event_type == "error":
                            print(f"\n[ERROR] {data.get('content', '?')}")
                        else:
                            print(f"[{event_type}] ", end="", flush=True)
                    except json.JSONDecodeError:
                        pass
    print("\n=== Stream ended ===")
except Exception as e:
    print(f"\nException: {e}")