fix: delete agent 500 error + dynamic personality + deployment guide
- Fix delete agent 500: clean up FK records (agent_llm_logs, permissions, schedules, executions, team_members) and unbind goals/tasks before delete - Remove hardcoded personality templates in Android, replace with dynamic system prompt generation from name + description - Set promptSectionsEnabled=false to bypass PromptComposer for personality - Add Tencent Cloud Linux deployment guide (Docker Compose) - Accumulated backend service updates, frontend UI fixes, Android app changes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
214
test/test_stress_long_conversation.py
Normal file
214
test/test_stress_long_conversation.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""端到端长对话压力测试 — 50+ 轮验证压缩不丢上下文
|
||||
|
||||
测试逻辑:
|
||||
1. Round 1: 注入关键事实(我的名字是小明,最爱《三体》)
|
||||
2. Round 2-9: 填充轮 — 正常对话积累 token
|
||||
3. Round 10: 验证 — 确认压缩是否触发
|
||||
4. Round 11-48: 填充轮 — 继续积累
|
||||
5. Round 49: 回忆 — 检查早期上下文是否丢失
|
||||
6. Round 50: 最终验证
|
||||
|
||||
观测指标:
|
||||
- compaction_triggered: 压缩是否被触发
|
||||
- token_usage.input_tokens: 输入 token 走势(应被压缩压低)
|
||||
- fact_retained: 50 轮后是否还记得早期事实
|
||||
- context_lost_errors: 是否有上下文超限错误
|
||||
"""
|
||||
import sys, os, io, json, time, requests
|
||||
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
BASE = "http://localhost:8038/api/v1"
|
||||
TOTAL_ROUNDS = 50
|
||||
FACT_CHECK_ROUND = 10 # 第 10 轮做第一次找回检查
|
||||
FINAL_CHECK_ROUND = 49 # 第 49 轮做最终找回检查
|
||||
|
||||
FILL_MESSAGES = [
|
||||
"请用Python写一个简单的冒泡排序函数,只输出代码不要解释",
|
||||
"请把下面这段话翻译成英语:今天天气真好,适合出去散步。",
|
||||
"写一个SQL查询,找出表中最近7天注册的用户,按注册时间降序排列",
|
||||
"解释一下什么是Docker的layer缓存机制,控制在100字以内",
|
||||
"用JavaScript写一个防抖函数debounce",
|
||||
"Python的GIL是什么?有什么影响?请简短回答",
|
||||
"写一个简单的Makefile来编译和运行一个C项目",
|
||||
"列出5个提高代码可读性的最佳实践",
|
||||
"解释HTTP状态码200、301、404、500分别代表什么",
|
||||
"写一个正则表达式来匹配中国手机号",
|
||||
"什么是RESTful API?请用三句话内解释",
|
||||
"写一个简单的git工作流命令序列(clone→branch→commit→push→PR)",
|
||||
"Python装饰器的原理是什么?请简短说明",
|
||||
"解释什么是数据库索引以及什么时候该用",
|
||||
"写一个shell脚本统计当前目录下每种文件扩展名的数量",
|
||||
"什么是CI/CD?请用100字以内解释",
|
||||
"写一个Python context manager 示例",
|
||||
"解释乐观锁和悲观锁的区别",
|
||||
"用TypeScript写一个简单的泛型函数",
|
||||
"什么是缓存穿透、缓存击穿、缓存雪崩?简短区分",
|
||||
"写一个docker-compose.yml 启动 nginx+mysql",
|
||||
"Python中__init__和__new__的区别是什么?",
|
||||
"解释TCP三次握手和四次挥手",
|
||||
"写一个简单的Redis Lua脚本示例",
|
||||
"什么是JWT?在分布式系统中如何使用?",
|
||||
"解释CSS盒模型,简短回答",
|
||||
"写一个awk命令来统计日志中每个IP的访问次数",
|
||||
"Python中asyncio的基本用法是什么?",
|
||||
"解释微服务架构的优缺点",
|
||||
"写一个简单的React Hook示例",
|
||||
"什么是Kubernetes的Pod?简短回答",
|
||||
"解释HTTPS的握手过程",
|
||||
"写一个Python生成器函数的例子",
|
||||
"什么是消息队列?列举几个常用的MQ",
|
||||
"解释数据库ACID四个特性",
|
||||
"写一个简单的Nginx反向代理配置",
|
||||
"Python中*args和**kwargs的含义",
|
||||
"什么是WebSocket?与HTTP轮询对比有什么优势?",
|
||||
"解释面向对象六大原则中的开闭原则",
|
||||
"写一个PostgreSQL的窗口函数查询示例",
|
||||
]
|
||||
|
||||
MEMORY_FACT = "我的名字是小明,我最喜欢读的书是刘慈欣写的《三体》三部曲,我的职业是Python后端工程师。"
|
||||
|
||||
|
||||
def login():
|
||||
r = requests.post(f"{BASE}/auth/login", data={"username": "admin", "password": "123456"})
|
||||
r.raise_for_status()
|
||||
return r.json()["access_token"]
|
||||
|
||||
|
||||
def test_long_conversation():
|
||||
token = login()
|
||||
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
||||
print(f"=== 长对话压力测试 — {TOTAL_ROUNDS} 轮 ===\n")
|
||||
print(f"注入事实: {MEMORY_FACT}")
|
||||
print(f"首次检查: 第 {FACT_CHECK_ROUND} 轮")
|
||||
print(f"最终检查: 第 {FINAL_CHECK_ROUND} 轮\n")
|
||||
|
||||
metrics = {
|
||||
"token_history": [],
|
||||
"compaction_triggered": False,
|
||||
"context_lost_errors": 0,
|
||||
"fact_retained_at_10": False,
|
||||
"fact_retained_at_49": False,
|
||||
"total_time_ms": 0,
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# ── Round 1: 注入记忆事实 ──
|
||||
print("─" * 50)
|
||||
print(f"Round 1/50: 注入记忆事实…")
|
||||
r = requests.post(f"{BASE}/agent-chat/bare", headers=headers,
|
||||
json={"message": f"请记住以下关于我的信息:{MEMORY_FACT} 请回复'已记住'即可。",
|
||||
"streamlined": True}, timeout=180)
|
||||
r.raise_for_status()
|
||||
d = r.json()
|
||||
sid = d["session_id"]
|
||||
tu = d.get("token_usage")
|
||||
record_metrics(metrics, 1, tu, d)
|
||||
print(f" session={sid[:16]}…, tokens_in={tu.get('input_tokens', 'N/A') if tu else 'N/A'}")
|
||||
print(f" reply: {d['content'][:100]}")
|
||||
|
||||
# ── Rounds 2-N: 填充 + 定期检查 ──
|
||||
for round_num in range(2, TOTAL_ROUNDS + 1):
|
||||
if round_num == FACT_CHECK_ROUND:
|
||||
msg = "我之前告诉过你我叫什么名字?最喜欢什么书?我的职业是什么?请简要回答。"
|
||||
label = f"Round {round_num}/50: 首次记忆检查"
|
||||
elif round_num == FINAL_CHECK_ROUND:
|
||||
msg = "请回忆一下:我们第一次对话时我告诉你我叫什么名字?我最喜欢的书是什么?我的职业是什么?"
|
||||
label = f"Round {round_num}/50: 最终记忆检查"
|
||||
else:
|
||||
msg = FILL_MESSAGES[(round_num - 2) % len(FILL_MESSAGES)]
|
||||
label = f"Round {round_num}/50: 填充"
|
||||
|
||||
print(f"{label}…", end=" ", flush=True)
|
||||
try:
|
||||
r = requests.post(f"{BASE}/agent-chat/bare", headers=headers,
|
||||
json={"message": msg, "session_id": sid, "streamlined": True},
|
||||
timeout=180)
|
||||
r.raise_for_status()
|
||||
d = r.json()
|
||||
tu = d.get("token_usage")
|
||||
record_metrics(metrics, round_num, tu, d)
|
||||
|
||||
if round_num == FACT_CHECK_ROUND:
|
||||
content_lower = d["content"].lower()
|
||||
metrics["fact_retained_at_10"] = (
|
||||
"小明" in d["content"] and ("三体" in d["content"] or "科幻" in d["content"])
|
||||
)
|
||||
status = "✓ 记住" if metrics["fact_retained_at_10"] else "✗ 遗忘"
|
||||
print(f"{status} | tokens_in={tu.get('input_tokens', '?') if tu else '?'}")
|
||||
print(f" reply: {d['content'][:200]}")
|
||||
|
||||
elif round_num == FINAL_CHECK_ROUND:
|
||||
content_lower = d["content"].lower()
|
||||
metrics["fact_retained_at_49"] = (
|
||||
"小明" in d["content"] and ("三体" in d["content"] or "科幻" in d["content"])
|
||||
)
|
||||
status = "✓ 记住" if metrics["fact_retained_at_49"] else "✗ 遗忘"
|
||||
print(f"{status} | tokens_in={tu.get('input_tokens', '?') if tu else '?'}")
|
||||
print(f" reply: {d['content'][:200]}")
|
||||
|
||||
elif round_num % 10 == 0:
|
||||
print(f"✓ tokens_in={tu.get('input_tokens', '?') if tu else '?'}")
|
||||
else:
|
||||
print("✓")
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
err_body = e.response.text[:200] if hasattr(e, 'response') else str(e)
|
||||
print(f"✗ HTTP {e.response.status_code if hasattr(e, 'response') else '?'}: {err_body}")
|
||||
if "context" in str(err_body).lower() or "413" in str(e):
|
||||
metrics["context_lost_errors"] += 1
|
||||
if metrics["context_lost_errors"] >= 3:
|
||||
print(" ⚠ 连续上下文超限,中止测试")
|
||||
break
|
||||
except requests.exceptions.Timeout:
|
||||
print("✗ 超时")
|
||||
except Exception as e:
|
||||
print(f"✗ {e}")
|
||||
|
||||
metrics["total_time_ms"] = int((time.time() - start_time) * 1000)
|
||||
|
||||
# ── 报告 ──
|
||||
print("\n" + "=" * 50)
|
||||
print("测试报告")
|
||||
print("=" * 50)
|
||||
print(f"总轮数: {TOTAL_ROUNDS}")
|
||||
print(f"总耗时: {metrics['total_time_ms']/1000:.1f}s")
|
||||
print(f"第10轮记忆保留: {'✓ 通过' if metrics['fact_retained_at_10'] else '✗ 失败'}")
|
||||
print(f"第49轮记忆保留: {'✓ 通过' if metrics['fact_retained_at_49'] else '✗ 失败'}")
|
||||
print(f"上下文超限错误: {metrics['context_lost_errors']} 次")
|
||||
|
||||
if metrics["token_history"]:
|
||||
token_entries = metrics["token_history"]
|
||||
print(f"\nToken 用量走势:")
|
||||
print(f" Round 1: {token_entries[0]['input_tokens']:,} tokens")
|
||||
# 找到压缩触发点
|
||||
prev = token_entries[0]["input_tokens"]
|
||||
for entry in token_entries[1:]:
|
||||
if entry["input_tokens"] < prev * 0.7: # 压缩后减少 >30%
|
||||
print(f" Round {entry['round']:>2}: {entry['input_tokens']:,} tokens ← 压缩触发")
|
||||
prev = entry["input_tokens"]
|
||||
print(f" Round {token_entries[-1]['round']:>2}: {token_entries[-1]['input_tokens']:,} tokens")
|
||||
|
||||
passed = metrics["fact_retained_at_10"] and not metrics["context_lost_errors"]
|
||||
print(f"\n=== {'测试通过 ✓' if passed else '测试失败 ✗'} ===")
|
||||
return passed
|
||||
|
||||
|
||||
def record_metrics(metrics, round_num, token_usage, response):
|
||||
if token_usage:
|
||||
metrics["token_history"].append({
|
||||
"round": round_num,
|
||||
"input_tokens": token_usage.get("input_tokens", 0),
|
||||
"cumulative_total": token_usage.get("cumulative_total", 0),
|
||||
"is_warning": token_usage.get("is_warning", False),
|
||||
"is_critical": token_usage.get("is_critical", False),
|
||||
"compaction_attempts": token_usage.get("compaction_attempts", 0),
|
||||
})
|
||||
if token_usage.get("is_critical"):
|
||||
metrics["compaction_triggered"] = True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_long_conversation()
|
||||
Reference in New Issue
Block a user