feat: add display status filtering to document list and API (#28342)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
GuanMu
2025-11-20 11:27:44 +08:00
committed by GitHub
parent a5d0e68675
commit 1d2cdf3489
12 changed files with 271 additions and 36 deletions

View File

@@ -162,6 +162,7 @@ class DatasetDocumentListApi(Resource):
"keyword": "Search keyword",
"sort": "Sort order (default: -created_at)",
"fetch": "Fetch full details (default: false)",
"status": "Filter documents by display status",
}
)
@api.response(200, "Documents retrieved successfully")
@@ -175,6 +176,7 @@ class DatasetDocumentListApi(Resource):
limit = request.args.get("limit", default=20, type=int)
search = request.args.get("keyword", default=None, type=str)
sort = request.args.get("sort", default="-created_at", type=str)
status = request.args.get("status", default=None, type=str)
# "yes", "true", "t", "y", "1" convert to True, while others convert to False.
try:
fetch_val = request.args.get("fetch", default="false")
@@ -203,6 +205,9 @@ class DatasetDocumentListApi(Resource):
query = select(Document).filter_by(dataset_id=str(dataset_id), tenant_id=current_tenant_id)
if status:
query = DocumentService.apply_display_status_filter(query, status)
if search:
search = f"%{search}%"
query = query.where(Document.name.like(search))

View File

@@ -456,12 +456,16 @@ class DocumentListApi(DatasetApiResource):
page = request.args.get("page", default=1, type=int)
limit = request.args.get("limit", default=20, type=int)
search = request.args.get("keyword", default=None, type=str)
status = request.args.get("status", default=None, type=str)
dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
if not dataset:
raise NotFound("Dataset not found.")
query = select(Document).filter_by(dataset_id=str(dataset_id), tenant_id=tenant_id)
if status:
query = DocumentService.apply_display_status_filter(query, status)
if search:
search = f"%{search}%"
query = query.where(Document.name.like(search))

View File

@@ -1082,6 +1082,62 @@ class DocumentService:
},
}
DISPLAY_STATUS_ALIASES: dict[str, str] = {
"active": "available",
"enabled": "available",
}
_INDEXING_STATUSES: tuple[str, ...] = ("parsing", "cleaning", "splitting", "indexing")
DISPLAY_STATUS_FILTERS: dict[str, tuple[Any, ...]] = {
"queuing": (Document.indexing_status == "waiting",),
"indexing": (
Document.indexing_status.in_(_INDEXING_STATUSES),
Document.is_paused.is_not(True),
),
"paused": (
Document.indexing_status.in_(_INDEXING_STATUSES),
Document.is_paused.is_(True),
),
"error": (Document.indexing_status == "error",),
"available": (
Document.indexing_status == "completed",
Document.archived.is_(False),
Document.enabled.is_(True),
),
"disabled": (
Document.indexing_status == "completed",
Document.archived.is_(False),
Document.enabled.is_(False),
),
"archived": (
Document.indexing_status == "completed",
Document.archived.is_(True),
),
}
@classmethod
def normalize_display_status(cls, status: str | None) -> str | None:
if not status:
return None
normalized = status.lower()
normalized = cls.DISPLAY_STATUS_ALIASES.get(normalized, normalized)
return normalized if normalized in cls.DISPLAY_STATUS_FILTERS else None
@classmethod
def build_display_status_filters(cls, status: str | None) -> tuple[Any, ...]:
normalized = cls.normalize_display_status(status)
if not normalized:
return ()
return cls.DISPLAY_STATUS_FILTERS[normalized]
@classmethod
def apply_display_status_filter(cls, query, status: str | None):
filters = cls.build_display_status_filters(status)
if not filters:
return query
return query.where(*filters)
DOCUMENT_METADATA_SCHEMA: dict[str, Any] = {
"book": {
"title": str,

View File

@@ -0,0 +1,33 @@
import sqlalchemy as sa
from models.dataset import Document
from services.dataset_service import DocumentService
def test_normalize_display_status_alias_mapping():
assert DocumentService.normalize_display_status("ACTIVE") == "available"
assert DocumentService.normalize_display_status("enabled") == "available"
assert DocumentService.normalize_display_status("archived") == "archived"
assert DocumentService.normalize_display_status("unknown") is None
def test_build_display_status_filters_available():
filters = DocumentService.build_display_status_filters("available")
assert len(filters) == 3
for condition in filters:
assert condition is not None
def test_apply_display_status_filter_applies_when_status_present():
query = sa.select(Document)
filtered = DocumentService.apply_display_status_filter(query, "queuing")
compiled = str(filtered.compile(compile_kwargs={"literal_binds": True}))
assert "WHERE" in compiled
assert "document.indexing_status = 'waiting'" in compiled
def test_apply_display_status_filter_returns_same_when_invalid():
query = sa.select(Document)
filtered = DocumentService.apply_display_status_filter(query, "invalid")
compiled = str(filtered.compile(compile_kwargs={"literal_binds": True}))
assert "WHERE" not in compiled