feat: add display status filtering to document list and API (#28342)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
@@ -162,6 +162,7 @@ class DatasetDocumentListApi(Resource):
|
||||
"keyword": "Search keyword",
|
||||
"sort": "Sort order (default: -created_at)",
|
||||
"fetch": "Fetch full details (default: false)",
|
||||
"status": "Filter documents by display status",
|
||||
}
|
||||
)
|
||||
@api.response(200, "Documents retrieved successfully")
|
||||
@@ -175,6 +176,7 @@ class DatasetDocumentListApi(Resource):
|
||||
limit = request.args.get("limit", default=20, type=int)
|
||||
search = request.args.get("keyword", default=None, type=str)
|
||||
sort = request.args.get("sort", default="-created_at", type=str)
|
||||
status = request.args.get("status", default=None, type=str)
|
||||
# "yes", "true", "t", "y", "1" convert to True, while others convert to False.
|
||||
try:
|
||||
fetch_val = request.args.get("fetch", default="false")
|
||||
@@ -203,6 +205,9 @@ class DatasetDocumentListApi(Resource):
|
||||
|
||||
query = select(Document).filter_by(dataset_id=str(dataset_id), tenant_id=current_tenant_id)
|
||||
|
||||
if status:
|
||||
query = DocumentService.apply_display_status_filter(query, status)
|
||||
|
||||
if search:
|
||||
search = f"%{search}%"
|
||||
query = query.where(Document.name.like(search))
|
||||
|
||||
@@ -456,12 +456,16 @@ class DocumentListApi(DatasetApiResource):
|
||||
page = request.args.get("page", default=1, type=int)
|
||||
limit = request.args.get("limit", default=20, type=int)
|
||||
search = request.args.get("keyword", default=None, type=str)
|
||||
status = request.args.get("status", default=None, type=str)
|
||||
dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
||||
if not dataset:
|
||||
raise NotFound("Dataset not found.")
|
||||
|
||||
query = select(Document).filter_by(dataset_id=str(dataset_id), tenant_id=tenant_id)
|
||||
|
||||
if status:
|
||||
query = DocumentService.apply_display_status_filter(query, status)
|
||||
|
||||
if search:
|
||||
search = f"%{search}%"
|
||||
query = query.where(Document.name.like(search))
|
||||
|
||||
@@ -1082,6 +1082,62 @@ class DocumentService:
|
||||
},
|
||||
}
|
||||
|
||||
DISPLAY_STATUS_ALIASES: dict[str, str] = {
|
||||
"active": "available",
|
||||
"enabled": "available",
|
||||
}
|
||||
|
||||
_INDEXING_STATUSES: tuple[str, ...] = ("parsing", "cleaning", "splitting", "indexing")
|
||||
|
||||
DISPLAY_STATUS_FILTERS: dict[str, tuple[Any, ...]] = {
|
||||
"queuing": (Document.indexing_status == "waiting",),
|
||||
"indexing": (
|
||||
Document.indexing_status.in_(_INDEXING_STATUSES),
|
||||
Document.is_paused.is_not(True),
|
||||
),
|
||||
"paused": (
|
||||
Document.indexing_status.in_(_INDEXING_STATUSES),
|
||||
Document.is_paused.is_(True),
|
||||
),
|
||||
"error": (Document.indexing_status == "error",),
|
||||
"available": (
|
||||
Document.indexing_status == "completed",
|
||||
Document.archived.is_(False),
|
||||
Document.enabled.is_(True),
|
||||
),
|
||||
"disabled": (
|
||||
Document.indexing_status == "completed",
|
||||
Document.archived.is_(False),
|
||||
Document.enabled.is_(False),
|
||||
),
|
||||
"archived": (
|
||||
Document.indexing_status == "completed",
|
||||
Document.archived.is_(True),
|
||||
),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def normalize_display_status(cls, status: str | None) -> str | None:
|
||||
if not status:
|
||||
return None
|
||||
normalized = status.lower()
|
||||
normalized = cls.DISPLAY_STATUS_ALIASES.get(normalized, normalized)
|
||||
return normalized if normalized in cls.DISPLAY_STATUS_FILTERS else None
|
||||
|
||||
@classmethod
|
||||
def build_display_status_filters(cls, status: str | None) -> tuple[Any, ...]:
|
||||
normalized = cls.normalize_display_status(status)
|
||||
if not normalized:
|
||||
return ()
|
||||
return cls.DISPLAY_STATUS_FILTERS[normalized]
|
||||
|
||||
@classmethod
|
||||
def apply_display_status_filter(cls, query, status: str | None):
|
||||
filters = cls.build_display_status_filters(status)
|
||||
if not filters:
|
||||
return query
|
||||
return query.where(*filters)
|
||||
|
||||
DOCUMENT_METADATA_SCHEMA: dict[str, Any] = {
|
||||
"book": {
|
||||
"title": str,
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
import sqlalchemy as sa
|
||||
|
||||
from models.dataset import Document
|
||||
from services.dataset_service import DocumentService
|
||||
|
||||
|
||||
def test_normalize_display_status_alias_mapping():
|
||||
assert DocumentService.normalize_display_status("ACTIVE") == "available"
|
||||
assert DocumentService.normalize_display_status("enabled") == "available"
|
||||
assert DocumentService.normalize_display_status("archived") == "archived"
|
||||
assert DocumentService.normalize_display_status("unknown") is None
|
||||
|
||||
|
||||
def test_build_display_status_filters_available():
|
||||
filters = DocumentService.build_display_status_filters("available")
|
||||
assert len(filters) == 3
|
||||
for condition in filters:
|
||||
assert condition is not None
|
||||
|
||||
|
||||
def test_apply_display_status_filter_applies_when_status_present():
|
||||
query = sa.select(Document)
|
||||
filtered = DocumentService.apply_display_status_filter(query, "queuing")
|
||||
compiled = str(filtered.compile(compile_kwargs={"literal_binds": True}))
|
||||
assert "WHERE" in compiled
|
||||
assert "document.indexing_status = 'waiting'" in compiled
|
||||
|
||||
|
||||
def test_apply_display_status_filter_returns_same_when_invalid():
|
||||
query = sa.select(Document)
|
||||
filtered = DocumentService.apply_display_status_filter(query, "invalid")
|
||||
compiled = str(filtered.compile(compile_kwargs={"literal_binds": True}))
|
||||
assert "WHERE" not in compiled
|
||||
Reference in New Issue
Block a user