fix: check allowed file extensions in rag transform pipeline and use set type instead of list for performance in file extensions (#26593)

This commit is contained in:
Bowen Liang
2025-10-09 10:21:56 +08:00
committed by GitHub
parent 89821d66bb
commit 40d35304ea
3 changed files with 45 additions and 16 deletions

View File

@@ -149,8 +149,7 @@ class RagPipelineTransformService:
file_extensions = node.get("data", {}).get("fileExtensions", [])
if not file_extensions:
return node
file_extensions = [file_extension.lower() for file_extension in file_extensions]
node["data"]["fileExtensions"] = DOCUMENT_EXTENSIONS
node["data"]["fileExtensions"] = [ext.lower() for ext in file_extensions if ext in DOCUMENT_EXTENSIONS]
return node
def _deal_knowledge_index(