feat: knowledge pipeline (#25360)

Signed-off-by: -LAN- <laipz8200@outlook.com>
Co-authored-by: twwu <twwu@dify.ai>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com>
Co-authored-by: quicksand <quicksandzn@gmail.com>
Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Joel <iamjoel007@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Co-authored-by: Hanqing Zhao <sherry9277@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
-LAN-
2025-09-18 12:49:10 +08:00
committed by GitHub
parent 7dadb33003
commit 85cda47c70
1772 changed files with 102407 additions and 31710 deletions

View File

@@ -1,11 +1,9 @@
from .entities import BaseIterationNodeData, BaseIterationState, BaseLoopNodeData, BaseLoopState, BaseNodeData
from .node import BaseNode
__all__ = [
"BaseIterationNodeData",
"BaseIterationState",
"BaseLoopNodeData",
"BaseLoopState",
"BaseNode",
"BaseNodeData",
]

View File

@@ -1,12 +1,37 @@
import json
from abc import ABC
from collections.abc import Sequence
from enum import StrEnum
from typing import Any, Union
from pydantic import BaseModel, model_validator
from core.workflow.nodes.base.exc import DefaultValueTypeError
from core.workflow.nodes.enums import ErrorStrategy
from core.workflow.enums import ErrorStrategy
from .exc import DefaultValueTypeError
_NumberType = Union[int, float]
class RetryConfig(BaseModel):
"""node retry config"""
max_retries: int = 0 # max retry times
retry_interval: int = 0 # retry interval in milliseconds
retry_enabled: bool = False # whether retry is enabled
@property
def retry_interval_seconds(self) -> float:
return self.retry_interval / 1000
class VariableSelector(BaseModel):
"""
Variable Selector.
"""
variable: str
value_selector: Sequence[str]
class DefaultValueType(StrEnum):
@@ -19,9 +44,6 @@ class DefaultValueType(StrEnum):
ARRAY_FILES = "array[file]"
NumberType = Union[int, float]
class DefaultValue(BaseModel):
value: Any = None
type: DefaultValueType
@@ -51,9 +73,6 @@ class DefaultValue(BaseModel):
@model_validator(mode="after")
def validate_value_type(self) -> "DefaultValue":
if self.type is None:
raise DefaultValueTypeError("type field is required")
# Type validation configuration
type_validators = {
DefaultValueType.STRING: {
@@ -61,7 +80,7 @@ class DefaultValue(BaseModel):
"converter": lambda x: x,
},
DefaultValueType.NUMBER: {
"type": NumberType,
"type": _NumberType,
"converter": self._convert_number,
},
DefaultValueType.OBJECT: {
@@ -70,7 +89,7 @@ class DefaultValue(BaseModel):
},
DefaultValueType.ARRAY_NUMBER: {
"type": list,
"element_type": NumberType,
"element_type": _NumberType,
"converter": self._parse_json,
},
DefaultValueType.ARRAY_STRING: {
@@ -107,18 +126,6 @@ class DefaultValue(BaseModel):
return self
class RetryConfig(BaseModel):
"""node retry config"""
max_retries: int = 0 # max retry times
retry_interval: int = 0 # retry interval in milliseconds
retry_enabled: bool = False # whether retry is enabled
@property
def retry_interval_seconds(self) -> float:
return self.retry_interval / 1000
class BaseNodeData(ABC, BaseModel):
title: str
desc: str | None = None

View File

@@ -1,81 +1,175 @@
import logging
from abc import abstractmethod
from collections.abc import Generator, Mapping, Sequence
from typing import TYPE_CHECKING, Any, ClassVar, Union
from functools import singledispatchmethod
from typing import Any, ClassVar
from uuid import uuid4
from core.workflow.entities.node_entities import NodeRunResult
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus
from core.workflow.nodes.base.entities import BaseNodeData, RetryConfig
from core.workflow.nodes.enums import ErrorStrategy, NodeType
from core.workflow.nodes.event import NodeEvent, RunCompletedEvent
from core.app.entities.app_invoke_entities import InvokeFrom
from core.workflow.entities import AgentNodeStrategyInit, GraphInitParams, GraphRuntimeState
from core.workflow.enums import ErrorStrategy, NodeExecutionType, NodeState, NodeType, WorkflowNodeExecutionStatus
from core.workflow.graph_events import (
GraphNodeEventBase,
NodeRunAgentLogEvent,
NodeRunFailedEvent,
NodeRunIterationFailedEvent,
NodeRunIterationNextEvent,
NodeRunIterationStartedEvent,
NodeRunIterationSucceededEvent,
NodeRunLoopFailedEvent,
NodeRunLoopNextEvent,
NodeRunLoopStartedEvent,
NodeRunLoopSucceededEvent,
NodeRunRetrieverResourceEvent,
NodeRunStartedEvent,
NodeRunStreamChunkEvent,
NodeRunSucceededEvent,
)
from core.workflow.node_events import (
AgentLogEvent,
IterationFailedEvent,
IterationNextEvent,
IterationStartedEvent,
IterationSucceededEvent,
LoopFailedEvent,
LoopNextEvent,
LoopStartedEvent,
LoopSucceededEvent,
NodeEventBase,
NodeRunResult,
RunRetrieverResourceEvent,
StreamChunkEvent,
StreamCompletedEvent,
)
from libs.datetime_utils import naive_utc_now
from models.enums import UserFrom
if TYPE_CHECKING:
from core.workflow.graph_engine import Graph, GraphInitParams, GraphRuntimeState
from core.workflow.graph_engine.entities.event import InNodeEvent
from .entities import BaseNodeData, RetryConfig
logger = logging.getLogger(__name__)
class BaseNode:
_node_type: ClassVar[NodeType]
class Node:
node_type: ClassVar["NodeType"]
execution_type: NodeExecutionType = NodeExecutionType.EXECUTABLE
def __init__(
self,
id: str,
config: Mapping[str, Any],
graph_init_params: "GraphInitParams",
graph: "Graph",
graph_runtime_state: "GraphRuntimeState",
previous_node_id: str | None = None,
thread_pool_id: str | None = None,
):
) -> None:
self.id = id
self.tenant_id = graph_init_params.tenant_id
self.app_id = graph_init_params.app_id
self.workflow_type = graph_init_params.workflow_type
self.workflow_id = graph_init_params.workflow_id
self.graph_config = graph_init_params.graph_config
self.user_id = graph_init_params.user_id
self.user_from = graph_init_params.user_from
self.invoke_from = graph_init_params.invoke_from
self.user_from = UserFrom(graph_init_params.user_from)
self.invoke_from = InvokeFrom(graph_init_params.invoke_from)
self.workflow_call_depth = graph_init_params.call_depth
self.graph = graph
self.graph_runtime_state = graph_runtime_state
self.previous_node_id = previous_node_id
self.thread_pool_id = thread_pool_id
self.state: NodeState = NodeState.UNKNOWN # node execution state
node_id = config.get("id")
if not node_id:
raise ValueError("Node ID is required.")
self.node_id = node_id
self._node_id = node_id
self._node_execution_id: str = ""
self._start_at = naive_utc_now()
@abstractmethod
def init_node_data(self, data: Mapping[str, Any]): ...
def init_node_data(self, data: Mapping[str, Any]) -> None: ...
@abstractmethod
def _run(self) -> NodeRunResult | Generator[Union[NodeEvent, "InNodeEvent"], None, None]:
def _run(self) -> NodeRunResult | Generator[NodeEventBase, None, None]:
"""
Run node
:return:
"""
raise NotImplementedError
def run(self) -> Generator[Union[NodeEvent, "InNodeEvent"], None, None]:
def run(self) -> Generator[GraphNodeEventBase, None, None]:
# Generate a single node execution ID to use for all events
if not self._node_execution_id:
self._node_execution_id = str(uuid4())
self._start_at = naive_utc_now()
# Create and push start event with required fields
start_event = NodeRunStartedEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.title,
in_iteration_id=None,
start_at=self._start_at,
)
# === FIXME(-LAN-): Needs to refactor.
from core.workflow.nodes.tool.tool_node import ToolNode
if isinstance(self, ToolNode):
start_event.provider_id = getattr(self.get_base_node_data(), "provider_id", "")
start_event.provider_type = getattr(self.get_base_node_data(), "provider_type", "")
from core.workflow.nodes.datasource.datasource_node import DatasourceNode
if isinstance(self, DatasourceNode):
plugin_id = getattr(self.get_base_node_data(), "plugin_id", "")
provider_name = getattr(self.get_base_node_data(), "provider_name", "")
start_event.provider_id = f"{plugin_id}/{provider_name}"
start_event.provider_type = getattr(self.get_base_node_data(), "provider_type", "")
from typing import cast
from core.workflow.nodes.agent.agent_node import AgentNode
from core.workflow.nodes.agent.entities import AgentNodeData
if isinstance(self, AgentNode):
start_event.agent_strategy = AgentNodeStrategyInit(
name=cast(AgentNodeData, self.get_base_node_data()).agent_strategy_name,
icon=self.agent_strategy_icon,
)
# ===
yield start_event
try:
result = self._run()
# Handle NodeRunResult
if isinstance(result, NodeRunResult):
yield self._convert_node_run_result_to_graph_node_event(result)
return
# Handle event stream
for event in result:
# NOTE: this is necessary because iteration and loop nodes yield GraphNodeEventBase
if isinstance(event, NodeEventBase): # pyright: ignore[reportUnnecessaryIsInstance]
yield self._dispatch(event)
elif isinstance(event, GraphNodeEventBase) and not event.in_iteration_id and not event.in_loop_id: # pyright: ignore[reportUnnecessaryIsInstance]
event.id = self._node_execution_id
yield event
else:
yield event
except Exception as e:
logger.exception("Node %s failed to run", self.node_id)
logger.exception("Node %s failed to run", self._node_id)
result = NodeRunResult(
status=WorkflowNodeExecutionStatus.FAILED,
error=str(e),
error_type="WorkflowNodeError",
)
if isinstance(result, NodeRunResult):
yield RunCompletedEvent(run_result=result)
else:
yield from result
yield NodeRunFailedEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
start_at=self._start_at,
node_run_result=result,
error=str(e),
)
@classmethod
def extract_variable_selector_to_variable_mapping(
@@ -140,13 +234,21 @@ class BaseNode:
) -> Mapping[str, Sequence[str]]:
return {}
@classmethod
def get_default_config(cls, filters: dict | None = None):
return {}
def blocks_variable_output(self, variable_selectors: set[tuple[str, ...]]) -> bool:
"""
Check if this node blocks the output of specific variables.
@property
def type_(self) -> NodeType:
return self._node_type
This method is used to determine if a node must complete execution before
the specified variables can be used in streaming output.
:param variable_selectors: Set of variable selectors, each as a tuple (e.g., ('conversation', 'str'))
:return: True if this node blocks output of any of the specified variables, False otherwise
"""
return False
@classmethod
def get_default_config(cls, filters: Mapping[str, object] | None = None) -> Mapping[str, object]:
return {}
@classmethod
@abstractmethod
@@ -158,10 +260,6 @@ class BaseNode:
# in `api/core/workflow/nodes/__init__.py`.
raise NotImplementedError("subclasses of BaseNode must implement `version` method.")
@property
def continue_on_error(self) -> bool:
return False
@property
def retry(self) -> bool:
return False
@@ -224,3 +322,198 @@ class BaseNode:
def default_value_dict(self) -> dict[str, Any]:
"""Get the default values dictionary for this node."""
return self._get_default_value_dict()
def _convert_node_run_result_to_graph_node_event(self, result: NodeRunResult) -> GraphNodeEventBase:
match result.status:
case WorkflowNodeExecutionStatus.FAILED:
return NodeRunFailedEvent(
id=self._node_execution_id,
node_id=self.id,
node_type=self.node_type,
start_at=self._start_at,
node_run_result=result,
error=result.error,
)
case WorkflowNodeExecutionStatus.SUCCEEDED:
return NodeRunSucceededEvent(
id=self._node_execution_id,
node_id=self.id,
node_type=self.node_type,
start_at=self._start_at,
node_run_result=result,
)
case _:
raise Exception(f"result status {result.status} not supported")
@singledispatchmethod
def _dispatch(self, event: NodeEventBase) -> GraphNodeEventBase:
raise NotImplementedError(f"Node {self._node_id} does not support event type {type(event)}")
@_dispatch.register
def _(self, event: StreamChunkEvent) -> NodeRunStreamChunkEvent:
return NodeRunStreamChunkEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
selector=event.selector,
chunk=event.chunk,
is_final=event.is_final,
)
@_dispatch.register
def _(self, event: StreamCompletedEvent) -> NodeRunSucceededEvent | NodeRunFailedEvent:
match event.node_run_result.status:
case WorkflowNodeExecutionStatus.SUCCEEDED:
return NodeRunSucceededEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
start_at=self._start_at,
node_run_result=event.node_run_result,
)
case WorkflowNodeExecutionStatus.FAILED:
return NodeRunFailedEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
start_at=self._start_at,
node_run_result=event.node_run_result,
error=event.node_run_result.error,
)
case _:
raise NotImplementedError(
f"Node {self._node_id} does not support status {event.node_run_result.status}"
)
@_dispatch.register
def _(self, event: AgentLogEvent) -> NodeRunAgentLogEvent:
return NodeRunAgentLogEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
message_id=event.message_id,
label=event.label,
node_execution_id=event.node_execution_id,
parent_id=event.parent_id,
error=event.error,
status=event.status,
data=event.data,
metadata=event.metadata,
)
@_dispatch.register
def _(self, event: LoopStartedEvent) -> NodeRunLoopStartedEvent:
return NodeRunLoopStartedEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.get_base_node_data().title,
start_at=event.start_at,
inputs=event.inputs,
metadata=event.metadata,
predecessor_node_id=event.predecessor_node_id,
)
@_dispatch.register
def _(self, event: LoopNextEvent) -> NodeRunLoopNextEvent:
return NodeRunLoopNextEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.get_base_node_data().title,
index=event.index,
pre_loop_output=event.pre_loop_output,
)
@_dispatch.register
def _(self, event: LoopSucceededEvent) -> NodeRunLoopSucceededEvent:
return NodeRunLoopSucceededEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.get_base_node_data().title,
start_at=event.start_at,
inputs=event.inputs,
outputs=event.outputs,
metadata=event.metadata,
steps=event.steps,
)
@_dispatch.register
def _(self, event: LoopFailedEvent) -> NodeRunLoopFailedEvent:
return NodeRunLoopFailedEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.get_base_node_data().title,
start_at=event.start_at,
inputs=event.inputs,
outputs=event.outputs,
metadata=event.metadata,
steps=event.steps,
error=event.error,
)
@_dispatch.register
def _(self, event: IterationStartedEvent) -> NodeRunIterationStartedEvent:
return NodeRunIterationStartedEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.get_base_node_data().title,
start_at=event.start_at,
inputs=event.inputs,
metadata=event.metadata,
predecessor_node_id=event.predecessor_node_id,
)
@_dispatch.register
def _(self, event: IterationNextEvent) -> NodeRunIterationNextEvent:
return NodeRunIterationNextEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.get_base_node_data().title,
index=event.index,
pre_iteration_output=event.pre_iteration_output,
)
@_dispatch.register
def _(self, event: IterationSucceededEvent) -> NodeRunIterationSucceededEvent:
return NodeRunIterationSucceededEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.get_base_node_data().title,
start_at=event.start_at,
inputs=event.inputs,
outputs=event.outputs,
metadata=event.metadata,
steps=event.steps,
)
@_dispatch.register
def _(self, event: IterationFailedEvent) -> NodeRunIterationFailedEvent:
return NodeRunIterationFailedEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
node_title=self.get_base_node_data().title,
start_at=event.start_at,
inputs=event.inputs,
outputs=event.outputs,
metadata=event.metadata,
steps=event.steps,
error=event.error,
)
@_dispatch.register
def _(self, event: RunRetrieverResourceEvent) -> NodeRunRetrieverResourceEvent:
return NodeRunRetrieverResourceEvent(
id=self._node_execution_id,
node_id=self._node_id,
node_type=self.node_type,
retriever_resources=event.retriever_resources,
context=event.context,
node_version=self.version(),
)

View File

@@ -0,0 +1,148 @@
"""Template structures for Response nodes (Answer and End).
This module provides a unified template structure for both Answer and End nodes,
similar to SegmentGroup but focused on template representation without values.
"""
from abc import ABC, abstractmethod
from collections.abc import Sequence
from dataclasses import dataclass
from typing import Any, Union
from core.workflow.nodes.base.variable_template_parser import VariableTemplateParser
@dataclass(frozen=True)
class TemplateSegment(ABC):
"""Base class for template segments."""
@abstractmethod
def __str__(self) -> str:
"""String representation of the segment."""
pass
@dataclass(frozen=True)
class TextSegment(TemplateSegment):
"""A text segment in a template."""
text: str
def __str__(self) -> str:
return self.text
@dataclass(frozen=True)
class VariableSegment(TemplateSegment):
"""A variable reference segment in a template."""
selector: Sequence[str]
variable_name: str | None = None # Optional variable name for End nodes
def __str__(self) -> str:
return "{{#" + ".".join(self.selector) + "#}}"
# Type alias for segments
TemplateSegmentUnion = Union[TextSegment, VariableSegment]
@dataclass(frozen=True)
class Template:
"""Unified template structure for Response nodes.
Similar to SegmentGroup, but represents the template structure
without variable values - only marking variable selectors.
"""
segments: list[TemplateSegmentUnion]
@classmethod
def from_answer_template(cls, template_str: str) -> "Template":
"""Create a Template from an Answer node template string.
Example:
"Hello, {{#node1.name#}}" -> [TextSegment("Hello, "), VariableSegment(["node1", "name"])]
Args:
template_str: The answer template string
Returns:
Template instance
"""
parser = VariableTemplateParser(template_str)
segments: list[TemplateSegmentUnion] = []
# Extract variable selectors to find all variables
variable_selectors = parser.extract_variable_selectors()
var_map = {var.variable: var.value_selector for var in variable_selectors}
# Parse template to get ordered segments
# We need to split the template by variable placeholders while preserving order
import re
# Create a regex pattern that matches variable placeholders
pattern = r"\{\{(#[a-zA-Z0-9_]{1,50}(?:\.[a-zA-Z_][a-zA-Z0-9_]{0,29}){1,10}#)\}\}"
# Split template while keeping the delimiters (variable placeholders)
parts = re.split(pattern, template_str)
for i, part in enumerate(parts):
if not part:
continue
# Check if this part is a variable reference (odd indices after split)
if i % 2 == 1: # Odd indices are variable keys
# Remove the # symbols from the variable key
var_key = part
if var_key in var_map:
segments.append(VariableSegment(selector=list(var_map[var_key])))
else:
# This shouldn't happen with valid templates
segments.append(TextSegment(text="{{" + part + "}}"))
else:
# Even indices are text segments
segments.append(TextSegment(text=part))
return cls(segments=segments)
@classmethod
def from_end_outputs(cls, outputs_config: list[dict[str, Any]]) -> "Template":
"""Create a Template from an End node outputs configuration.
End nodes are treated as templates of concatenated variables with newlines.
Example:
[{"variable": "text", "value_selector": ["node1", "text"]},
{"variable": "result", "value_selector": ["node2", "result"]}]
->
[VariableSegment(["node1", "text"]),
TextSegment("\n"),
VariableSegment(["node2", "result"])]
Args:
outputs_config: List of output configurations with variable and value_selector
Returns:
Template instance
"""
segments: list[TemplateSegmentUnion] = []
for i, output in enumerate(outputs_config):
if i > 0:
# Add newline separator between variables
segments.append(TextSegment(text="\n"))
value_selector = output.get("value_selector", [])
variable_name = output.get("variable", "")
if value_selector:
segments.append(VariableSegment(selector=list(value_selector), variable_name=variable_name))
if len(segments) > 0 and isinstance(segments[-1], TextSegment):
segments = segments[:-1]
return cls(segments=segments)
def __str__(self) -> str:
"""String representation of the template."""
return "".join(str(segment) for segment in self.segments)

View File

@@ -0,0 +1,130 @@
import re
from collections.abc import Mapping, Sequence
from typing import Any
from .entities import VariableSelector
REGEX = re.compile(r"\{\{(#[a-zA-Z0-9_]{1,50}(\.[a-zA-Z_][a-zA-Z0-9_]{0,29}){1,10}#)\}\}")
SELECTOR_PATTERN = re.compile(r"\{\{(#[a-zA-Z0-9_]{1,50}(?:\.[a-zA-Z_][a-zA-Z0-9_]{0,29}){1,10}#)\}\}")
def extract_selectors_from_template(template: str, /) -> Sequence[VariableSelector]:
parts = SELECTOR_PATTERN.split(template)
selectors = []
for part in filter(lambda x: x, parts):
if "." in part and part[0] == "#" and part[-1] == "#":
selectors.append(VariableSelector(variable=f"{part}", value_selector=part[1:-1].split(".")))
return selectors
class VariableTemplateParser:
"""
!NOTE: Consider to use the new `segments` module instead of this class.
A class for parsing and manipulating template variables in a string.
Rules:
1. Template variables must be enclosed in `{{}}`.
2. The template variable Key can only be: #node_id.var1.var2#.
3. The template variable Key cannot contain new lines or spaces, and must comply with rule 2.
Example usage:
template = "Hello, {{#node_id.query.name#}}! Your age is {{#node_id.query.age#}}."
parser = VariableTemplateParser(template)
# Extract template variable keys
variable_keys = parser.extract()
print(variable_keys)
# Output: ['#node_id.query.name#', '#node_id.query.age#']
# Extract variable selectors
variable_selectors = parser.extract_variable_selectors()
print(variable_selectors)
# Output: [VariableSelector(variable='#node_id.query.name#', value_selector=['node_id', 'query', 'name']),
# VariableSelector(variable='#node_id.query.age#', value_selector=['node_id', 'query', 'age'])]
# Format the template string
inputs = {'#node_id.query.name#': 'John', '#node_id.query.age#': 25}}
formatted_string = parser.format(inputs)
print(formatted_string)
# Output: "Hello, John! Your age is 25."
"""
def __init__(self, template: str):
self.template = template
self.variable_keys = self.extract()
def extract(self):
"""
Extracts all the template variable keys from the template string.
Returns:
A list of template variable keys.
"""
# Regular expression to match the template rules
matches = re.findall(REGEX, self.template)
first_group_matches = [match[0] for match in matches]
return list(set(first_group_matches))
def extract_variable_selectors(self) -> list[VariableSelector]:
"""
Extracts the variable selectors from the template variable keys.
Returns:
A list of VariableSelector objects representing the variable selectors.
"""
variable_selectors = []
for variable_key in self.variable_keys:
remove_hash = variable_key.replace("#", "")
split_result = remove_hash.split(".")
if len(split_result) < 2:
continue
variable_selectors.append(VariableSelector(variable=variable_key, value_selector=split_result))
return variable_selectors
def format(self, inputs: Mapping[str, Any]) -> str:
"""
Formats the template string by replacing the template variables with their corresponding values.
Args:
inputs: A dictionary containing the values for the template variables.
Returns:
The formatted string with template variables replaced by their values.
"""
def replacer(match):
key = match.group(1)
value = inputs.get(key, match.group(0)) # return original matched string if key not found
if value is None:
value = ""
# convert the value to string
if isinstance(value, list | dict | bool | int | float):
value = str(value)
# remove template variables if required
return VariableTemplateParser.remove_template_variables(value)
prompt = re.sub(REGEX, replacer, self.template)
return re.sub(r"<\|.*?\|>", "", prompt)
@classmethod
def remove_template_variables(cls, text: str):
"""
Removes the template variables from the given text.
Args:
text: The text from which to remove the template variables.
Returns:
The text with template variables removed.
"""
return re.sub(REGEX, r"{\1}", text)