refactor: consume events after pause/abort and improve API clarity (#28328)

Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
This commit is contained in:
-LAN-
2025-11-18 19:04:11 +08:00
committed by GitHub
parent 68526c09fc
commit 6efdc94661
7 changed files with 261 additions and 183 deletions

View File

@@ -192,7 +192,6 @@ class GraphEngine:
self._dispatcher = Dispatcher(
event_queue=self._event_queue,
event_handler=self._event_handler_registry,
event_collector=self._event_manager,
execution_coordinator=self._execution_coordinator,
event_emitter=self._event_manager,
)

View File

@@ -43,7 +43,6 @@ class Dispatcher:
self,
event_queue: queue.Queue[GraphNodeEventBase],
event_handler: "EventHandler",
event_collector: EventManager,
execution_coordinator: ExecutionCoordinator,
event_emitter: EventManager | None = None,
) -> None:
@@ -53,13 +52,11 @@ class Dispatcher:
Args:
event_queue: Queue of events from workers
event_handler: Event handler registry for processing events
event_collector: Event manager for collecting unhandled events
execution_coordinator: Coordinator for execution flow
event_emitter: Optional event manager to signal completion
"""
self._event_queue = event_queue
self._event_handler = event_handler
self._event_collector = event_collector
self._execution_coordinator = execution_coordinator
self._event_emitter = event_emitter
@@ -86,37 +83,31 @@ class Dispatcher:
def _dispatcher_loop(self) -> None:
"""Main dispatcher loop."""
try:
self._process_commands()
while not self._stop_event.is_set():
commands_checked = False
should_check_commands = False
should_break = False
if (
self._execution_coordinator.aborted
or self._execution_coordinator.paused
or self._execution_coordinator.execution_complete
):
break
if self._execution_coordinator.is_execution_complete():
should_check_commands = True
should_break = True
else:
# Check for scaling
self._execution_coordinator.check_scaling()
self._execution_coordinator.check_scaling()
try:
event = self._event_queue.get(timeout=0.1)
self._event_handler.dispatch(event)
self._event_queue.task_done()
self._process_commands(event)
except queue.Empty:
time.sleep(0.1)
# Process events
try:
event = self._event_queue.get(timeout=0.1)
# Route to the event handler
self._event_handler.dispatch(event)
should_check_commands = self._should_check_commands(event)
self._event_queue.task_done()
except queue.Empty:
# Process commands even when no new events arrive so abort requests are not missed
should_check_commands = True
time.sleep(0.1)
if should_check_commands and not commands_checked:
self._execution_coordinator.check_commands()
commands_checked = True
if should_break:
if not commands_checked:
self._execution_coordinator.check_commands()
self._process_commands()
while True:
try:
event = self._event_queue.get(block=False)
self._event_handler.dispatch(event)
self._event_queue.task_done()
except queue.Empty:
break
except Exception as e:
@@ -129,6 +120,6 @@ class Dispatcher:
if self._event_emitter:
self._event_emitter.mark_complete()
def _should_check_commands(self, event: GraphNodeEventBase) -> bool:
"""Return True if the event represents a node completion."""
return isinstance(event, self._COMMAND_TRIGGER_EVENTS)
def _process_commands(self, event: GraphNodeEventBase | None = None):
if event is None or isinstance(event, self._COMMAND_TRIGGER_EVENTS):
self._execution_coordinator.process_commands()

View File

@@ -40,7 +40,7 @@ class ExecutionCoordinator:
self._command_processor = command_processor
self._worker_pool = worker_pool
def check_commands(self) -> None:
def process_commands(self) -> None:
"""Process any pending commands."""
self._command_processor.process_commands()
@@ -48,24 +48,16 @@ class ExecutionCoordinator:
"""Check and perform worker scaling if needed."""
self._worker_pool.check_and_scale()
def is_execution_complete(self) -> bool:
"""
Check if execution is complete.
Returns:
True if execution is complete
"""
# Treat paused, aborted, or failed executions as terminal states
if self._graph_execution.is_paused:
return True
if self._graph_execution.aborted or self._graph_execution.has_error:
return True
@property
def execution_complete(self):
return self._state_manager.is_execution_complete()
@property
def is_paused(self) -> bool:
def aborted(self):
return self._graph_execution.aborted or self._graph_execution.has_error
@property
def paused(self) -> bool:
"""Expose whether the underlying graph execution is paused."""
return self._graph_execution.is_paused