Skip to content

Commit

Permalink
fix(api): Various E-stop fixes (#14929)
Browse files Browse the repository at this point in the history
  • Loading branch information
SyntaxColoring committed Apr 19, 2024
1 parent 8fa37b3 commit 4418128
Show file tree
Hide file tree
Showing 12 changed files with 250 additions and 242 deletions.
10 changes: 6 additions & 4 deletions api/src/opentrons/protocol_engine/errors/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -951,16 +951,18 @@ def __init__(


class EStopActivatedError(ProtocolEngineError):
"""Raised when an operation's required pipette tip is not attached."""
"""Represents an E-stop event."""

def __init__(
self,
message: Optional[str] = None,
details: Optional[Dict[str, Any]] = None,
wrapping: Optional[Sequence[EnumeratedError]] = None,
) -> None:
"""Build an EStopActivatedError."""
super().__init__(ErrorCodes.E_STOP_ACTIVATED, message, details, wrapping)
super().__init__(
code=ErrorCodes.E_STOP_ACTIVATED,
message="E-stop activated.",
wrapping=wrapping,
)


class NotSupportedOnRobotType(ProtocolEngineError):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ async def execute(self, command_id: str) -> None:
if isinstance(error, asyncio.CancelledError):
error = RunStoppedError("Run was cancelled")
elif isinstance(error, EStopActivatedError):
error = PE_EStopActivatedError(message=str(error), wrapping=[error])
error = PE_EStopActivatedError(wrapping=[error])
elif not isinstance(error, EnumeratedError):
error = PythonException(error)

Expand Down
137 changes: 41 additions & 96 deletions api/src/opentrons/protocol_engine/protocol_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from opentrons.protocol_engine.actions.actions import ResumeFromRecoveryAction
from opentrons.protocol_engine.error_recovery_policy import (
ErrorRecoveryPolicy,
ErrorRecoveryType,
error_recovery_by_ff,
)

Expand Down Expand Up @@ -58,7 +57,6 @@
HardwareStoppedAction,
ResetTipsAction,
SetPipetteMovementSpeedAction,
FailCommandAction,
)


Expand Down Expand Up @@ -277,103 +275,45 @@ async def add_and_execute_command_wait_for_recovery(
)
return completed_command

def estop(
self,
# TODO(mm, 2024-03-26): Maintenance runs are a robot-server concept that
# ProtocolEngine should not have to know about. Can this be simplified or
# defined in other terms?
maintenance_run: bool,
) -> None:
"""Signal to the engine that an estop event occurred.
def estop(self) -> None:
"""Signal to the engine that an E-stop event occurred.
If an estop happens while the robot is moving, lower layers physically stop
motion and raise the event as an exception, which fails the Protocol Engine
command. No action from the `ProtocolEngine` caller is needed to handle that.
However, if an estop happens in between commands, or in the middle of
a command like `comment` or `waitForDuration` that doesn't access the hardware,
`ProtocolEngine` needs to be told about it so it can treat it as a fatal run
error and stop executing more commands. This method is how to do that.
If there are any queued commands for the engine, they will be marked
as failed due to the estop event. If there aren't any queued commands
*and* this is a maintenance run (which has commands queued one-by-one),
a series of actions will mark the engine as Stopped. In either case the
queue worker will be deactivated; the primary difference is that the former
case will expect the protocol runner to `finish()` the engine, whereas the
maintenance run will be put into a state wherein the engine can be discarded.
"""
if self._state_store.commands.get_is_stopped():
return
running_or_next_queued_id = (
self._state_store.commands.get_running_command_id()
or self._state_store.commands.get_queue_ids().head(None)
# TODO(mm, 2024-04-02): This logic looks wrong whenever the next queued
# command is a setup command, which is the normal case in maintenance
# runs. Setup commands won't show up in commands.get_queue_ids().
)
running_or_next_queued = (
self._state_store.commands.get(running_or_next_queued_id)
if running_or_next_queued_id is not None
else None
)
`ProtocolEngine` needs to be told about it so it can interrupt the command
and stop executing any more. This method is how to do that.
if running_or_next_queued_id is not None:
assert running_or_next_queued is not None

fail_action = FailCommandAction(
command_id=running_or_next_queued_id,
# FIXME(mm, 2024-04-02): As of https://github.com/Opentrons/opentrons/pull/14726,
# this action is only legal if the command is running, not queued.
running_command=running_or_next_queued,
error_id=self._model_utils.generate_id(),
failed_at=self._model_utils.get_timestamp(),
error=EStopActivatedError(message="Estop Activated"),
notes=[],
type=ErrorRecoveryType.FAIL_RUN,
)
self._action_dispatcher.dispatch(fail_action)

# The FailCommandAction above will have cleared all the queued protocol
# OR setup commands, depending on whether we gave it a protocol or setup
# command. We want both to be cleared in either case. So, do that here.
running_or_next_queued_id = self._state_store.commands.get_queue_ids().head(
None
)
if running_or_next_queued_id is not None:
running_or_next_queued = self._state_store.commands.get(
running_or_next_queued_id
)
fail_action = FailCommandAction(
command_id=running_or_next_queued_id,
# FIXME(mm, 2024-04-02): As of https://github.com/Opentrons/opentrons/pull/14726,
# this action is only legal if the command is running, not queued.
running_command=running_or_next_queued,
error_id=self._model_utils.generate_id(),
failed_at=self._model_utils.get_timestamp(),
error=EStopActivatedError(message="Estop Activated"),
notes=[],
type=ErrorRecoveryType.FAIL_RUN,
)
self._action_dispatcher.dispatch(fail_action)
self._queue_worker.cancel()
elif maintenance_run:
stop_action = self._state_store.commands.validate_action_allowed(
This acts roughly like `request_stop()`. After calling this, you should call
`finish()` with an EStopActivatedError.
"""
try:
action = self._state_store.commands.validate_action_allowed(
StopAction(from_estop=True)
)
self._action_dispatcher.dispatch(stop_action)
hardware_stop_action = HardwareStoppedAction(
completed_at=self._model_utils.get_timestamp(),
finish_error_details=FinishErrorDetails(
error=EStopActivatedError(message="Estop Activated"),
error_id=self._model_utils.generate_id(),
created_at=self._model_utils.get_timestamp(),
),
except Exception: # todo(mm, 2024-04-16): Catch a more specific type.
# This is likely called from some hardware API callback that doesn't care
# about ProtocolEngine lifecycle or what methods are valid to call at what
# times. So it makes more sense for us to no-op here than to propagate this
# as an error.
_log.info(
"ProtocolEngine cannot handle E-stop event right now. Ignoring it.",
exc_info=True,
)
self._action_dispatcher.dispatch(hardware_stop_action)
self._queue_worker.cancel()
else:
_log.info("estop pressed before protocol was started, taking no action.")
return
self._action_dispatcher.dispatch(action)
# self._queue_worker.cancel() will try to interrupt any ongoing command.
# Unfortunately, if it's a hardware command, this interruption will race
# against the E-stop exception propagating up from lower layers. But we need to
# do this because we want to make sure non-hardware commands, like
# `waitForDuration`, are also interrupted.
self._queue_worker.cancel()
# Unlike self.request_stop(), we don't need to do
# self._hardware_api.cancel_execution_and_running_tasks(). Since this was an
# E-stop event, the hardware API already knows.

async def request_stop(self) -> None:
"""Make command execution stop soon.
Expand Down Expand Up @@ -418,14 +358,20 @@ async def finish(
set_run_status: bool = True,
post_run_hardware_state: PostRunHardwareState = PostRunHardwareState.HOME_AND_STAY_ENGAGED,
) -> None:
"""Gracefully finish using the ProtocolEngine, waiting for it to become idle.
"""Finish using the `ProtocolEngine`.
This does a few things:
1. It may do post-run actions like homing and dropping tips. This depends on the
arguments passed as well as heuristics based on the history of the engine.
2. It waits for the engine to be done controlling the robot's hardware.
3. It releases internal resources, like background tasks.
The engine will finish executing its current command (if any),
and then shut down. After an engine has been `finished`'ed, it cannot
be restarted.
It's safe to call `finish()` multiple times. After you call `finish()`,
the engine can't be restarted.
This method should not raise. If any exceptions happened during execution that were not
properly caught by the CommandExecutor, or if any exceptions happen during this
properly caught by `ProtocolEngine` internals, or if any exceptions happen during this
`finish()` call, they should be saved as `.state_view.get_summary().errors`.
Arguments:
Expand All @@ -439,12 +385,11 @@ async def finish(
if self._state_store.commands.state.stopped_by_estop:
# This handles the case where the E-stop was pressed while we were *not* in the middle
# of some hardware interaction that would raise it as an exception. For example, imagine
# we were paused between two commands, or imagine we were executing a very long run of
# comment commands.
# we were paused between two commands, or imagine we were executing a waitForDuration.
drop_tips_after_run = False
post_run_hardware_state = PostRunHardwareState.DISENGAGE_IN_PLACE
if error is None:
error = EStopActivatedError(message="Estop was activated during a run")
error = EStopActivatedError()

if error:
# If the run had an error, check if that error indicates an E-stop.
Expand Down
16 changes: 9 additions & 7 deletions api/src/opentrons/protocol_engine/state/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,9 +345,11 @@ def handle_action(self, action: Action) -> None: # noqa: C901
elif isinstance(action, StopAction):
if not self._state.run_result:
self._state.queue_status = QueueStatus.PAUSED
self._state.run_result = RunResult.STOPPED
if action.from_estop:
self._state.stopped_by_estop = True
self._state.run_result = RunResult.FAILED
else:
self._state.run_result = RunResult.STOPPED

elif isinstance(action, FinishAction):
if not self._state.run_result:
Expand All @@ -361,12 +363,12 @@ def handle_action(self, action: Action) -> None: # noqa: C901
else:
self._state.run_result = RunResult.STOPPED

if action.error_details:
self._state.run_error = self._map_run_exception_to_error_occurrence(
action.error_details.error_id,
action.error_details.created_at,
action.error_details.error,
)
if not self._state.run_error and action.error_details:
self._state.run_error = self._map_run_exception_to_error_occurrence(
action.error_details.error_id,
action.error_details.created_at,
action.error_details.error,
)

elif isinstance(action, HardwareStoppedAction):
self._state.queue_status = QueueStatus.PAUSED
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,8 @@ def _ImplementationCls(self) -> Type[_TestCommandImpl]:
False,
),
(
EStopActivatedError("oh no"),
matchers.ErrorMatching(PE_EStopActivatedError, match="oh no"),
EStopActivatedError(),
matchers.ErrorMatching(PE_EStopActivatedError),
True,
),
(
Expand Down
39 changes: 37 additions & 2 deletions api/tests/opentrons/protocol_engine/state/test_command_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

from datetime import datetime
from unittest.mock import sentinel

import pytest

Expand All @@ -13,10 +14,15 @@
from opentrons.ordered_set import OrderedSet
from opentrons.protocol_engine import actions, commands, errors
from opentrons.protocol_engine.error_recovery_policy import ErrorRecoveryType
from opentrons.protocol_engine.errors.error_occurrence import ErrorOccurrence
from opentrons.protocol_engine.errors.exceptions import EStopActivatedError
from opentrons.protocol_engine.notes.notes import CommandNote
from opentrons.protocol_engine.state.commands import CommandStore, CommandView
from opentrons.protocol_engine.state.commands import (
CommandStore,
CommandView,
)
from opentrons.protocol_engine.state.config import Config
from opentrons.protocol_engine.types import DeckType
from opentrons.protocol_engine.types import DeckType, EngineStatus


def _make_config() -> Config:
Expand Down Expand Up @@ -434,3 +440,32 @@ def test_get_recovery_in_progress_for_command() -> None:

# c3 failed, but not recoverably.
assert not subject_view.get_recovery_in_progress_for_command("c2")


def test_final_state_after_estop() -> None:
"""Test the final state of the run after it's E-stopped."""
subject = CommandStore(config=_make_config(), is_door_open=False)
subject_view = CommandView(subject.state)

error_details = actions.FinishErrorDetails(
error=EStopActivatedError(), error_id="error-id", created_at=datetime.now()
)
expected_error_occurrence = ErrorOccurrence(
id=error_details.error_id,
createdAt=error_details.created_at,
errorCode=ErrorCodes.E_STOP_ACTIVATED.value.code,
errorType="EStopActivatedError",
detail="E-stop activated.",
)

subject.handle_action(actions.StopAction(from_estop=True))
subject.handle_action(actions.FinishAction(error_details=error_details))
subject.handle_action(
actions.HardwareStoppedAction(
completed_at=sentinel.hardware_stopped_action_completed_at,
finish_error_details=None,
)
)

assert subject_view.get_status() == EngineStatus.FAILED
assert subject_view.get_error() == expected_error_occurrence
Original file line number Diff line number Diff line change
Expand Up @@ -600,8 +600,13 @@ def test_command_store_handles_finish_action_with_stopped() -> None:
assert subject.state.run_result == RunResult.STOPPED


@pytest.mark.parametrize("from_estop", [True, False])
def test_command_store_handles_stop_action(from_estop: bool) -> None:
@pytest.mark.parametrize(
["from_estop", "expected_run_result"],
[(True, RunResult.FAILED), (False, RunResult.STOPPED)],
)
def test_command_store_handles_stop_action(
from_estop: bool, expected_run_result: RunResult
) -> None:
"""It should mark the engine as non-gracefully stopped on StopAction."""
subject = CommandStore(is_door_open=False, config=_make_config())

Expand All @@ -615,7 +620,7 @@ def test_command_store_handles_stop_action(from_estop: bool) -> None:
assert subject.state == CommandState(
command_history=CommandHistory(),
queue_status=QueueStatus.PAUSED,
run_result=RunResult.STOPPED,
run_result=expected_run_result,
run_completed_at=None,
is_door_blocking=False,
run_error=None,
Expand Down
Loading

0 comments on commit 4418128

Please sign in to comment.