diff --git a/src/runboat/controller.py b/src/runboat/controller.py index cf0156e..f9bd1d6 100644 --- a/src/runboat/controller.py +++ b/src/runboat/controller.py @@ -15,6 +15,8 @@ _logger = logging.getLogger(__name__) # of the background tasks and the clearing of the wakeup avoids waking up the tasks # too often. EVENT_BUFFERING_DELAY = 1 +# When an exception happens in background tasks, restart them after a delay. +WALKING_DEAD_RESTART_DELAY = 5 class Controller: @@ -256,13 +258,18 @@ class Controller: _logger.info(f"(Re)starting {func.__name__}") try: await func() + except k8s.WatchException as e: + _logger.info( + f"Watch error {e} in {func.__name__}, " + f"restarting in {WALKING_DEAD_RESTART_DELAY} sec." + ) + await asyncio.sleep(WALKING_DEAD_RESTART_DELAY) except Exception: - delay = 5 _logger.exception( f"Unhandled exception in {func.__name__}, " - f"restarting in {delay} sec." + f"restarting in {WALKING_DEAD_RESTART_DELAY} sec." ) - await asyncio.sleep(delay) + await asyncio.sleep(WALKING_DEAD_RESTART_DELAY) for f in ( self.deployment_watcher, diff --git a/src/runboat/k8s.py b/src/runboat/k8s.py index d78062c..641982d 100644 --- a/src/runboat/k8s.py +++ b/src/runboat/k8s.py @@ -4,7 +4,6 @@ import os import shutil import subprocess import tempfile -import time from contextlib import contextmanager from enum import Enum from importlib import resources @@ -80,6 +79,10 @@ def patch_deployment( raise +class WatchException(Exception): + pass + + def _watch( list_method: Callable[..., Any], *args: Any, **kwargs: Any ) -> Generator[tuple[str | None, Any], None, None]: @@ -113,12 +116,7 @@ def _watch( except (urllib3.exceptions.TimeoutError, TimeoutError): continue except Exception as e: - delay = 5 - _logger.info( - f"Error {e} watching {list_method.__name__}. Retrying in {delay} sec." - ) - time.sleep(delay) - continue + raise WatchException(f"{e} in {list_method.__name__}") from e @sync_to_async_iterator