More precise change detection
Fix: we read spec.replicas instead of status.replicas. Do not wakeup needlessly when irrelevant changes happen in deployments. Better logging.
This commit is contained in:
parent
9627729ba2
commit
f5d232c3af
9 changed files with 110 additions and 75 deletions
|
|
@ -48,7 +48,6 @@ MVP:
|
||||||
- better error handling in API (return 400 on user errors)
|
- better error handling in API (return 400 on user errors)
|
||||||
- basic tests
|
- basic tests
|
||||||
- build and publish runboat container image
|
- build and publish runboat container image
|
||||||
- test what happens when the watcher looses connection to k8s
|
|
||||||
- look at other TODO in code to see if anything important remains
|
- look at other TODO in code to see if anything important remains
|
||||||
- basic UI (single page with a combo box to select repo and show builds by branch/pr,
|
- basic UI (single page with a combo box to select repo and show builds by branch/pr,
|
||||||
with start/stop buttons)
|
with start/stop buttons)
|
||||||
|
|
@ -56,6 +55,7 @@ MVP:
|
||||||
More:
|
More:
|
||||||
|
|
||||||
- shiny UI
|
- shiny UI
|
||||||
|
- websocket stream of build changes, for a dynamic UI
|
||||||
- handle PR close (delete all builds for PR)
|
- handle PR close (delete all builds for PR)
|
||||||
- handle branch delete (delete all builds for branch)
|
- handle branch delete (delete all builds for branch)
|
||||||
- create builds for all supported repos on startup (goes with sticky branches)
|
- create builds for all supported repos on startup (goes with sticky branches)
|
||||||
|
|
|
||||||
|
|
@ -3,13 +3,14 @@ disable_existing_loggers: false
|
||||||
formatters:
|
formatters:
|
||||||
rich:
|
rich:
|
||||||
datefmt: "[%X]"
|
datefmt: "[%X]"
|
||||||
format: "%(name)25s %(message)s"
|
# format: "%(name)25s %(message)s"
|
||||||
handlers:
|
handlers:
|
||||||
console:
|
console:
|
||||||
class: rich.logging.RichHandler
|
class: rich.logging.RichHandler
|
||||||
level: NOTSET
|
level: NOTSET
|
||||||
formatter: rich
|
formatter: rich
|
||||||
rich_tracebacks: true
|
rich_tracebacks: true
|
||||||
|
show_path: false
|
||||||
root:
|
root:
|
||||||
level: DEBUG
|
level: DEBUG
|
||||||
handlers: [console]
|
handlers: [console]
|
||||||
|
|
|
||||||
|
|
@ -100,13 +100,14 @@ class Controller:
|
||||||
build_name = deployment.metadata.labels.get("runboat/build")
|
build_name = deployment.metadata.labels.get("runboat/build")
|
||||||
if not build_name:
|
if not build_name:
|
||||||
continue
|
continue
|
||||||
_logger.debug(f"k8s {event_type} {deployment.kind} {build_name}")
|
should_wakeup = False
|
||||||
if event_type in ("ADDED", "MODIFIED"):
|
if event_type in ("ADDED", "MODIFIED"):
|
||||||
self.db.add(Build.from_deployment(deployment))
|
should_wakeup = self.db.add(Build.from_deployment(deployment))
|
||||||
elif event_type == "DELETED":
|
elif event_type == "DELETED":
|
||||||
self.db.remove(build_name)
|
should_wakeup = self.db.remove(build_name)
|
||||||
else:
|
else:
|
||||||
_logger.error(f"Unexpected k8s event type {event_type}.")
|
_logger.error(f"Unexpected k8s event type {event_type}.")
|
||||||
|
if should_wakeup:
|
||||||
self._wakeup()
|
self._wakeup()
|
||||||
|
|
||||||
async def job_watcher(self) -> None:
|
async def job_watcher(self) -> None:
|
||||||
|
|
@ -117,7 +118,6 @@ class Controller:
|
||||||
job_kind = job.metadata.labels.get("runboat/job-kind")
|
job_kind = job.metadata.labels.get("runboat/job-kind")
|
||||||
if job_kind not in ("initialize", "cleanup"):
|
if job_kind not in ("initialize", "cleanup"):
|
||||||
continue
|
continue
|
||||||
_logger.debug(f"k8s {event_type} {job.kind} {job_kind} {build_name}")
|
|
||||||
if event_type in ("ADDED", "MODIFIED"):
|
if event_type in ("ADDED", "MODIFIED"):
|
||||||
build = self.db.get(build_name)
|
build = self.db.get(build_name)
|
||||||
if build is None:
|
if build is None:
|
||||||
|
|
@ -125,21 +125,27 @@ class Controller:
|
||||||
# is starting and the db has not been populated yet.
|
# is starting and the db has not been populated yet.
|
||||||
build = await Build.from_name(build_name)
|
build = await Build.from_name(build_name)
|
||||||
if build is None:
|
if build is None:
|
||||||
|
_logger.warning(
|
||||||
|
f"Received job event for {build_name} "
|
||||||
|
f"but the corresponding deployment is gone. "
|
||||||
|
f"Deleting all build resources."
|
||||||
|
)
|
||||||
|
await k8s.delete_resources(build_name)
|
||||||
continue
|
continue
|
||||||
if job_kind == "initialize":
|
if job_kind == "initialize":
|
||||||
if job.status.succeeded:
|
if job.status.active:
|
||||||
|
await build.on_initialize_started()
|
||||||
|
elif job.status.succeeded:
|
||||||
await build.on_initialize_succeeded()
|
await build.on_initialize_succeeded()
|
||||||
elif job.status.failed:
|
elif job.status.failed:
|
||||||
await build.on_initialize_failed()
|
await build.on_initialize_failed()
|
||||||
else:
|
|
||||||
await build.on_initialize_started()
|
|
||||||
if job_kind == "cleanup":
|
if job_kind == "cleanup":
|
||||||
if job.status.succeeded:
|
if job.status.active:
|
||||||
|
await build.on_cleanup_started()
|
||||||
|
elif job.status.succeeded:
|
||||||
await build.on_cleanup_succeeded()
|
await build.on_cleanup_succeeded()
|
||||||
elif job.status.failed:
|
elif job.status.failed:
|
||||||
await build.on_cleanup_failed()
|
await build.on_cleanup_failed()
|
||||||
else:
|
|
||||||
await build.on_cleanup_started()
|
|
||||||
elif event_type == "DELETED":
|
elif event_type == "DELETED":
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
|
@ -155,8 +161,8 @@ class Controller:
|
||||||
if not to_initialize:
|
if not to_initialize:
|
||||||
continue # nothing startable, back to sleep
|
continue # nothing startable, back to sleep
|
||||||
_logger.info(
|
_logger.info(
|
||||||
f"{self.initializing}/{self.max_initializing} builds are initializing. "
|
f"{self.initializing} builds of max {self.max_initializing} "
|
||||||
f"Initializing {len(to_initialize)} more."
|
f"are initializing. Initializing {len(to_initialize)} more."
|
||||||
)
|
)
|
||||||
for build in to_initialize:
|
for build in to_initialize:
|
||||||
await build.initialize()
|
await build.initialize()
|
||||||
|
|
@ -171,7 +177,7 @@ class Controller:
|
||||||
if not to_stop:
|
if not to_stop:
|
||||||
continue # nothing stoppable, back to sleep
|
continue # nothing stoppable, back to sleep
|
||||||
_logger.info(
|
_logger.info(
|
||||||
f"{self.started}/{self.max_started} builds started. "
|
f"{self.started} builds of max {self.max_started} are started. "
|
||||||
f"Stopping {len(to_stop)}."
|
f"Stopping {len(to_stop)}."
|
||||||
)
|
)
|
||||||
for build in to_stop:
|
for build in to_stop:
|
||||||
|
|
@ -187,7 +193,7 @@ class Controller:
|
||||||
if not to_undeploy:
|
if not to_undeploy:
|
||||||
continue # nothing undeployable, back to sleep
|
continue # nothing undeployable, back to sleep
|
||||||
_logger.info(
|
_logger.info(
|
||||||
f"{self.deployed}/{self.max_deployed} builds deployed. "
|
f"{self.deployed} builds of max {self.max_deployed} are deployed. "
|
||||||
f"Undeploying {len(to_undeploy)}."
|
f"Undeploying {len(to_undeploy)}."
|
||||||
)
|
)
|
||||||
for build in to_undeploy:
|
for build in to_undeploy:
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,17 @@
|
||||||
|
import logging
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
||||||
from .models import BranchOrPull, Build, BuildInitStatus, BuildStatus
|
from .models import BranchOrPull, Build, BuildInitStatus, BuildStatus
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class BuildsDb:
|
class BuildsDb:
|
||||||
"""An in-memory database of builds.
|
"""An in-memory database of builds.
|
||||||
|
|
||||||
It is maintained up-to-date by the controller that receives events from the cluster.
|
It is maintained up-to-date by the controller that receives events
|
||||||
We use sqlite3 to facilitate queries and sorting, such as counting by status, or
|
from the cluster. We use sqlite3 to facilitate queries and sorting,
|
||||||
finding oldest builds.
|
such as counting by status, or finding oldest builds.
|
||||||
|
|
||||||
Querying it on each event from k8s is probably not the most efficient, but this
|
|
||||||
should do for a start, and there are plenty of ways to optimize.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_con: sqlite3.Connection
|
_con: sqlite3.Connection
|
||||||
|
|
@ -35,6 +35,7 @@ class BuildsDb:
|
||||||
" pr INTEGER, "
|
" pr INTEGER, "
|
||||||
" git_commit TEXT NOT NULL, "
|
" git_commit TEXT NOT NULL, "
|
||||||
" image TEXT NOT NULL,"
|
" image TEXT NOT NULL,"
|
||||||
|
" desired_replicas INTEGER NOT NULL,"
|
||||||
" status TEXT NOT NULL, "
|
" status TEXT NOT NULL, "
|
||||||
" init_status TEXT NOT NULL, "
|
" init_status TEXT NOT NULL, "
|
||||||
" last_scaled TEXT, "
|
" last_scaled TEXT, "
|
||||||
|
|
@ -69,10 +70,17 @@ class BuildsDb:
|
||||||
return self._build_from_row(row)
|
return self._build_from_row(row)
|
||||||
|
|
||||||
def remove(self, name: str) -> None:
|
def remove(self, name: str) -> None:
|
||||||
|
if self.get(name) is None:
|
||||||
|
return False # no change
|
||||||
with self._con:
|
with self._con:
|
||||||
self._con.execute("DELETE FROM builds WHERE name=?", (name,))
|
self._con.execute("DELETE FROM builds WHERE name=?", (name,))
|
||||||
|
_logger.info("Noticed removal of %s", name)
|
||||||
|
return True
|
||||||
|
|
||||||
def add(self, build: Build) -> None:
|
def add(self, build: Build) -> None:
|
||||||
|
prev_build = self.get(build.name)
|
||||||
|
if prev_build == build:
|
||||||
|
return False # no change
|
||||||
with self._con:
|
with self._con:
|
||||||
self._con.execute(
|
self._con.execute(
|
||||||
"INSERT OR REPLACE INTO builds "
|
"INSERT OR REPLACE INTO builds "
|
||||||
|
|
@ -84,12 +92,13 @@ class BuildsDb:
|
||||||
" pr,"
|
" pr,"
|
||||||
" git_commit,"
|
" git_commit,"
|
||||||
" image,"
|
" image,"
|
||||||
|
" desired_replicas,"
|
||||||
" status,"
|
" status,"
|
||||||
" init_status, "
|
" init_status, "
|
||||||
" last_scaled, "
|
" last_scaled, "
|
||||||
" created"
|
" created"
|
||||||
") "
|
") "
|
||||||
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||||
(
|
(
|
||||||
build.name,
|
build.name,
|
||||||
build.deployment_name,
|
build.deployment_name,
|
||||||
|
|
@ -98,12 +107,27 @@ class BuildsDb:
|
||||||
build.pr,
|
build.pr,
|
||||||
build.git_commit,
|
build.git_commit,
|
||||||
build.image,
|
build.image,
|
||||||
|
build.desired_replicas,
|
||||||
build.status,
|
build.status,
|
||||||
build.init_status,
|
build.init_status,
|
||||||
build.last_scaled,
|
build.last_scaled.isoformat(),
|
||||||
build.created.isoformat(),
|
build.created.isoformat(),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
if prev_build is None:
|
||||||
|
action = "addition"
|
||||||
|
else:
|
||||||
|
action = "update"
|
||||||
|
_logger.info(
|
||||||
|
"Noticed %s of %s (%s/%s/desired_replicas=%s/last_scaled=%s)",
|
||||||
|
action,
|
||||||
|
build,
|
||||||
|
build.status,
|
||||||
|
build.init_status,
|
||||||
|
build.desired_replicas,
|
||||||
|
build.last_scaled,
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
def count_by_status(self, status: BuildStatus) -> int:
|
def count_by_status(self, status: BuildStatus) -> int:
|
||||||
return self._con.execute(
|
return self._con.execute(
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ def _github_get(url: str) -> Any:
|
||||||
}
|
}
|
||||||
if settings.github_token:
|
if settings.github_token:
|
||||||
headers["Authorization"] = f"token {settings.github_token}"
|
headers["Authorization"] = f"token {settings.github_token}"
|
||||||
response = requests.get(full_url, headers)
|
response = requests.get(full_url, headers=headers)
|
||||||
if response.status_code == 404:
|
if response.status_code == 404:
|
||||||
raise NotFoundOnGithub(f"GitHub URL not found: {full_url}.")
|
raise NotFoundOnGithub(f"GitHub URL not found: {full_url}.")
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
|
||||||
|
|
@ -143,7 +143,9 @@ def _render_kubefiles(deployment_vars: DeploymentVars) -> Generator[Path, None,
|
||||||
|
|
||||||
|
|
||||||
async def _kubectl(args: list[str]) -> None:
|
async def _kubectl(args: list[str]) -> None:
|
||||||
proc = await asyncio.create_subprocess_exec("kubectl", *args)
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
"kubectl", *args, stdout=subprocess.DEVNULL
|
||||||
|
)
|
||||||
return_code = await proc.wait()
|
return_code = await proc.wait()
|
||||||
if return_code != 0:
|
if return_code != 0:
|
||||||
raise subprocess.CalledProcessError(return_code, ["kubectl"] + args)
|
raise subprocess.CalledProcessError(return_code, ["kubectl"] + args)
|
||||||
|
|
@ -159,13 +161,7 @@ async def deploy(deployment_vars: DeploymentVars) -> None:
|
||||||
str(tmp_path),
|
str(tmp_path),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
await _kubectl(
|
await _kubectl(["apply", "-k", str(tmp_path), "--wait=false"])
|
||||||
[
|
|
||||||
"apply",
|
|
||||||
"-k",
|
|
||||||
str(tmp_path),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def delete_resources(build_name: str) -> None:
|
async def delete_resources(build_name: str) -> None:
|
||||||
|
|
|
||||||
|
|
@ -3,9 +3,9 @@ kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: odoo
|
name: odoo
|
||||||
annotations:
|
annotations:
|
||||||
runboat/init-status: "todo" # ask controller to start when there is capacity
|
runboat/init-status: "todo" # ask controller to initialize when there is capacity
|
||||||
spec:
|
spec:
|
||||||
replicas: 0 # deploy idle (with 0 replica)
|
replicas: 0 # deploy idle
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app: odoo
|
app: odoo
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ set -ex
|
||||||
|
|
||||||
#
|
#
|
||||||
# Clone an addons repository at git reference in $ADDONS_DIR.
|
# Clone an addons repository at git reference in $ADDONS_DIR.
|
||||||
# Run oca_install_addons and oca_init_db on it.
|
# Run oca_install_addons on it.
|
||||||
#
|
#
|
||||||
|
|
||||||
git clone --filter=blob:none $RUNBOAT_GIT_REPO $ADDONS_DIR
|
git clone --filter=blob:none $RUNBOAT_GIT_REPO $ADDONS_DIR
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ class BuildStatus(str, Enum):
|
||||||
|
|
||||||
|
|
||||||
class BuildInitStatus(str, Enum):
|
class BuildInitStatus(str, Enum):
|
||||||
todo = "todo" # to initialize as soon as there is capacity
|
todo = "todo" # to initialize and start as soon as there is capacity
|
||||||
started = "started" # initialization job running
|
started = "started" # initialization job running
|
||||||
succeeded = "succeeded" # initialization job succeeded
|
succeeded = "succeeded" # initialization job succeeded
|
||||||
failed = "failed" # initialization job failed
|
failed = "failed" # initialization job failed
|
||||||
|
|
@ -41,12 +41,26 @@ class Build(BaseModel):
|
||||||
image: str
|
image: str
|
||||||
status: BuildStatus
|
status: BuildStatus
|
||||||
init_status: BuildInitStatus
|
init_status: BuildInitStatus
|
||||||
|
desired_replicas: int
|
||||||
last_scaled: datetime.datetime
|
last_scaled: datetime.datetime
|
||||||
created: datetime.datetime
|
created: datetime.datetime
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return f"{self.slug} ({self.name})"
|
return f"{self.slug} ({self.name})"
|
||||||
|
|
||||||
|
def __eq__(self, other: "Build") -> bool:
|
||||||
|
if other is None:
|
||||||
|
return False
|
||||||
|
if self.name != other.name:
|
||||||
|
return False
|
||||||
|
# Ignore fields that are immutable by design.
|
||||||
|
return (
|
||||||
|
self.status == other.status
|
||||||
|
and self.init_status == other.init_status
|
||||||
|
and self.desired_replicas == other.desired_replicas
|
||||||
|
and self.last_scaled == other.last_scaled
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
async def from_name(cls, build_name: str) -> Optional["Build"]:
|
async def from_name(cls, build_name: str) -> Optional["Build"]:
|
||||||
"""Create a Build model by reading the k8s api."""
|
"""Create a Build model by reading the k8s api."""
|
||||||
|
|
@ -67,8 +81,9 @@ class Build(BaseModel):
|
||||||
image=deployment.spec.template.spec.containers[0].image,
|
image=deployment.spec.template.spec.containers[0].image,
|
||||||
init_status=deployment.metadata.annotations["runboat/init-status"],
|
init_status=deployment.metadata.annotations["runboat/init-status"],
|
||||||
status=cls._status_from_deployment(deployment),
|
status=cls._status_from_deployment(deployment),
|
||||||
|
desired_replicas=deployment.spec.replicas or 0,
|
||||||
last_scaled=deployment.metadata.annotations.get("runboat/last-scaled")
|
last_scaled=deployment.metadata.annotations.get("runboat/last-scaled")
|
||||||
or datetime.datetime.utcnow(),
|
or deployment.metadata.creation_timestamp,
|
||||||
created=deployment.metadata.creation_timestamp,
|
created=deployment.metadata.creation_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -82,7 +97,7 @@ class Build(BaseModel):
|
||||||
elif init_status == BuildInitStatus.failed:
|
elif init_status == BuildInitStatus.failed:
|
||||||
return BuildStatus.failed
|
return BuildStatus.failed
|
||||||
elif init_status == BuildInitStatus.succeeded:
|
elif init_status == BuildInitStatus.succeeded:
|
||||||
replicas = deployment.status.replicas
|
replicas = deployment.spec.replicas
|
||||||
if not replicas:
|
if not replicas:
|
||||||
return BuildStatus.stopped
|
return BuildStatus.stopped
|
||||||
else:
|
else:
|
||||||
|
|
@ -142,19 +157,17 @@ class Build(BaseModel):
|
||||||
elif self.status == BuildStatus.failed:
|
elif self.status == BuildStatus.failed:
|
||||||
_logger.info(f"Marking failed {self} for reinitialization.")
|
_logger.info(f"Marking failed {self} for reinitialization.")
|
||||||
await k8s.delete_job(self.name, job_kind="initialize")
|
await k8s.delete_job(self.name, job_kind="initialize")
|
||||||
await self._patch(
|
await self._patch(init_status=BuildInitStatus.todo, desired_replicas=0)
|
||||||
init_status=BuildInitStatus.todo, replicas=0, update_last_scaled=False
|
|
||||||
)
|
|
||||||
elif self.status == BuildStatus.stopped:
|
elif self.status == BuildStatus.stopped:
|
||||||
_logger.info(f"Starting {self}.")
|
_logger.info(f"Starting {self} that was last scaled on {self.last_scaled}.")
|
||||||
await self._patch(replicas=1, update_last_scaled=True)
|
await self._patch(desired_replicas=1)
|
||||||
|
|
||||||
async def stop(self) -> None:
|
async def stop(self) -> None:
|
||||||
if self.status == BuildStatus.started:
|
if self.status == BuildStatus.started:
|
||||||
_logger.info(f"Stopping {self}.")
|
_logger.info(f"Stopping {self} that was last scaled on {self.last_scaled}.")
|
||||||
await self._patch(replicas=0, update_last_scaled=True)
|
await self._patch(desired_replicas=0)
|
||||||
else:
|
else:
|
||||||
_logger.info("Ignoring stop command for {self} " "that is not started.")
|
_logger.info(f"Ignoring stop command for {self} that is not started.")
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
# Start initizalization job. on_init_started/on_init_succeeded/on_init_failed
|
# Start initizalization job. on_init_started/on_init_succeeded/on_init_failed
|
||||||
|
|
@ -193,9 +206,7 @@ class Build(BaseModel):
|
||||||
if self.init_status == BuildInitStatus.started:
|
if self.init_status == BuildInitStatus.started:
|
||||||
return
|
return
|
||||||
_logger.info(f"Initialization job started for {self}.")
|
_logger.info(f"Initialization job started for {self}.")
|
||||||
await self._patch(
|
await self._patch(init_status=BuildInitStatus.started, desired_replicas=0)
|
||||||
init_status=BuildInitStatus.started, replicas=0, update_last_scaled=True
|
|
||||||
)
|
|
||||||
|
|
||||||
async def on_initialize_succeeded(self) -> None:
|
async def on_initialize_succeeded(self) -> None:
|
||||||
if self.init_status == BuildInitStatus.succeeded:
|
if self.init_status == BuildInitStatus.succeeded:
|
||||||
|
|
@ -203,9 +214,7 @@ class Build(BaseModel):
|
||||||
# succeeded old initialization jobs after a controller restart.
|
# succeeded old initialization jobs after a controller restart.
|
||||||
return
|
return
|
||||||
_logger.info(f"Initialization job succeded for {self}, starting.")
|
_logger.info(f"Initialization job succeded for {self}, starting.")
|
||||||
await self._patch(
|
await self._patch(init_status=BuildInitStatus.succeeded, desired_replicas=1)
|
||||||
init_status=BuildInitStatus.succeeded, replicas=1, update_last_scaled=True
|
|
||||||
)
|
|
||||||
|
|
||||||
async def on_initialize_failed(self) -> None:
|
async def on_initialize_failed(self) -> None:
|
||||||
if self.init_status == BuildInitStatus.failed:
|
if self.init_status == BuildInitStatus.failed:
|
||||||
|
|
@ -213,15 +222,13 @@ class Build(BaseModel):
|
||||||
# restarting, and is notified of existing initialization jobs.
|
# restarting, and is notified of existing initialization jobs.
|
||||||
return
|
return
|
||||||
_logger.info(f"Initialization job failed for {self}.")
|
_logger.info(f"Initialization job failed for {self}.")
|
||||||
await self._patch(
|
await self._patch(init_status=BuildInitStatus.failed, desired_replicas=0)
|
||||||
init_status=BuildInitStatus.failed, replicas=0, update_last_scaled=True
|
|
||||||
)
|
|
||||||
|
|
||||||
async def on_cleanup_started(self) -> None:
|
async def on_cleanup_started(self) -> None:
|
||||||
|
if self.init_status == BuildInitStatus.cleaning:
|
||||||
|
return
|
||||||
_logger.info(f"Cleanup job started for {self}.")
|
_logger.info(f"Cleanup job started for {self}.")
|
||||||
await self._patch(
|
await self._patch(init_status=BuildInitStatus.cleaning, desired_replicas=0)
|
||||||
init_status=BuildInitStatus.cleaning, replicas=0, update_last_scaled=False
|
|
||||||
)
|
|
||||||
|
|
||||||
async def on_cleanup_succeeded(self) -> None:
|
async def on_cleanup_succeeded(self) -> None:
|
||||||
_logger.info(f"Cleanup job succeeded for {self}, deleting resources.")
|
_logger.info(f"Cleanup job succeeded for {self}, deleting resources.")
|
||||||
|
|
@ -235,11 +242,10 @@ class Build(BaseModel):
|
||||||
async def _patch(
|
async def _patch(
|
||||||
self,
|
self,
|
||||||
init_status: BuildInitStatus | None = None,
|
init_status: BuildInitStatus | None = None,
|
||||||
replicas: int | None = None,
|
desired_replicas: int | None = None,
|
||||||
update_last_scaled: bool = True,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
ops = []
|
ops = []
|
||||||
if init_status is not None:
|
if init_status is not None and init_status != self.init_status:
|
||||||
ops.extend(
|
ops.extend(
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
|
|
@ -249,21 +255,23 @@ class Build(BaseModel):
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
if replicas is not None:
|
if desired_replicas is not None and desired_replicas != self.desired_replicas:
|
||||||
ops.append(
|
ops.extend(
|
||||||
|
[
|
||||||
{
|
{
|
||||||
"op": "replace",
|
"op": "replace",
|
||||||
"path": "/spec/replicas",
|
"path": "/spec/replicas",
|
||||||
"value": replicas,
|
"value": desired_replicas,
|
||||||
},
|
},
|
||||||
)
|
|
||||||
if update_last_scaled:
|
|
||||||
ops.append(
|
|
||||||
{
|
{
|
||||||
"op": "replace",
|
"op": "replace",
|
||||||
"path": "/metadata/annotations/runboat~1last-scaled",
|
"path": "/metadata/annotations/runboat~1last-scaled",
|
||||||
"value": datetime.datetime.utcnow().isoformat() + "Z",
|
"value": datetime.datetime.utcnow()
|
||||||
|
.replace(microsecond=0)
|
||||||
|
.isoformat()
|
||||||
|
+ "Z",
|
||||||
},
|
},
|
||||||
|
]
|
||||||
)
|
)
|
||||||
await k8s.patch_deployment(self.deployment_name, ops)
|
await k8s.patch_deployment(self.deployment_name, ops)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue