Better way to kill jobs

This commit is contained in:
Stéphane Bidoul 2021-11-28 13:30:39 +01:00
parent f720cbcac5
commit e961780d1b
No known key found for this signature in database
GPG key ID: BCAB2555446B5B92
2 changed files with 18 additions and 14 deletions

View file

@ -242,18 +242,20 @@ async def delete_resources(build_name: str) -> None:
)
async def delete_job(build_name: str, job_kind: DeploymentMode) -> None:
@sync_to_async
def kill_job(build_name: str, job_kind: DeploymentMode) -> None:
# TODO delete all resources with runboat/build and runboat/job-kind label
await _kubectl(
[
"-n",
settings.build_namespace,
"delete",
"job",
"-l",
f"runboat/build={build_name},runboat/job-kind={job_kind}",
"--wait=false",
]
batchv1 = client.BatchV1Api()
batchv1.delete_collection_namespaced_job(
namespace=settings.build_namespace,
label_selector=f"runboat/build={build_name},runboat/job-kind={job_kind}",
grace_period_seconds=0,
)
corev1 = client.CoreV1Api()
corev1.delete_collection_namespaced_pod(
namespace=settings.build_namespace,
label_selector=f"runboat/build={build_name},runboat/job-kind={job_kind}",
grace_period_seconds=0,
)

View file

@ -214,7 +214,7 @@ class Build(BaseModel):
return
elif self.status == BuildStatus.failed:
_logger.info(f"Marking failed {self} for reinitialization.")
await k8s.delete_job(self.name, job_kind=k8s.DeploymentMode.initialize)
await k8s.kill_job(self.name, job_kind=k8s.DeploymentMode.initialize)
if await self._patch(init_status=BuildInitStatus.todo, desired_replicas=0):
await github.notify_status(
self.commit_info.repo,
@ -259,8 +259,10 @@ class Build(BaseModel):
async def cleanup(self) -> None:
"""Launch the clenaup job."""
# Delete the initialization job to reduce conflict with the cleanup job.
await k8s.delete_job(self.name, job_kind=k8s.DeploymentMode.initialize)
# Kill the initialization job to reduce conflict with the cleanup job, such as
# the database being created by the initialization after the cleanup job has
# completed.
await k8s.kill_job(self.name, job_kind=k8s.DeploymentMode.initialize)
# Be sure the deployment is stopped.
await self._patch(desired_replicas=0, not_found_ok=True)
# Start cleanup job. on_cleanup_{started,succeeded,failed} callbacks will follow