From 9f3086ee74dcf40d24a38004f62a0053b11451d0 Mon Sep 17 00:00:00 2001 From: Amanda Murphy Date: Fri, 10 Apr 2026 22:59:26 -0700 Subject: [PATCH 1/4] show image pull back off errors --- packages/k8s/src/k8s/index.ts | 62 ++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 11 deletions(-) diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index ae773da3..0d532938 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -673,6 +673,34 @@ export async function pruneSecrets(): Promise { ) } +const UNRECOVERABLE_WAITING_REASONS = new Set([ + 'ImagePullBackOff', + 'ErrImagePull', + 'InvalidImageName', + 'CreateContainerConfigError', + 'CreateContainerError' +]) + +function getContainerErrors( + pod: k8s.V1Pod +): string[] { + const errors: string[] = [] + const allStatuses = [ + ...(pod.status?.initContainerStatuses ?? []), + ...(pod.status?.containerStatuses ?? []) + ] + for (const cs of allStatuses) { + const waiting = cs.state?.waiting + if (waiting?.reason && UNRECOVERABLE_WAITING_REASONS.has(waiting.reason)) { + const detail = waiting.message + ? `${waiting.reason}: ${waiting.message}` + : waiting.reason + errors.push(`container "${cs.name}": ${detail}`) + } + } + return errors +} + export async function waitForPodPhases( podName: string, awaitingPhases: Set, @@ -683,7 +711,8 @@ export async function waitForPodPhases( let phase: PodPhase = PodPhase.UNKNOWN try { while (true) { - phase = await getPodPhase(podName) + const pod = await readPod(podName) + phase = parsePodPhase(pod) if (awaitingPhases.has(phase)) { return } @@ -693,6 +722,14 @@ export async function waitForPodPhases( `Pod ${podName} is unhealthy with phase status ${phase}` ) } + + const containerErrors = getContainerErrors(pod) + if (containerErrors.length > 0) { + throw new Error( + `Pod ${podName} has unrecoverable container errors: ${containerErrors.join('; ')}` + ) + } + await backOffManager.backOff() } } catch (error) { @@ -721,23 +758,26 @@ export function getPrepareJobTimeoutSeconds(): number { return timeoutSeconds } -async function getPodPhase(name: string): Promise { - const podPhaseLookup = new Set([ - PodPhase.PENDING, - PodPhase.RUNNING, - PodPhase.SUCCEEDED, - PodPhase.FAILED, - PodPhase.UNKNOWN - ]) - const pod = await k8sApi.readNamespacedPod({ +async function readPod(name: string): Promise { + return k8sApi.readNamespacedPod({ name, namespace: namespace() }) +} + +const podPhaseLookup = new Set([ + PodPhase.PENDING, + PodPhase.RUNNING, + PodPhase.SUCCEEDED, + PodPhase.FAILED, + PodPhase.UNKNOWN +]) +function parsePodPhase(pod: k8s.V1Pod): PodPhase { if (!pod.status?.phase || !podPhaseLookup.has(pod.status.phase)) { return PodPhase.UNKNOWN } - return pod.status?.phase as PodPhase + return pod.status.phase as PodPhase } async function isJobSucceeded(name: string): Promise { From 8632f9992e2fe73e3ad0e456506e141cbb752fb4 Mon Sep 17 00:00:00 2001 From: Amanda Murphy Date: Tue, 14 Apr 2026 09:01:20 -0700 Subject: [PATCH 2/4] Lint --- packages/k8s/src/k8s/index.ts | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index 0d532938..53bd7f02 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -681,9 +681,7 @@ const UNRECOVERABLE_WAITING_REASONS = new Set([ 'CreateContainerError' ]) -function getContainerErrors( - pod: k8s.V1Pod -): string[] { +function getContainerErrors(pod: k8s.V1Pod): string[] { const errors: string[] = [] const allStatuses = [ ...(pod.status?.initContainerStatuses ?? []), @@ -692,10 +690,7 @@ function getContainerErrors( for (const cs of allStatuses) { const waiting = cs.state?.waiting if (waiting?.reason && UNRECOVERABLE_WAITING_REASONS.has(waiting.reason)) { - const detail = waiting.message - ? `${waiting.reason}: ${waiting.message}` - : waiting.reason - errors.push(`container "${cs.name}": ${detail}`) + errors.push(`container "${cs.name}": ${waiting.reason}`) } } return errors From dff6829d69c5ed728d935a3e5ad0d2fdc428ad99 Mon Sep 17 00:00:00 2001 From: Amanda Murphy Date: Tue, 14 Apr 2026 09:16:57 -0700 Subject: [PATCH 3/4] Add the full error string --- .gitignore | 3 ++- packages/k8s/src/k8s/index.ts | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index a48fc2b5..9b477c9e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ node_modules/ lib/ dist/ **/tests/_temp/** -packages/k8s/tests/test-kind.yaml \ No newline at end of file +packages/k8s/tests/test-kind.yaml +.idea diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index 53bd7f02..81cd79f5 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -690,7 +690,7 @@ function getContainerErrors(pod: k8s.V1Pod): string[] { for (const cs of allStatuses) { const waiting = cs.state?.waiting if (waiting?.reason && UNRECOVERABLE_WAITING_REASONS.has(waiting.reason)) { - errors.push(`container "${cs.name}": ${waiting.reason}`) + errors.push(`container "${cs.name}": ${waiting.reason}${waiting.message ? ` - ${waiting.message}` : ''}`) } } return errors From c2eff001ec81bc4313860b4159fc2b4fea3920b3 Mon Sep 17 00:00:00 2001 From: A Murphy Date: Wed, 15 Apr 2026 11:21:27 -0700 Subject: [PATCH 4/4] Revert "Show container pull errors" --- .gitignore | 3 +- packages/k8s/src/k8s/index.ts | 57 +++++++---------------------------- 2 files changed, 12 insertions(+), 48 deletions(-) diff --git a/.gitignore b/.gitignore index 9b477c9e..a48fc2b5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,4 @@ node_modules/ lib/ dist/ **/tests/_temp/** -packages/k8s/tests/test-kind.yaml -.idea +packages/k8s/tests/test-kind.yaml \ No newline at end of file diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index 81cd79f5..ae773da3 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -673,29 +673,6 @@ export async function pruneSecrets(): Promise { ) } -const UNRECOVERABLE_WAITING_REASONS = new Set([ - 'ImagePullBackOff', - 'ErrImagePull', - 'InvalidImageName', - 'CreateContainerConfigError', - 'CreateContainerError' -]) - -function getContainerErrors(pod: k8s.V1Pod): string[] { - const errors: string[] = [] - const allStatuses = [ - ...(pod.status?.initContainerStatuses ?? []), - ...(pod.status?.containerStatuses ?? []) - ] - for (const cs of allStatuses) { - const waiting = cs.state?.waiting - if (waiting?.reason && UNRECOVERABLE_WAITING_REASONS.has(waiting.reason)) { - errors.push(`container "${cs.name}": ${waiting.reason}${waiting.message ? ` - ${waiting.message}` : ''}`) - } - } - return errors -} - export async function waitForPodPhases( podName: string, awaitingPhases: Set, @@ -706,8 +683,7 @@ export async function waitForPodPhases( let phase: PodPhase = PodPhase.UNKNOWN try { while (true) { - const pod = await readPod(podName) - phase = parsePodPhase(pod) + phase = await getPodPhase(podName) if (awaitingPhases.has(phase)) { return } @@ -717,14 +693,6 @@ export async function waitForPodPhases( `Pod ${podName} is unhealthy with phase status ${phase}` ) } - - const containerErrors = getContainerErrors(pod) - if (containerErrors.length > 0) { - throw new Error( - `Pod ${podName} has unrecoverable container errors: ${containerErrors.join('; ')}` - ) - } - await backOffManager.backOff() } } catch (error) { @@ -753,26 +721,23 @@ export function getPrepareJobTimeoutSeconds(): number { return timeoutSeconds } -async function readPod(name: string): Promise { - return k8sApi.readNamespacedPod({ +async function getPodPhase(name: string): Promise { + const podPhaseLookup = new Set([ + PodPhase.PENDING, + PodPhase.RUNNING, + PodPhase.SUCCEEDED, + PodPhase.FAILED, + PodPhase.UNKNOWN + ]) + const pod = await k8sApi.readNamespacedPod({ name, namespace: namespace() }) -} - -const podPhaseLookup = new Set([ - PodPhase.PENDING, - PodPhase.RUNNING, - PodPhase.SUCCEEDED, - PodPhase.FAILED, - PodPhase.UNKNOWN -]) -function parsePodPhase(pod: k8s.V1Pod): PodPhase { if (!pod.status?.phase || !podPhaseLookup.has(pod.status.phase)) { return PodPhase.UNKNOWN } - return pod.status.phase as PodPhase + return pod.status?.phase as PodPhase } async function isJobSucceeded(name: string): Promise {