diff --git a/.gitignore b/.gitignore index a48fc2b5..9b477c9e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ node_modules/ lib/ dist/ **/tests/_temp/** -packages/k8s/tests/test-kind.yaml \ No newline at end of file +packages/k8s/tests/test-kind.yaml +.idea diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index ae773da3..80f0ef54 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -673,6 +673,29 @@ export async function pruneSecrets(): Promise { ) } +const UNRECOVERABLE_WAITING_REASONS = new Set([ + 'ImagePullBackOff', + 'ErrImagePull', + 'InvalidImageName', + 'CreateContainerConfigError', + 'CreateContainerError' +]) + +function getContainerErrors(pod: k8s.V1Pod): string[] { + const errors: string[] = [] + const allStatuses = [ + ...(pod.status?.initContainerStatuses ?? []), + ...(pod.status?.containerStatuses ?? []) + ] + for (const cs of allStatuses) { + const waiting = cs.state?.waiting + if (waiting?.reason && UNRECOVERABLE_WAITING_REASONS.has(waiting.reason)) { + errors.push(`container "${cs.name}": ${waiting.reason}${waiting.message ? ` - ${waiting.message}` : ''}`) + } + } + return errors +} + export async function waitForPodPhases( podName: string, awaitingPhases: Set, @@ -683,7 +706,8 @@ export async function waitForPodPhases( let phase: PodPhase = PodPhase.UNKNOWN try { while (true) { - phase = await getPodPhase(podName) + const pod = await readPod(podName) + phase = parsePodPhase(pod) if (awaitingPhases.has(phase)) { return } @@ -693,11 +717,20 @@ export async function waitForPodPhases( `Pod ${podName} is unhealthy with phase status ${phase}` ) } + + const containerErrors = getContainerErrors(pod) + if (containerErrors.length > 0) { + throw new Error( + `Pod ${podName} has unrecoverable container errors: ${containerErrors.join('; ')}` + ) + } + await backOffManager.backOff() } } catch (error) { + const additionalPodErrors = error instanceof Error ? error.message : JSON.stringify(error) throw new Error( - `Pod ${podName} is unhealthy with phase status ${phase}: ${JSON.stringify(error)}` + `Pod ${podName} is unhealthy with phase status ${phase}: ${additionalPodErrors}` ) } } @@ -721,23 +754,26 @@ export function getPrepareJobTimeoutSeconds(): number { return timeoutSeconds } -async function getPodPhase(name: string): Promise { - const podPhaseLookup = new Set([ - PodPhase.PENDING, - PodPhase.RUNNING, - PodPhase.SUCCEEDED, - PodPhase.FAILED, - PodPhase.UNKNOWN - ]) - const pod = await k8sApi.readNamespacedPod({ +async function readPod(name: string): Promise { + return k8sApi.readNamespacedPod({ name, namespace: namespace() }) +} + +const podPhaseLookup = new Set([ + PodPhase.PENDING, + PodPhase.RUNNING, + PodPhase.SUCCEEDED, + PodPhase.FAILED, + PodPhase.UNKNOWN +]) +function parsePodPhase(pod: k8s.V1Pod): PodPhase { if (!pod.status?.phase || !podPhaseLookup.has(pod.status.phase)) { return PodPhase.UNKNOWN } - return pod.status?.phase as PodPhase + return pod.status.phase as PodPhase } async function isJobSucceeded(name: string): Promise {