Azure · meecethereese · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
@@ -173,10 +173,14 @@ done
 # False even though the Gateway is fully functional. We check for either:
 #   - Programmed=True (cloud / LoadBalancer environments), or
 #   - The gateway service exists with a port and the listener is programmed (Kind / bare-metal)
+#
+# kubectl wait --for=condition=Programmed does not work here because in Kind the
+# gateway-level Programmed condition stays False — kubectl wait would block for
+# the full timeout before the fallback ever runs.
 
-echo "Checking Gateway status..."
+echo "Waiting for Gateway to be ready..."
 gateway_ready=""
-for i in {1..60}; do
+for i in {1..360}; do
     # Check if the Gateway is fully programmed (cloud environments)
     gateway_programmed=$(kubectl get gateway "$RELEASE_NAME" -n "$NAMESPACE" \
         -o jsonpath='{.status.conditions[?(@.type=="Programmed")].status}' 2>/dev/null || echo "")
@@ -186,13 +190,8 @@ for i in {1..60}; do
         break
     fi
 
-    # Check if the gateway service exists with the listener port and the
-    # listener is programmed. Istio names the auto-created service
-    # "{gateway}-istio". The service has multiple ports (e.g. 15021 for health
-    # checks) — we check specifically for the gateway listener port.
-    # In Kind the service type is LoadBalancer (pending) but the port is still
-    # reachable via the cluster — so having the correct port + a programmed
-    # listener is sufficient to consider the gateway ready.
+    # Kind / bare-metal fallback: check listener-level condition + service port.
+    # Istio names the auto-created service "{gateway}-istio".
     listener_port=$(kubectl get gateway "$RELEASE_NAME" -n "$NAMESPACE" \
         -o jsonpath='{.spec.listeners[?(@.name=="http")].port}' 2>/dev/null || echo "")
     gateway_svc_port=$(kubectl get svc "${RELEASE_NAME}-istio" -n "$NAMESPACE" \
@@ -205,104 +204,65 @@ for i in {1..60}; do
         break
     fi
 
-    # If status field is completely empty the controller may not be installed
-    gateway_status=$(kubectl get gateway "$RELEASE_NAME" -n "$NAMESPACE" \
-        -o jsonpath='{.status}' 2>/dev/null || echo "")
-    if [ -z "$gateway_status" ] && [ "$i" -ge 10 ]; then
-        echo "WARNING: Gateway has no status after $((i * 5)) seconds — a Gateway API controller may not be installed"
-        echo "Skipping Gateway status checks"
-        break
+    if [ "$i" -eq 360 ]; then
+        echo "ERROR: Gateway is not ready after 30 minutes"
+        kubectl get gateway "$RELEASE_NAME" -n "$NAMESPACE" -o yaml || true
+        exit 1
     fi
 
-    echo "Waiting for Gateway to be ready... (attempt $i/60)"
+    echo "Waiting for Gateway to be ready... (attempt $i/360)"
     sleep 5
 done
 
 echo "Gateway configuration:"
 kubectl get gateway "$RELEASE_NAME" -n "$NAMESPACE"
 
-if [ "$gateway_ready" = "true" ]; then
-    # ---------------------------------------------------------------------------
-    # Wait for HTTPRoute to be accepted and resolved (only if controller is active)
-    # ---------------------------------------------------------------------------
-
-    echo "Checking HTTPRoute status..."
-    httproute_accepted=""
-    httproute_resolved=""
-    for i in {1..60}; do
-        httproute_accepted=$(kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE" \
-            -o jsonpath='{.status.parents[0].conditions[?(@.type=="Accepted")].status}' 2>/dev/null || echo "")
-        httproute_resolved=$(kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE" \
-            -o jsonpath='{.status.parents[0].conditions[?(@.type=="ResolvedRefs")].status}' 2>/dev/null || echo "")
-
-        if [ "$httproute_accepted" = "True" ] && [ "$httproute_resolved" = "True" ]; then
-            echo "✓ HTTPRoute is accepted and refs are resolved"
-            break
-        fi
-        echo "Waiting for HTTPRoute to be ready... (attempt $i/60)"
-        sleep 5
-    done
-
-    if [ "$httproute_accepted" != "True" ] || [ "$httproute_resolved" != "True" ]; then
-        echo "ERROR: HTTPRoute is not ready after 5 minutes"
-        echo "HTTPRoute accepted: $httproute_accepted, resolved: $httproute_resolved"
-        kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE" \
-            -o jsonpath='{.status.parents[0].conditions}' | jq . || echo "Could not get HTTPRoute conditions"
-        exit 1
-    fi
-
-    echo "HTTPRoute configuration:"
-    kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE"
-else
-    echo "Skipping HTTPRoute status checks (no active Gateway controller detected)"
-    echo "HTTPRoute configuration:"
-    kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE"
-fi
-
 # ---------------------------------------------------------------------------
-# Wait for server deployment and pods to be ready
+# Wait for HTTPRoute to be accepted and resolved
 # ---------------------------------------------------------------------------
 
-echo "Waiting for server deployment to be ready..."
-kubectl rollout status deployment/"$RELEASE_NAME" -n "$NAMESPACE" --timeout=300s
-
-echo "Waiting for all server pods to be ready..."
-for i in {1..36}; do
-    ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-        -o jsonpath='{.items[*].status.conditions[?(@.type=="Ready")].status}' | grep -o "True" | wc -l)
-    total_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-        --no-headers | wc -l)
+# HTTPRoute conditions are nested under .status.parents[0].conditions, so
+# kubectl wait --for=condition= does not work here.
 
-    echo "Ready pods: $ready_pods/$total_pods"
+echo "Waiting for HTTPRoute to be ready..."
+for i in {1..360}; do
+    httproute_accepted=$(kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE" \
+        -o jsonpath='{.status.parents[0].conditions[?(@.type=="Accepted")].status}' 2>/dev/null || echo "")
+    httproute_resolved=$(kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE" \
+        -o jsonpath='{.status.parents[0].conditions[?(@.type=="ResolvedRefs")].status}' 2>/dev/null || echo "")
 
-    if [ "$ready_pods" -eq "$total_pods" ] && [ "$total_pods" -gt 0 ]; then
-        echo "✓ All server pods are ready"
+    if [ "$httproute_accepted" = "True" ] && [ "$httproute_resolved" = "True" ]; then
+        echo "✓ HTTPRoute is accepted and refs are resolved"
         break
     fi
 
-    if [ "$i" -eq 36 ]; then
-        echo "ERROR: Server pods not ready after 6 minutes"
-        echo "Pods that are not ready:"
-        kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-            -o custom-columns="NAME:.metadata.name,STATUS:.status.phase,READY:.status.conditions[?(@.type=='Ready')].status,REASON:.status.containerStatuses[0].state.waiting.reason"
-
-        not_ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-            -o jsonpath='{range .items[*]}{.metadata.name}{" "}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' \
-            | grep -v "True" | cut -d' ' -f1)
-
-        if [ -n "$not_ready_pods" ]; then
-            for pod in $not_ready_pods; do
-                echo "--- Pod: $pod ---"
-                kubectl describe pod "$pod" -n "$NAMESPACE" | tail -20
-                echo "--- End Pod: $pod ---"
-            done
-        fi
-
+    if [ "$i" -eq 360 ]; then
+        echo "ERROR: HTTPRoute is not ready after 30 minutes"
+        echo "HTTPRoute accepted: $httproute_accepted, resolved: $httproute_resolved"
+        kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE" \
+            -o jsonpath='{.status.parents[0].conditions}' | jq . || echo "Could not get HTTPRoute conditions"
         exit 1
     fi
 
-    echo "Waiting for server pods to be ready... (attempt $i/36)"
-    sleep 10
+    echo "Waiting for HTTPRoute to be ready... (attempt $i/360)"
+    sleep 5
 done
 
+echo "HTTPRoute configuration:"
+kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE"
+
+# ---------------------------------------------------------------------------
+# Wait for server deployment and pods to be ready
+# ---------------------------------------------------------------------------
+
+echo "Waiting for server deployment to be ready..."
+kubectl rollout status deployment/"$RELEASE_NAME" -n "$NAMESPACE"
+
+# Wait for the Istio gateway proxy pod to be ready. The gateway controller
+# creates a separate pod for the proxy which may still be Pending after the
+# server deployment has rolled out.
+echo "Waiting for Istio gateway pod to be ready..."
+kubectl wait --for=condition=Ready pod \
+    -l "gateway.networking.k8s.io/gateway-name=$RELEASE_NAME" -n "$NAMESPACE" --timeout=300s
+
 echo "Server deployed successfully with Gateway API (class: $GATEWAY_CLASS)"
@@ -133,19 +133,20 @@ echo "  Tolerations File: ${TOLERATIONS_FILE:-<none>}"
 # Retry helm install to handle transient webhook readiness issues.
 # The ingress-nginx admission webhook Service can take a few extra seconds
 # for kube-proxy iptables rules to propagate even after the controller pod
-# is Ready, causing "connection refused" on the first attempt.
+# is Ready, causing "connection refused" on the first attempt. At large scale
+# (many nodes / many iptables rules) this can take significantly longer.
 HELM_INSTALLED=false
-for attempt in $(seq 1 5); do
+for attempt in $(seq 1 360); do
     if helm upgrade --install "$RELEASE_NAME" "$CHART_PATH" "${HELM_ARGS[@]}"; then
         HELM_INSTALLED=true
         break
     fi
-    echo "Helm install attempt $attempt/5 failed, retrying in 5s..."
+    echo "Helm install attempt $attempt/360 failed, retrying in 5s..."
     sleep 5
 done
 
 if [ "$HELM_INSTALLED" = false ]; then
-    echo "ERROR: Helm install failed after 5 attempts"
+    echo "ERROR: Helm install failed after 30 minutes"
     exit 1
 fi
 
@@ -154,17 +155,17 @@ fi
 # ---------------------------------------------------------------------------
 
 echo "Waiting for Ingress resource to be created..."
-for i in {1..30}; do
+for i in {1..360}; do
     if kubectl get ingress "$RELEASE_NAME" -n "$NAMESPACE" >/dev/null 2>&1; then
         echo "Ingress found: $RELEASE_NAME"
         break
     fi
-    if [ "$i" -eq 30 ]; then
-        echo "ERROR: Ingress resource was not created after 60 seconds"
+    if [ "$i" -eq 360 ]; then
+        echo "ERROR: Ingress resource was not created after 30 minutes"
         exit 1
     fi
-    echo "Waiting for Ingress resource... (attempt $i/30)"
-    sleep 2
+    echo "Waiting for Ingress resource... (attempt $i/360)"
+    sleep 5
 done
 
 # ---------------------------------------------------------------------------
@@ -179,7 +180,7 @@ done
 
 echo "Waiting for Ingress to be reachable..."
 ingress_ready=""
-for i in {1..60}; do
+for i in {1..360}; do
     # Check for a LoadBalancer address on the Ingress resource
     ingress_address=$(kubectl get ingress "$RELEASE_NAME" -n "$NAMESPACE" \
         -o jsonpath='{.status.loadBalancer.ingress[0].ip}{.status.loadBalancer.ingress[0].hostname}' 2>/dev/null || echo "")
@@ -199,7 +200,7 @@ for i in {1..60}; do
         break
     fi
 
-    echo "Waiting for Ingress to be reachable... (attempt $i/60)"
+    echo "Waiting for Ingress to be reachable... (attempt $i/360)"
     sleep 5
 done
 
@@ -215,45 +216,6 @@ kubectl get ingress "$RELEASE_NAME" -n "$NAMESPACE"
 # ---------------------------------------------------------------------------
 
 echo "Waiting for server deployment to be ready..."
-kubectl rollout status deployment/"$RELEASE_NAME" -n "$NAMESPACE" --timeout=300s
-
-echo "Waiting for all server pods to be ready..."
-for i in {1..36}; do
-    ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-        -o jsonpath='{.items[*].status.conditions[?(@.type=="Ready")].status}' | grep -o "True" | wc -l)
-    total_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-        --no-headers | wc -l)
-
-    echo "Ready pods: $ready_pods/$total_pods"
-
-    if [ "$ready_pods" -eq "$total_pods" ] && [ "$total_pods" -gt 0 ]; then
-        echo "✓ All server pods are ready"
-        break
-    fi
-
-    if [ "$i" -eq 36 ]; then
-        echo "ERROR: Server pods not ready after 6 minutes"
-        echo "Pods that are not ready:"
-        kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-            -o custom-columns="NAME:.metadata.name,STATUS:.status.phase,READY:.status.conditions[?(@.type=='Ready')].status,REASON:.status.containerStatuses[0].state.waiting.reason"
-
-        not_ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-            -o jsonpath='{range .items[*]}{.metadata.name}{" "}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' \
-            | grep -v "True" | cut -d' ' -f1)
-
-        if [ -n "$not_ready_pods" ]; then
-            for pod in $not_ready_pods; do
-                echo "--- Pod: $pod ---"
-                kubectl describe pod "$pod" -n "$NAMESPACE" | tail -20
-                echo "--- End Pod: $pod ---"
-            done
-        fi
-
-        exit 1
-    fi
-
-    echo "Waiting for server pods to be ready... (attempt $i/36)"
-    sleep 10
-done
+kubectl rollout status deployment/"$RELEASE_NAME" -n "$NAMESPACE"
 
 echo "Server deployed successfully with Ingress (class: $INGRESS_CLASS)"