Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 48 additions & 88 deletions scripts/setup/gateway.sh
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,14 @@ done
# False even though the Gateway is fully functional. We check for either:
# - Programmed=True (cloud / LoadBalancer environments), or
# - The gateway service exists with a port and the listener is programmed (Kind / bare-metal)
#
# kubectl wait --for=condition=Programmed does not work here because in Kind the
# gateway-level Programmed condition stays False — kubectl wait would block for
# the full timeout before the fallback ever runs.

echo "Checking Gateway status..."
echo "Waiting for Gateway to be ready..."
gateway_ready=""
for i in {1..60}; do
for i in {1..360}; do
# Check if the Gateway is fully programmed (cloud environments)
gateway_programmed=$(kubectl get gateway "$RELEASE_NAME" -n "$NAMESPACE" \
-o jsonpath='{.status.conditions[?(@.type=="Programmed")].status}' 2>/dev/null || echo "")
Expand All @@ -186,13 +190,8 @@ for i in {1..60}; do
break
fi

# Check if the gateway service exists with the listener port and the
# listener is programmed. Istio names the auto-created service
# "{gateway}-istio". The service has multiple ports (e.g. 15021 for health
# checks) — we check specifically for the gateway listener port.
# In Kind the service type is LoadBalancer (pending) but the port is still
# reachable via the cluster — so having the correct port + a programmed
# listener is sufficient to consider the gateway ready.
# Kind / bare-metal fallback: check listener-level condition + service port.
# Istio names the auto-created service "{gateway}-istio".
listener_port=$(kubectl get gateway "$RELEASE_NAME" -n "$NAMESPACE" \
-o jsonpath='{.spec.listeners[?(@.name=="http")].port}' 2>/dev/null || echo "")
gateway_svc_port=$(kubectl get svc "${RELEASE_NAME}-istio" -n "$NAMESPACE" \
Expand All @@ -205,104 +204,65 @@ for i in {1..60}; do
break
fi

# If status field is completely empty the controller may not be installed
gateway_status=$(kubectl get gateway "$RELEASE_NAME" -n "$NAMESPACE" \
-o jsonpath='{.status}' 2>/dev/null || echo "")
if [ -z "$gateway_status" ] && [ "$i" -ge 10 ]; then
echo "WARNING: Gateway has no status after $((i * 5)) seconds — a Gateway API controller may not be installed"
echo "Skipping Gateway status checks"
break
if [ "$i" -eq 360 ]; then
echo "ERROR: Gateway is not ready after 30 minutes"
kubectl get gateway "$RELEASE_NAME" -n "$NAMESPACE" -o yaml || true
exit 1
fi

echo "Waiting for Gateway to be ready... (attempt $i/60)"
echo "Waiting for Gateway to be ready... (attempt $i/360)"
sleep 5
done

echo "Gateway configuration:"
kubectl get gateway "$RELEASE_NAME" -n "$NAMESPACE"

if [ "$gateway_ready" = "true" ]; then
# ---------------------------------------------------------------------------
# Wait for HTTPRoute to be accepted and resolved (only if controller is active)
# ---------------------------------------------------------------------------

echo "Checking HTTPRoute status..."
httproute_accepted=""
httproute_resolved=""
for i in {1..60}; do
httproute_accepted=$(kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE" \
-o jsonpath='{.status.parents[0].conditions[?(@.type=="Accepted")].status}' 2>/dev/null || echo "")
httproute_resolved=$(kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE" \
-o jsonpath='{.status.parents[0].conditions[?(@.type=="ResolvedRefs")].status}' 2>/dev/null || echo "")

if [ "$httproute_accepted" = "True" ] && [ "$httproute_resolved" = "True" ]; then
echo "✓ HTTPRoute is accepted and refs are resolved"
break
fi
echo "Waiting for HTTPRoute to be ready... (attempt $i/60)"
sleep 5
done

if [ "$httproute_accepted" != "True" ] || [ "$httproute_resolved" != "True" ]; then
echo "ERROR: HTTPRoute is not ready after 5 minutes"
echo "HTTPRoute accepted: $httproute_accepted, resolved: $httproute_resolved"
kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE" \
-o jsonpath='{.status.parents[0].conditions}' | jq . || echo "Could not get HTTPRoute conditions"
exit 1
fi

echo "HTTPRoute configuration:"
kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE"
else
echo "Skipping HTTPRoute status checks (no active Gateway controller detected)"
echo "HTTPRoute configuration:"
kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE"
fi

# ---------------------------------------------------------------------------
# Wait for server deployment and pods to be ready
# Wait for HTTPRoute to be accepted and resolved
# ---------------------------------------------------------------------------

echo "Waiting for server deployment to be ready..."
kubectl rollout status deployment/"$RELEASE_NAME" -n "$NAMESPACE" --timeout=300s

echo "Waiting for all server pods to be ready..."
for i in {1..36}; do
ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-o jsonpath='{.items[*].status.conditions[?(@.type=="Ready")].status}' | grep -o "True" | wc -l)
total_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
--no-headers | wc -l)
# HTTPRoute conditions are nested under .status.parents[0].conditions, so
# kubectl wait --for=condition= does not work here.

echo "Ready pods: $ready_pods/$total_pods"
echo "Waiting for HTTPRoute to be ready..."
for i in {1..360}; do
httproute_accepted=$(kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE" \
-o jsonpath='{.status.parents[0].conditions[?(@.type=="Accepted")].status}' 2>/dev/null || echo "")
httproute_resolved=$(kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE" \
-o jsonpath='{.status.parents[0].conditions[?(@.type=="ResolvedRefs")].status}' 2>/dev/null || echo "")

if [ "$ready_pods" -eq "$total_pods" ] && [ "$total_pods" -gt 0 ]; then
echo "✓ All server pods are ready"
if [ "$httproute_accepted" = "True" ] && [ "$httproute_resolved" = "True" ]; then
echo "✓ HTTPRoute is accepted and refs are resolved"
break
fi

if [ "$i" -eq 36 ]; then
echo "ERROR: Server pods not ready after 6 minutes"
echo "Pods that are not ready:"
kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-o custom-columns="NAME:.metadata.name,STATUS:.status.phase,READY:.status.conditions[?(@.type=='Ready')].status,REASON:.status.containerStatuses[0].state.waiting.reason"

not_ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-o jsonpath='{range .items[*]}{.metadata.name}{" "}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' \
| grep -v "True" | cut -d' ' -f1)

if [ -n "$not_ready_pods" ]; then
for pod in $not_ready_pods; do
echo "--- Pod: $pod ---"
kubectl describe pod "$pod" -n "$NAMESPACE" | tail -20
echo "--- End Pod: $pod ---"
done
fi

if [ "$i" -eq 360 ]; then
echo "ERROR: HTTPRoute is not ready after 30 minutes"
echo "HTTPRoute accepted: $httproute_accepted, resolved: $httproute_resolved"
kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE" \
-o jsonpath='{.status.parents[0].conditions}' | jq . || echo "Could not get HTTPRoute conditions"
exit 1
fi

echo "Waiting for server pods to be ready... (attempt $i/36)"
sleep 10
echo "Waiting for HTTPRoute to be ready... (attempt $i/360)"
sleep 5
done

echo "HTTPRoute configuration:"
kubectl get httproute "$RELEASE_NAME" -n "$NAMESPACE"

# ---------------------------------------------------------------------------
# Wait for server deployment and pods to be ready
# ---------------------------------------------------------------------------

echo "Waiting for server deployment to be ready..."
kubectl rollout status deployment/"$RELEASE_NAME" -n "$NAMESPACE"

# Wait for the Istio gateway proxy pod to be ready. The gateway controller
# creates a separate pod for the proxy which may still be Pending after the
# server deployment has rolled out.
echo "Waiting for Istio gateway pod to be ready..."
kubectl wait --for=condition=Ready pod \
-l "gateway.networking.k8s.io/gateway-name=$RELEASE_NAME" -n "$NAMESPACE" --timeout=300s

echo "Server deployed successfully with Gateway API (class: $GATEWAY_CLASS)"
64 changes: 13 additions & 51 deletions scripts/setup/ingress.sh
Original file line number Diff line number Diff line change
Expand Up @@ -133,19 +133,20 @@ echo " Tolerations File: ${TOLERATIONS_FILE:-<none>}"
# Retry helm install to handle transient webhook readiness issues.
# The ingress-nginx admission webhook Service can take a few extra seconds
# for kube-proxy iptables rules to propagate even after the controller pod
# is Ready, causing "connection refused" on the first attempt.
# is Ready, causing "connection refused" on the first attempt. At large scale
# (many nodes / many iptables rules) this can take significantly longer.
HELM_INSTALLED=false
for attempt in $(seq 1 5); do
for attempt in $(seq 1 360); do
if helm upgrade --install "$RELEASE_NAME" "$CHART_PATH" "${HELM_ARGS[@]}"; then
HELM_INSTALLED=true
break
fi
echo "Helm install attempt $attempt/5 failed, retrying in 5s..."
echo "Helm install attempt $attempt/360 failed, retrying in 5s..."
sleep 5
done

if [ "$HELM_INSTALLED" = false ]; then
echo "ERROR: Helm install failed after 5 attempts"
echo "ERROR: Helm install failed after 30 minutes"
exit 1
fi

Expand All @@ -154,17 +155,17 @@ fi
# ---------------------------------------------------------------------------

echo "Waiting for Ingress resource to be created..."
for i in {1..30}; do
for i in {1..360}; do
Comment thread
meecethereese marked this conversation as resolved.
if kubectl get ingress "$RELEASE_NAME" -n "$NAMESPACE" >/dev/null 2>&1; then
echo "Ingress found: $RELEASE_NAME"
break
fi
if [ "$i" -eq 30 ]; then
echo "ERROR: Ingress resource was not created after 60 seconds"
if [ "$i" -eq 360 ]; then
echo "ERROR: Ingress resource was not created after 30 minutes"
exit 1
fi
echo "Waiting for Ingress resource... (attempt $i/30)"
sleep 2
echo "Waiting for Ingress resource... (attempt $i/360)"
sleep 5
done

# ---------------------------------------------------------------------------
Expand All @@ -179,7 +180,7 @@ done

echo "Waiting for Ingress to be reachable..."
ingress_ready=""
for i in {1..60}; do
for i in {1..360}; do
# Check for a LoadBalancer address on the Ingress resource
ingress_address=$(kubectl get ingress "$RELEASE_NAME" -n "$NAMESPACE" \
-o jsonpath='{.status.loadBalancer.ingress[0].ip}{.status.loadBalancer.ingress[0].hostname}' 2>/dev/null || echo "")
Expand All @@ -199,7 +200,7 @@ for i in {1..60}; do
break
fi

echo "Waiting for Ingress to be reachable... (attempt $i/60)"
echo "Waiting for Ingress to be reachable... (attempt $i/360)"
sleep 5
done

Expand All @@ -215,45 +216,6 @@ kubectl get ingress "$RELEASE_NAME" -n "$NAMESPACE"
# ---------------------------------------------------------------------------

echo "Waiting for server deployment to be ready..."
kubectl rollout status deployment/"$RELEASE_NAME" -n "$NAMESPACE" --timeout=300s

echo "Waiting for all server pods to be ready..."
for i in {1..36}; do
ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-o jsonpath='{.items[*].status.conditions[?(@.type=="Ready")].status}' | grep -o "True" | wc -l)
total_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
--no-headers | wc -l)

echo "Ready pods: $ready_pods/$total_pods"

if [ "$ready_pods" -eq "$total_pods" ] && [ "$total_pods" -gt 0 ]; then
echo "✓ All server pods are ready"
break
fi

if [ "$i" -eq 36 ]; then
echo "ERROR: Server pods not ready after 6 minutes"
echo "Pods that are not ready:"
kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-o custom-columns="NAME:.metadata.name,STATUS:.status.phase,READY:.status.conditions[?(@.type=='Ready')].status,REASON:.status.containerStatuses[0].state.waiting.reason"

not_ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=server \
-o jsonpath='{range .items[*]}{.metadata.name}{" "}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' \
| grep -v "True" | cut -d' ' -f1)

if [ -n "$not_ready_pods" ]; then
for pod in $not_ready_pods; do
echo "--- Pod: $pod ---"
kubectl describe pod "$pod" -n "$NAMESPACE" | tail -20
echo "--- End Pod: $pod ---"
done
fi

exit 1
fi

echo "Waiting for server pods to be ready... (attempt $i/36)"
sleep 10
done
kubectl rollout status deployment/"$RELEASE_NAME" -n "$NAMESPACE"

echo "Server deployed successfully with Ingress (class: $INGRESS_CLASS)"
Loading