Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion .buildkite/test-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,29 @@
- set -o pipefail
- mkdir -p "$(pwd)/tmp" && export KUBERAY_TEST_OUTPUT_DIR=$(pwd)/tmp
- echo "KUBERAY_TEST_OUTPUT_DIR=$$KUBERAY_TEST_OUTPUT_DIR"
- KUBERAY_TEST_TIMEOUT_SHORT=1m KUBERAY_TEST_TIMEOUT_MEDIUM=5m KUBERAY_TEST_TIMEOUT_LONG=10m go test -timeout 30m -v ./test/e2erayservice 2>&1 | awk -f ../.buildkite/format.awk | tee $$KUBERAY_TEST_OUTPUT_DIR/gotest.log || (kubectl logs --tail -1 -l app.kubernetes.io/name=kuberay | tee $$KUBERAY_TEST_OUTPUT_DIR/kuberay-operator.log && cd $$KUBERAY_TEST_OUTPUT_DIR && find . -name "*.log" | tar -cf /artifact-mount/e2e-rayservice-log.tar -T - && exit 1)
- KUBERAY_TEST_TIMEOUT_SHORT=1m KUBERAY_TEST_TIMEOUT_MEDIUM=5m KUBERAY_TEST_TIMEOUT_LONG=10m go test -timeout 30m -v -skip Suspend ./test/e2erayservice 2>&1 | awk -f ../.buildkite/format.awk | tee $$KUBERAY_TEST_OUTPUT_DIR/gotest.log || (kubectl logs --tail -1 -l app.kubernetes.io/name=kuberay | tee $$KUBERAY_TEST_OUTPUT_DIR/kuberay-operator.log && cd $$KUBERAY_TEST_OUTPUT_DIR && find . -name "*.log" | tar -cf /artifact-mount/e2e-rayservice-log.tar -T - && exit 1)
- echo "--- END:e2e rayservice (nightly operator) tests finished"

- label: 'Test E2E rayservice suspend (nightly operator)'
instance_size: large
image: golang:1.26-bookworm
commands:
- source .buildkite/setup-env.sh
- kind create cluster --wait 900s --config ./ci/kind-config-buildkite.yml
- kubectl config set clusters.kind-kind.server https://docker:6443
# Build nightly KubeRay operator image
- pushd ray-operator
- source ../.buildkite/build-start-operator.sh
- kubectl wait --timeout=90s --for=condition=Available=true deployment kuberay-operator
# Run suspend e2e tests and print KubeRay operator logs if tests fail
- echo "--- START:Running e2e rayservice suspend (nightly operator) tests"
- if [ -n "$${KUBERAY_TEST_RAY_IMAGE}" ]; then echo "Using Ray Image $${KUBERAY_TEST_RAY_IMAGE}"; fi
- set -o pipefail
- mkdir -p "$(pwd)/tmp" && export KUBERAY_TEST_OUTPUT_DIR=$(pwd)/tmp
- echo "KUBERAY_TEST_OUTPUT_DIR=$$KUBERAY_TEST_OUTPUT_DIR"
- KUBERAY_TEST_TIMEOUT_SHORT=1m KUBERAY_TEST_TIMEOUT_MEDIUM=5m KUBERAY_TEST_TIMEOUT_LONG=10m go test -timeout 30m -v -run Suspend ./test/e2erayservice 2>&1 | awk -f ../.buildkite/format.awk | tee $$KUBERAY_TEST_OUTPUT_DIR/gotest.log || (kubectl logs --tail -1 -l app.kubernetes.io/name=kuberay | tee $$KUBERAY_TEST_OUTPUT_DIR/kuberay-operator.log && cd $$KUBERAY_TEST_OUTPUT_DIR && find . -name "*.log" | tar -cf /artifact-mount/e2e-rayservice-suspend-log.tar -T - && exit 1)
- echo "--- END:e2e rayservice suspend (nightly operator) tests finished"

- label: 'Test RayService Incremental Upgrade E2E (nightly operator)'
instance_size: large
image: golang:1.26-bookworm
Expand Down
1 change: 1 addition & 0 deletions docs/reference/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,7 @@ _Appears in:_
| `serveConfigV2` _string_ | Important: Run "make" to regenerate code after modifying this file<br />Defines the applications and deployments to deploy, should be a YAML multi-line scalar string. | | |
| `rayClusterConfig` _[RayClusterSpec](#rayclusterspec)_ | | | |
| `excludeHeadPodFromServeSvc` _boolean_ | If the field is set to true, the value of the label `ray.io/serve` on the head Pod should always be false.<br />Therefore, the head Pod's endpoint will not be added to the Kubernetes Serve service. | | |
| `suspend` _boolean_ | Suspend indicates whether the RayService should suspend its execution. When set to true,<br />all Kubernetes resources owned by the RayService controller (RayClusters, Kubernetes<br />Services, Gateway, HTTPRoute) will be deleted. Setting it back to false will allow the<br />RayService controller to recreate the resources. | | |



Expand Down
2 changes: 2 additions & 0 deletions helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions ray-operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,12 @@ test-e2e-autoscaler: manifests fmt vet ## Run e2e autoscaler tests.
go test -timeout 30m -v $(WHAT)

test-e2e-rayservice: WHAT ?= ./test/e2erayservice
test-e2e-rayservice: manifests fmt vet ## Run e2e RayService tests.
go test -timeout 30m -v $(WHAT)
test-e2e-rayservice: manifests fmt vet ## Run e2e RayService tests (excluding suspend tests, which have their own target).
go test -timeout 30m -v -skip Suspend $(WHAT)

test-e2e-rayservice-suspend: WHAT ?= ./test/e2erayservice
test-e2e-rayservice-suspend: manifests fmt vet ## Run e2e RayService suspend tests.
go test -timeout 30m -v -run Suspend $(WHAT)

test-e2e-upgrade: WHAT ?= ./test/e2eupgrade
test-e2e-upgrade: manifests fmt vet ## Run e2e operator upgrade tests.
Expand Down
14 changes: 14 additions & 0 deletions ray-operator/apis/ray/v1/rayservice_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@ type RayServiceSpec struct {
// Therefore, the head Pod's endpoint will not be added to the Kubernetes Serve service.
// +optional
ExcludeHeadPodFromServeSvc bool `json:"excludeHeadPodFromServeSvc,omitempty"`
// Suspend indicates whether the RayService should suspend its execution. When set to true,
// all Kubernetes resources owned by the RayService controller (RayClusters, Kubernetes
// Services, Gateway, HTTPRoute) will be deleted. Setting it back to false will allow the
// RayService controller to recreate the resources.
// +optional
Suspend bool `json:"suspend,omitempty"`
}

// RayServiceStatuses defines the observed state of RayService
Expand Down Expand Up @@ -209,6 +215,11 @@ const (
UpgradeInProgress RayServiceConditionType = "UpgradeInProgress"
// RollbackInProgress means the RayService is currently rolling back an in-progress upgrade to the original cluster state.
RollbackInProgress RayServiceConditionType = "RollbackInProgress"
// RayServiceSuspending means the RayService is in the middle of deleting its owned resources in response to Spec.Suspend.
// Once entered, the suspend operation completes atomically regardless of later changes to Spec.Suspend.
RayServiceSuspending RayServiceConditionType = "Suspending"
// RayServiceSuspended means all resources owned by the RayService controller have been deleted and the RayService is suspended.
RayServiceSuspended RayServiceConditionType = "Suspended"
)

const (
Expand All @@ -221,6 +232,9 @@ const (
NoActiveCluster RayServiceConditionReason = "NoActiveCluster"
RayServiceValidationFailed RayServiceConditionReason = "ValidationFailed"
TargetClusterChanged RayServiceConditionReason = "TargetClusterChanged"
SuspendRequested RayServiceConditionReason = "SuspendRequested"
SuspendInProgress RayServiceConditionReason = "SuspendInProgress"
SuspendComplete RayServiceConditionReason = "SuspendComplete"
)

// +kubebuilder:object:root=true
Expand Down
2 changes: 2 additions & 0 deletions ray-operator/config/crd/bases/ray.io_rayservices.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading