Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,13 @@ Anyone can comment on issues and submit reviews for pull requests. In order to b

Before submitting a PR, please perform the following steps:

- List of steps to perform before submitting a PR.
- Run `make build`.
- Run `make verify`.
- Run `make test`.
- Run `make test-integration` for controller or manifest behavior changes.
- Run `make test-e2e` for user-facing proxy behavior changes.
- Run `make test-e2e-hosted` for hosted-mode behavior changes.

Use these make targets as the official test interface. A raw `go test ./...`
does not include generated manifests, envtest asset setup, linting, or the e2e
packaging used by CI.
35 changes: 35 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ IMAGE_REGISTRY_NAME ?= quay.io/open-cluster-management
IMAGE_NAME = cluster-proxy
IMAGE_TAG ?= latest
E2E_TEST_CLUSTER_NAME ?= e2e
E2E_HOSTED_HUB_CLUSTER_NAME ?= cluster-proxy-hosted-hub
E2E_HOSTED_HOSTING_CLUSTER_NAME ?= cluster-proxy-hosted-hosting
E2E_HOSTED_MANAGED_CLUSTER_NAME ?= cluster-proxy-hosted-managed
E2E_HOSTED_WORK_DIR ?= _output/e2e-hosted
E2E_HOSTED_PROXY_ENTRYPOINT_LOCAL_PORT ?= 18090
E2E_HOSTED_USER_SERVER_LOCAL_PORT ?= 19092
CONTAINER_ENGINE ?= docker
# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)
CRD_OPTIONS ?= "crd:crdVersions={v1},allowDangerousTypes=true,generateEmbeddedObjectMeta=true"
Expand Down Expand Up @@ -127,6 +133,7 @@ images:
-f cmd/Dockerfile \
--build-arg ADDON_AGENT_IMAGE_NAME=$(IMAGE_REGISTRY_NAME)/$(IMAGE_NAME):$(IMAGE_TAG) \
-t $(IMAGE_REGISTRY_NAME)/$(IMAGE_NAME):$(IMAGE_TAG) .
.PHONY: images

images-amd64:
$(CONTAINER_ENGINE) buildx build \
Expand Down Expand Up @@ -221,6 +228,34 @@ test-e2e: delete-e2e-image-from-kind build-e2e-image load-e2e-image-kind
@./test/e2e/env/wait-for-job.sh cluster-proxy-e2e open-cluster-management-addon 1200
.PHONY: test-e2e

setup-env-for-e2e-hosted: images
@echo "Setting up environment for hosted e2e tests..."
IMAGE_REGISTRY_NAME=$(IMAGE_REGISTRY_NAME) \
IMAGE_NAME=$(IMAGE_NAME) \
IMAGE_TAG=$(IMAGE_TAG) \
HUB_CLUSTER_NAME=$(E2E_HOSTED_HUB_CLUSTER_NAME) \
HOSTING_CLUSTER_NAME=$(E2E_HOSTED_HOSTING_CLUSTER_NAME) \
MANAGED_CLUSTER_NAME=$(E2E_HOSTED_MANAGED_CLUSTER_NAME) \
WORK_DIR=$(E2E_HOSTED_WORK_DIR) \
./test/e2e/env/init-hosted.sh
.PHONY: setup-env-for-e2e-hosted

clean-e2e-hosted:
@echo "Cleaning up hosted e2e kind clusters..."
-kind delete cluster --name $(E2E_HOSTED_HUB_CLUSTER_NAME)
-kind delete cluster --name $(E2E_HOSTED_HOSTING_CLUSTER_NAME)
-kind delete cluster --name $(E2E_HOSTED_MANAGED_CLUSTER_NAME)
rm -rf $(E2E_HOSTED_WORK_DIR)
.PHONY: clean-e2e-hosted

test-e2e-hosted: clean-e2e-hosted setup-env-for-e2e-hosted
@echo "Running hosted e2e tests..."
WORK_DIR=$(E2E_HOSTED_WORK_DIR) \
PROXY_ENTRYPOINT_LOCAL_PORT=$(E2E_HOSTED_PROXY_ENTRYPOINT_LOCAL_PORT) \
USER_SERVER_LOCAL_PORT=$(E2E_HOSTED_USER_SERVER_LOCAL_PORT) \
./test/e2e/env/run-hosted.sh
.PHONY: test-e2e-hosted

# Rapid iteration workflow for e2e tests (cleans up everything first)
# Use LABEL_FILTER to run specific tests, e.g.: make retest-e2e LABEL_FILTER="connectivity"
retest-e2e: clean-e2e delete-e2e-image-from-kind build-e2e-image load-e2e-image-kind
Expand Down
47 changes: 47 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,53 @@ dialer of the Kubernetes client config object, e.g.:
cfg.Dial = tunnel.DialContext
```

### Hosted mode

Cluster Proxy supports addon-framework hosted mode when the `ManagedClusterAddOn`
has the `addon.open-cluster-management.io/hosting-cluster-name` annotation. In
hosted mode the proxy-agent deployment runs on the hosting cluster while the
managed cluster keeps the service account and RBAC needed for TokenRequest,
TokenReview, leases, ConfigMaps, and impersonation.

The hosting cluster must contain an external managed-cluster kubeconfig Secret.
By default the addon reads `external-managed-kubeconfig` from the namespace named
after the managed cluster, creates short-lived tokens for the managed
`cluster-proxy` service account, and writes a generated kubeconfig Secret named
`cluster-proxy-managed-kubeconfig` in the addon install namespace. The generated
kubeconfig is mounted read-only by the hosted agent containers; the external
admin kubeconfig is mounted only by the provisioner.

Hosted mode supports the managed Kubernetes API proxy path. The regular Service
proxy is disabled by default in hosted mode because a service-proxy running on
the hosting cluster usually cannot reach managed cluster Service DNS names or
ClusterIPs. Set `hostedServiceProxyMode=BestEffort` only when the hosting
cluster can directly reach managed Service networking. Set
`hostedServiceProxyMode=Relay` to deploy a managed-side relay and send Service
proxy requests through the managed apiserver Service proxy subresource.

| Mode | Kube API proxy | Regular Service proxy |
|------|----------------|-----------------------|
| Default | Supported | Supported when service proxy is enabled |
| Hosted, `hostedServiceProxyMode=Disabled` | Supported | Disabled |
| Hosted, `hostedServiceProxyMode=BestEffort` | Supported | Best effort; requires hosting-to-managed Service network reachability |
| Hosted, `hostedServiceProxyMode=Relay` | Supported | Supported through the managed-side `cluster-proxy-service-relay` Deployment and Service |

The following `AddOnDeploymentConfig.spec.customizedVariables` are available for
hosted mode:

- `externalManagedKubeConfigSecretNamespace`: defaults to the managed cluster name
- `externalManagedKubeConfigSecretName`: defaults to `external-managed-kubeconfig`
- `managedKubeConfigSecret`: defaults to `cluster-proxy-managed-kubeconfig`
- `managedKubeConfigTokenExpiration`: defaults to `24h`
- `managedKubeConfigRefreshBefore`: defaults to `1h`
- `managedKubeConfigSyncInterval`: defaults to `5m`
- `hostedServiceProxyMode`: `Disabled`, `BestEffort`, or `Relay`; defaults to `Disabled`

The hosted provisioner patches `ManagedKubeconfigReady` on the hub
`ManagedClusterAddOn` and exposes health and metrics on `:8000`. The
managed-apiserver raw TCP relay exposes health and metrics on `:8001`; the
service relay exposes health and metrics on `:8000`.

### Performance

The following table shows network bandwidth benchmarking results via [goben](https://github.com/udhos/goben)
Expand Down
51 changes: 43 additions & 8 deletions cmd/addon-agent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"time"

"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/klog/v2"
"k8s.io/klog/v2/textlogger"
Expand All @@ -20,12 +21,14 @@ import (
"open-cluster-management.io/cluster-proxy/pkg/common"
"open-cluster-management.io/cluster-proxy/pkg/util"

"k8s.io/component-base/metrics/legacyregistry"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
)

var (
hubKubeconfig string
spokeKubeconfig string
clusterName string
proxyServerNamespace string
enablePortForwardProxy bool
Expand Down Expand Up @@ -69,6 +72,8 @@ func main() {
klog.InitFlags(flag.CommandLine)
flag.StringVar(&hubKubeconfig, "hub-kubeconfig", "",
"The kubeconfig to talk to hub cluster")
flag.StringVar(&spokeKubeconfig, "spoke-kubeconfig", "",
"The kubeconfig to talk to spoke/managed cluster. If empty, in-cluster config is used")
flag.StringVar(&clusterName, "cluster-name", "",
"The name of the managed cluster")
flag.StringVar(&proxyServerNamespace, "proxy-server-namespace", "open-cluster-management-addon",
Expand All @@ -88,7 +93,17 @@ func main() {
}
cfg.UserAgent = "proxy-agent-addon-agent"

spokeClient, err := kubernetes.NewForConfig(ctrl.GetConfigOrDie())
var spokeConfig *rest.Config
if spokeKubeconfig != "" {
spokeConfig, err = clientcmd.BuildConfigFromFlags("", spokeKubeconfig)
if err != nil {
panic(err)
}
} else {
spokeConfig = ctrl.GetConfigOrDie()
}
spokeConfig.UserAgent = "proxy-agent-addon-agent-spoke"
spokeClient, err := kubernetes.NewForConfig(spokeConfig)
if err != nil {
panic(fmt.Errorf("failed to create spoke client, err: %w", err))
}
Expand All @@ -97,21 +112,36 @@ func main() {
panic(fmt.Sprintf("Pod namespace is empty, please set the ENV for %s", envKeyPodNamespace))
}

leaseClient := spokeClient
leaseNamespace := addonAgentNamespace
useManagementLease := spokeKubeconfig != ""
if useManagementLease {
managementConfig := ctrl.GetConfigOrDie()
managementConfig.UserAgent = "proxy-agent-addon-agent-management"
leaseClient, err = kubernetes.NewForConfig(managementConfig)
if err != nil {
panic(fmt.Errorf("failed to create management client, err: %w", err))
}
}

var leaseUpdater lease.LeaseUpdater
if enableProxyAgentHealthCheck {
klog.Infof("Proxy-agent health check enabled, lease will only update when proxy-agent is connected")
leaseUpdater = lease.NewLeaseUpdater(
spokeClient,
leaseClient,
common.AddonName,
addonAgentNamespace,
leaseNamespace,
checkProxyAgentReadiness(),
).WithHubLeaseConfig(cfg, clusterName)
)
} else {
leaseUpdater = lease.NewLeaseUpdater(
spokeClient,
leaseClient,
common.AddonName,
addonAgentNamespace,
).WithHubLeaseConfig(cfg, clusterName)
leaseNamespace,
)
}
if !useManagementLease {
leaseUpdater = leaseUpdater.WithHubLeaseConfig(cfg, clusterName)
}

ctx := context.Background()
Expand All @@ -135,7 +165,11 @@ func main() {
}

// If the certificates is changed, we need to restart the agent to load the new certificates.
cc, err := addonutils.NewConfigChecker("certificates check", "/etc/tls/tls.crt", "/etc/tls/tls.key")
configFiles := []string{"/etc/tls/tls.crt", "/etc/tls/tls.key"}
if spokeKubeconfig != "" {
configFiles = append(configFiles, spokeKubeconfig)
}
cc, err := addonutils.NewConfigChecker("certificates check", configFiles...)
if err != nil {
klog.Fatalf("failed create certificates checker: %v", err)
}
Expand All @@ -160,6 +194,7 @@ func main() {
func serveHealthProbes(stop <-chan struct{}, address string, healthCheckers map[string]healthz.Checker) {
mux := http.NewServeMux()
mux.Handle("/healthz", http.StripPrefix("/healthz", &healthz.Handler{Checks: healthCheckers}))
mux.Handle("/metrics", legacyregistry.Handler())

server := http.Server{
Handler: mux,
Expand Down
6 changes: 6 additions & 0 deletions cmd/cluster-proxy/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ import (
"k8s.io/klog/v2"

"open-cluster-management.io/cluster-proxy/pkg/controllers"
"open-cluster-management.io/cluster-proxy/pkg/proxyagent/agent/managedapiserver"
"open-cluster-management.io/cluster-proxy/pkg/proxyagent/agent/provisioner"
"open-cluster-management.io/cluster-proxy/pkg/serviceproxy"
"open-cluster-management.io/cluster-proxy/pkg/servicerelay"
"open-cluster-management.io/cluster-proxy/pkg/userserver"
"open-cluster-management.io/cluster-proxy/pkg/version"
)
Expand Down Expand Up @@ -53,6 +56,9 @@ func newClusterProxyCommand() *cobra.Command {

cmd.AddCommand(userserver.NewUserServerCommand())
cmd.AddCommand(serviceproxy.NewServiceProxyCommand())
cmd.AddCommand(servicerelay.NewCommand())
cmd.AddCommand(provisioner.NewManagedKubeconfigProvisionerCommand())
cmd.AddCommand(managedapiserver.NewCommand())
cmd.AddCommand(controllers.NewControllersCommand())

return cmd
Expand Down
4 changes: 4 additions & 0 deletions pkg/constant/constant.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@ const (

ServiceProxyPort = 7443

ServiceRelayPort = 7444

ServerCertSecretName = "cluster-proxy-service-proxy-server-cert"

ServiceProxyName = "cluster-proxy-service-proxy"

ServiceRelayName = "cluster-proxy-service-relay"

AddonName = "cluster-proxy"

// UserServerSecretName is the fixed secret name for user server certificates.
Expand Down
114 changes: 114 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package metrics

import (
"time"

componentmetrics "k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)

var (
managedKubeconfigTokenExpirationSeconds = componentmetrics.NewGauge(
&componentmetrics.GaugeOpts{
Name: "cluster_proxy_managed_kubeconfig_token_expiration_seconds",
Help: "Seconds until the generated hosted-mode managed kubeconfig token expires.",
StabilityLevel: componentmetrics.ALPHA,
},
)

managedKubeconfigRefreshTotal = componentmetrics.NewCounterVec(
&componentmetrics.CounterOpts{
Name: "cluster_proxy_managed_kubeconfig_refresh_total",
Help: "Total number of managed kubeconfig refresh attempts by result.",
StabilityLevel: componentmetrics.ALPHA,
},
[]string{"result"},
)

managedAPIServerRelayConnectionsTotal = componentmetrics.NewCounter(
&componentmetrics.CounterOpts{
Name: "cluster_proxy_managed_apiserver_relay_connections_total",
Help: "Total number of raw TCP connections accepted by the managed apiserver relay.",
StabilityLevel: componentmetrics.ALPHA,
},
)

managedAPIServerRelayConnectionsActive = componentmetrics.NewGauge(
&componentmetrics.GaugeOpts{
Name: "cluster_proxy_managed_apiserver_relay_connections_active",
Help: "Current number of active raw TCP connections handled by the managed apiserver relay.",
StabilityLevel: componentmetrics.ALPHA,
},
)

managedAPIServerRelayDialErrorsTotal = componentmetrics.NewCounter(
&componentmetrics.CounterOpts{
Name: "cluster_proxy_managed_apiserver_relay_dial_errors_total",
Help: "Total number of managed apiserver relay dial errors.",
StabilityLevel: componentmetrics.ALPHA,
},
)

serviceProxyRequestsTotal = componentmetrics.NewCounterVec(
&componentmetrics.CounterOpts{
Name: "cluster_proxy_service_proxy_requests_total",
Help: "Total number of service-proxy requests by mode, target, and result.",
StabilityLevel: componentmetrics.ALPHA,
},
[]string{"mode", "target", "result"},
)

serviceRelayRequestsTotal = componentmetrics.NewCounterVec(
&componentmetrics.CounterOpts{
Name: "cluster_proxy_service_relay_requests_total",
Help: "Total number of service-relay requests by target scheme and result.",
StabilityLevel: componentmetrics.ALPHA,
},
[]string{"scheme", "result"},
)
)

func init() {
legacyregistry.MustRegister(
managedKubeconfigTokenExpirationSeconds,
managedKubeconfigRefreshTotal,
managedAPIServerRelayConnectionsTotal,
managedAPIServerRelayConnectionsActive,
managedAPIServerRelayDialErrorsTotal,
serviceProxyRequestsTotal,
serviceRelayRequestsTotal,
)
}

func SetManagedKubeconfigTokenExpiration(expiration, now time.Time) {
remaining := expiration.Sub(now).Seconds()
if remaining < 0 {
remaining = 0
}
managedKubeconfigTokenExpirationSeconds.Set(remaining)
}

func ObserveManagedKubeconfigRefresh(result string) {
managedKubeconfigRefreshTotal.WithLabelValues(result).Inc()
}

func ObserveManagedAPIServerRelayConnectionStart() {
managedAPIServerRelayConnectionsTotal.Inc()
managedAPIServerRelayConnectionsActive.Inc()
}

func ObserveManagedAPIServerRelayConnectionDone() {
managedAPIServerRelayConnectionsActive.Dec()
}

func ObserveManagedAPIServerRelayDialError() {
managedAPIServerRelayDialErrorsTotal.Inc()
}

func ObserveServiceProxyRequest(mode, target, result string) {
serviceProxyRequestsTotal.WithLabelValues(mode, target, result).Inc()
}

func ObserveServiceRelayRequest(scheme, result string) {
serviceRelayRequestsTotal.WithLabelValues(scheme, result).Inc()
}
Loading