diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5b7830380..328973706 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -48,4 +48,13 @@ Anyone can comment on issues and submit reviews for pull requests. In order to b Before submitting a PR, please perform the following steps: -- List of steps to perform before submitting a PR. +- Run `make build`. +- Run `make verify`. +- Run `make test`. +- Run `make test-integration` for controller or manifest behavior changes. +- Run `make test-e2e` for user-facing proxy behavior changes. +- Run `make test-e2e-hosted` for hosted-mode behavior changes. + +Use these make targets as the official test interface. A raw `go test ./...` +does not include generated manifests, envtest asset setup, linting, or the e2e +packaging used by CI. diff --git a/Makefile b/Makefile index 52ca90a0b..cfa200bcc 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,12 @@ IMAGE_REGISTRY_NAME ?= quay.io/open-cluster-management IMAGE_NAME = cluster-proxy IMAGE_TAG ?= latest E2E_TEST_CLUSTER_NAME ?= e2e +E2E_HOSTED_HUB_CLUSTER_NAME ?= cluster-proxy-hosted-hub +E2E_HOSTED_HOSTING_CLUSTER_NAME ?= cluster-proxy-hosted-hosting +E2E_HOSTED_MANAGED_CLUSTER_NAME ?= cluster-proxy-hosted-managed +E2E_HOSTED_WORK_DIR ?= _output/e2e-hosted +E2E_HOSTED_PROXY_ENTRYPOINT_LOCAL_PORT ?= 18090 +E2E_HOSTED_USER_SERVER_LOCAL_PORT ?= 19092 CONTAINER_ENGINE ?= docker # Produce CRDs that work back to Kubernetes 1.11 (no version conversion) CRD_OPTIONS ?= "crd:crdVersions={v1},allowDangerousTypes=true,generateEmbeddedObjectMeta=true" @@ -127,6 +133,7 @@ images: -f cmd/Dockerfile \ --build-arg ADDON_AGENT_IMAGE_NAME=$(IMAGE_REGISTRY_NAME)/$(IMAGE_NAME):$(IMAGE_TAG) \ -t $(IMAGE_REGISTRY_NAME)/$(IMAGE_NAME):$(IMAGE_TAG) . +.PHONY: images images-amd64: $(CONTAINER_ENGINE) buildx build \ @@ -221,6 +228,34 @@ test-e2e: delete-e2e-image-from-kind build-e2e-image load-e2e-image-kind @./test/e2e/env/wait-for-job.sh cluster-proxy-e2e open-cluster-management-addon 1200 .PHONY: test-e2e +setup-env-for-e2e-hosted: images + @echo "Setting up environment for hosted e2e tests..." + IMAGE_REGISTRY_NAME=$(IMAGE_REGISTRY_NAME) \ + IMAGE_NAME=$(IMAGE_NAME) \ + IMAGE_TAG=$(IMAGE_TAG) \ + HUB_CLUSTER_NAME=$(E2E_HOSTED_HUB_CLUSTER_NAME) \ + HOSTING_CLUSTER_NAME=$(E2E_HOSTED_HOSTING_CLUSTER_NAME) \ + MANAGED_CLUSTER_NAME=$(E2E_HOSTED_MANAGED_CLUSTER_NAME) \ + WORK_DIR=$(E2E_HOSTED_WORK_DIR) \ + ./test/e2e/env/init-hosted.sh +.PHONY: setup-env-for-e2e-hosted + +clean-e2e-hosted: + @echo "Cleaning up hosted e2e kind clusters..." + -kind delete cluster --name $(E2E_HOSTED_HUB_CLUSTER_NAME) + -kind delete cluster --name $(E2E_HOSTED_HOSTING_CLUSTER_NAME) + -kind delete cluster --name $(E2E_HOSTED_MANAGED_CLUSTER_NAME) + rm -rf $(E2E_HOSTED_WORK_DIR) +.PHONY: clean-e2e-hosted + +test-e2e-hosted: clean-e2e-hosted setup-env-for-e2e-hosted + @echo "Running hosted e2e tests..." + WORK_DIR=$(E2E_HOSTED_WORK_DIR) \ + PROXY_ENTRYPOINT_LOCAL_PORT=$(E2E_HOSTED_PROXY_ENTRYPOINT_LOCAL_PORT) \ + USER_SERVER_LOCAL_PORT=$(E2E_HOSTED_USER_SERVER_LOCAL_PORT) \ + ./test/e2e/env/run-hosted.sh +.PHONY: test-e2e-hosted + # Rapid iteration workflow for e2e tests (cleans up everything first) # Use LABEL_FILTER to run specific tests, e.g.: make retest-e2e LABEL_FILTER="connectivity" retest-e2e: clean-e2e delete-e2e-image-from-kind build-e2e-image load-e2e-image-kind diff --git a/README.md b/README.md index 6189ff775..bbb8fc9ae 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,53 @@ dialer of the Kubernetes client config object, e.g.: cfg.Dial = tunnel.DialContext ``` +### Hosted mode + +Cluster Proxy supports addon-framework hosted mode when the `ManagedClusterAddOn` +has the `addon.open-cluster-management.io/hosting-cluster-name` annotation. In +hosted mode the proxy-agent deployment runs on the hosting cluster while the +managed cluster keeps the service account and RBAC needed for TokenRequest, +TokenReview, leases, ConfigMaps, and impersonation. + +The hosting cluster must contain an external managed-cluster kubeconfig Secret. +By default the addon reads `external-managed-kubeconfig` from the namespace named +after the managed cluster, creates short-lived tokens for the managed +`cluster-proxy` service account, and writes a generated kubeconfig Secret named +`cluster-proxy-managed-kubeconfig` in the addon install namespace. The generated +kubeconfig is mounted read-only by the hosted agent containers; the external +admin kubeconfig is mounted only by the provisioner. + +Hosted mode supports the managed Kubernetes API proxy path. The regular Service +proxy is disabled by default in hosted mode because a service-proxy running on +the hosting cluster usually cannot reach managed cluster Service DNS names or +ClusterIPs. Set `hostedServiceProxyMode=BestEffort` only when the hosting +cluster can directly reach managed Service networking. Set +`hostedServiceProxyMode=Relay` to deploy a managed-side relay and send Service +proxy requests through the managed apiserver Service proxy subresource. + +| Mode | Kube API proxy | Regular Service proxy | +|------|----------------|-----------------------| +| Default | Supported | Supported when service proxy is enabled | +| Hosted, `hostedServiceProxyMode=Disabled` | Supported | Disabled | +| Hosted, `hostedServiceProxyMode=BestEffort` | Supported | Best effort; requires hosting-to-managed Service network reachability | +| Hosted, `hostedServiceProxyMode=Relay` | Supported | Supported through the managed-side `cluster-proxy-service-relay` Deployment and Service | + +The following `AddOnDeploymentConfig.spec.customizedVariables` are available for +hosted mode: + +- `externalManagedKubeConfigSecretNamespace`: defaults to the managed cluster name +- `externalManagedKubeConfigSecretName`: defaults to `external-managed-kubeconfig` +- `managedKubeConfigSecret`: defaults to `cluster-proxy-managed-kubeconfig` +- `managedKubeConfigTokenExpiration`: defaults to `24h` +- `managedKubeConfigRefreshBefore`: defaults to `1h` +- `managedKubeConfigSyncInterval`: defaults to `5m` +- `hostedServiceProxyMode`: `Disabled`, `BestEffort`, or `Relay`; defaults to `Disabled` + +The hosted provisioner patches `ManagedKubeconfigReady` on the hub +`ManagedClusterAddOn` and exposes health and metrics on `:8000`. The +managed-apiserver raw TCP relay exposes health and metrics on `:8001`; the +service relay exposes health and metrics on `:8000`. + ### Performance The following table shows network bandwidth benchmarking results via [goben](https://github.com/udhos/goben) diff --git a/cmd/addon-agent/main.go b/cmd/addon-agent/main.go index ac4ee6fda..f060e508d 100644 --- a/cmd/addon-agent/main.go +++ b/cmd/addon-agent/main.go @@ -11,6 +11,7 @@ import ( "time" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" "k8s.io/klog/v2" "k8s.io/klog/v2/textlogger" @@ -20,12 +21,14 @@ import ( "open-cluster-management.io/cluster-proxy/pkg/common" "open-cluster-management.io/cluster-proxy/pkg/util" + "k8s.io/component-base/metrics/legacyregistry" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/healthz" ) var ( hubKubeconfig string + spokeKubeconfig string clusterName string proxyServerNamespace string enablePortForwardProxy bool @@ -69,6 +72,8 @@ func main() { klog.InitFlags(flag.CommandLine) flag.StringVar(&hubKubeconfig, "hub-kubeconfig", "", "The kubeconfig to talk to hub cluster") + flag.StringVar(&spokeKubeconfig, "spoke-kubeconfig", "", + "The kubeconfig to talk to spoke/managed cluster. If empty, in-cluster config is used") flag.StringVar(&clusterName, "cluster-name", "", "The name of the managed cluster") flag.StringVar(&proxyServerNamespace, "proxy-server-namespace", "open-cluster-management-addon", @@ -88,7 +93,17 @@ func main() { } cfg.UserAgent = "proxy-agent-addon-agent" - spokeClient, err := kubernetes.NewForConfig(ctrl.GetConfigOrDie()) + var spokeConfig *rest.Config + if spokeKubeconfig != "" { + spokeConfig, err = clientcmd.BuildConfigFromFlags("", spokeKubeconfig) + if err != nil { + panic(err) + } + } else { + spokeConfig = ctrl.GetConfigOrDie() + } + spokeConfig.UserAgent = "proxy-agent-addon-agent-spoke" + spokeClient, err := kubernetes.NewForConfig(spokeConfig) if err != nil { panic(fmt.Errorf("failed to create spoke client, err: %w", err)) } @@ -97,21 +112,36 @@ func main() { panic(fmt.Sprintf("Pod namespace is empty, please set the ENV for %s", envKeyPodNamespace)) } + leaseClient := spokeClient + leaseNamespace := addonAgentNamespace + useManagementLease := spokeKubeconfig != "" + if useManagementLease { + managementConfig := ctrl.GetConfigOrDie() + managementConfig.UserAgent = "proxy-agent-addon-agent-management" + leaseClient, err = kubernetes.NewForConfig(managementConfig) + if err != nil { + panic(fmt.Errorf("failed to create management client, err: %w", err)) + } + } + var leaseUpdater lease.LeaseUpdater if enableProxyAgentHealthCheck { klog.Infof("Proxy-agent health check enabled, lease will only update when proxy-agent is connected") leaseUpdater = lease.NewLeaseUpdater( - spokeClient, + leaseClient, common.AddonName, - addonAgentNamespace, + leaseNamespace, checkProxyAgentReadiness(), - ).WithHubLeaseConfig(cfg, clusterName) + ) } else { leaseUpdater = lease.NewLeaseUpdater( - spokeClient, + leaseClient, common.AddonName, - addonAgentNamespace, - ).WithHubLeaseConfig(cfg, clusterName) + leaseNamespace, + ) + } + if !useManagementLease { + leaseUpdater = leaseUpdater.WithHubLeaseConfig(cfg, clusterName) } ctx := context.Background() @@ -135,7 +165,11 @@ func main() { } // If the certificates is changed, we need to restart the agent to load the new certificates. - cc, err := addonutils.NewConfigChecker("certificates check", "/etc/tls/tls.crt", "/etc/tls/tls.key") + configFiles := []string{"/etc/tls/tls.crt", "/etc/tls/tls.key"} + if spokeKubeconfig != "" { + configFiles = append(configFiles, spokeKubeconfig) + } + cc, err := addonutils.NewConfigChecker("certificates check", configFiles...) if err != nil { klog.Fatalf("failed create certificates checker: %v", err) } @@ -160,6 +194,7 @@ func main() { func serveHealthProbes(stop <-chan struct{}, address string, healthCheckers map[string]healthz.Checker) { mux := http.NewServeMux() mux.Handle("/healthz", http.StripPrefix("/healthz", &healthz.Handler{Checks: healthCheckers})) + mux.Handle("/metrics", legacyregistry.Handler()) server := http.Server{ Handler: mux, diff --git a/cmd/cluster-proxy/main.go b/cmd/cluster-proxy/main.go index 4021f7e14..3ad2e25e0 100644 --- a/cmd/cluster-proxy/main.go +++ b/cmd/cluster-proxy/main.go @@ -13,7 +13,10 @@ import ( "k8s.io/klog/v2" "open-cluster-management.io/cluster-proxy/pkg/controllers" + "open-cluster-management.io/cluster-proxy/pkg/proxyagent/agent/managedapiserver" + "open-cluster-management.io/cluster-proxy/pkg/proxyagent/agent/provisioner" "open-cluster-management.io/cluster-proxy/pkg/serviceproxy" + "open-cluster-management.io/cluster-proxy/pkg/servicerelay" "open-cluster-management.io/cluster-proxy/pkg/userserver" "open-cluster-management.io/cluster-proxy/pkg/version" ) @@ -53,6 +56,9 @@ func newClusterProxyCommand() *cobra.Command { cmd.AddCommand(userserver.NewUserServerCommand()) cmd.AddCommand(serviceproxy.NewServiceProxyCommand()) + cmd.AddCommand(servicerelay.NewCommand()) + cmd.AddCommand(provisioner.NewManagedKubeconfigProvisionerCommand()) + cmd.AddCommand(managedapiserver.NewCommand()) cmd.AddCommand(controllers.NewControllersCommand()) return cmd diff --git a/pkg/constant/constant.go b/pkg/constant/constant.go index c50c909b7..1565cec9b 100644 --- a/pkg/constant/constant.go +++ b/pkg/constant/constant.go @@ -5,10 +5,14 @@ const ( ServiceProxyPort = 7443 + ServiceRelayPort = 7444 + ServerCertSecretName = "cluster-proxy-service-proxy-server-cert" ServiceProxyName = "cluster-proxy-service-proxy" + ServiceRelayName = "cluster-proxy-service-relay" + AddonName = "cluster-proxy" // UserServerSecretName is the fixed secret name for user server certificates. diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go new file mode 100644 index 000000000..2868714af --- /dev/null +++ b/pkg/metrics/metrics.go @@ -0,0 +1,114 @@ +package metrics + +import ( + "time" + + componentmetrics "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +var ( + managedKubeconfigTokenExpirationSeconds = componentmetrics.NewGauge( + &componentmetrics.GaugeOpts{ + Name: "cluster_proxy_managed_kubeconfig_token_expiration_seconds", + Help: "Seconds until the generated hosted-mode managed kubeconfig token expires.", + StabilityLevel: componentmetrics.ALPHA, + }, + ) + + managedKubeconfigRefreshTotal = componentmetrics.NewCounterVec( + &componentmetrics.CounterOpts{ + Name: "cluster_proxy_managed_kubeconfig_refresh_total", + Help: "Total number of managed kubeconfig refresh attempts by result.", + StabilityLevel: componentmetrics.ALPHA, + }, + []string{"result"}, + ) + + managedAPIServerRelayConnectionsTotal = componentmetrics.NewCounter( + &componentmetrics.CounterOpts{ + Name: "cluster_proxy_managed_apiserver_relay_connections_total", + Help: "Total number of raw TCP connections accepted by the managed apiserver relay.", + StabilityLevel: componentmetrics.ALPHA, + }, + ) + + managedAPIServerRelayConnectionsActive = componentmetrics.NewGauge( + &componentmetrics.GaugeOpts{ + Name: "cluster_proxy_managed_apiserver_relay_connections_active", + Help: "Current number of active raw TCP connections handled by the managed apiserver relay.", + StabilityLevel: componentmetrics.ALPHA, + }, + ) + + managedAPIServerRelayDialErrorsTotal = componentmetrics.NewCounter( + &componentmetrics.CounterOpts{ + Name: "cluster_proxy_managed_apiserver_relay_dial_errors_total", + Help: "Total number of managed apiserver relay dial errors.", + StabilityLevel: componentmetrics.ALPHA, + }, + ) + + serviceProxyRequestsTotal = componentmetrics.NewCounterVec( + &componentmetrics.CounterOpts{ + Name: "cluster_proxy_service_proxy_requests_total", + Help: "Total number of service-proxy requests by mode, target, and result.", + StabilityLevel: componentmetrics.ALPHA, + }, + []string{"mode", "target", "result"}, + ) + + serviceRelayRequestsTotal = componentmetrics.NewCounterVec( + &componentmetrics.CounterOpts{ + Name: "cluster_proxy_service_relay_requests_total", + Help: "Total number of service-relay requests by target scheme and result.", + StabilityLevel: componentmetrics.ALPHA, + }, + []string{"scheme", "result"}, + ) +) + +func init() { + legacyregistry.MustRegister( + managedKubeconfigTokenExpirationSeconds, + managedKubeconfigRefreshTotal, + managedAPIServerRelayConnectionsTotal, + managedAPIServerRelayConnectionsActive, + managedAPIServerRelayDialErrorsTotal, + serviceProxyRequestsTotal, + serviceRelayRequestsTotal, + ) +} + +func SetManagedKubeconfigTokenExpiration(expiration, now time.Time) { + remaining := expiration.Sub(now).Seconds() + if remaining < 0 { + remaining = 0 + } + managedKubeconfigTokenExpirationSeconds.Set(remaining) +} + +func ObserveManagedKubeconfigRefresh(result string) { + managedKubeconfigRefreshTotal.WithLabelValues(result).Inc() +} + +func ObserveManagedAPIServerRelayConnectionStart() { + managedAPIServerRelayConnectionsTotal.Inc() + managedAPIServerRelayConnectionsActive.Inc() +} + +func ObserveManagedAPIServerRelayConnectionDone() { + managedAPIServerRelayConnectionsActive.Dec() +} + +func ObserveManagedAPIServerRelayDialError() { + managedAPIServerRelayDialErrorsTotal.Inc() +} + +func ObserveServiceProxyRequest(mode, target, result string) { + serviceProxyRequestsTotal.WithLabelValues(mode, target, result).Inc() +} + +func ObserveServiceRelayRequest(scheme, result string) { + serviceRelayRequestsTotal.WithLabelValues(scheme, result).Inc() +} diff --git a/pkg/proxyagent/agent/agent.go b/pkg/proxyagent/agent/agent.go index bccbf929e..94db14add 100644 --- a/pkg/proxyagent/agent/agent.go +++ b/pkg/proxyagent/agent/agent.go @@ -88,6 +88,7 @@ func NewAgentAddon( }) agentFactory := addonfactory.NewAgentAddonFactory(common.AddonName, FS, "manifests/charts/addon-agent"). + WithAgentHostedModeEnabledOption(). WithAgentRegistrationOption(&agent.RegistrationOption{ CSRConfigurations: func(cluster *clusterv1.ManagedCluster, addon *addonv1alpha1.ManagedClusterAddOn) ([]addonv1alpha1.RegistrationConfig, error) { return regConfigs, nil @@ -106,6 +107,16 @@ func NewAgentAddon( Verbs: []string{"*"}, Resources: []string{"leases"}, }, + { + APIGroups: []string{"addon.open-cluster-management.io"}, + Verbs: []string{"get"}, + Resources: []string{"managedclusteraddons"}, + }, + { + APIGroups: []string{"addon.open-cluster-management.io"}, + Verbs: []string{"update"}, + Resources: []string{"managedclusteraddons/status"}, + }, }, }). BindKubeClientClusterRole(&rbacv1.ClusterRole{ diff --git a/pkg/proxyagent/agent/agent_test.go b/pkg/proxyagent/agent/agent_test.go index 0b93ac787..e70b930d3 100644 --- a/pkg/proxyagent/agent/agent_test.go +++ b/pkg/proxyagent/agent/agent_test.go @@ -197,6 +197,16 @@ func TestAgentAddonRegistrationOption(t *testing.T) { Verbs: []string{"*"}, Resources: []string{"leases"}, }, + { + APIGroups: []string{"addon.open-cluster-management.io"}, + Verbs: []string{"get"}, + Resources: []string{"managedclusteraddons"}, + }, + { + APIGroups: []string{"addon.open-cluster-management.io"}, + Verbs: []string{"update"}, + Resources: []string{"managedclusteraddons/status"}, + }, }, role.Rules) // Verify RoleBinding was created and references the correct subjects @@ -732,6 +742,305 @@ func TestNewAgentAddon(t *testing.T) { } } +func TestNewAgentAddonHostedModeManifests(t *testing.T) { + clusterName := "cluster" + addOnName := "open-cluster-management-cluster-proxy" + managedProxyConfigName := "cluster-proxy" + + addon := newAddOn(addOnName, clusterName) + addon.Annotations = map[string]string{ + addonv1alpha1.HostingClusterNameAnnotationKey: "hosting-cluster", + } + addon.Status.ConfigReferences = []addonv1alpha1.ConfigReference{newManagedProxyConfigReference(managedProxyConfigName)} + + fakeKubeClient := fakekube.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster-proxy-service-proxy-server-cert", + Namespace: "test", + }, + Data: map[string][]byte{ + "tls.crt": []byte("testcrt"), + "tls.key": []byte("testkey"), + }, + }) + fakeRuntimeClient := fakeruntime.NewClientBuilder(). + WithObjects(newManagedProxyConfig(managedProxyConfigName, proxyv1alpha1.EntryPointTypeHostname)). + Build() + + agentAddOn, err := NewAgentAddon( + &fakeSelfSigner{t: t}, + "test", + fakeRuntimeClient, + fakeKubeClient, + true, + true, + fakeaddon.NewSimpleClientset(), + ) + assert.NoError(t, err) + assert.True(t, agentAddOn.GetAgentAddonOptions().HostedModeEnabled) + + manifests, err := agentAddOn.Manifests(newCluster(clusterName, true), addon) + assert.NoError(t, err) + + agentDeploy := getDeploymentByName(manifests, "cluster-proxy-proxy-agent") + assert.NotNil(t, agentDeploy) + assert.Equal(t, "hosting", agentDeploy.Annotations[addonv1alpha1.HostedManifestLocationAnnotationKey]) + assert.True(t, deploymentHasVolume(agentDeploy, "managed-kubeconfig")) + + addonAgent := getContainer(agentDeploy, "addon-agent") + assert.NotNil(t, addonAgent) + assert.Contains(t, addonAgent.Args, "--spoke-kubeconfig=/etc/managed/kubeconfig") + + serviceProxy := getContainer(agentDeploy, "service-proxy") + assert.Nil(t, serviceProxy) + + managedAPIServerProxy := getContainer(agentDeploy, "managed-apiserver-proxy") + assert.NotNil(t, managedAPIServerProxy) + assert.Contains(t, managedAPIServerProxy.Args, "--managed-kubeconfig=/etc/managed/kubeconfig") + + provisionerDeploy := getDeploymentByName(manifests, "cluster-proxy-managed-kubeconfig-provisioner") + assert.NotNil(t, provisionerDeploy) + assert.Equal(t, "hosting", provisionerDeploy.Annotations[addonv1alpha1.HostedManifestLocationAnnotationKey]) + + kubeAPIService := getKubeAPIServerExternalNameService(manifests, clusterName) + assert.NotNil(t, kubeAPIService) + assert.Equal(t, corev1.ServiceTypeClusterIP, kubeAPIService.Spec.Type) + assert.Equal(t, "hosting", kubeAPIService.Annotations[addonv1alpha1.HostedManifestLocationAnnotationKey]) + + addonAgentRole := getRoleByName(manifests, "cluster-proxy-addon-agent") + assert.NotNil(t, addonAgentRole) + assert.Equal(t, "hosting", addonAgentRole.Annotations[addonv1alpha1.HostedManifestLocationAnnotationKey]) +} + +func TestNewAgentAddonDefaultModeDoesNotRenderHostedResources(t *testing.T) { + clusterName := "cluster" + addOnName := "open-cluster-management-cluster-proxy" + managedProxyConfigName := "cluster-proxy" + + addon := newAddOn(addOnName, clusterName) + addon.Status.ConfigReferences = []addonv1alpha1.ConfigReference{newManagedProxyConfigReference(managedProxyConfigName)} + + fakeKubeClient := fakekube.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster-proxy-service-proxy-server-cert", + Namespace: "test", + }, + Data: map[string][]byte{ + "tls.crt": []byte("testcrt"), + "tls.key": []byte("testkey"), + }, + }) + fakeRuntimeClient := fakeruntime.NewClientBuilder(). + WithObjects(newManagedProxyConfig(managedProxyConfigName, proxyv1alpha1.EntryPointTypeHostname)). + Build() + + agentAddOn, err := NewAgentAddon( + &fakeSelfSigner{t: t}, + "test", + fakeRuntimeClient, + fakeKubeClient, + true, + true, + fakeaddon.NewSimpleClientset(), + ) + assert.NoError(t, err) + + manifests, err := agentAddOn.Manifests(newCluster(clusterName, true), addon) + assert.NoError(t, err) + + for _, manifest := range manifests { + obj, ok := manifest.(metav1.ObjectMetaAccessor) + if !ok { + continue + } + assert.NotContains(t, obj.GetObjectMeta().GetAnnotations(), addonv1alpha1.HostedManifestLocationAnnotationKey) + } + + agentDeploy := getDeploymentByName(manifests, "cluster-proxy-proxy-agent") + assert.NotNil(t, agentDeploy) + assert.False(t, deploymentHasVolume(agentDeploy, "managed-kubeconfig")) + assert.Nil(t, getContainer(agentDeploy, "managed-apiserver-proxy")) + assert.Nil(t, getDeploymentByName(manifests, "cluster-proxy-managed-kubeconfig-provisioner")) + assert.Nil(t, getDeploymentByName(manifests, "cluster-proxy-service-relay")) +} + +func TestNewAgentAddonHostedModeBestEffortServiceProxy(t *testing.T) { + clusterName := "cluster" + addOnName := "open-cluster-management-cluster-proxy" + managedProxyConfigName := "cluster-proxy" + addOnDeploymentConfigName := "deploy-config" + + addon := newAddOn(addOnName, clusterName) + addon.Annotations = map[string]string{ + addonv1alpha1.HostingClusterNameAnnotationKey: "hosting-cluster", + } + addon.Status.ConfigReferences = []addonv1alpha1.ConfigReference{ + newManagedProxyConfigReference(managedProxyConfigName), + newAddOndDeploymentConfigReference(addOnDeploymentConfigName, clusterName), + } + + fakeKubeClient := fakekube.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster-proxy-service-proxy-server-cert", + Namespace: "test", + }, + Data: map[string][]byte{ + "tls.crt": []byte("testcrt"), + "tls.key": []byte("testkey"), + }, + }) + fakeRuntimeClient := fakeruntime.NewClientBuilder(). + WithObjects(newManagedProxyConfig(managedProxyConfigName, proxyv1alpha1.EntryPointTypeHostname)). + Build() + addOnDeploymentConfig := newAddOnDeploymentConfig(addOnDeploymentConfigName, clusterName) + addOnDeploymentConfig.Spec.CustomizedVariables = []addonv1alpha1.CustomizedVariable{ + { + Name: "hostedServiceProxyMode", + Value: "BestEffort", + }, + { + Name: "externalManagedKubeConfigSecretNamespace", + Value: "external-ns", + }, + { + Name: "externalManagedKubeConfigSecretName", + Value: "external-kubeconfig", + }, + { + Name: "managedKubeConfigSecret", + Value: "custom-managed-kubeconfig", + }, + { + Name: "managedKubeConfigTokenExpiration", + Value: "12h", + }, + { + Name: "managedKubeConfigRefreshBefore", + Value: "30m", + }, + { + Name: "managedKubeConfigSyncInterval", + Value: "2m", + }, + } + + agentAddOn, err := NewAgentAddon( + &fakeSelfSigner{t: t}, + "test", + fakeRuntimeClient, + fakeKubeClient, + true, + true, + fakeaddon.NewSimpleClientset(addOnDeploymentConfig), + ) + assert.NoError(t, err) + + manifests, err := agentAddOn.Manifests(newCluster(clusterName, true), addon) + assert.NoError(t, err) + + agentDeploy := getDeploymentByName(manifests, "cluster-proxy-proxy-agent") + assert.NotNil(t, agentDeploy) + assert.Equal(t, "custom-managed-kubeconfig", getVolumeSecretName(agentDeploy, "managed-kubeconfig")) + + serviceProxy := getContainer(agentDeploy, "service-proxy") + assert.NotNil(t, serviceProxy) + assert.Contains(t, serviceProxy.Args, "--managed-kubeconfig=/etc/managed/kubeconfig") + assert.Contains(t, serviceProxy.Args, "--hosted-service-proxy-mode=BestEffort") + + provisioner := getContainer(getDeploymentByName(manifests, "cluster-proxy-managed-kubeconfig-provisioner"), "managed-kubeconfig-provisioner") + assert.NotNil(t, provisioner) + assert.Contains(t, provisioner.Args, "--source-namespace=external-ns") + assert.Contains(t, provisioner.Args, "--source-name=external-kubeconfig") + assert.Contains(t, provisioner.Args, "--target-name=custom-managed-kubeconfig") + assert.Contains(t, provisioner.Args, "--token-expiration=12h") + assert.Contains(t, provisioner.Args, "--refresh-before=30m") + assert.Contains(t, provisioner.Args, "--sync-interval=2m") + + serviceProxyServerCertSecret := getSecretByName(manifests, "cluster-proxy-service-proxy-server-certificates") + assert.NotNil(t, serviceProxyServerCertSecret) + assert.Equal(t, "hosting", serviceProxyServerCertSecret.Annotations[addonv1alpha1.HostedManifestLocationAnnotationKey]) +} + +func TestNewAgentAddonHostedModeRelayServiceProxy(t *testing.T) { + clusterName := "cluster" + addOnName := "open-cluster-management-cluster-proxy" + managedProxyConfigName := "cluster-proxy" + addOnDeploymentConfigName := "deploy-config" + + addon := newAddOn(addOnName, clusterName) + addon.Annotations = map[string]string{ + addonv1alpha1.HostingClusterNameAnnotationKey: "hosting-cluster", + } + addon.Status.ConfigReferences = []addonv1alpha1.ConfigReference{ + newManagedProxyConfigReference(managedProxyConfigName), + newAddOndDeploymentConfigReference(addOnDeploymentConfigName, clusterName), + } + + addOnDeploymentConfig := newAddOnDeploymentConfig(addOnDeploymentConfigName, clusterName) + addOnDeploymentConfig.Spec.CustomizedVariables = []addonv1alpha1.CustomizedVariable{ + { + Name: "hostedServiceProxyMode", + Value: "Relay", + }, + } + + agentAddOn, err := NewAgentAddon( + &fakeSelfSigner{t: t}, + "test", + fakeruntime.NewClientBuilder(). + WithObjects(newManagedProxyConfig(managedProxyConfigName, proxyv1alpha1.EntryPointTypeHostname)). + Build(), + fakekube.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster-proxy-service-proxy-server-cert", + Namespace: "test", + }, + Data: map[string][]byte{ + "tls.crt": []byte("testcrt"), + "tls.key": []byte("testkey"), + }, + }), + true, + true, + fakeaddon.NewSimpleClientset(addOnDeploymentConfig), + ) + assert.NoError(t, err) + + manifests, err := agentAddOn.Manifests(newCluster(clusterName, true), addon) + assert.NoError(t, err) + + agentDeploy := getDeploymentByName(manifests, "cluster-proxy-proxy-agent") + assert.NotNil(t, agentDeploy) + + serviceProxy := getContainer(agentDeploy, "service-proxy") + assert.NotNil(t, serviceProxy) + assert.Contains(t, serviceProxy.Args, "--managed-kubeconfig=/etc/managed/kubeconfig") + assert.Contains(t, serviceProxy.Args, "--hosted-service-proxy-mode=Relay") + + serviceRelayDeploy := getDeploymentByName(manifests, "cluster-proxy-service-relay") + assert.NotNil(t, serviceRelayDeploy) + assert.NotContains(t, serviceRelayDeploy.Annotations, addonv1alpha1.HostedManifestLocationAnnotationKey) + serviceRelay := getContainer(serviceRelayDeploy, "service-relay") + assert.NotNil(t, serviceRelay) + assert.Contains(t, serviceRelay.Args, "service-relay") + assert.Contains(t, serviceRelay.Args, "--listen=:7444") + assert.NotNil(t, serviceRelayDeploy.Spec.Template.Spec.AutomountServiceAccountToken) + assert.False(t, *serviceRelayDeploy.Spec.Template.Spec.AutomountServiceAccountToken) + + serviceRelayService := getServiceByName(manifests, "cluster-proxy-service-relay") + assert.NotNil(t, serviceRelayService) + assert.Equal(t, corev1.ServiceTypeClusterIP, serviceRelayService.Spec.Type) + assert.NotContains(t, serviceRelayService.Annotations, addonv1alpha1.HostedManifestLocationAnnotationKey) + + serviceRelayRole := getRoleByName(manifests, "cluster-proxy-service-relay-proxy") + assert.NotNil(t, serviceRelayRole) + assert.NotContains(t, serviceRelayRole.Annotations, addonv1alpha1.HostedManifestLocationAnnotationKey) + + serviceProxyServerCertSecret := getSecretByName(manifests, "cluster-proxy-service-proxy-server-certificates") + assert.NotNil(t, serviceProxyServerCertSecret) + assert.Equal(t, "hosting", serviceProxyServerCertSecret.Annotations[addonv1alpha1.HostedManifestLocationAnnotationKey]) +} + type fakeSelfSigner struct { t *testing.T } @@ -1054,6 +1363,66 @@ func getAgentDeployment(manifests []runtime.Object) *appsv1.Deployment { return nil } +func getDeploymentByName(manifests []runtime.Object, name string) *appsv1.Deployment { + for _, manifest := range manifests { + switch obj := manifest.(type) { + case *appsv1.Deployment: + if obj.Name == name { + return obj + } + } + } + return nil +} + +func getContainer(deploy *appsv1.Deployment, name string) *corev1.Container { + if deploy == nil { + return nil + } + for i := range deploy.Spec.Template.Spec.Containers { + if deploy.Spec.Template.Spec.Containers[i].Name == name { + return &deploy.Spec.Template.Spec.Containers[i] + } + } + return nil +} + +func deploymentHasVolume(deploy *appsv1.Deployment, name string) bool { + if deploy == nil { + return false + } + for _, volume := range deploy.Spec.Template.Spec.Volumes { + if volume.Name == name { + return true + } + } + return false +} + +func getVolumeSecretName(deploy *appsv1.Deployment, name string) string { + if deploy == nil { + return "" + } + for _, volume := range deploy.Spec.Template.Spec.Volumes { + if volume.Name == name && volume.Secret != nil { + return volume.Secret.SecretName + } + } + return "" +} + +func getRoleByName(manifests []runtime.Object, name string) *rbacv1.Role { + for _, manifest := range manifests { + switch obj := manifest.(type) { + case *rbacv1.Role: + if obj.Name == name { + return obj + } + } + } + return nil +} + func getKubeAPIServerExternalNameService(manifests []runtime.Object, clusterName string) *corev1.Service { for _, manifest := range manifests { switch obj := manifest.(type) { @@ -1068,6 +1437,19 @@ func getKubeAPIServerExternalNameService(manifests []runtime.Object, clusterName return nil } +func getServiceByName(manifests []runtime.Object, name string) *corev1.Service { + for _, manifest := range manifests { + switch obj := manifest.(type) { + case *corev1.Service: + if obj.Name == name { + return obj + } + } + } + + return nil +} + func getProxyServerHost(deploy *appsv1.Deployment) string { args := deploy.Spec.Template.Spec.Containers[0].Args for _, arg := range args { @@ -1091,3 +1473,16 @@ func getCASecret(manifests []runtime.Object) *corev1.Secret { return nil } + +func getSecretByName(manifests []runtime.Object, name string) *corev1.Secret { + for _, manifest := range manifests { + switch obj := manifest.(type) { + case *corev1.Secret: + if obj.Name == name { + return obj + } + } + } + + return nil +} diff --git a/pkg/proxyagent/agent/managedapiserver/proxy.go b/pkg/proxyagent/agent/managedapiserver/proxy.go new file mode 100644 index 000000000..f028912ee --- /dev/null +++ b/pkg/proxyagent/agent/managedapiserver/proxy.go @@ -0,0 +1,156 @@ +package managedapiserver + +import ( + "context" + "fmt" + "io" + "net" + "net/url" + "time" + + "github.com/spf13/cobra" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/klog/v2" + + addonmetrics "open-cluster-management.io/cluster-proxy/pkg/metrics" + "open-cluster-management.io/cluster-proxy/pkg/utils" +) + +type Proxy struct { + ManagedKubeconfig string + Listen string + DialTimeout time.Duration + HealthProbeBindAddress string +} + +func NewCommand() *cobra.Command { + proxy := &Proxy{ + Listen: ":8443", + DialTimeout: 30 * time.Second, + HealthProbeBindAddress: ":8001", + } + + cmd := &cobra.Command{ + Use: "managed-apiserver-proxy", + Short: "Relay raw TCP connections to the managed cluster apiserver", + RunE: func(cmd *cobra.Command, args []string) error { + return proxy.Run(cmd.Context()) + }, + } + + flags := cmd.Flags() + flags.StringVar(&proxy.ManagedKubeconfig, "managed-kubeconfig", proxy.ManagedKubeconfig, "The managed cluster kubeconfig") + flags.StringVar(&proxy.Listen, "listen", proxy.Listen, "The TCP listen address") + flags.DurationVar(&proxy.DialTimeout, "dial-timeout", proxy.DialTimeout, "Timeout for dialing the managed apiserver") + flags.StringVar(&proxy.HealthProbeBindAddress, "health-probe-bind-address", proxy.HealthProbeBindAddress, "The address the health probe and metrics endpoint binds to") + + return cmd +} + +func (p *Proxy) Run(ctx context.Context) error { + if p.ManagedKubeconfig == "" { + return fmt.Errorf("managed kubeconfig is required") + } + if p.Listen == "" { + return fmt.Errorf("listen address is required") + } + if p.HealthProbeBindAddress == "" { + p.HealthProbeBindAddress = ":8001" + } + + config, err := clientcmd.BuildConfigFromFlags("", p.ManagedKubeconfig) + if err != nil { + return err + } + target, err := targetAddress(config.Host) + if err != nil { + return err + } + + listener, err := net.Listen("tcp", p.Listen) + if err != nil { + return err + } + defer listener.Close() + + go func() { + <-ctx.Done() + _ = listener.Close() + }() + go func() { + if err := utils.ServeHealthProbes(p.HealthProbeBindAddress, nil); err != nil { + klog.Fatal(err) + } + }() + + klog.Infof("managed apiserver proxy listening on %s and relaying to %s", p.Listen, target) + for { + conn, err := listener.Accept() + if err != nil { + if ctx.Err() != nil { + return ctx.Err() + } + return err + } + addonmetrics.ObserveManagedAPIServerRelayConnectionStart() + go p.handle(ctx, conn, target) + } +} + +func (p *Proxy) handle(ctx context.Context, downstream net.Conn, target string) { + defer addonmetrics.ObserveManagedAPIServerRelayConnectionDone() + defer downstream.Close() + + dialer := &net.Dialer{ + Timeout: p.DialTimeout, + KeepAlive: 30 * time.Second, + } + upstream, err := dialer.DialContext(ctx, "tcp", target) + if err != nil { + addonmetrics.ObserveManagedAPIServerRelayDialError() + klog.Errorf("failed to dial managed apiserver %s: %v", target, err) + return + } + defer upstream.Close() + + errCh := make(chan error, 2) + go copyAndClose(upstream, downstream, errCh) + go copyAndClose(downstream, upstream, errCh) + <-errCh +} + +func copyAndClose(dst net.Conn, src net.Conn, errCh chan<- error) { + _, err := io.Copy(dst, src) + if tcp, ok := dst.(*net.TCPConn); ok { + _ = tcp.CloseWrite() + } + errCh <- err +} + +func targetAddress(host string) (string, error) { + if host == "" { + return "", fmt.Errorf("managed kubeconfig server is empty") + } + parsed, err := url.Parse(host) + if err != nil { + return "", err + } + if parsed.Host == "" { + return "", fmt.Errorf("managed kubeconfig server %q does not include a host", host) + } + hostname := parsed.Hostname() + if hostname == "" { + return "", fmt.Errorf("managed kubeconfig server %q does not include a host", host) + } + if port := parsed.Port(); port != "" { + return net.JoinHostPort(hostname, port), nil + } + switch parsed.Scheme { + case "https": + return net.JoinHostPort(hostname, "443"), nil + case "http": + return net.JoinHostPort(hostname, "80"), nil + default: + return "", fmt.Errorf("unsupported managed kubeconfig server scheme %q", parsed.Scheme) + } +} diff --git a/pkg/proxyagent/agent/managedapiserver/proxy_test.go b/pkg/proxyagent/agent/managedapiserver/proxy_test.go new file mode 100644 index 000000000..28c5c13aa --- /dev/null +++ b/pkg/proxyagent/agent/managedapiserver/proxy_test.go @@ -0,0 +1,161 @@ +package managedapiserver + +import ( + "context" + "fmt" + "net" + "os" + "testing" + "time" +) + +func TestTargetAddress(t *testing.T) { + cases := []struct { + name string + host string + expected string + }{ + { + name: "https default port", + host: "https://managed.example.com", + expected: "managed.example.com:443", + }, + { + name: "https explicit port", + host: "https://managed.example.com:6443", + expected: "managed.example.com:6443", + }, + { + name: "http default port", + host: "http://managed.example.com", + expected: "managed.example.com:80", + }, + { + name: "ipv6 default port", + host: "https://[::1]", + expected: "[::1]:443", + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + actual, err := targetAddress(c.host) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if actual != c.expected { + t.Fatalf("expected %q, got %q", c.expected, actual) + } + }) + } + + if _, err := targetAddress("ftp://managed.example.com"); err == nil { + t.Fatal("expected unsupported scheme error") + } +} + +func TestProxyRelaysRawTCPBytes(t *testing.T) { + upstream, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to listen upstream: %v", err) + } + defer upstream.Close() + + go func() { + conn, err := upstream.Accept() + if err != nil { + return + } + defer conn.Close() + + buf := make([]byte, 32) + n, err := conn.Read(buf) + if err != nil { + return + } + _, _ = conn.Write([]byte("relay:" + string(buf[:n]))) + }() + + listener, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to allocate listen address: %v", err) + } + listenAddress := listener.Addr().String() + _ = listener.Close() + + kubeconfigPath := t.TempDir() + "/kubeconfig" + if err := os.WriteFile(kubeconfigPath, []byte(fmt.Sprintf(`apiVersion: v1 +kind: Config +clusters: +- name: managed + cluster: + server: https://%s +contexts: +- name: managed + context: + cluster: managed + user: cluster-proxy +current-context: managed +users: +- name: cluster-proxy + user: + token: token +`, upstream.Addr().String())), 0600); err != nil { + t.Fatalf("failed to write kubeconfig: %v", err) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + errCh := make(chan error, 1) + go func() { + errCh <- (&Proxy{ + ManagedKubeconfig: kubeconfigPath, + Listen: listenAddress, + DialTimeout: time.Second, + HealthProbeBindAddress: "127.0.0.1:0", + }).Run(ctx) + }() + + conn, err := dialEventually(ctx, listenAddress) + if err != nil { + t.Fatalf("failed to dial proxy: %v", err) + } + defer conn.Close() + + if _, err := conn.Write([]byte("tls-client-hello")); err != nil { + t.Fatalf("failed to write to proxy: %v", err) + } + buf := make([]byte, 64) + n, err := conn.Read(buf) + if err != nil { + t.Fatalf("failed to read proxy response: %v", err) + } + if string(buf[:n]) != "relay:tls-client-hello" { + t.Fatalf("unexpected proxy response %q", string(buf[:n])) + } + + cancel() + select { + case err := <-errCh: + if err != nil && err != context.Canceled { + t.Fatalf("unexpected proxy error: %v", err) + } + case <-time.After(time.Second): + t.Fatal("timed out waiting for proxy shutdown") + } +} + +func dialEventually(ctx context.Context, address string) (net.Conn, error) { + deadline := time.Now().Add(2 * time.Second) + var lastErr error + for time.Now().Before(deadline) { + conn, err := (&net.Dialer{Timeout: 50 * time.Millisecond}).DialContext(ctx, "tcp", address) + if err == nil { + return conn, nil + } + lastErr = err + time.Sleep(10 * time.Millisecond) + } + return nil, lastErr +} diff --git a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-deployment.yaml b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-deployment.yaml index c2e4328c1..5cbbde216 100644 --- a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-deployment.yaml +++ b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-deployment.yaml @@ -1,12 +1,20 @@ +{{- $validHostedServiceProxyModes := list "Disabled" "BestEffort" "Relay" -}} +{{- if and (eq .Values.installMode "Hosted") (not (has .Values.hostedServiceProxyMode $validHostedServiceProxyModes)) -}} +{{- fail (printf "hostedServiceProxyMode must be one of Disabled, BestEffort, or Relay; got %q" .Values.hostedServiceProxyMode) -}} +{{- end -}} +{{- $enableServiceProxy := and .Values.enableServiceProxy (or (ne .Values.installMode "Hosted") (or (eq .Values.hostedServiceProxyMode "BestEffort") (eq .Values.hostedServiceProxyMode "Relay"))) -}} apiVersion: apps/v1 kind: Deployment metadata: namespace: {{ .Release.Namespace }} name: {{ .Values.agentDeploymentName }} annotations: - {{- with .Values.agentDeploymentAnnotations }} - {{ toYaml . | indent 2 }} - {{- end }} +{{ if eq .Values.installMode "Hosted" }} + addon.open-cluster-management.io/hosted-manifest-location: hosting +{{ end }} +{{ with .Values.agentDeploymentAnnotations }} +{{ toYaml . | nindent 4 }} +{{ end }} spec: replicas: {{ .Values.replicas }} selector: @@ -16,9 +24,9 @@ spec: template: metadata: annotations: - {{- with .Values.agentDeploymentAnnotations }} - {{ toYaml . | indent 2 }} - {{- end }} +{{ with .Values.agentDeploymentAnnotations }} +{{ toYaml . | nindent 8 }} +{{ end }} target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' labels: open-cluster-management.io/addon: cluster-proxy @@ -115,6 +123,9 @@ spec: {{- range .Values.addonAgentArgs }} - {{ . }} {{- end }} + {{- if eq .Values.installMode "Hosted" }} + - --spoke-kubeconfig=/etc/managed/kubeconfig + {{- end }} securityContext: allowPrivilegeEscalation: false capabilities: @@ -130,12 +141,17 @@ spec: - name: hub mountPath: /etc/tls readOnly: true + {{- if eq .Values.installMode "Hosted" }} + - name: managed-kubeconfig + mountPath: /etc/managed + readOnly: true + {{- end }} env: - name: POD_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace - {{- if .Values.enableServiceProxy }} + {{- if $enableServiceProxy }} - name: service-proxy {{- $reverseResourceRequirements := reverse .Values.global.resourceRequirements }} {{- range $item := $reverseResourceRequirements }} @@ -158,6 +174,10 @@ spec: - --cert=/server-cert/tls.crt - --key=/server-cert/tls.key - --hub-kubeconfig=/etc/kubeconfig/kubeconfig + {{- if eq .Values.installMode "Hosted" }} + - --managed-kubeconfig=/etc/managed/kubeconfig + - --hosted-service-proxy-mode={{ .Values.hostedServiceProxyMode }} + {{- end }} {{- range .Values.additionalServiceProxyArgs }} - {{ . }} {{- end }} @@ -190,12 +210,52 @@ spec: - name: service-proxy-server-cert mountPath: /server-cert readOnly: true + {{- if eq .Values.installMode "Hosted" }} + - name: managed-kubeconfig + mountPath: /etc/managed + readOnly: true + {{- end }} env: - name: POD_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace {{- end }} + {{- if and .Values.enableKubeApiProxy (eq .Values.installMode "Hosted") }} + - name: managed-apiserver-proxy + {{- $reverseResourceRequirements := reverse .Values.global.resourceRequirements }} + {{- range $item := $reverseResourceRequirements }} + {{- if regexMatch $item.containerIDRegex "deployments:cluster-proxy-proxy-agent:managed-apiserver-proxy" }} + resources: + {{- toYaml $item.resources | nindent 12 }} + {{- break -}} + {{- end -}} + {{- end }} + image: {{ .Values.registry }}/{{ .Values.image }}:{{ .Values.tag }} + imagePullPolicy: IfNotPresent + command: + - /cluster-proxy + args: + - managed-apiserver-proxy + - --managed-kubeconfig=/etc/managed/kubeconfig + - --listen=:8443 + - --health-probe-bind-address=:8001 + ports: + - name: managed-api + containerPort: 8443 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + runAsNonRoot: true + readOnlyRootFilesystem: true + volumeMounts: + - name: managed-kubeconfig + mountPath: /etc/managed + readOnly: true + {{- end }} volumes: - name: ca secret: @@ -206,7 +266,12 @@ spec: - name: hub-kubeconfig secret: secretName: cluster-proxy-hub-kubeconfig - {{- if .Values.enableServiceProxy }} + {{- if eq .Values.installMode "Hosted" }} + - name: managed-kubeconfig + secret: + secretName: {{ .Values.managedKubeConfigSecret }} + {{- end }} + {{- if $enableServiceProxy }} {{- if .Values.additionalServiceCAConfigMap }} - name: additional-service-ca configMap: diff --git a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-role.yaml b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-role.yaml index b499fe517..680db0268 100644 --- a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-role.yaml +++ b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-role.yaml @@ -3,6 +3,10 @@ kind: Role metadata: name: cluster-proxy-addon-agent namespace: {{ .Release.Namespace }} +{{ if eq .Values.installMode "Hosted" }} + annotations: + addon.open-cluster-management.io/hosted-manifest-location: hosting +{{ end }} rules: - apiGroups: - coordination.k8s.io diff --git a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-rolebinding.yaml b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-rolebinding.yaml index 6b23e05b4..ac2be5007 100644 --- a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-rolebinding.yaml +++ b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-rolebinding.yaml @@ -3,6 +3,10 @@ apiVersion: rbac.authorization.k8s.io/v1 metadata: name: cluster-proxy-addon-agent namespace: {{ .Release.Namespace }} +{{ if eq .Values.installMode "Hosted" }} + annotations: + addon.open-cluster-management.io/hosted-manifest-location: hosting +{{ end }} roleRef: apiGroup: rbac.authorization.k8s.io kind: Role diff --git a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/agent-client-secret.yaml b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/agent-client-secret.yaml index ec094627e..8404bade1 100644 --- a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/agent-client-secret.yaml +++ b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/agent-client-secret.yaml @@ -4,6 +4,10 @@ kind: Secret metadata: namespace: {{ .Release.Namespace }} name: cluster-proxy-open-cluster-management.io-proxy-agent-signer-client-cert +{{ if eq .Values.installMode "Hosted" }} + annotations: + addon.open-cluster-management.io/hosted-manifest-location: hosting +{{ end }} data: "tls.crt": {{ .Values.staticProxyAgentSecretCert }} "tls.key": {{ .Values.staticProxyAgentSecretKey }} diff --git a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/ca-secret.yaml b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/ca-secret.yaml index 7b4edf1dc..658f7e809 100644 --- a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/ca-secret.yaml +++ b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/ca-secret.yaml @@ -3,5 +3,9 @@ kind: Secret metadata: namespace: {{ .Release.Namespace }} name: cluster-proxy-ca +{{ if eq .Values.installMode "Hosted" }} + annotations: + addon.open-cluster-management.io/hosted-manifest-location: hosting +{{ end }} data: "ca.crt": {{ .Values.base64EncodedCAData }} diff --git a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/cluster-service.yaml b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/cluster-service.yaml index 0c73ad3ad..3791246a2 100644 --- a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/cluster-service.yaml +++ b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/cluster-service.yaml @@ -1,4 +1,22 @@ {{ if .Values.enableKubeApiProxy }} +{{ if eq .Values.installMode "Hosted" }} +apiVersion: v1 +kind: Service +metadata: + namespace: {{ .Release.Namespace }} + name: {{ .Values.clusterName }} + annotations: + addon.open-cluster-management.io/hosted-manifest-location: hosting +spec: + type: ClusterIP + selector: + open-cluster-management.io/addon: cluster-proxy + proxy.open-cluster-management.io/component-name: proxy-agent + ports: + - name: https + port: 443 + targetPort: 8443 +{{ else }} apiVersion: v1 kind: Service metadata: @@ -8,3 +26,4 @@ spec: type: ExternalName externalName: kubernetes.default.{{ .Values.serviceDomain }} {{ end }} +{{ end }} diff --git a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/managed-kubeconfig-provisioner.yaml b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/managed-kubeconfig-provisioner.yaml new file mode 100644 index 000000000..e56d6fdb5 --- /dev/null +++ b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/managed-kubeconfig-provisioner.yaml @@ -0,0 +1,201 @@ +{{- if eq .Values.installMode "Hosted" }} +{{- $sourceNamespace := default .Values.clusterName .Values.externalManagedKubeConfigSecretNamespace }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: {{ $sourceNamespace }} + name: cluster-proxy-managed-kubeconfig-source-reader + annotations: + addon.open-cluster-management.io/hosted-manifest-location: hosting +rules: + - apiGroups: + - "" + resources: + - secrets + resourceNames: + - {{ .Values.externalManagedKubeConfigSecretName }} + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + namespace: {{ $sourceNamespace }} + name: cluster-proxy-managed-kubeconfig-source-reader + annotations: + addon.open-cluster-management.io/hosted-manifest-location: hosting +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cluster-proxy-managed-kubeconfig-source-reader +subjects: + - kind: ServiceAccount + name: cluster-proxy + namespace: {{ .Release.Namespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: {{ .Release.Namespace }} + name: cluster-proxy-managed-kubeconfig-writer + annotations: + addon.open-cluster-management.io/hosted-manifest-location: hosting +rules: + - apiGroups: + - "" + resources: + - secrets + resourceNames: + - {{ .Values.managedKubeConfigSecret }} + verbs: + - get + - update + - patch + - delete + - apiGroups: + - "" + resources: + - secrets + verbs: + - create + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + namespace: {{ .Release.Namespace }} + name: cluster-proxy-managed-kubeconfig-writer + annotations: + addon.open-cluster-management.io/hosted-manifest-location: hosting +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cluster-proxy-managed-kubeconfig-writer +subjects: + - kind: ServiceAccount + name: cluster-proxy + namespace: {{ .Release.Namespace }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + namespace: {{ .Release.Namespace }} + name: cluster-proxy-managed-kubeconfig-provisioner + annotations: + addon.open-cluster-management.io/hosted-manifest-location: hosting +spec: + replicas: 1 + selector: + matchLabels: + open-cluster-management.io/addon: cluster-proxy + proxy.open-cluster-management.io/component-name: managed-kubeconfig-provisioner + template: + metadata: + labels: + open-cluster-management.io/addon: cluster-proxy + proxy.open-cluster-management.io/component-name: managed-kubeconfig-provisioner + spec: + serviceAccountName: cluster-proxy + {{- if .Values.tolerations }} + tolerations: {{ toYaml .Values.tolerations | nindent 8 }} + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: {{ toYaml .Values.nodeSelector | nindent 8 }} + {{- end }} + containers: + - name: managed-kubeconfig-provisioner + image: {{ .Values.registry }}/{{ .Values.image }}:{{ .Values.tag }} + imagePullPolicy: IfNotPresent + command: + - /cluster-proxy + args: + - managed-kubeconfig-provisioner + - --cluster-name={{ .Values.clusterName }} + - --source-namespace={{ $sourceNamespace }} + - --source-name={{ .Values.externalManagedKubeConfigSecretName }} + - --target-namespace={{ .Release.Namespace }} + - --target-name={{ .Values.managedKubeConfigSecret }} + - --managed-service-account-namespace={{ .Release.Namespace }} + - --managed-service-account-name=cluster-proxy + - --token-expiration={{ .Values.managedKubeConfigTokenExpiration }} + - --refresh-before={{ .Values.managedKubeConfigRefreshBefore }} + - --sync-interval={{ .Values.managedKubeConfigSyncInterval }} + - --hub-kubeconfig=/etc/kubeconfig/kubeconfig + - --addon-name=cluster-proxy + - --addon-namespace={{ .Values.clusterName }} + - --health-probe-bind-address=:8000 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + runAsNonRoot: true + readOnlyRootFilesystem: true + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumeMounts: + - name: hub-kubeconfig + mountPath: /etc/kubeconfig/ + readOnly: true + volumes: + - name: hub-kubeconfig + secret: + secretName: cluster-proxy-hub-kubeconfig + imagePullSecrets: + {{- range .Values.proxyAgentImagePullSecrets }} + - name: {{ . }} + {{- end }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + namespace: {{ .Release.Namespace }} + name: cluster-proxy-managed-kubeconfig-cleanup + annotations: + addon.open-cluster-management.io/hosted-manifest-location: hosting + addon.open-cluster-management.io/addon-pre-delete: "" +spec: + backoffLimit: 3 + template: + metadata: + labels: + open-cluster-management.io/addon: cluster-proxy + proxy.open-cluster-management.io/component-name: managed-kubeconfig-cleanup + spec: + serviceAccountName: cluster-proxy + restartPolicy: OnFailure + containers: + - name: cleanup + image: {{ .Values.registry }}/{{ .Values.image }}:{{ .Values.tag }} + imagePullPolicy: IfNotPresent + command: + - /cluster-proxy + args: + - managed-kubeconfig-provisioner + - --cleanup + - --target-namespace={{ .Release.Namespace }} + - --target-name={{ .Values.managedKubeConfigSecret }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + runAsNonRoot: true + readOnlyRootFilesystem: true + imagePullSecrets: + {{- range .Values.proxyAgentImagePullSecrets }} + - name: {{ . }} + {{- end }} +{{- end }} diff --git a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/namespace.yaml b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/namespace.yaml index 04934cc89..9ebaa96c6 100644 --- a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/namespace.yaml +++ b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/namespace.yaml @@ -5,4 +5,14 @@ metadata: name: {{ .Release.Namespace }} annotations: addon.open-cluster-management.io/deletion-orphan: "" +{{ if eq .Values.installMode "Hosted" }} +--- +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Release.Namespace }} + annotations: + addon.open-cluster-management.io/deletion-orphan: "" + addon.open-cluster-management.io/hosted-manifest-location: hosting +{{ end }} {{ end }} diff --git a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/service-account.yaml b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/service-account.yaml index 79c21124d..f3d068e9b 100644 --- a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/service-account.yaml +++ b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/service-account.yaml @@ -3,3 +3,13 @@ kind: ServiceAccount metadata: namespace: {{ .Release.Namespace }} name: cluster-proxy +{{ if eq .Values.installMode "Hosted" }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + namespace: {{ .Release.Namespace }} + name: cluster-proxy + annotations: + addon.open-cluster-management.io/hosted-manifest-location: hosting +{{ end }} diff --git a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/service-proxy-server-certificates.yaml b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/service-proxy-server-certificates.yaml index d4a938f84..44ca095ce 100644 --- a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/service-proxy-server-certificates.yaml +++ b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/service-proxy-server-certificates.yaml @@ -1,11 +1,19 @@ -{{- if .Values.enableServiceProxy }} +{{- $validHostedServiceProxyModes := list "Disabled" "BestEffort" "Relay" -}} +{{- if and (eq .Values.installMode "Hosted") (not (has .Values.hostedServiceProxyMode $validHostedServiceProxyModes)) -}} +{{- fail (printf "hostedServiceProxyMode must be one of Disabled, BestEffort, or Relay; got %q" .Values.hostedServiceProxyMode) -}} +{{- end -}} +{{- $enableServiceProxy := and .Values.enableServiceProxy (or (ne .Values.installMode "Hosted") (or (eq .Values.hostedServiceProxyMode "BestEffort") (eq .Values.hostedServiceProxyMode "Relay"))) -}} +{{- if $enableServiceProxy }} apiVersion: v1 kind: Secret metadata: namespace: {{ .Release.Namespace }} name: cluster-proxy-service-proxy-server-certificates +{{ if eq .Values.installMode "Hosted" }} + annotations: + addon.open-cluster-management.io/hosted-manifest-location: hosting +{{ end }} data: "tls.crt": {{ .Values.serviceProxySecretCert }} "tls.key": {{ .Values.serviceProxySecretKey }} {{- end }} - diff --git a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/service-relay.yaml b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/service-relay.yaml new file mode 100644 index 000000000..dbd43868a --- /dev/null +++ b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/service-relay.yaml @@ -0,0 +1,132 @@ +{{- if and .Values.enableServiceProxy (eq .Values.installMode "Hosted") (eq .Values.hostedServiceProxyMode "Relay") }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: {{ .Release.Namespace }} + name: cluster-proxy-service-relay-proxy +rules: + - apiGroups: + - "" + resources: + - services/proxy + resourceNames: + - {{ .Values.serviceRelayName }} + - http:{{ .Values.serviceRelayName }} + - http:{{ .Values.serviceRelayName }}:{{ .Values.serviceRelayPort }} + verbs: + - get + - create + - update + - patch + - delete + - apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + namespace: {{ .Release.Namespace }} + name: cluster-proxy-service-relay-proxy +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cluster-proxy-service-relay-proxy +subjects: + - kind: ServiceAccount + name: cluster-proxy + namespace: {{ .Release.Namespace }} +--- +apiVersion: v1 +kind: Service +metadata: + namespace: {{ .Release.Namespace }} + name: {{ .Values.serviceRelayName }} +spec: + type: ClusterIP + selector: + open-cluster-management.io/addon: cluster-proxy + proxy.open-cluster-management.io/component-name: service-relay + ports: + - name: http + port: {{ .Values.serviceRelayPort }} + targetPort: {{ .Values.serviceRelayPort }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + namespace: {{ .Release.Namespace }} + name: {{ .Values.serviceRelayName }} +spec: + replicas: 1 + selector: + matchLabels: + open-cluster-management.io/addon: cluster-proxy + proxy.open-cluster-management.io/component-name: service-relay + template: + metadata: + labels: + open-cluster-management.io/addon: cluster-proxy + proxy.open-cluster-management.io/component-name: service-relay + spec: + serviceAccountName: cluster-proxy + automountServiceAccountToken: false + {{- if .Values.tolerations }} + tolerations: {{ toYaml .Values.tolerations | nindent 8 }} + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: {{ toYaml .Values.nodeSelector | nindent 8 }} + {{- end }} + containers: + - name: service-relay + image: {{ .Values.registry }}/{{ .Values.image }}:{{ .Values.tag }} + imagePullPolicy: IfNotPresent + command: + - /cluster-proxy + args: + - service-relay + - --listen=:{{ .Values.serviceRelayPort }} + - --health-probe-bind-address=:8000 + {{- if .Values.additionalServiceCAConfigMap }} + - --additional-service-ca=/additional-service-ca/service-ca.crt + {{- end }} + ports: + - name: http + containerPort: {{ .Values.serviceRelayPort }} + livenessProbe: + httpGet: + path: /healthz + port: 8000 + initialDelaySeconds: 2 + periodSeconds: 10 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + runAsNonRoot: true + readOnlyRootFilesystem: true + {{- if .Values.additionalServiceCAConfigMap }} + volumeMounts: + - name: additional-service-ca + mountPath: /additional-service-ca + readOnly: true + {{- end }} + {{- if .Values.additionalServiceCAConfigMap }} + volumes: + - name: additional-service-ca + configMap: + name: {{ .Values.additionalServiceCAConfigMap }} + optional: true + {{- end }} + imagePullSecrets: + {{- range .Values.proxyAgentImagePullSecrets }} + - name: {{ . }} + {{- end }} +{{- end }} diff --git a/pkg/proxyagent/agent/manifests/charts/addon-agent/values.yaml b/pkg/proxyagent/agent/manifests/charts/addon-agent/values.yaml index 6ca87c47c..670c967cd 100644 --- a/pkg/proxyagent/agent/manifests/charts/addon-agent/values.yaml +++ b/pkg/proxyagent/agent/manifests/charts/addon-agent/values.yaml @@ -1,4 +1,5 @@ clusterName: loopback +installMode: Default agentDeploymentName: cluster-proxy-proxy-agent @@ -20,6 +21,16 @@ spokeAddonNamespace: "open-cluster-management-cluster-proxy" additionalProxyAgentArgs: [] additionalServiceProxyArgs: [] +externalManagedKubeConfigSecretNamespace: "" +externalManagedKubeConfigSecretName: external-managed-kubeconfig +managedKubeConfigSecret: cluster-proxy-managed-kubeconfig +managedKubeConfigTokenExpiration: 24h +managedKubeConfigRefreshBefore: 1h +managedKubeConfigSyncInterval: 5m +hostedServiceProxyMode: Disabled +serviceRelayName: cluster-proxy-service-relay +serviceRelayPort: 7444 + agentDeploymentAnnotations: {} addonAgentArgs: [] diff --git a/pkg/proxyagent/agent/provisioner/command.go b/pkg/proxyagent/agent/provisioner/command.go new file mode 100644 index 000000000..1637b1e4a --- /dev/null +++ b/pkg/proxyagent/agent/provisioner/command.go @@ -0,0 +1,67 @@ +package provisioner + +import ( + "github.com/spf13/cobra" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + + addonclient "open-cluster-management.io/api/client/addon/clientset/versioned" +) + +func NewManagedKubeconfigProvisionerCommand() *cobra.Command { + options := Options{} + + cmd := &cobra.Command{ + Use: "managed-kubeconfig-provisioner", + Short: "Provision a minimal managed-cluster kubeconfig for hosted mode", + RunE: func(cmd *cobra.Command, args []string) error { + options.Complete() + if err := options.Validate(); err != nil { + return err + } + + config, err := rest.InClusterConfig() + if err != nil { + return err + } + hostingClient, err := kubernetes.NewForConfig(rest.AddUserAgent(config, "cluster-proxy-managed-kubeconfig-provisioner")) + if err != nil { + return err + } + + provisioner := NewProvisioner(options, hostingClient) + if options.HubKubeconfig != "" { + hubConfig, err := clientcmd.BuildConfigFromFlags("", options.HubKubeconfig) + if err != nil { + return err + } + hubAddonClient, err := addonclient.NewForConfig(rest.AddUserAgent(hubConfig, "cluster-proxy-managed-kubeconfig-provisioner")) + if err != nil { + return err + } + provisioner.WithAddonClient(hubAddonClient) + } + return provisioner.Run(cmd.Context()) + }, + } + + flags := cmd.Flags() + flags.StringVar(&options.ClusterName, "cluster-name", options.ClusterName, "The managed cluster name") + flags.StringVar(&options.SourceNamespace, "source-namespace", options.SourceNamespace, "The namespace of the external managed kubeconfig secret. Defaults to --cluster-name") + flags.StringVar(&options.SourceName, "source-name", DefaultSourceSecretName, "The external managed kubeconfig secret name") + flags.StringVar(&options.TargetNamespace, "target-namespace", options.TargetNamespace, "The namespace for the generated managed kubeconfig secret. Defaults to POD_NAMESPACE") + flags.StringVar(&options.TargetName, "target-name", DefaultTargetSecretName, "The generated managed kubeconfig secret name") + flags.StringVar(&options.ManagedServiceAccountNamespace, "managed-service-account-namespace", options.ManagedServiceAccountNamespace, "The namespace of the managed cluster service account. Defaults to --target-namespace") + flags.StringVar(&options.ManagedServiceAccountName, "managed-service-account-name", DefaultManagedServiceAccountName, "The managed cluster service account name") + flags.DurationVar(&options.TokenExpiration, "token-expiration", DefaultTokenExpiration, "Requested TokenRequest expiration") + flags.DurationVar(&options.RefreshBefore, "refresh-before", DefaultRefreshBefore, "Refresh the managed kubeconfig this long before token expiration") + flags.DurationVar(&options.SyncInterval, "sync-interval", DefaultSyncInterval, "Interval between managed kubeconfig syncs") + flags.StringVar(&options.HubKubeconfig, "hub-kubeconfig", options.HubKubeconfig, "The kubeconfig file for connecting to the hub cluster") + flags.StringVar(&options.AddonName, "addon-name", DefaultAddonName, "The ManagedClusterAddOn name to patch with managed kubeconfig readiness") + flags.StringVar(&options.AddonNamespace, "addon-namespace", options.AddonNamespace, "The ManagedClusterAddOn namespace to patch. Defaults to --cluster-name") + flags.StringVar(&options.HealthProbeBindAddress, "health-probe-bind-address", DefaultHealthProbeBindAddress, "The address the health probe and metrics endpoint binds to") + flags.BoolVar(&options.Cleanup, "cleanup", false, "Delete the generated managed kubeconfig secret and exit") + + return cmd +} diff --git a/pkg/proxyagent/agent/provisioner/managed_kubeconfig.go b/pkg/proxyagent/agent/provisioner/managed_kubeconfig.go new file mode 100644 index 000000000..e9dbeb4c2 --- /dev/null +++ b/pkg/proxyagent/agent/provisioner/managed_kubeconfig.go @@ -0,0 +1,527 @@ +package provisioner + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "os" + "time" + + authenticationv1 "k8s.io/api/authentication/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + clientcmdapi "k8s.io/client-go/tools/clientcmd/api" + "k8s.io/klog/v2" + + addonclient "open-cluster-management.io/api/client/addon/clientset/versioned" + "open-cluster-management.io/cluster-proxy/pkg/common" + addonmetrics "open-cluster-management.io/cluster-proxy/pkg/metrics" + "open-cluster-management.io/cluster-proxy/pkg/utils" +) + +const ( + DefaultSourceSecretName = "external-managed-kubeconfig" + DefaultTargetSecretName = "cluster-proxy-managed-kubeconfig" + DefaultManagedServiceAccountName = "cluster-proxy" + DefaultAddonName = common.AddonName + DefaultTokenExpiration = 24 * time.Hour + DefaultRefreshBefore = time.Hour + DefaultSyncInterval = 5 * time.Minute + DefaultHealthProbeBindAddress = ":8000" + SecretKubeconfigKey = "kubeconfig" + ConditionManagedKubeconfigReady = "ManagedKubeconfigReady" + annotationTokenExpirationTimestamp = "proxy.open-cluster-management.io/managed-kubeconfig-token-expiration" + annotationSourceKubeconfigHash = "proxy.open-cluster-management.io/source-kubeconfig-hash" +) + +type ManagedClientFactory func(kubeconfig []byte) (kubernetes.Interface, error) + +type Options struct { + ClusterName string + SourceNamespace string + SourceName string + TargetNamespace string + TargetName string + ManagedServiceAccountNamespace string + ManagedServiceAccountName string + TokenExpiration time.Duration + RefreshBefore time.Duration + SyncInterval time.Duration + HubKubeconfig string + AddonName string + AddonNamespace string + HealthProbeBindAddress string + Cleanup bool +} + +func (o *Options) Complete() { + if o.SourceNamespace == "" { + o.SourceNamespace = o.ClusterName + } + if o.SourceName == "" { + o.SourceName = DefaultSourceSecretName + } + if o.TargetNamespace == "" { + o.TargetNamespace = os.Getenv("POD_NAMESPACE") + } + if o.TargetName == "" { + o.TargetName = DefaultTargetSecretName + } + if o.ManagedServiceAccountNamespace == "" { + o.ManagedServiceAccountNamespace = o.TargetNamespace + } + if o.ManagedServiceAccountName == "" { + o.ManagedServiceAccountName = DefaultManagedServiceAccountName + } + if o.TokenExpiration == 0 { + o.TokenExpiration = DefaultTokenExpiration + } + if o.RefreshBefore == 0 { + o.RefreshBefore = DefaultRefreshBefore + } + if o.SyncInterval == 0 { + o.SyncInterval = DefaultSyncInterval + } + if o.AddonName == "" { + o.AddonName = common.AddonName + } + if o.AddonNamespace == "" { + o.AddonNamespace = o.ClusterName + } + if o.HealthProbeBindAddress == "" { + o.HealthProbeBindAddress = DefaultHealthProbeBindAddress + } +} + +func (o Options) Validate() error { + if o.Cleanup { + if o.TargetNamespace == "" { + return fmt.Errorf("target namespace is required") + } + if o.TargetName == "" { + return fmt.Errorf("target name is required") + } + return nil + } + if o.ClusterName == "" { + return fmt.Errorf("cluster name is required") + } + if o.SourceNamespace == "" { + return fmt.Errorf("source namespace is required") + } + if o.SourceName == "" { + return fmt.Errorf("source name is required") + } + if o.TargetNamespace == "" { + return fmt.Errorf("target namespace is required") + } + if o.TargetName == "" { + return fmt.Errorf("target name is required") + } + if o.ManagedServiceAccountNamespace == "" { + return fmt.Errorf("managed service account namespace is required") + } + if o.ManagedServiceAccountName == "" { + return fmt.Errorf("managed service account name is required") + } + if o.TokenExpiration <= 0 { + return fmt.Errorf("token expiration must be greater than zero") + } + if o.RefreshBefore < 0 { + return fmt.Errorf("refresh before must not be negative") + } + if o.SyncInterval <= 0 { + return fmt.Errorf("sync interval must be greater than zero") + } + return nil +} + +type Provisioner struct { + options Options + hostingClient kubernetes.Interface + managedClientFn ManagedClientFactory + addonClient addonclient.Interface + now func() time.Time + lastTokenExpiration time.Time +} + +func NewProvisioner(options Options, hostingClient kubernetes.Interface) *Provisioner { + options.Complete() + return &Provisioner{ + options: options, + hostingClient: hostingClient, + managedClientFn: newManagedClient, + now: time.Now, + } +} + +func (p *Provisioner) WithManagedClientFactory(factory ManagedClientFactory) *Provisioner { + p.managedClientFn = factory + return p +} + +func (p *Provisioner) WithAddonClient(addonClient addonclient.Interface) *Provisioner { + p.addonClient = addonClient + return p +} + +func (p *Provisioner) WithNow(now func() time.Time) *Provisioner { + p.now = now + return p +} + +func (p *Provisioner) LastTokenExpiration() time.Time { + return p.lastTokenExpiration +} + +func (p *Provisioner) Run(ctx context.Context) error { + go func() { + if err := utils.ServeHealthProbes(p.options.HealthProbeBindAddress, nil); err != nil { + klog.Fatal(err) + } + }() + + if p.options.Cleanup { + return p.Cleanup(ctx) + } + if err := p.Sync(ctx); err != nil { + klog.Errorf("managed kubeconfig sync failed: %v", err) + } + + ticker := time.NewTicker(p.options.SyncInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + if err := p.Sync(ctx); err != nil { + klog.Errorf("managed kubeconfig sync failed: %v", err) + } + } + } +} + +func (p *Provisioner) Cleanup(ctx context.Context) error { + err := p.hostingClient.CoreV1().Secrets(p.options.TargetNamespace).Delete(ctx, p.options.TargetName, metav1.DeleteOptions{}) + if apierrors.IsNotFound(err) { + p.recordEvent(ctx, corev1.EventTypeNormal, "ManagedKubeconfigCleanupSkipped", + fmt.Sprintf("Generated managed kubeconfig secret %s/%s was already removed", p.options.TargetNamespace, p.options.TargetName)) + return nil + } + if err != nil { + p.recordEvent(ctx, corev1.EventTypeWarning, "ManagedKubeconfigCleanupFailed", err.Error()) + return err + } + p.recordEvent(ctx, corev1.EventTypeNormal, "ManagedKubeconfigCleaned", + fmt.Sprintf("Deleted generated managed kubeconfig secret %s/%s", p.options.TargetNamespace, p.options.TargetName)) + return nil +} + +func (p *Provisioner) Sync(ctx context.Context) error { + result, err := p.sync(ctx) + if err != nil { + addonmetrics.ObserveManagedKubeconfigRefresh("error") + p.recordEvent(ctx, corev1.EventTypeWarning, "ManagedKubeconfigSyncFailed", err.Error()) + conditionErr := p.setManagedKubeconfigCondition(ctx, metav1.ConditionFalse, "SyncFailed", err.Error()) + if conditionErr != nil { + klog.Errorf("failed to patch ManagedClusterAddOn condition after sync failure: %v", conditionErr) + } + return err + } + + addonmetrics.ObserveManagedKubeconfigRefresh(result.metricResult) + if !result.expiration.IsZero() { + addonmetrics.SetManagedKubeconfigTokenExpiration(result.expiration, p.now()) + } + p.recordEvent(ctx, corev1.EventTypeNormal, result.reason, result.message) + if err := p.setManagedKubeconfigCondition(ctx, metav1.ConditionTrue, result.reason, result.message); err != nil { + addonmetrics.ObserveManagedKubeconfigRefresh("error") + p.recordEvent(ctx, corev1.EventTypeWarning, "ManagedKubeconfigConditionPatchFailed", err.Error()) + return err + } + return nil +} + +type syncResult struct { + metricResult string + reason string + message string + expiration time.Time +} + +func (p *Provisioner) sync(ctx context.Context) (syncResult, error) { + if err := p.options.Validate(); err != nil { + return syncResult{}, err + } + + source, err := p.hostingClient.CoreV1().Secrets(p.options.SourceNamespace).Get(ctx, p.options.SourceName, metav1.GetOptions{}) + if err != nil { + return syncResult{}, err + } + sourceKubeconfig, ok := source.Data[SecretKubeconfigKey] + if !ok || len(sourceKubeconfig) == 0 { + return syncResult{}, fmt.Errorf("source secret %s/%s does not contain %q", p.options.SourceNamespace, p.options.SourceName, SecretKubeconfigKey) + } + + sourceHash := kubeconfigHash(sourceKubeconfig) + target, err := p.hostingClient.CoreV1().Secrets(p.options.TargetNamespace).Get(ctx, p.options.TargetName, metav1.GetOptions{}) + targetExists := err == nil + if targetExists && !needsRefresh(target, sourceHash, p.now(), p.options.RefreshBefore) { + klog.V(4).Infof("managed kubeconfig secret %s/%s is still fresh", p.options.TargetNamespace, p.options.TargetName) + return syncResult{ + metricResult: "skipped", + reason: "ManagedKubeconfigFresh", + message: fmt.Sprintf("Generated managed kubeconfig secret %s/%s is still fresh", p.options.TargetNamespace, p.options.TargetName), + expiration: tokenExpirationFromSecret(target), + }, nil + } + if err != nil && !apierrors.IsNotFound(err) { + return syncResult{}, err + } + + managedClient, err := p.managedClientFn(sourceKubeconfig) + if err != nil { + return syncResult{}, err + } + + expirationSeconds := int64(p.options.TokenExpiration.Seconds()) + tokenRequest, err := managedClient.CoreV1().ServiceAccounts(p.options.ManagedServiceAccountNamespace).CreateToken( + ctx, + p.options.ManagedServiceAccountName, + &authenticationv1.TokenRequest{ + Spec: authenticationv1.TokenRequestSpec{ + ExpirationSeconds: &expirationSeconds, + }, + }, + metav1.CreateOptions{}, + ) + if err != nil { + return syncResult{}, err + } + if tokenRequest.Status.Token == "" { + return syncResult{}, fmt.Errorf("token request for serviceaccount %s/%s returned an empty token", + p.options.ManagedServiceAccountNamespace, p.options.ManagedServiceAccountName) + } + + expiration := tokenRequest.Status.ExpirationTimestamp.Time + if expiration.IsZero() { + expiration = p.now().Add(p.options.TokenExpiration) + } + managedKubeconfig, err := BuildManagedKubeconfig(sourceKubeconfig, tokenRequest.Status.Token) + if err != nil { + return syncResult{}, err + } + + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: p.options.TargetName, + Namespace: p.options.TargetNamespace, + Annotations: map[string]string{ + annotationTokenExpirationTimestamp: expiration.UTC().Format(time.RFC3339), + annotationSourceKubeconfigHash: sourceHash, + }, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{ + SecretKubeconfigKey: managedKubeconfig, + }, + } + + if targetExists { + secret.ResourceVersion = target.ResourceVersion + _, err = p.hostingClient.CoreV1().Secrets(p.options.TargetNamespace).Update(ctx, secret, metav1.UpdateOptions{}) + } else { + _, err = p.hostingClient.CoreV1().Secrets(p.options.TargetNamespace).Create(ctx, secret, metav1.CreateOptions{}) + } + if err != nil { + return syncResult{}, err + } + + p.lastTokenExpiration = expiration + klog.Infof("managed kubeconfig secret %s/%s synced; token expires at %s", + p.options.TargetNamespace, p.options.TargetName, expiration.UTC().Format(time.RFC3339)) + reason := "ManagedKubeconfigCreated" + if targetExists { + reason = "ManagedKubeconfigUpdated" + } + return syncResult{ + metricResult: "success", + reason: reason, + message: fmt.Sprintf("Synced generated managed kubeconfig secret %s/%s; token expires at %s", + p.options.TargetNamespace, p.options.TargetName, expiration.UTC().Format(time.RFC3339)), + expiration: expiration, + }, nil +} + +func (p *Provisioner) recordEvent(ctx context.Context, eventType, reason, message string) { + if p.options.TargetNamespace == "" || p.options.TargetName == "" { + return + } + now := metav1.NewTime(p.now()) + event := &corev1.Event{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: p.options.TargetNamespace, + GenerateName: p.options.TargetName + ".", + }, + InvolvedObject: corev1.ObjectReference{ + APIVersion: "v1", + Kind: "Secret", + Namespace: p.options.TargetNamespace, + Name: p.options.TargetName, + }, + Reason: reason, + Message: message, + Type: eventType, + Source: corev1.EventSource{Component: "cluster-proxy-managed-kubeconfig-provisioner"}, + FirstTimestamp: now, + LastTimestamp: now, + EventTime: metav1.MicroTime(now), + Count: 1, + Action: reason, + ReportingController: "cluster-proxy-managed-kubeconfig-provisioner", + ReportingInstance: p.reportingInstance(), + } + if _, err := p.hostingClient.CoreV1().Events(p.options.TargetNamespace).Create(ctx, event, metav1.CreateOptions{}); err != nil { + klog.Errorf("failed to record managed kubeconfig event %s: %v", reason, err) + } +} + +func (p *Provisioner) reportingInstance() string { + if hostname := os.Getenv("HOSTNAME"); hostname != "" { + return hostname + } + if p.options.TargetNamespace != "" && p.options.TargetName != "" { + return p.options.TargetNamespace + "/" + p.options.TargetName + } + return "cluster-proxy-managed-kubeconfig-provisioner" +} + +func (p *Provisioner) setManagedKubeconfigCondition(ctx context.Context, status metav1.ConditionStatus, reason, message string) error { + if p.addonClient == nil || p.options.AddonNamespace == "" || p.options.AddonName == "" { + return nil + } + + addon, err := p.addonClient.AddonV1alpha1().ManagedClusterAddOns(p.options.AddonNamespace).Get(ctx, p.options.AddonName, metav1.GetOptions{}) + if err != nil { + return err + } + updated := addon.DeepCopy() + meta.SetStatusCondition(&updated.Status.Conditions, metav1.Condition{ + Type: ConditionManagedKubeconfigReady, + Status: status, + Reason: reason, + Message: message, + ObservedGeneration: addon.Generation, + }) + _, err = p.addonClient.AddonV1alpha1().ManagedClusterAddOns(p.options.AddonNamespace).UpdateStatus(ctx, updated, metav1.UpdateOptions{}) + return err +} + +func BuildManagedKubeconfig(sourceKubeconfig []byte, token string) ([]byte, error) { + sourceConfig, err := clientcmd.Load(sourceKubeconfig) + if err != nil { + return nil, err + } + cluster, err := currentCluster(sourceConfig) + if err != nil { + return nil, err + } + + clusterCopy := *cluster + config := clientcmdapi.Config{ + Clusters: map[string]*clientcmdapi.Cluster{ + "managed": &clusterCopy, + }, + AuthInfos: map[string]*clientcmdapi.AuthInfo{ + "cluster-proxy": { + Token: token, + }, + }, + Contexts: map[string]*clientcmdapi.Context{ + "managed": { + Cluster: "managed", + AuthInfo: "cluster-proxy", + }, + }, + CurrentContext: "managed", + } + + return clientcmd.Write(config) +} + +func currentCluster(config *clientcmdapi.Config) (*clientcmdapi.Cluster, error) { + if config == nil { + return nil, fmt.Errorf("kubeconfig is empty") + } + if config.CurrentContext != "" { + if context, ok := config.Contexts[config.CurrentContext]; ok && context.Cluster != "" { + if cluster, ok := config.Clusters[context.Cluster]; ok { + return cluster, nil + } + return nil, fmt.Errorf("current context references missing cluster %q", context.Cluster) + } + } + if len(config.Clusters) == 1 { + for _, cluster := range config.Clusters { + return cluster, nil + } + } + return nil, fmt.Errorf("kubeconfig must have a current context or exactly one cluster") +} + +func needsRefresh(secret *corev1.Secret, sourceHash string, now time.Time, refreshBefore time.Duration) bool { + if secret == nil { + return true + } + if len(secret.Data[SecretKubeconfigKey]) == 0 { + return true + } + if secret.Annotations[annotationSourceKubeconfigHash] != sourceHash { + return true + } + expirationRaw := secret.Annotations[annotationTokenExpirationTimestamp] + if expirationRaw == "" { + return true + } + expiration, err := time.Parse(time.RFC3339, expirationRaw) + if err != nil { + return true + } + return !now.Add(refreshBefore).Before(expiration) +} + +func tokenExpirationFromSecret(secret *corev1.Secret) time.Time { + if secret == nil { + return time.Time{} + } + expirationRaw := secret.Annotations[annotationTokenExpirationTimestamp] + if expirationRaw == "" { + return time.Time{} + } + expiration, err := time.Parse(time.RFC3339, expirationRaw) + if err != nil { + return time.Time{} + } + return expiration +} + +func kubeconfigHash(kubeconfig []byte) string { + sum := sha256.Sum256(kubeconfig) + return hex.EncodeToString(sum[:]) +} + +func newManagedClient(kubeconfig []byte) (kubernetes.Interface, error) { + config, err := clientcmd.RESTConfigFromKubeConfig(kubeconfig) + if err != nil { + return nil, err + } + return kubernetes.NewForConfig(rest.AddUserAgent(config, "cluster-proxy-managed-kubeconfig-provisioner")) +} diff --git a/pkg/proxyagent/agent/provisioner/managed_kubeconfig_test.go b/pkg/proxyagent/agent/provisioner/managed_kubeconfig_test.go new file mode 100644 index 000000000..5a0e21ddc --- /dev/null +++ b/pkg/proxyagent/agent/provisioner/managed_kubeconfig_test.go @@ -0,0 +1,373 @@ +package provisioner + +import ( + "context" + "strings" + "testing" + "time" + + authenticationv1 "k8s.io/api/authentication/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" + "k8s.io/client-go/tools/clientcmd" + addonv1alpha1 "open-cluster-management.io/api/addon/v1alpha1" + fakeaddon "open-cluster-management.io/api/client/addon/clientset/versioned/fake" +) + +func TestProvisionerSyncCreatesManagedKubeconfigSecret(t *testing.T) { + now := time.Date(2026, 5, 19, 1, 2, 3, 0, time.UTC) + sourceKubeconfig := testKubeconfig() + hostingClient := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: DefaultSourceSecretName, + Namespace: "cluster1", + }, + Data: map[string][]byte{SecretKubeconfigKey: sourceKubeconfig}, + }) + managedClient := fakeManagedClient(t, "managed-token", now.Add(time.Hour)) + + provisioner := NewProvisioner(Options{ + ClusterName: "cluster1", + TargetNamespace: "addon-ns", + ManagedServiceAccountNamespace: "addon-ns", + TokenExpiration: time.Hour, + }, hostingClient).WithManagedClientFactory(func(kubeconfig []byte) (kubernetes.Interface, error) { + if string(kubeconfig) != string(sourceKubeconfig) { + t.Fatalf("expected source kubeconfig to be used") + } + return managedClient, nil + }).WithNow(func() time.Time { return now }) + + if err := provisioner.Sync(context.Background()); err != nil { + t.Fatalf("unexpected sync error: %v", err) + } + + secret, err := hostingClient.CoreV1().Secrets("addon-ns").Get(context.Background(), DefaultTargetSecretName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("failed to get target secret: %v", err) + } + if secret.Annotations[annotationSourceKubeconfigHash] != kubeconfigHash(sourceKubeconfig) { + t.Fatalf("source hash annotation was not set") + } + if secret.Annotations[annotationTokenExpirationTimestamp] != now.Add(time.Hour).Format(time.RFC3339) { + t.Fatalf("unexpected expiration annotation: %s", secret.Annotations[annotationTokenExpirationTimestamp]) + } + + generatedConfig, err := clientcmd.Load(secret.Data[SecretKubeconfigKey]) + if err != nil { + t.Fatalf("failed to load generated kubeconfig: %v", err) + } + if generatedConfig.Clusters["managed"].Server != "https://managed.example.com:6443" { + t.Fatalf("unexpected generated server: %s", generatedConfig.Clusters["managed"].Server) + } + if generatedConfig.AuthInfos["cluster-proxy"].Token != "managed-token" { + t.Fatalf("unexpected generated token: %s", generatedConfig.AuthInfos["cluster-proxy"].Token) + } +} + +func TestProvisionerSyncRecordsEventAndCondition(t *testing.T) { + now := time.Date(2026, 5, 19, 1, 2, 3, 0, time.UTC) + sourceKubeconfig := testKubeconfig() + hostingClient := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: DefaultSourceSecretName, + Namespace: "cluster1", + }, + Data: map[string][]byte{SecretKubeconfigKey: sourceKubeconfig}, + }) + addonClient := fakeaddon.NewSimpleClientset(&addonv1alpha1.ManagedClusterAddOn{ + ObjectMeta: metav1.ObjectMeta{ + Name: DefaultAddonName, + Namespace: "cluster1", + Generation: 7, + }, + }) + managedClient := fakeManagedClient(t, "managed-token", now.Add(time.Hour)) + + provisioner := NewProvisioner(Options{ + ClusterName: "cluster1", + TargetNamespace: "addon-ns", + ManagedServiceAccountNamespace: "addon-ns", + TokenExpiration: time.Hour, + }, hostingClient).WithManagedClientFactory(func(kubeconfig []byte) (kubernetes.Interface, error) { + return managedClient, nil + }).WithAddonClient(addonClient).WithNow(func() time.Time { return now }) + + if err := provisioner.Sync(context.Background()); err != nil { + t.Fatalf("unexpected sync error: %v", err) + } + + addon, err := addonClient.AddonV1alpha1().ManagedClusterAddOns("cluster1").Get(context.Background(), DefaultAddonName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("failed to get addon: %v", err) + } + condition := meta.FindStatusCondition(addon.Status.Conditions, ConditionManagedKubeconfigReady) + if condition == nil { + t.Fatalf("expected %s condition", ConditionManagedKubeconfigReady) + } + if condition.Status != metav1.ConditionTrue || condition.Reason != "ManagedKubeconfigCreated" || condition.ObservedGeneration != 7 { + t.Fatalf("unexpected condition: %#v", condition) + } + events, err := hostingClient.CoreV1().Events("addon-ns").List(context.Background(), metav1.ListOptions{}) + if err != nil { + t.Fatalf("failed to list events: %v", err) + } + if len(events.Items) == 0 { + t.Fatal("expected a Kubernetes event to be recorded") + } +} + +func TestProvisionerSyncFailurePatchesConditionFalse(t *testing.T) { + now := time.Date(2026, 5, 19, 1, 2, 3, 0, time.UTC) + hostingClient := fake.NewSimpleClientset() + addonClient := fakeaddon.NewSimpleClientset(&addonv1alpha1.ManagedClusterAddOn{ + ObjectMeta: metav1.ObjectMeta{ + Name: DefaultAddonName, + Namespace: "cluster1", + }, + }) + + provisioner := NewProvisioner(Options{ + ClusterName: "cluster1", + TargetNamespace: "addon-ns", + }, hostingClient).WithAddonClient(addonClient).WithNow(func() time.Time { return now }) + + if err := provisioner.Sync(context.Background()); err == nil { + t.Fatal("expected sync error") + } + addon, err := addonClient.AddonV1alpha1().ManagedClusterAddOns("cluster1").Get(context.Background(), DefaultAddonName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("failed to get addon: %v", err) + } + condition := meta.FindStatusCondition(addon.Status.Conditions, ConditionManagedKubeconfigReady) + if condition == nil { + t.Fatalf("expected %s condition", ConditionManagedKubeconfigReady) + } + if condition.Status != metav1.ConditionFalse || condition.Reason != "SyncFailed" { + t.Fatalf("unexpected condition: %#v", condition) + } +} + +func TestProvisionerSyncSkipsFreshSecret(t *testing.T) { + now := time.Date(2026, 5, 19, 1, 2, 3, 0, time.UTC) + sourceKubeconfig := testKubeconfig() + hostingClient := fake.NewSimpleClientset( + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: DefaultSourceSecretName, Namespace: "cluster1"}, + Data: map[string][]byte{SecretKubeconfigKey: sourceKubeconfig}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: DefaultTargetSecretName, + Namespace: "addon-ns", + Annotations: map[string]string{ + annotationSourceKubeconfigHash: kubeconfigHash(sourceKubeconfig), + annotationTokenExpirationTimestamp: now.Add(2 * time.Hour).Format(time.RFC3339), + }, + }, + Data: map[string][]byte{SecretKubeconfigKey: []byte("existing")}, + }, + ) + + called := false + provisioner := NewProvisioner(Options{ + ClusterName: "cluster1", + TargetNamespace: "addon-ns", + RefreshBefore: time.Hour, + }, hostingClient).WithManagedClientFactory(func(kubeconfig []byte) (kubernetes.Interface, error) { + called = true + return fake.NewSimpleClientset(), nil + }).WithNow(func() time.Time { return now }) + + if err := provisioner.Sync(context.Background()); err != nil { + t.Fatalf("unexpected sync error: %v", err) + } + if called { + t.Fatalf("managed client should not be created when target secret is fresh") + } +} + +func TestProvisionerSyncRefreshesBeforeExpiration(t *testing.T) { + now := time.Date(2026, 5, 19, 1, 2, 3, 0, time.UTC) + sourceKubeconfig := testKubeconfig() + hostingClient := fake.NewSimpleClientset( + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: DefaultSourceSecretName, Namespace: "cluster1"}, + Data: map[string][]byte{SecretKubeconfigKey: sourceKubeconfig}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: DefaultTargetSecretName, + Namespace: "addon-ns", + Annotations: map[string]string{ + annotationSourceKubeconfigHash: kubeconfigHash(sourceKubeconfig), + annotationTokenExpirationTimestamp: now.Add(30 * time.Minute).Format(time.RFC3339), + }, + }, + Data: map[string][]byte{SecretKubeconfigKey: []byte("existing")}, + }, + ) + managedClient := fakeManagedClient(t, "refreshed-token", now.Add(time.Hour)) + provisioner := NewProvisioner(Options{ + ClusterName: "cluster1", + TargetNamespace: "addon-ns", + RefreshBefore: time.Hour, + TokenExpiration: time.Hour, + }, hostingClient).WithManagedClientFactory(func(kubeconfig []byte) (kubernetes.Interface, error) { + return managedClient, nil + }).WithNow(func() time.Time { return now }) + + if err := provisioner.Sync(context.Background()); err != nil { + t.Fatalf("unexpected sync error: %v", err) + } + + secret, err := hostingClient.CoreV1().Secrets("addon-ns").Get(context.Background(), DefaultTargetSecretName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("failed to get target secret: %v", err) + } + generatedConfig, err := clientcmd.Load(secret.Data[SecretKubeconfigKey]) + if err != nil { + t.Fatalf("failed to load generated kubeconfig: %v", err) + } + if generatedConfig.AuthInfos["cluster-proxy"].Token != "refreshed-token" { + t.Fatalf("expected refreshed token, got %q", generatedConfig.AuthInfos["cluster-proxy"].Token) + } +} + +func TestProvisionerSyncRefreshesWhenSourceKubeconfigChanges(t *testing.T) { + now := time.Date(2026, 5, 19, 1, 2, 3, 0, time.UTC) + sourceKubeconfig := []byte(`apiVersion: v1 +kind: Config +clusters: +- name: managed + cluster: + server: https://changed.example.com:6443 +contexts: +- name: managed + context: + cluster: managed + user: admin +current-context: managed +users: +- name: admin + user: + token: admin-token +`) + hostingClient := fake.NewSimpleClientset( + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: DefaultSourceSecretName, Namespace: "cluster1"}, + Data: map[string][]byte{SecretKubeconfigKey: sourceKubeconfig}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: DefaultTargetSecretName, + Namespace: "addon-ns", + Annotations: map[string]string{ + annotationSourceKubeconfigHash: kubeconfigHash(testKubeconfig()), + annotationTokenExpirationTimestamp: now.Add(2 * time.Hour).Format(time.RFC3339), + }, + }, + Data: map[string][]byte{SecretKubeconfigKey: []byte("existing")}, + }, + ) + managedClient := fakeManagedClient(t, "changed-token", now.Add(time.Hour)) + provisioner := NewProvisioner(Options{ + ClusterName: "cluster1", + TargetNamespace: "addon-ns", + RefreshBefore: time.Hour, + TokenExpiration: time.Hour, + }, hostingClient).WithManagedClientFactory(func(kubeconfig []byte) (kubernetes.Interface, error) { + if string(kubeconfig) != string(sourceKubeconfig) { + t.Fatalf("expected changed source kubeconfig to be used") + } + return managedClient, nil + }).WithNow(func() time.Time { return now }) + + if err := provisioner.Sync(context.Background()); err != nil { + t.Fatalf("unexpected sync error: %v", err) + } + + secret, err := hostingClient.CoreV1().Secrets("addon-ns").Get(context.Background(), DefaultTargetSecretName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("failed to get target secret: %v", err) + } + generatedConfig, err := clientcmd.Load(secret.Data[SecretKubeconfigKey]) + if err != nil { + t.Fatalf("failed to load generated kubeconfig: %v", err) + } + if generatedConfig.Clusters["managed"].Server != "https://changed.example.com:6443" { + t.Fatalf("expected changed server, got %q", generatedConfig.Clusters["managed"].Server) + } + if generatedConfig.AuthInfos["cluster-proxy"].Token != "changed-token" { + t.Fatalf("expected changed token, got %q", generatedConfig.AuthInfos["cluster-proxy"].Token) + } + if secret.Annotations[annotationSourceKubeconfigHash] != kubeconfigHash(sourceKubeconfig) { + t.Fatalf("expected source hash annotation to be updated") + } +} + +func TestProvisionerCleanupDeletesTargetSecret(t *testing.T) { + hostingClient := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: DefaultTargetSecretName, Namespace: "addon-ns"}, + }) + provisioner := NewProvisioner(Options{ + TargetNamespace: "addon-ns", + Cleanup: true, + }, hostingClient) + + if err := provisioner.Cleanup(context.Background()); err != nil { + t.Fatalf("unexpected cleanup error: %v", err) + } + _, err := hostingClient.CoreV1().Secrets("addon-ns").Get(context.Background(), DefaultTargetSecretName, metav1.GetOptions{}) + if err == nil || !strings.Contains(err.Error(), "not found") { + t.Fatalf("expected target secret to be deleted, got %v", err) + } +} + +func fakeManagedClient(t *testing.T, token string, expiration time.Time) *fake.Clientset { + t.Helper() + client := fake.NewSimpleClientset() + client.PrependReactor("create", "serviceaccounts", func(action k8stesting.Action) (bool, runtime.Object, error) { + if action.GetSubresource() != "token" { + return false, nil, nil + } + createAction := action.(k8stesting.CreateAction) + tokenRequest := createAction.GetObject().(*authenticationv1.TokenRequest) + if tokenRequest.Spec.ExpirationSeconds == nil || *tokenRequest.Spec.ExpirationSeconds == 0 { + t.Fatalf("expected TokenRequest expiration to be set") + } + return true, &authenticationv1.TokenRequest{ + Status: authenticationv1.TokenRequestStatus{ + Token: token, + ExpirationTimestamp: metav1.NewTime(expiration), + }, + }, nil + }) + return client +} + +func testKubeconfig() []byte { + return []byte(`apiVersion: v1 +kind: Config +clusters: +- name: managed + cluster: + server: https://managed.example.com:6443 + certificate-authority-data: Y2E= +contexts: +- name: managed + context: + cluster: managed + user: admin +current-context: managed +users: +- name: admin + user: + token: admin-token +`) +} diff --git a/pkg/serviceproxy/service_proxy.go b/pkg/serviceproxy/service_proxy.go index 9ff081685..c5402416e 100644 --- a/pkg/serviceproxy/service_proxy.go +++ b/pkg/serviceproxy/service_proxy.go @@ -1,6 +1,7 @@ package serviceproxy import ( + "bufio" "context" "crypto/tls" "crypto/x509" @@ -8,6 +9,7 @@ import ( "net" "net/http" "net/http/httputil" + "net/url" "os" "strings" "time" @@ -19,12 +21,14 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + clientcmdapi "k8s.io/client-go/tools/clientcmd/api" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/healthz" addonutils "open-cluster-management.io/addon-framework/pkg/utils" "open-cluster-management.io/cluster-proxy/pkg/constant" + addonmetrics "open-cluster-management.io/cluster-proxy/pkg/metrics" "open-cluster-management.io/cluster-proxy/pkg/utils" sdktls "open-cluster-management.io/sdk-go/pkg/tls" ) @@ -46,6 +50,10 @@ func NewServiceProxyCommand() *cobra.Command { } const ( + ServiceProxyModeDisabled = "Disabled" + ServiceProxyModeBestEffort = "BestEffort" + ServiceProxyModeRelay = "Relay" + // defaultTokenReviewCacheTTL is the default TTL for cached TokenReview results. // Cached entries expire after this duration, forcing a fresh TokenReview API call. // A short TTL (10s) is sufficient because the primary goal is deduplicating @@ -76,9 +84,14 @@ type serviceProxy struct { kubeClientBurst int hubKubeConfig string + managedKubeConfig string + managedAPIServerURL string hubKubeClient kubernetes.Interface managedClusterKubeClient kubernetes.Interface + hostedServiceProxyMode string + relayURLTemplate *url.URL + enableImpersonation bool managedClusterAuthenticator authenticator.Token @@ -92,9 +105,10 @@ type serviceProxy struct { func newServiceProxy() *serviceProxy { s := &serviceProxy{ - tokenReviewCacheTTL: defaultTokenReviewCacheTTL, - kubeClientQPS: defaultKubeClientQPS, - kubeClientBurst: defaultKubeClientBurst, + tokenReviewCacheTTL: defaultTokenReviewCacheTTL, + kubeClientQPS: defaultKubeClientQPS, + kubeClientBurst: defaultKubeClientBurst, + hostedServiceProxyMode: ServiceProxyModeBestEffort, } s.getImpersonateTokenFunc = s.readImpersonateTokenFromFile return s @@ -109,6 +123,8 @@ func (s *serviceProxy) AddFlags(cmd *cobra.Command) { // hubKubeConfig is the kubeconfig file for connecting to the hub cluster flags.StringVar(&s.hubKubeConfig, "hub-kubeconfig", "", "The kubeconfig file for connecting to the hub cluster") + flags.StringVar(&s.managedKubeConfig, "managed-kubeconfig", "", "The kubeconfig file for connecting to the managed cluster. If empty, in-cluster config is used") + flags.StringVar(&s.hostedServiceProxyMode, "hosted-service-proxy-mode", s.hostedServiceProxyMode, "Hosted service proxy mode. One of Disabled, BestEffort, or Relay") // proxy related flags flags.IntVar(&s.maxIdleConns, "max-idle-conns", 100, "The maximum number of idle (keep-alive) connections across all hosts.") @@ -132,7 +148,7 @@ func (s *serviceProxy) Run(ctx context.Context) error { var err error customChecks := []healthz.Checker{} - cc, err := addonutils.NewConfigChecker("cert", s.cert, s.key, rootCAFile, s.hubKubeConfig) + cc, err := addonutils.NewConfigChecker("cert", configCheckerFiles(s.cert, s.key, rootCAFile, s.hubKubeConfig, s.managedKubeConfig)...) if err != nil { return err } @@ -174,18 +190,30 @@ func (s *serviceProxy) Run(ctx context.Context) error { } // init managedClusterKubeClient - // managedClusterKubeClient is the kubeClient of current cluster using in-cluster config - config, err := rest.InClusterConfig() + managedConfig, err := s.managedRESTConfig() if err != nil { - return fmt.Errorf("failed to get in-cluster config: %v", err) + return err } - config.QPS = s.kubeClientQPS - config.Burst = s.kubeClientBurst + managedConfig.QPS = s.kubeClientQPS + managedConfig.Burst = s.kubeClientBurst - s.managedClusterKubeClient, err = kubernetes.NewForConfig(config) + s.managedClusterKubeClient, err = kubernetes.NewForConfig(managedConfig) if err != nil { return err } + if s.managedKubeConfig != "" { + s.managedAPIServerURL = managedConfig.Host + s.getImpersonateTokenFunc = s.readImpersonateTokenFromManagedKubeconfig + if err := appendRESTConfigCA(s.rootCAs, managedConfig); err != nil { + return err + } + } + if s.hostedServiceProxyMode == ServiceProxyModeRelay { + s.relayURLTemplate, err = buildServiceRelayURL(s.managedAPIServerURL, os.Getenv("POD_NAMESPACE")) + if err != nil { + return err + } + } // get hubKubeConfig hubConfig, err := clientcmd.BuildConfigFromFlags("", s.hubKubeConfig) @@ -263,6 +291,11 @@ func (s *serviceProxy) Run(ctx context.Context) error { func (s *serviceProxy) ServeHTTP(wr http.ResponseWriter, req *http.Request) { ctx := req.Context() logger := klog.FromContext(ctx) + targetKind := "unknown" + result := "error" + defer func() { + addonmetrics.ObserveServiceProxyRequest(s.hostedServiceProxyMode, targetKind, result) + }() if klog.V(4).Enabled() { dump, err := httputil.DumpRequest(req, true) @@ -273,29 +306,54 @@ func (s *serviceProxy) ServeHTTP(wr http.ResponseWriter, req *http.Request) { klog.V(4).Infof("request:\n %s", string(dump)) } - url, err := utils.GetTargetServiceURLFromRequest(req) + targetURL, err := utils.GetTargetServiceURLFromRequest(req) if err != nil { http.Error(wr, err.Error(), http.StatusBadRequest) logger.Error(err, "failed to get target service url from request") return } + isKubeAPIServer := targetURL.Host == "kubernetes.default.svc" + targetKind = "service" + if isKubeAPIServer { + targetKind = "kube-apiserver" + } + if isKubeAPIServer && s.managedAPIServerURL != "" { + targetURL, err = parseManagedAPIServerURL(s.managedAPIServerURL) + if err != nil { + http.Error(wr, err.Error(), http.StatusBadRequest) + logger.Error(err, "failed to parse managed apiserver url") + return + } + } else if !isKubeAPIServer && s.hostedServiceProxyMode == ServiceProxyModeRelay { + targetURL, err = s.serviceRelayURL() + if err != nil { + http.Error(wr, err.Error(), http.StatusBadRequest) + logger.Error(err, "failed to build service relay url") + return + } + if err := s.prepareRelayRequest(req); err != nil { + http.Error(wr, err.Error(), http.StatusBadRequest) + logger.Error(err, "failed to prepare service relay request") + return + } + } // Enrich logger with request-scoped fields so all downstream logs // are traceable by request without repeating these values. logger = logger.WithValues( - "targetHost", url.Host, + "targetHost", targetURL.Host, "method", req.Method, "path", req.URL.Path, ) ctx = klog.NewContext(ctx, logger) logger.V(4).Info("service proxy received request", - "targetScheme", url.Scheme, + "targetScheme", targetURL.Scheme, "enableImpersonation", s.enableImpersonation, - "isKubeAPIServer", url.Host == "kubernetes.default.svc", + "isKubeAPIServer", isKubeAPIServer, ) - if url.Host == "kubernetes.default.svc" { + if isKubeAPIServer { if s.enableImpersonation { if err := s.processAuthentication(ctx, req); err != nil { logger.Error(err, "authentication failed") @@ -306,10 +364,10 @@ func (s *serviceProxy) ServeHTTP(wr http.ResponseWriter, req *http.Request) { } logger.V(6).Info("forwarding request to reverse proxy", - "targetURL", url.String(), + "targetURL", targetURL.String(), ) - proxy := httputil.NewSingleHostReverseProxy(url) + proxy := httputil.NewSingleHostReverseProxy(targetURL) proxy.Transport = &http.Transport{ DialContext: (&net.Dialer{ Timeout: 30 * time.Second, @@ -328,8 +386,14 @@ func (s *serviceProxy) ServeHTTP(wr http.ResponseWriter, req *http.Request) { // set ForceAttemptHTTP2 = false to prevent auto http2 upgration ForceAttemptHTTP2: false, } + proxy.ErrorHandler = func(w http.ResponseWriter, req *http.Request, err error) { + logger.Error(err, "service proxy reverse proxy error") + http.Error(w, err.Error(), http.StatusBadGateway) + } - proxy.ServeHTTP(wr, req) + recorder := &statusRecorder{ResponseWriter: wr, statusCode: http.StatusOK} + proxy.ServeHTTP(recorder, req) + result = resultFromStatus(recorder.statusCode) } func (s *serviceProxy) validate() error { @@ -339,9 +403,157 @@ func (s *serviceProxy) validate() error { if s.key == "" { return fmt.Errorf("key is required") } + switch s.hostedServiceProxyMode { + case ServiceProxyModeDisabled, ServiceProxyModeBestEffort, ServiceProxyModeRelay: + default: + return fmt.Errorf("hosted-service-proxy-mode must be one of Disabled, BestEffort, or Relay; got %q", s.hostedServiceProxyMode) + } + if s.hostedServiceProxyMode == ServiceProxyModeRelay && s.managedKubeConfig == "" { + return fmt.Errorf("managed-kubeconfig is required when hosted-service-proxy-mode=Relay") + } + return nil +} + +func (s *serviceProxy) managedRESTConfig() (*rest.Config, error) { + if s.managedKubeConfig == "" { + config, err := rest.InClusterConfig() + if err != nil { + return nil, fmt.Errorf("failed to get in-cluster config: %v", err) + } + return config, nil + } + + config, err := clientcmd.BuildConfigFromFlags("", s.managedKubeConfig) + if err != nil { + return nil, fmt.Errorf("failed to build managed kubeconfig: %v", err) + } + return config, nil +} + +func configCheckerFiles(files ...string) []string { + result := []string{} + for _, file := range files { + if file != "" { + result = append(result, file) + } + } + return result +} + +func appendRESTConfigCA(pool *x509.CertPool, config *rest.Config) error { + if len(config.CAData) > 0 { + if ok := pool.AppendCertsFromPEM(config.CAData); !ok { + return fmt.Errorf("failed to parse managed kubeconfig CA data") + } + return nil + } + if config.CAFile == "" { + return nil + } + caData, err := os.ReadFile(config.CAFile) + if err != nil { + return err + } + if ok := pool.AppendCertsFromPEM(caData); !ok { + return fmt.Errorf("failed to parse managed kubeconfig CA file %s", config.CAFile) + } + return nil +} + +func parseManagedAPIServerURL(rawURL string) (*url.URL, error) { + parsed, err := url.Parse(rawURL) + if err != nil { + return nil, err + } + if parsed.Scheme == "" || parsed.Host == "" { + return nil, fmt.Errorf("managed apiserver URL %q must include scheme and host", rawURL) + } + return parsed, nil +} + +func (s *serviceProxy) serviceRelayURL() (*url.URL, error) { + if s.relayURLTemplate == nil { + return nil, fmt.Errorf("service relay URL is not initialized") + } + clone := *s.relayURLTemplate + return &clone, nil +} + +func buildServiceRelayURL(managedAPIServerURL, namespace string) (*url.URL, error) { + if managedAPIServerURL == "" { + return nil, fmt.Errorf("managed apiserver URL is required for Relay mode") + } + if namespace == "" { + return nil, fmt.Errorf("POD_NAMESPACE is required for Relay mode") + } + relayURL, err := parseManagedAPIServerURL(managedAPIServerURL) + if err != nil { + return nil, err + } + relayURL.Path = fmt.Sprintf( + "/api/v1/namespaces/%s/services/http:%s:%d/proxy", + url.PathEscape(namespace), + constant.ServiceRelayName, + constant.ServiceRelayPort, + ) + relayURL.RawQuery = "" + return relayURL, nil +} + +func (s *serviceProxy) prepareRelayRequest(req *http.Request) error { + authorization := req.Header.Get("Authorization") + req.Header.Del(utils.HeaderClusterProxyAuthorization) + if authorization != "" { + req.Header.Set(utils.HeaderClusterProxyAuthorization, authorization) + } + + tokenReader := s.getImpersonateTokenFunc + if tokenReader == nil { + tokenReader = s.readImpersonateTokenFromManagedKubeconfig + } + token, err := tokenReader() + if err != nil { + return fmt.Errorf("failed to get managed kubeconfig token: %v", err) + } + token = strings.TrimSpace(token) + if token == "" { + return fmt.Errorf("managed kubeconfig token is empty") + } + req.Header.Set("Authorization", "Bearer "+token) return nil } +func resultFromStatus(statusCode int) string { + if statusCode >= http.StatusBadRequest { + return "error" + } + return "success" +} + +type statusRecorder struct { + http.ResponseWriter + statusCode int +} + +func (r *statusRecorder) WriteHeader(statusCode int) { + r.statusCode = statusCode + r.ResponseWriter.WriteHeader(statusCode) +} + +func (r *statusRecorder) Flush() { + if flusher, ok := r.ResponseWriter.(http.Flusher); ok { + flusher.Flush() + } +} + +func (r *statusRecorder) Hijack() (net.Conn, *bufio.ReadWriter, error) { + hijacker, ok := r.ResponseWriter.(http.Hijacker) + if !ok { + return nil, nil, fmt.Errorf("response writer does not support hijacking") + } + return hijacker.Hijack() +} + func (s *serviceProxy) readImpersonateTokenFromFile() (string, error) { // Read the latest token from the mounted file token, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token") @@ -351,6 +563,49 @@ func (s *serviceProxy) readImpersonateTokenFromFile() (string, error) { return string(token), nil } +func (s *serviceProxy) readImpersonateTokenFromManagedKubeconfig() (string, error) { + config, err := clientcmd.LoadFromFile(s.managedKubeConfig) + if err != nil { + return "", err + } + + authInfo, err := currentAuthInfo(config) + if err != nil { + return "", err + } + if authInfo.Token != "" { + return authInfo.Token, nil + } + if authInfo.TokenFile != "" { + token, err := os.ReadFile(authInfo.TokenFile) + if err != nil { + return "", err + } + return string(token), nil + } + return "", fmt.Errorf("managed kubeconfig does not contain a bearer token") +} + +func currentAuthInfo(config *clientcmdapi.Config) (*clientcmdapi.AuthInfo, error) { + if config == nil { + return nil, fmt.Errorf("managed kubeconfig is empty") + } + if config.CurrentContext != "" { + if context, ok := config.Contexts[config.CurrentContext]; ok && context.AuthInfo != "" { + if authInfo, ok := config.AuthInfos[context.AuthInfo]; ok { + return authInfo, nil + } + return nil, fmt.Errorf("current context references missing authinfo %q", context.AuthInfo) + } + } + if len(config.AuthInfos) == 1 { + for _, authInfo := range config.AuthInfos { + return authInfo, nil + } + } + return nil, fmt.Errorf("managed kubeconfig must have a current context or exactly one authinfo") +} + // processAuthentication handles the authentication flow for both managed cluster and hub users. // It tries managed cluster TokenReview first; if unauthenticated, falls back to hub TokenReview. func (s *serviceProxy) processAuthentication(ctx context.Context, req *http.Request) error { diff --git a/pkg/serviceproxy/token_authenticator.go b/pkg/serviceproxy/token_authenticator.go index dfb9f0c02..73fcb7463 100644 --- a/pkg/serviceproxy/token_authenticator.go +++ b/pkg/serviceproxy/token_authenticator.go @@ -2,7 +2,6 @@ package serviceproxy import ( "context" - "fmt" authenticationv1 "k8s.io/api/authentication/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -42,7 +41,10 @@ func (a *tokenReviewAuthenticator) AuthenticateToken(ctx context.Context, token if !tokenReview.Status.Authenticated { if tokenReview.Status.Error != "" { - return nil, false, fmt.Errorf("%s TokenReview error: %s", a.name, tokenReview.Status.Error) + logger.V(4).Info("TokenReview returned unauthenticated status with error", + "cluster", a.name, + "error", tokenReview.Status.Error, + ) } return nil, false, nil } diff --git a/pkg/serviceproxy/token_authenticator_test.go b/pkg/serviceproxy/token_authenticator_test.go index 140b55582..d4a8b1825 100644 --- a/pkg/serviceproxy/token_authenticator_test.go +++ b/pkg/serviceproxy/token_authenticator_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "net/http" + "os" "strings" "testing" @@ -72,7 +73,7 @@ func TestTokenReviewAuthenticator_Unauthenticated(t *testing.T) { func TestProcessAuthentication_ManagedClusterToken(t *testing.T) { s := &serviceProxy{ - enableImpersonation: true, + enableImpersonation: true, managedClusterAuthenticator: authenticator.TokenFunc(func(ctx context.Context, token string) (*authenticator.Response, bool, error) { return &authenticator.Response{User: &user.DefaultInfo{Name: "mc-user"}}, true, nil }), @@ -238,6 +239,95 @@ func TestNewServiceProxy_DefaultValues(t *testing.T) { } } +func TestManagedKubeconfigConfigAndToken(t *testing.T) { + kubeconfig := `apiVersion: v1 +kind: Config +clusters: +- name: managed + cluster: + server: https://managed.example.com:6443 +contexts: +- name: managed + context: + cluster: managed + user: cluster-proxy +current-context: managed +users: +- name: cluster-proxy + user: + token: managed-token +` + path := t.TempDir() + "/kubeconfig" + if err := os.WriteFile(path, []byte(kubeconfig), 0600); err != nil { + t.Fatalf("failed to write kubeconfig: %v", err) + } + + s := &serviceProxy{managedKubeConfig: path} + config, err := s.managedRESTConfig() + if err != nil { + t.Fatalf("unexpected managedRESTConfig error: %v", err) + } + if config.Host != "https://managed.example.com:6443" { + t.Fatalf("unexpected managed host: %s", config.Host) + } + + token, err := s.readImpersonateTokenFromManagedKubeconfig() + if err != nil { + t.Fatalf("unexpected token read error: %v", err) + } + if token != "managed-token" { + t.Fatalf("expected managed-token, got %q", token) + } +} + +func TestParseManagedAPIServerURL(t *testing.T) { + url, err := parseManagedAPIServerURL("https://managed.example.com:6443") + if err != nil { + t.Fatalf("unexpected parse error: %v", err) + } + if url.Host != "managed.example.com:6443" { + t.Fatalf("unexpected host: %s", url.Host) + } + + if _, err := parseManagedAPIServerURL("managed.example.com:6443"); err == nil { + t.Fatal("expected error for URL without scheme") + } +} + +func TestServiceProxyRelayURLAndAuthorizationHeader(t *testing.T) { + relayURLTemplate, err := buildServiceRelayURL("https://managed.example.com:6443", "addon-ns") + if err != nil { + t.Fatalf("unexpected buildServiceRelayURL error: %v", err) + } + s := &serviceProxy{ + managedAPIServerURL: "https://managed.example.com:6443", + hostedServiceProxyMode: ServiceProxyModeRelay, + relayURLTemplate: relayURLTemplate, + getImpersonateTokenFunc: func() (string, error) { return "managed-token", nil }, + } + + relayURL, err := s.serviceRelayURL() + if err != nil { + t.Fatalf("unexpected relay URL error: %v", err) + } + if relayURL.String() != "https://managed.example.com:6443/api/v1/namespaces/addon-ns/services/http:cluster-proxy-service-relay:7444/proxy" { + t.Fatalf("unexpected relay URL %s", relayURL.String()) + } + + req, _ := http.NewRequest("GET", "https://example.com/ping", nil) + req.Header.Set("Authorization", "Bearer original-token") + req.Header.Set("Cluster-Proxy-Authorization", "Bearer spoofed-token") + if err := s.prepareRelayRequest(req); err != nil { + t.Fatalf("unexpected prepare relay request error: %v", err) + } + if req.Header.Get("Authorization") != "Bearer managed-token" { + t.Fatalf("expected managed token authorization, got %q", req.Header.Get("Authorization")) + } + if req.Header.Get("Cluster-Proxy-Authorization") != "Bearer original-token" { + t.Fatalf("expected original authorization in internal header, got %q", req.Header.Get("Cluster-Proxy-Authorization")) + } +} + func TestTokenReviewAuthenticator_TokenSentInRequest(t *testing.T) { var capturedToken string client := fake.NewSimpleClientset() @@ -281,7 +371,7 @@ func TestTokenReviewAuthenticator_APIError(t *testing.T) { } } -func TestTokenReviewAuthenticator_StatusError(t *testing.T) { +func TestTokenReviewAuthenticator_StatusErrorUnauthenticated(t *testing.T) { client := fake.NewSimpleClientset() client.PrependReactor("create", "tokenreviews", func(action k8stesting.Action) (bool, runtime.Object, error) { return true, &authenticationv1.TokenReview{ @@ -294,8 +384,8 @@ func TestTokenReviewAuthenticator_StatusError(t *testing.T) { authn := &tokenReviewAuthenticator{client: client, name: "test"} resp, ok, err := authn.AuthenticateToken(context.Background(), "expired-token") - if err == nil { - t.Fatal("expected error when Status.Error is set") + if err != nil { + t.Fatalf("unexpected error when Status.Error is set on unauthenticated TokenReview: %v", err) } if ok { t.Fatal("expected authenticated=false") @@ -303,9 +393,6 @@ func TestTokenReviewAuthenticator_StatusError(t *testing.T) { if resp != nil { t.Fatal("expected nil response") } - if !strings.Contains(err.Error(), "Credentials are expired") { - t.Fatalf("expected Status.Error in error message, got: %v", err) - } } func TestProcessAuthentication_GetImpersonateTokenError(t *testing.T) { diff --git a/pkg/servicerelay/service_relay.go b/pkg/servicerelay/service_relay.go new file mode 100644 index 000000000..769ed1be3 --- /dev/null +++ b/pkg/servicerelay/service_relay.go @@ -0,0 +1,202 @@ +package servicerelay + +import ( + "bufio" + "context" + "crypto/tls" + "crypto/x509" + "fmt" + "net" + "net/http" + "net/http/httputil" + "os" + "time" + + "github.com/spf13/cobra" + "k8s.io/klog/v2" + + "open-cluster-management.io/cluster-proxy/pkg/constant" + addonmetrics "open-cluster-management.io/cluster-proxy/pkg/metrics" + "open-cluster-management.io/cluster-proxy/pkg/utils" +) + +type ServiceRelay struct { + Listen string + AdditionalServiceCA string + HealthProbeBindAddress string + rootCAs *x509.CertPool + transport http.RoundTripper +} + +func NewCommand() *cobra.Command { + relay := &ServiceRelay{ + Listen: fmt.Sprintf(":%d", constant.ServiceRelayPort), + HealthProbeBindAddress: ":8000", + } + + cmd := &cobra.Command{ + Use: "service-relay", + Short: "Relay hosted service-proxy requests to managed cluster Services", + RunE: func(cmd *cobra.Command, args []string) error { + return relay.Run(cmd.Context()) + }, + } + + flags := cmd.Flags() + flags.StringVar(&relay.Listen, "listen", relay.Listen, "The HTTP listen address") + flags.StringVar(&relay.AdditionalServiceCA, "additional-service-ca", relay.AdditionalServiceCA, "The path to the additional CA certificate for services") + flags.StringVar(&relay.HealthProbeBindAddress, "health-probe-bind-address", relay.HealthProbeBindAddress, "The address the health probe and metrics endpoint binds to") + + return cmd +} + +func (s *ServiceRelay) Run(ctx context.Context) error { + if s.Listen == "" { + return fmt.Errorf("listen address is required") + } + + s.rootCAs, _ = x509.SystemCertPool() + if s.rootCAs == nil { + s.rootCAs = x509.NewCertPool() + } + + if s.AdditionalServiceCA != "" { + caData, err := os.ReadFile(s.AdditionalServiceCA) + if err != nil { + if os.IsNotExist(err) { + klog.Infof("additional-service-ca file not found: %s", s.AdditionalServiceCA) + } else { + return err + } + } else if ok := s.rootCAs.AppendCertsFromPEM(caData); !ok { + return fmt.Errorf("failed to parse additional service CA %s", s.AdditionalServiceCA) + } + } + + if s.transport == nil { + s.transport = &http.Transport{ + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + MaxIdleConns: 100, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: time.Second, + TLSClientConfig: &tls.Config{ + RootCAs: s.rootCAs, + MinVersion: tls.VersionTLS12, + }, + ForceAttemptHTTP2: false, + } + } + + go func() { + if err := utils.ServeHealthProbes(s.HealthProbeBindAddress, nil); err != nil { + klog.Fatal(err) + } + }() + + server := &http.Server{ + Addr: s.Listen, + Handler: s, + ReadHeaderTimeout: 5 * time.Second, + } + + go func() { + <-ctx.Done() + shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _ = server.Shutdown(shutdownCtx) + }() + + klog.Infof("service relay listening on %s", s.Listen) + if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { + return err + } + return ctx.Err() +} + +func (s *ServiceRelay) ServeHTTP(w http.ResponseWriter, req *http.Request) { + target, err := utils.GetTargetServiceURLFromRequest(req) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + addonmetrics.ObserveServiceRelayRequest("unknown", "error") + return + } + if target.Scheme != "http" && target.Scheme != "https" { + http.Error(w, fmt.Sprintf("unsupported target scheme %q", target.Scheme), http.StatusBadRequest) + addonmetrics.ObserveServiceRelayRequest(target.Scheme, "error") + return + } + if target.Host == "kubernetes.default.svc" { + http.Error(w, "service relay does not proxy kube-apiserver requests", http.StatusBadRequest) + addonmetrics.ObserveServiceRelayRequest(target.Scheme, "error") + return + } + + restoreAuthorizationHeader(req) + removeClusterProxyHeaders(req) + + proxy := httputil.NewSingleHostReverseProxy(target) + proxy.Transport = s.transport + proxy.ErrorHandler = func(w http.ResponseWriter, req *http.Request, err error) { + klog.Errorf("service relay proxy error: %v", err) + http.Error(w, err.Error(), http.StatusBadGateway) + } + + recorder := &statusRecorder{ResponseWriter: w, statusCode: http.StatusOK} + proxy.ServeHTTP(recorder, req) + addonmetrics.ObserveServiceRelayRequest(target.Scheme, resultFromStatus(recorder.statusCode)) +} + +func restoreAuthorizationHeader(req *http.Request) { + authorization := req.Header.Get(utils.HeaderClusterProxyAuthorization) + req.Header.Del("Authorization") + req.Header.Del(utils.HeaderClusterProxyAuthorization) + if authorization != "" { + req.Header.Set("Authorization", authorization) + } +} + +func removeClusterProxyHeaders(req *http.Request) { + for _, header := range []string{ + utils.HeaderClusterProxyProto, + utils.HeaderClusterProxyNamespace, + utils.HeaderClusterProxyService, + utils.HeaderClusterProxyPort, + } { + req.Header.Del(header) + } +} + +func resultFromStatus(statusCode int) string { + if statusCode >= http.StatusBadRequest { + return "error" + } + return "success" +} + +type statusRecorder struct { + http.ResponseWriter + statusCode int +} + +func (r *statusRecorder) WriteHeader(statusCode int) { + r.statusCode = statusCode + r.ResponseWriter.WriteHeader(statusCode) +} + +func (r *statusRecorder) Flush() { + if flusher, ok := r.ResponseWriter.(http.Flusher); ok { + flusher.Flush() + } +} + +func (r *statusRecorder) Hijack() (net.Conn, *bufio.ReadWriter, error) { + hijacker, ok := r.ResponseWriter.(http.Hijacker) + if !ok { + return nil, nil, fmt.Errorf("response writer does not support hijacking") + } + return hijacker.Hijack() +} diff --git a/pkg/servicerelay/service_relay_test.go b/pkg/servicerelay/service_relay_test.go new file mode 100644 index 000000000..436d38ba1 --- /dev/null +++ b/pkg/servicerelay/service_relay_test.go @@ -0,0 +1,89 @@ +package servicerelay + +import ( + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "open-cluster-management.io/cluster-proxy/pkg/utils" +) + +func TestServiceRelayRestoresAuthorizationAndStripsInternalHeaders(t *testing.T) { + var captured *http.Request + relay := &ServiceRelay{ + transport: roundTripFunc(func(req *http.Request) (*http.Response, error) { + captured = req.Clone(req.Context()) + captured.Header = req.Header.Clone() + return &http.Response{ + StatusCode: http.StatusCreated, + Header: http.Header{"X-Backend": []string{"ok"}}, + Body: io.NopCloser(strings.NewReader("backend-body")), + Request: req, + }, nil + }), + } + + req := httptest.NewRequest("GET", "http://relay/ping?x=1", nil) + req.Header.Set(utils.HeaderClusterProxyProto, "http") + req.Header.Set(utils.HeaderClusterProxyNamespace, "default") + req.Header.Set(utils.HeaderClusterProxyService, "hello") + req.Header.Set(utils.HeaderClusterProxyPort, "8080") + req.Header.Set("Authorization", "Bearer managed-token") + req.Header.Set(utils.HeaderClusterProxyAuthorization, "Bearer original-token") + + recorder := httptest.NewRecorder() + relay.ServeHTTP(recorder, req) + + if recorder.Code != http.StatusCreated { + t.Fatalf("expected status %d, got %d", http.StatusCreated, recorder.Code) + } + if recorder.Header().Get("X-Backend") != "ok" { + t.Fatalf("expected backend header to be proxied") + } + if strings.TrimSpace(recorder.Body.String()) != "backend-body" { + t.Fatalf("unexpected body %q", recorder.Body.String()) + } + if captured == nil { + t.Fatal("expected backend request to be captured") + } + if captured.URL.Scheme != "http" || captured.URL.Host != "hello.default.svc:8080" || captured.URL.Path != "/ping" { + t.Fatalf("unexpected target URL %s", captured.URL.String()) + } + if captured.Header.Get("Authorization") != "Bearer original-token" { + t.Fatalf("expected original authorization to be restored, got %q", captured.Header.Get("Authorization")) + } + for _, header := range []string{ + utils.HeaderClusterProxyProto, + utils.HeaderClusterProxyNamespace, + utils.HeaderClusterProxyService, + utils.HeaderClusterProxyPort, + utils.HeaderClusterProxyAuthorization, + } { + if captured.Header.Get(header) != "" { + t.Fatalf("expected header %s to be stripped, got %q", header, captured.Header.Get(header)) + } + } +} + +func TestServiceRelayRejectsKubeAPIServerTarget(t *testing.T) { + relay := &ServiceRelay{} + req := httptest.NewRequest("GET", "http://relay/healthz", nil) + req.Header.Set(utils.HeaderClusterProxyProto, "https") + req.Header.Set(utils.HeaderClusterProxyNamespace, "default") + req.Header.Set(utils.HeaderClusterProxyService, "kubernetes") + req.Header.Set(utils.HeaderClusterProxyPort, "443") + + recorder := httptest.NewRecorder() + relay.ServeHTTP(recorder, req) + if recorder.Code != http.StatusBadRequest { + t.Fatalf("expected status %d, got %d", http.StatusBadRequest, recorder.Code) + } +} + +type roundTripFunc func(*http.Request) (*http.Response, error) + +func (f roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return f(req) +} diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 646ab7c0f..f3f653bb2 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -9,6 +9,7 @@ import ( "time" utilnet "k8s.io/apimachinery/pkg/util/net" + "k8s.io/component-base/metrics/legacyregistry" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/healthz" ) @@ -19,10 +20,11 @@ const ( HEADERSERVICEKEY = "Service-Client-Key" // Cluster-Proxy custom headers for service proxy - HeaderClusterProxyProto = "Cluster-Proxy-Proto" - HeaderClusterProxyNamespace = "Cluster-Proxy-Namespace" - HeaderClusterProxyService = "Cluster-Proxy-Service" - HeaderClusterProxyPort = "Cluster-Proxy-Port" + HeaderClusterProxyProto = "Cluster-Proxy-Proto" + HeaderClusterProxyNamespace = "Cluster-Proxy-Namespace" + HeaderClusterProxyService = "Cluster-Proxy-Service" + HeaderClusterProxyPort = "Cluster-Proxy-Port" + HeaderClusterProxyAuthorization = "Cluster-Proxy-Authorization" ) // TargetServiceConfig is a collection of data extrict from the request URL description the target service we can to access on the managed cluster. @@ -166,6 +168,7 @@ func ServeHealthProbes(healthProbeBindAddress string, tlsConfig *tls.Config, cus } mux.Handle("/healthz", http.StripPrefix("/healthz", &healthz.Handler{Checks: checks})) + mux.Handle("/metrics", legacyregistry.Handler()) server := http.Server{ Handler: mux, ReadHeaderTimeout: 5 * time.Second, diff --git a/test/e2e/connect_test.go b/test/e2e/connect_test.go index 6fecdd603..034544c28 100644 --- a/test/e2e/connect_test.go +++ b/test/e2e/connect_test.go @@ -47,6 +47,10 @@ const ( // getProxyEntrypointAddress returns the address to connect to proxy-entrypoint service. // If running in-cluster, it uses the service DNS name. Otherwise, it uses localhost (for port-forward). func getProxyEntrypointAddress() string { + if address := os.Getenv("PROXY_ENTRYPOINT_ADDRESS"); address != "" { + return address + } + // Running in-cluster, use service DNS name namespace := os.Getenv("PROXY_ENTRYPOINT_NAMESPACE") if namespace == "" { @@ -137,7 +141,7 @@ var _ = Describe("Requests through Cluster-Proxy", Label("serviceproxy", "connec Describe("Get pods", Label("pods"), func() { Context("URL is vailid", func() { It("should return pods information", Label("valid-url"), func() { - _, err := clusterProxyKubeClient.CoreV1().Pods(hubInstallNamespace).List(context.Background(), v1.ListOptions{}) + _, err := clusterProxyKubeClient.CoreV1().Pods(targetNamespace).List(context.Background(), v1.ListOptions{}) Expect(err).To(BeNil()) }) }) @@ -168,7 +172,7 @@ var _ = Describe("Requests through Cluster-Proxy", Label("serviceproxy", "connec Describe("Get Logs of a pod", Label("logs"), func() { It("should return logs information", Label("pod-logs"), func() { - req := clusterProxyKubeClient.CoreV1().Pods(hubInstallNamespace).GetLogs(podName, &corev1.PodLogOptions{}) + req := clusterProxyKubeClient.CoreV1().Pods(targetNamespace).GetLogs(podName, &corev1.PodLogOptions{}) podlogs, err := req.Stream(context.Background()) Expect(err).To(BeNil()) podlogs.Close() @@ -177,36 +181,38 @@ var _ = Describe("Requests through Cluster-Proxy", Label("serviceproxy", "connec Describe("Watch ConfigMap create", Label("watch"), func() { It("shoud watch", Label("configmap"), func() { - watch, err := clusterProxyKubeClient.CoreV1().ConfigMaps(hubInstallNamespace).Watch(context.TODO(), v1.ListOptions{}) + watch, err := clusterProxyKubeClient.CoreV1().ConfigMaps(targetNamespace).Watch(context.TODO(), v1.ListOptions{}) Expect(err).To(BeNil()) + defer watch.Stop() - // create a pod - _, err = hubKubeClient.CoreV1().ConfigMaps(hubInstallNamespace).Create(context.Background(), &corev1.ConfigMap{ + _, err = targetKubeClient.CoreV1().ConfigMaps(targetNamespace).Create(context.Background(), &corev1.ConfigMap{ ObjectMeta: v1.ObjectMeta{ Name: "cluster-proxy-test", }, }, v1.CreateOptions{}) Expect(err).To(BeNil()) - // check if r is create - select { - case <-watch.ResultChan(): - // this chan shoud not receive any pod event before pod created - err := hubKubeClient.CoreV1().ConfigMaps(hubInstallNamespace).Delete(context.Background(), "cluster-proxy-test", metav1.DeleteOptions{}) - Expect(err).To(BeNil()) - default: - Fail("Failed to received a pod create event") - } + Eventually(func() bool { + select { + case <-watch.ResultChan(): + return true + default: + return false + } + }, timeout, time.Second).Should(BeTrue()) + + err = targetKubeClient.CoreV1().ConfigMaps(targetNamespace).Delete(context.Background(), "cluster-proxy-test", metav1.DeleteOptions{}) + Expect(err).To(BeNil()) }) }) Describe("Execute in a pod", Label("exec"), func() { It("should return hello", Label("pod-exec"), func() { - req := clusterProxyKubeClient.CoreV1().RESTClient().Post().Resource("pods").Name(podName).Namespace(hubInstallNamespace).SubResource("exec").Param("container", "manager") + req := clusterProxyKubeClient.CoreV1().RESTClient().Post().Resource("pods").Name(podName).Namespace(targetNamespace).SubResource("exec").Param("container", podContainerName) req.VersionedParams(&corev1.PodExecOptions{ Command: []string{"/bin/sh", "-c", "echo hello"}, - Container: "manager", + Container: podContainerName, Stdin: false, Stdout: true, Stderr: true, @@ -276,6 +282,13 @@ var _ = Describe("Requests through Cluster-Proxy", Label("serviceproxy", "connec Expect(err).To(BeNil()) fmt.Println("response:", string(body)) + if hostedMode { + Expect(resp.StatusCode).To(Equal(http.StatusOK)) + Expect(strings.Contains(string(body), "Hello from hello-world-https")).To(BeTrue(), + "expected response to contain 'Hello from hello-world-https', got: %s", string(body)) + return + } + // The request should either succeed (200) or fail with Bad Gateway (502). // Both cases confirm the request was correctly routed to the HTTPS backend: // - 200: Certificate is trusted, backend responded successfully diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index c182767b8..e969435d3 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -26,6 +26,7 @@ import ( "k8s.io/client-go/kubernetes" k8sscheme "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" addonv1alpha1 "open-cluster-management.io/api/addon/v1alpha1" addonclient "open-cluster-management.io/api/client/addon/clientset/versioned" @@ -54,19 +55,33 @@ func init() { } var ( - managedClusterName string - hubRESTConfig *rest.Config - hubKubeClient kubernetes.Interface - hubRuntimeClient client.Client - clusterProxyKubeClient kubernetes.Interface - clusterProxyWrongClient kubernetes.Interface - clusterProxyUnAuthClient kubernetes.Interface - clusterProxyHttpClient *http.Client - hubAddOnClient addonclient.Interface - hubClusterClient clusterclient.Interface - clusterProxyCfg *rest.Config - serviceAccountToken string - podName string + managedClusterName string + hubRESTConfig *rest.Config + hostingRESTConfig *rest.Config + managedRESTConfig *rest.Config + hubKubeClient kubernetes.Interface + hostingKubeClient kubernetes.Interface + managedKubeClient kubernetes.Interface + hubRuntimeClient client.Client + hostingRuntimeClient client.Client + managedRuntimeClient client.Client + clusterProxyKubeClient kubernetes.Interface + clusterProxyManagedClient kubernetes.Interface + clusterProxyWrongClient kubernetes.Interface + clusterProxyUnAuthClient kubernetes.Interface + clusterProxyHttpClient *http.Client + hubAddOnClient addonclient.Interface + hubClusterClient clusterclient.Interface + clusterProxyCfg *rest.Config + serviceAccountToken string + managedServiceAccountToken string + podName string + podContainerName string + podPort int + hostedMode bool + targetNamespace string + targetKubeClient kubernetes.Interface + targetRuntimeClient client.Client ) const ( @@ -75,6 +90,10 @@ const ( hubInstallNamespace = "open-cluster-management-addon" managedClusterInstallNamespace = "open-cluster-management-cluster-proxy" serviceAccountName = "cluster-proxy-test" + managedServiceAccountName = "cluster-proxy-managed-test" + hostedTestPodName = "hello-world" + hostedTestPodContainerName = "hello-world" + hostedTestPodPort = 8000 ) var _ = BeforeSuite(func() { @@ -87,7 +106,9 @@ var _ = BeforeSuite(func() { By("Init clients") err = func() error { var err error - hubRESTConfig, err = rest.InClusterConfig() + hostedMode = os.Getenv("E2E_HOSTING_KUBECONFIG") != "" || os.Getenv("E2E_MANAGED_KUBECONFIG") != "" + + hubRESTConfig, err = configFromEnvOrInCluster("E2E_HUB_KUBECONFIG") if err != nil { return err } @@ -110,6 +131,47 @@ var _ = BeforeSuite(func() { } hubClusterClient, err = clusterclient.NewForConfig(hubRESTConfig) + if err != nil { + return err + } + + targetNamespace = hubInstallNamespace + targetKubeClient = hubKubeClient + targetRuntimeClient = hubRuntimeClient + podContainerName = "manager" + + if hostedMode { + hostingRESTConfig, err = configFromEnv("E2E_HOSTING_KUBECONFIG") + if err != nil { + return err + } + managedRESTConfig, err = configFromEnv("E2E_MANAGED_KUBECONFIG") + if err != nil { + return err + } + hostingKubeClient, err = kubernetes.NewForConfig(hostingRESTConfig) + if err != nil { + return err + } + managedKubeClient, err = kubernetes.NewForConfig(managedRESTConfig) + if err != nil { + return err + } + hostingRuntimeClient, err = client.New(hostingRESTConfig, client.Options{Scheme: scheme}) + if err != nil { + return err + } + managedRuntimeClient, err = client.New(managedRESTConfig, client.Options{Scheme: scheme}) + if err != nil { + return err + } + targetNamespace = "default" + targetKubeClient = managedKubeClient + targetRuntimeClient = managedRuntimeClient + podName = hostedTestPodName + podContainerName = hostedTestPodContainerName + podPort = hostedTestPodPort + } return err }() @@ -124,6 +186,22 @@ var _ = BeforeSuite(func() { prepareClusterProxyClient() }) +func configFromEnv(envName string) (*rest.Config, error) { + kubeconfig := os.Getenv(envName) + if kubeconfig == "" { + return nil, fmt.Errorf("%s is required", envName) + } + return clientcmd.BuildConfigFromFlags("", kubeconfig) +} + +func configFromEnvOrInCluster(envName string) (*rest.Config, error) { + kubeconfig := os.Getenv(envName) + if kubeconfig != "" { + return clientcmd.BuildConfigFromFlags("", kubeconfig) + } + return rest.InClusterConfig() +} + func checkAddonStatus() { var err error @@ -161,29 +239,47 @@ func checkAddonStatus() { } fmt.Fprintf(GinkgoWriter, "[SUCCESS] Service cluster-proxy-addon-user exists\n") - // deployment on managedcluster is running - fmt.Fprintf(GinkgoWriter, "[DEBUG] Checking deployment: cluster-proxy-proxy-agent in namespace: %s\n", managedClusterInstallNamespace) - anpAgent, err := hubKubeClient.AppsV1().Deployments(managedClusterInstallNamespace).Get(context.Background(), "cluster-proxy-proxy-agent", metav1.GetOptions{}) - if err != nil { - fmt.Fprintf(GinkgoWriter, "[ERROR] Failed to get deployment cluster-proxy-proxy-agent: %v\n", err) - return err - } - fmt.Fprintf(GinkgoWriter, "[DEBUG] Deployment cluster-proxy-proxy-agent status - Replicas: %d, Available: %d, Ready: %d, Updated: %d\n", - anpAgent.Status.Replicas, anpAgent.Status.AvailableReplicas, anpAgent.Status.ReadyReplicas, anpAgent.Status.UpdatedReplicas) - if anpAgent.Status.AvailableReplicas < 1 { - errMsg := fmt.Errorf("available replicas for %s should be more than 1, but get %d", "anp-agent", anpAgent.Status.AvailableReplicas) - fmt.Fprintf(GinkgoWriter, "[ERROR] %v\n", errMsg) - return errMsg + if hostedMode { + if err := deploymentAvailable(hostingKubeClient, managedClusterInstallNamespace, "cluster-proxy-proxy-agent"); err != nil { + return err + } + if err := deploymentAvailable(hostingKubeClient, managedClusterInstallNamespace, "cluster-proxy-managed-kubeconfig-provisioner"); err != nil { + return err + } + if err := deploymentAvailable(managedKubeClient, managedClusterInstallNamespace, "cluster-proxy-service-relay"); err != nil { + return err + } + } else { + // deployment on managedcluster is running + fmt.Fprintf(GinkgoWriter, "[DEBUG] Checking deployment: cluster-proxy-proxy-agent in namespace: %s\n", managedClusterInstallNamespace) + if err := deploymentAvailable(hubKubeClient, managedClusterInstallNamespace, "cluster-proxy-proxy-agent"); err != nil { + return err + } + fmt.Fprintf(GinkgoWriter, "[SUCCESS] Deployment cluster-proxy-proxy-agent is ready\n") } - fmt.Fprintf(GinkgoWriter, "[SUCCESS] Deployment cluster-proxy-proxy-agent is ready\n") fmt.Fprintf(GinkgoWriter, "[SUCCESS] All resources are running\n") return nil }, eventuallyTimeout, eventuallyInterval).ShouldNot(HaveOccurred()) } +func deploymentAvailable(kubeClient kubernetes.Interface, namespace, name string) error { + fmt.Fprintf(GinkgoWriter, "[DEBUG] Checking deployment: %s in namespace: %s\n", name, namespace) + deploy, err := kubeClient.AppsV1().Deployments(namespace).Get(context.Background(), name, metav1.GetOptions{}) + if err != nil { + fmt.Fprintf(GinkgoWriter, "[ERROR] Failed to get deployment %s: %v\n", name, err) + return err + } + fmt.Fprintf(GinkgoWriter, "[DEBUG] Deployment %s status - Replicas: %d, Available: %d, Ready: %d, Updated: %d\n", + name, deploy.Status.Replicas, deploy.Status.AvailableReplicas, deploy.Status.ReadyReplicas, deploy.Status.UpdatedReplicas) + if deploy.Status.AvailableReplicas < 1 { + return fmt.Errorf("available replicas for %s should >= 1, but get %d", name, deploy.Status.AvailableReplicas) + } + return nil +} + func prepareTestServiceAccount() { - By("Create a serviceaccount on managedcluster") + By("Create a hub serviceaccount for cluster-proxy requests") _, err := hubKubeClient.CoreV1().ServiceAccounts(hubInstallNamespace).Create(context.Background(), &corev1.ServiceAccount{ ObjectMeta: metav1.ObjectMeta{ Name: serviceAccountName, @@ -210,6 +306,11 @@ func prepareTestServiceAccount() { Resources: []string{"pods/exec"}, Verbs: []string{"create"}, }, + { + APIGroups: []string{""}, + Resources: []string{"pods/portforward"}, + Verbs: []string{"create"}, + }, { APIGroups: []string{""}, Resources: []string{"configmaps"}, @@ -242,9 +343,100 @@ func prepareTestServiceAccount() { if !apierrors.IsAlreadyExists(err) { Expect(err).To(BeNil()) } + + if hostedMode { + prepareHostedTargetRBAC() + } +} + +func prepareHostedTargetRBAC() { + hubUser := fmt.Sprintf("cluster:hub:system:serviceaccount:%s:%s", hubInstallNamespace, serviceAccountName) + createTargetRoleBinding("cluster-proxy-hub-user", v1.Subject{ + Kind: v1.UserKind, + Name: hubUser, + }) + + By("Create a managed serviceaccount for managed-token authentication") + _, err := managedKubeClient.CoreV1().ServiceAccounts(targetNamespace).Create(context.Background(), &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: managedServiceAccountName, + Namespace: targetNamespace, + }, + }, metav1.CreateOptions{}) + if !apierrors.IsAlreadyExists(err) { + Expect(err).To(BeNil()) + } + + createTargetRoleBinding("cluster-proxy-managed-user", v1.Subject{ + Kind: v1.ServiceAccountKind, + Name: managedServiceAccountName, + Namespace: targetNamespace, + }) +} + +func createTargetRoleBinding(name string, subject v1.Subject) { + By("Create target role for cluster-proxy access") + _, err := targetKubeClient.RbacV1().Roles(targetNamespace).Create(context.Background(), &v1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: targetNamespace, + }, + Rules: []v1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{"pods", "pods/log"}, + Verbs: []string{"get", "list"}, + }, + { + APIGroups: []string{""}, + Resources: []string{"pods/exec", "pods/portforward"}, + Verbs: []string{"create"}, + }, + { + APIGroups: []string{""}, + Resources: []string{"configmaps"}, + Verbs: []string{"watch"}, + }, + }, + }, metav1.CreateOptions{}) + if !apierrors.IsAlreadyExists(err) { + Expect(err).To(BeNil()) + } + + By("Create target rolebinding for cluster-proxy access") + _, err = targetKubeClient.RbacV1().RoleBindings(targetNamespace).Create(context.Background(), &v1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: targetNamespace, + }, + RoleRef: v1.RoleRef{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "Role", + Name: name, + }, + Subjects: []v1.Subject{subject}, + }, metav1.CreateOptions{}) + if !apierrors.IsAlreadyExists(err) { + Expect(err).To(BeNil()) + } } func preparePodFortest() { + if hostedMode { + By("Use the hosted hello-world pod for kube-apiserver proxy tests") + Eventually(func() error { + pod, err := managedKubeClient.CoreV1().Pods(targetNamespace).Get(context.Background(), podName, metav1.GetOptions{}) + if err != nil { + return err + } + if pod.Status.Phase != corev1.PodRunning { + return fmt.Errorf("pod %s/%s is not running: %s", targetNamespace, podName, pod.Status.Phase) + } + return nil + }, eventuallyTimeout, eventuallyInterval).ShouldNot(HaveOccurred()) + return + } + pods, err := hubKubeClient.CoreV1().Pods(hubInstallNamespace).List(context.Background(), metav1.ListOptions{}) Expect(err).To(BeNil()) for _, pod := range pods.Items { @@ -261,11 +453,14 @@ var ( func prepareClusterProxyClient() { var err error - kubeconfig, err := rest.InClusterConfig() + kubeconfig, err := configFromEnvOrInCluster("E2E_HUB_KUBECONFIG") if err != nil { Expect(err).To(BeNil()) } - userServerServiceAddress = "cluster-proxy-addon-user." + hubInstallNamespace + ".svc:9092" + userServerServiceAddress = os.Getenv("CLUSTER_PROXY_USER_SERVER_ADDRESS") + if userServerServiceAddress == "" { + userServerServiceAddress = "cluster-proxy-addon-user." + hubInstallNamespace + ".svc:9092" + } By("Get RootCA of the cluster-proxy") // Get the CA certificate from the proxy-server-ca secret that is used to sign all certificates @@ -298,6 +493,8 @@ func prepareClusterProxyClient() { clusterProxyCfg.TLSClientConfig.CAData = []byte(rootCA) clusterProxyCfg.TLSClientConfig.CertData = nil clusterProxyCfg.TLSClientConfig.KeyData = nil + clusterProxyCfg.TLSClientConfig.CertFile = "" + clusterProxyCfg.TLSClientConfig.KeyFile = "" clusterProxyCfg.BearerToken = serviceAccountToken clusterProxyCfg.BearerTokenFile = "" // Clear the default token file path from InClusterConfig @@ -315,6 +512,8 @@ func prepareClusterProxyClient() { clusterWrongProxyCfg.TLSClientConfig.CAData = []byte(rootCA) clusterWrongProxyCfg.TLSClientConfig.CertData = nil clusterWrongProxyCfg.TLSClientConfig.KeyData = nil + clusterWrongProxyCfg.TLSClientConfig.CertFile = "" + clusterWrongProxyCfg.TLSClientConfig.KeyFile = "" clusterWrongProxyCfg.BearerToken = serviceAccountToken clusterWrongProxyCfg.BearerTokenFile = "" // Clear the default token file path from InClusterConfig @@ -330,6 +529,8 @@ func prepareClusterProxyClient() { clusterUnAuthProxyCfg.TLSClientConfig.CAData = []byte(rootCA) clusterUnAuthProxyCfg.TLSClientConfig.CertData = nil clusterUnAuthProxyCfg.TLSClientConfig.KeyData = nil + clusterUnAuthProxyCfg.TLSClientConfig.CertFile = "" + clusterUnAuthProxyCfg.TLSClientConfig.KeyFile = "" clusterUnAuthProxyCfg.BearerToken = serviceAccountToken + "wrong token" clusterUnAuthProxyCfg.BearerTokenFile = "" // Clear the default token file path from InClusterConfig @@ -338,6 +539,37 @@ func prepareClusterProxyClient() { return err } + if hostedMode { + By("Create managed serviceAccount token using TokenRequest API") + tokenRequest, err := managedKubeClient.CoreV1().ServiceAccounts(targetNamespace).CreateToken( + context.Background(), + managedServiceAccountName, + &authenticationv1.TokenRequest{ + Spec: authenticationv1.TokenRequestSpec{ + ExpirationSeconds: func(i int64) *int64 { return &i }(3600), + }, + }, + metav1.CreateOptions{}, + ) + if err != nil { + return err + } + managedServiceAccountToken = tokenRequest.Status.Token + + managedTokenProxyCfg := rest.CopyConfig(clusterProxyCfg) + managedTokenProxyCfg.TLSClientConfig.CAData = []byte(rootCA) + managedTokenProxyCfg.TLSClientConfig.CertData = nil + managedTokenProxyCfg.TLSClientConfig.KeyData = nil + managedTokenProxyCfg.TLSClientConfig.CertFile = "" + managedTokenProxyCfg.TLSClientConfig.KeyFile = "" + managedTokenProxyCfg.BearerToken = managedServiceAccountToken + managedTokenProxyCfg.BearerTokenFile = "" + clusterProxyManagedClient, err = kubernetes.NewForConfig(managedTokenProxyCfg) + if err != nil { + return err + } + } + // clusterProxyHttpClient rootCAPool := x509.NewCertPool() rootCAPool.AppendCertsFromPEM([]byte(rootCA)) diff --git a/test/e2e/env/init-hosted.sh b/test/e2e/env/init-hosted.sh new file mode 100755 index 000000000..0a1da588c --- /dev/null +++ b/test/e2e/env/init-hosted.sh @@ -0,0 +1,472 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +WORK_DIR="${WORK_DIR:-${ROOT_DIR}/_output/e2e-hosted}" +if [[ "${WORK_DIR}" != /* ]]; then + WORK_DIR="${ROOT_DIR}/${WORK_DIR}" +fi +HUB_CLUSTER_NAME="${HUB_CLUSTER_NAME:-cluster-proxy-hosted-hub}" +HOSTING_CLUSTER_NAME="${HOSTING_CLUSTER_NAME:-cluster-proxy-hosted-hosting}" +MANAGED_CLUSTER_NAME="${MANAGED_CLUSTER_NAME:-cluster-proxy-hosted-managed}" +IMAGE_REGISTRY_NAME="${IMAGE_REGISTRY_NAME:-quay.io/open-cluster-management}" +IMAGE_NAME="${IMAGE_NAME:-cluster-proxy}" +IMAGE_TAG="${IMAGE_TAG:-latest}" + +HUB_NAMESPACE="open-cluster-management-addon" +ADDON_NAMESPACE="open-cluster-management-cluster-proxy" +PLACEMENT_NAME="cluster-proxy-hosted-placement" +DEPLOY_CONFIG_NAME="hosted-relay" +EXTERNAL_KUBECONFIG_SECRET="external-managed-kubeconfig" +HTTPS_CA_CONFIGMAP="hello-world-https-ca" +PROXY_ENTRYPOINT_NODE_PORT="30091" +PROXY_SERVER_NODE_PORT="30090" + +HUB_KUBECONFIG="${WORK_DIR}/hub.kubeconfig" +HOSTING_KUBECONFIG="${WORK_DIR}/hosting.kubeconfig" +MANAGED_KUBECONFIG="${WORK_DIR}/managed.kubeconfig" +HUB_CONTAINER_KUBECONFIG="${WORK_DIR}/hub-container.kubeconfig" +MANAGED_CONTAINER_KUBECONFIG="${WORK_DIR}/managed-container.kubeconfig" + +log() { + echo "[$(date '+%H:%M:%S')] $*" +} + +require_cmd() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "Required command '$1' not found" >&2 + exit 1 + fi +} + +create_kind_config() { + local cluster_name="$1" + local config_file="$2" + + cat >"${config_file}" </dev/null +} + +rewrite_cluster_data_server() { + local kubeconfig="$1" + local old_server="$2" + local new_server="$3" + + if [[ "${old_server}" == "${new_server}" ]]; then + return + fi + + log "Rewriting kubeconfig server ${old_server} -> ${new_server}" + kubectl --kubeconfig "${kubeconfig}" get secrets -A -o json | \ + jq -r --arg old "${old_server}" --arg new "${new_server}" ' + .items[] as $item + | (($item.data // {}) | to_entries[]) + | (try (.value | @base64d) catch "") as $decoded + | select($decoded | contains($old)) + | [ + $item.metadata.namespace, + $item.metadata.name, + .key, + ($decoded | split($old) | join($new) | @base64) + ] | @tsv + ' | while IFS=$'\t' read -r namespace name key value; do + local patch + patch="$(jq -cn --arg key "${key}" --arg value "${value}" '[{ + "op": "replace", + "path": ("/data/" + ($key | gsub("~"; "~0") | gsub("/"; "~1"))), + "value": $value + }]')" + kubectl --kubeconfig "${kubeconfig}" -n "${namespace}" patch secret "${name}" --type=json -p "${patch}" >/dev/null + done + + kubectl --kubeconfig "${kubeconfig}" get configmaps -A -o json | \ + jq -r --arg old "${old_server}" --arg new "${new_server}" ' + .items[] as $item + | (($item.data // {}) | to_entries[]) + | select(.value | contains($old)) + | [ + $item.metadata.namespace, + $item.metadata.name, + .key, + (.value | split($old) | join($new) | @base64) + ] | @tsv + ' | while IFS=$'\t' read -r namespace name key encoded_value; do + local decoded_value patch + decoded_value="$(printf '%s' "${encoded_value}" | base64 -d)" + patch="$(jq -cn --arg key "${key}" --arg value "${decoded_value}" '[{ + "op": "replace", + "path": ("/data/" + ($key | gsub("~"; "~0") | gsub("/"; "~1"))), + "value": $value + }]')" + kubectl --kubeconfig "${kubeconfig}" -n "${namespace}" patch configmap "${name}" --type=json -p "${patch}" >/dev/null + done +} + +restart_ocm_deployments() { + local kubeconfig="$1" + + kubectl --kubeconfig "${kubeconfig}" get deployments -A -o json | \ + jq -r '.items[] + | select(.metadata.namespace | startswith("open-cluster-management")) + | [.metadata.namespace, .metadata.name] | @tsv' | \ + while IFS=$'\t' read -r namespace name; do + kubectl --kubeconfig "${kubeconfig}" -n "${namespace}" rollout restart "deployment/${name}" >/dev/null || true + done +} + +wait_managed_cluster_available() { + local cluster_name="$1" + + log "Waiting for ManagedCluster ${cluster_name} to become available" + kubectl --kubeconfig "${HUB_KUBECONFIG}" wait \ + --for=condition=ManagedClusterConditionAvailable \ + "managedcluster/${cluster_name}" \ + --timeout=600s +} + +wait_deployment() { + local kubeconfig="$1" + local namespace="$2" + local name="$3" + local timeout="${4:-600s}" + + kubectl --kubeconfig "${kubeconfig}" -n "${namespace}" wait \ + --for=condition=available \ + "deployment/${name}" \ + --timeout="${timeout}" +} + +wait_resource() { + local kubeconfig="$1" + local resource="$2" + local timeout="${3:-300}" + + for _ in $(seq 1 "${timeout}"); do + if kubectl --kubeconfig "${kubeconfig}" get ${resource} >/dev/null 2>&1; then + return + fi + sleep 1 + done + + echo "Timed out waiting for ${resource}" >&2 + exit 1 +} + +wait_container_health() { + local kubeconfig="$1" + local namespace="$2" + local workload="$3" + local container="$4" + local port="$5" + local timeout="${6:-180}" + + log "Waiting for ${workload}/${container} healthz on port ${port}" + for _ in $(seq 1 "${timeout}"); do + if kubectl --kubeconfig "${kubeconfig}" -n "${namespace}" exec "${workload}" -c "${container}" -- \ + wget -qO- "http://127.0.0.1:${port}/healthz" 2>/dev/null | grep -q ok; then + return + fi + sleep 1 + done + + kubectl --kubeconfig "${kubeconfig}" -n "${namespace}" logs "${workload}" -c "${container}" --tail=120 || true + echo "Timed out waiting for ${workload}/${container} healthz" >&2 + exit 1 +} + +apply_hosted_addon_config() { + log "Applying hosted AddOnDeploymentConfig and ManagedClusterAddOn" + kubectl --kubeconfig "${HUB_KUBECONFIG}" apply -f - </dev/null +} + +prepare_test_services() { + log "Deploying test services on managed cluster" + kubectl --kubeconfig "${MANAGED_KUBECONFIG}" apply -f "${ROOT_DIR}/test/e2e/env/hello-world.yaml" + kubectl --kubeconfig "${MANAGED_KUBECONFIG}" apply -f "${ROOT_DIR}/test/e2e/env/hello-world-https.yaml" + kubectl --kubeconfig "${MANAGED_KUBECONFIG}" wait --for=condition=ready pod/hello-world -n default --timeout=120s + kubectl --kubeconfig "${MANAGED_KUBECONFIG}" wait --for=condition=ready pod/hello-world-https -n default --timeout=180s + + log "Deploying optional BestEffort test service on hosting cluster" + kubectl --kubeconfig "${HOSTING_KUBECONFIG}" apply -f "${ROOT_DIR}/test/e2e/env/hello-world.yaml" + kubectl --kubeconfig "${HOSTING_KUBECONFIG}" wait --for=condition=ready pod/hello-world -n default --timeout=120s + + log "Creating HTTPS service CA ConfigMaps" + kubectl --kubeconfig "${MANAGED_KUBECONFIG}" exec -n default pod/hello-world-https -- \ + cat /certs/server.crt >"${WORK_DIR}/hello-world-https-ca.crt" + for kubeconfig in "${MANAGED_KUBECONFIG}" "${HOSTING_KUBECONFIG}"; do + kubectl --kubeconfig "${kubeconfig}" create namespace "${ADDON_NAMESPACE}" --dry-run=client -o yaml | \ + kubectl --kubeconfig "${kubeconfig}" apply -f - + kubectl --kubeconfig "${kubeconfig}" -n "${ADDON_NAMESPACE}" create configmap "${HTTPS_CA_CONFIGMAP}" \ + --from-file=service-ca.crt="${WORK_DIR}/hello-world-https-ca.crt" \ + --dry-run=client -o yaml | kubectl --kubeconfig "${kubeconfig}" apply -f - + done +} + +prepare_external_managed_kubeconfig() { + log "Creating external managed kubeconfig Secret on hosting cluster" + kubectl --kubeconfig "${HOSTING_KUBECONFIG}" create namespace "${MANAGED_CLUSTER_NAME}" --dry-run=client -o yaml | \ + kubectl --kubeconfig "${HOSTING_KUBECONFIG}" apply -f - + kubectl --kubeconfig "${HOSTING_KUBECONFIG}" -n "${MANAGED_CLUSTER_NAME}" create secret generic "${EXTERNAL_KUBECONFIG_SECRET}" \ + --from-file=kubeconfig="${MANAGED_CONTAINER_KUBECONFIG}" \ + --dry-run=client -o yaml | kubectl --kubeconfig "${HOSTING_KUBECONFIG}" apply -f - +} + +wait_hosted_addon_ready() { + log "Waiting for hosted cluster-proxy resources" + wait_deployment "${HUB_KUBECONFIG}" "${HUB_NAMESPACE}" cluster-proxy-addon-manager 600s + wait_deployment "${HUB_KUBECONFIG}" "${HUB_NAMESPACE}" cluster-proxy-addon-user 600s + wait_deployment "${HUB_KUBECONFIG}" "${HUB_NAMESPACE}" cluster-proxy 600s + + wait_resource "${HOSTING_KUBECONFIG}" "deployment/cluster-proxy-managed-kubeconfig-provisioner -n ${ADDON_NAMESPACE}" 300 + wait_resource "${HOSTING_KUBECONFIG}" "deployment/cluster-proxy-proxy-agent -n ${ADDON_NAMESPACE}" 300 + wait_resource "${MANAGED_KUBECONFIG}" "deployment/cluster-proxy-service-relay -n ${ADDON_NAMESPACE}" 300 + + wait_deployment "${HOSTING_KUBECONFIG}" "${ADDON_NAMESPACE}" cluster-proxy-managed-kubeconfig-provisioner 600s + wait_deployment "${HOSTING_KUBECONFIG}" "${ADDON_NAMESPACE}" cluster-proxy-proxy-agent 600s + wait_deployment "${MANAGED_KUBECONFIG}" "${ADDON_NAMESPACE}" cluster-proxy-service-relay 600s + wait_container_health "${HOSTING_KUBECONFIG}" "${ADDON_NAMESPACE}" deployment/cluster-proxy-proxy-agent proxy-agent 8093 + wait_container_health "${HOSTING_KUBECONFIG}" "${ADDON_NAMESPACE}" deployment/cluster-proxy-proxy-agent service-proxy 8000 + wait_container_health "${HOSTING_KUBECONFIG}" "${ADDON_NAMESPACE}" deployment/cluster-proxy-proxy-agent managed-apiserver-proxy 8001 + wait_container_health "${MANAGED_KUBECONFIG}" "${ADDON_NAMESPACE}" deployment/cluster-proxy-service-relay service-relay 8000 + + log "Waiting for generated managed kubeconfig and addon availability" + wait_resource "${HOSTING_KUBECONFIG}" "secret/cluster-proxy-managed-kubeconfig -n ${ADDON_NAMESPACE}" 300 + for _ in $(seq 1 120); do + if kubectl --kubeconfig "${HUB_KUBECONFIG}" -n "${MANAGED_CLUSTER_NAME}" get managedclusteraddon cluster-proxy \ + -o jsonpath='{.status.conditions[?(@.type=="Available")].status}' | grep -q True; then + return + fi + sleep 5 + done + + kubectl --kubeconfig "${HUB_KUBECONFIG}" -n "${MANAGED_CLUSTER_NAME}" get managedclusteraddon cluster-proxy -o yaml + echo "Timed out waiting for hosted ManagedClusterAddOn availability" >&2 + exit 1 +} + +write_env_file() { + cat >"${WORK_DIR}/env" <"${HUB_KUBECONFIG}" + kind get kubeconfig --name "${HOSTING_CLUSTER_NAME}" >"${HOSTING_KUBECONFIG}" + kind get kubeconfig --name "${MANAGED_CLUSTER_NAME}" >"${MANAGED_KUBECONFIG}" + + local hub_host_server managed_host_server hub_container_server managed_container_server + hub_host_server="$(kubeconfig_server "${HUB_KUBECONFIG}")" + managed_host_server="$(kubeconfig_server "${MANAGED_KUBECONFIG}")" + hub_container_server="https://${HUB_CLUSTER_NAME}-control-plane:6443" + managed_container_server="https://${MANAGED_CLUSTER_NAME}-control-plane:6443" + rewrite_kubeconfig_server "${HUB_KUBECONFIG}" "${HUB_CONTAINER_KUBECONFIG}" "${hub_container_server}" + rewrite_kubeconfig_server "${MANAGED_KUBECONFIG}" "${MANAGED_CONTAINER_KUBECONFIG}" "${managed_container_server}" + + local image="${IMAGE_REGISTRY_NAME}/${IMAGE_NAME}:${IMAGE_TAG}" + for cluster in "${HUB_CLUSTER_NAME}" "${HOSTING_CLUSTER_NAME}" "${MANAGED_CLUSTER_NAME}"; do + log "Loading ${image} into ${cluster}" + kind load docker-image "${image}" --name "${cluster}" + done + + log "Initializing OCM hub" + KUBECONFIG="${HUB_KUBECONFIG}" clusteradm init \ + --output-join-command-file "${WORK_DIR}/join.sh" \ + --wait + + local join_cmd + join_cmd="$(sed -e 's/ --wait//g' -e 's/ --cluster-name \$1/ --cluster-name/g' "${WORK_DIR}/join.sh")" + + log "Joining hosting cluster ${HOSTING_CLUSTER_NAME}" + KUBECONFIG="${HOSTING_KUBECONFIG}" sh -c "${join_cmd} ${HOSTING_CLUSTER_NAME}" + rewrite_cluster_data_server "${HOSTING_KUBECONFIG}" "${hub_host_server}" "${hub_container_server}" + restart_ocm_deployments "${HOSTING_KUBECONFIG}" + KUBECONFIG="${HUB_KUBECONFIG}" clusteradm accept --clusters "${HOSTING_CLUSTER_NAME}" --wait + rewrite_cluster_data_server "${HOSTING_KUBECONFIG}" "${hub_host_server}" "${hub_container_server}" + restart_ocm_deployments "${HOSTING_KUBECONFIG}" + wait_managed_cluster_available "${HOSTING_CLUSTER_NAME}" + + log "Joining managed cluster ${MANAGED_CLUSTER_NAME} in hosted mode" + KUBECONFIG="${HOSTING_KUBECONFIG}" sh -c "${join_cmd} ${MANAGED_CLUSTER_NAME} --mode hosted --managed-cluster-kubeconfig ${MANAGED_KUBECONFIG}" + rewrite_cluster_data_server "${HOSTING_KUBECONFIG}" "${hub_host_server}" "${hub_container_server}" + rewrite_cluster_data_server "${HOSTING_KUBECONFIG}" "${managed_host_server}" "${managed_container_server}" + restart_ocm_deployments "${HOSTING_KUBECONFIG}" + KUBECONFIG="${HUB_KUBECONFIG}" clusteradm accept --clusters "${MANAGED_CLUSTER_NAME}" --wait + rewrite_cluster_data_server "${HOSTING_KUBECONFIG}" "${hub_host_server}" "${hub_container_server}" + rewrite_cluster_data_server "${HOSTING_KUBECONFIG}" "${managed_host_server}" "${managed_container_server}" + restart_ocm_deployments "${HOSTING_KUBECONFIG}" + wait_managed_cluster_available "${MANAGED_CLUSTER_NAME}" + + prepare_test_services + prepare_external_managed_kubeconfig + apply_placement + apply_hosted_addon_config + install_cluster_proxy + wait_hosted_addon_ready + write_env_file + + log "Hosted E2E environment is ready" + log "Environment file: ${WORK_DIR}/env" +} + +main "$@" diff --git a/test/e2e/env/run-hosted.sh b/test/e2e/env/run-hosted.sh new file mode 100755 index 000000000..77f08eeb9 --- /dev/null +++ b/test/e2e/env/run-hosted.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +WORK_DIR="${WORK_DIR:-${ROOT_DIR}/_output/e2e-hosted}" +if [[ "${WORK_DIR}" != /* ]]; then + WORK_DIR="${ROOT_DIR}/${WORK_DIR}" +fi +ENV_FILE="${WORK_DIR}/env" +PROXY_ENTRYPOINT_LOCAL_PORT="${PROXY_ENTRYPOINT_LOCAL_PORT:-18090}" +USER_SERVER_LOCAL_PORT="${USER_SERVER_LOCAL_PORT:-19092}" +HOSTED_LABEL_FILTER="${HOSTED_LABEL_FILTER:-hosted}" +HUB_NAMESPACE="open-cluster-management-addon" + +if [[ ! -f "${ENV_FILE}" ]]; then + echo "Hosted E2E env file not found: ${ENV_FILE}" >&2 + echo "Run make setup-env-for-e2e-hosted first." >&2 + exit 1 +fi + +# shellcheck disable=SC1090 +source "${ENV_FILE}" +for kubeconfig_env in E2E_HUB_KUBECONFIG E2E_HOSTING_KUBECONFIG E2E_MANAGED_KUBECONFIG; do + kubeconfig_path="${!kubeconfig_env}" + if [[ "${kubeconfig_path}" != /* ]]; then + export "${kubeconfig_env}=${ROOT_DIR}/${kubeconfig_path}" + fi +done + +PIDS=() + +cleanup() { + for pid in "${PIDS[@]}"; do + if kill -0 "${pid}" >/dev/null 2>&1; then + kill "${pid}" >/dev/null 2>&1 || true + wait "${pid}" >/dev/null 2>&1 || true + fi + done +} +trap cleanup EXIT + +wait_for_port() { + local port="$1" + for _ in $(seq 1 120); do + if (echo >/dev/tcp/127.0.0.1/"${port}") >/dev/null 2>&1; then + return + fi + sleep 1 + done + + echo "Timed out waiting for localhost:${port}" >&2 + exit 1 +} + +start_port_forward() { + local name="$1" + local local_port="$2" + local remote_port="$3" + local log_file="${WORK_DIR}/${name}.port-forward.log" + + echo "Starting port-forward ${name}: 127.0.0.1:${local_port} -> ${remote_port}" + kubectl --kubeconfig "${E2E_HUB_KUBECONFIG}" -n "${HUB_NAMESPACE}" port-forward \ + --address 127.0.0.1 \ + "svc/${name}" \ + "${local_port}:${remote_port}" >"${log_file}" 2>&1 & + PIDS+=("$!") + wait_for_port "${local_port}" +} + +start_port_forward proxy-entrypoint "${PROXY_ENTRYPOINT_LOCAL_PORT}" 8090 +start_port_forward cluster-proxy-addon-user "${USER_SERVER_LOCAL_PORT}" 9092 + +export PROXY_ENTRYPOINT_ADDRESS="127.0.0.1:${PROXY_ENTRYPOINT_LOCAL_PORT}" +export CLUSTER_PROXY_USER_SERVER_ADDRESS="127.0.0.1:${USER_SERVER_LOCAL_PORT}" + +cd "${ROOT_DIR}" +go test ./test/e2e -count=1 -v -ginkgo.v -ginkgo.label-filter="${HOSTED_LABEL_FILTER}" diff --git a/test/e2e/hosted_test.go b/test/e2e/hosted_test.go new file mode 100644 index 000000000..a46817ddd --- /dev/null +++ b/test/e2e/hosted_test.go @@ -0,0 +1,549 @@ +package e2e + +import ( + "bytes" + "context" + "fmt" + "io" + "net" + "net/http" + "os" + "strconv" + "sync/atomic" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + k8sscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/remotecommand" + "k8s.io/client-go/transport/spdy" + + addonapiv1alpha1 "open-cluster-management.io/api/addon/v1alpha1" +) + +const ( + sourceKubeconfigHashAnnotation = "proxy.open-cluster-management.io/source-kubeconfig-hash" + managedKubeconfigSecretName = "cluster-proxy-managed-kubeconfig" + serviceRelayName = "cluster-proxy-service-relay" +) + +var _ = Describe("Hosted Mode", Label("hosted"), Ordered, func() { + BeforeAll(func() { + if !hostedMode { + Skip("hosted mode kubeconfigs are not configured") + } + }) + + It("should split hosted resources across hosting and managed clusters", Label("hosted-relay", "deployment"), func() { + By("Checking hosting cluster resources") + hostingDeploy := getDeployment(hostingKubeClient, managedClusterInstallNamespace, "cluster-proxy-proxy-agent") + Expect(containerNames(hostingDeploy)).To(ContainElements( + "proxy-agent", + "addon-agent", + "service-proxy", + "managed-apiserver-proxy", + )) + Expect(deploymentHasVolume(hostingDeploy, "managed-kubeconfig")).To(BeTrue()) + Expect(containerHasVolumeMount(hostingDeploy, "proxy-agent", "/etc/managed")).To(BeFalse()) + Expect(containerHasVolumeMount(hostingDeploy, "addon-agent", "/etc/managed")).To(BeTrue()) + Expect(containerHasVolumeMount(hostingDeploy, "service-proxy", "/etc/managed")).To(BeTrue()) + Expect(containerHasVolumeMount(hostingDeploy, "managed-apiserver-proxy", "/etc/managed")).To(BeTrue()) + + getDeployment(hostingKubeClient, managedClusterInstallNamespace, "cluster-proxy-managed-kubeconfig-provisioner") + _, err := hostingKubeClient.CoreV1().Secrets(managedClusterInstallNamespace).Get( + context.Background(), managedKubeconfigSecretName, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + _, err = hostingKubeClient.RbacV1().Roles(managedClusterInstallNamespace).Get( + context.Background(), "cluster-proxy-addon-agent", metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + _, err = hostingKubeClient.RbacV1().RoleBindings(managedClusterInstallNamespace).Get( + context.Background(), "cluster-proxy-addon-agent", metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + _, err = hostingKubeClient.CoreV1().Services(managedClusterInstallNamespace).Get( + context.Background(), managedClusterName, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + expectDeploymentNotFound(hostingKubeClient, managedClusterInstallNamespace, serviceRelayName) + + By("Checking managed cluster resources") + managedRelay := getDeployment(managedKubeClient, managedClusterInstallNamespace, serviceRelayName) + Expect(containerNames(managedRelay)).To(ContainElement("service-relay")) + Expect(managedRelay.Spec.Template.Spec.AutomountServiceAccountToken).ToNot(BeNil()) + Expect(*managedRelay.Spec.Template.Spec.AutomountServiceAccountToken).To(BeFalse()) + Expect(deploymentHasVolume(managedRelay, "managed-kubeconfig")).To(BeFalse()) + _, err = managedKubeClient.CoreV1().ServiceAccounts(managedClusterInstallNamespace).Get( + context.Background(), "cluster-proxy", metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + _, err = managedKubeClient.RbacV1().Roles(managedClusterInstallNamespace).Get( + context.Background(), "cluster-proxy-service-relay-proxy", metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + expectRoleNotFound(managedKubeClient, managedClusterInstallNamespace, "cluster-proxy-addon-agent") + _, err = managedKubeClient.CoreV1().Services(managedClusterInstallNamespace).Get( + context.Background(), serviceRelayName, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + expectDeploymentNotFound(managedKubeClient, managedClusterInstallNamespace, "cluster-proxy-proxy-agent") + expectDeploymentNotFound(managedKubeClient, managedClusterInstallNamespace, "cluster-proxy-managed-kubeconfig-provisioner") + _, err = managedKubeClient.CoreV1().Secrets(managedClusterInstallNamespace).Get( + context.Background(), managedKubeconfigSecretName, metav1.GetOptions{}) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + + It("should provision and refresh the managed kubeconfig", Label("hosted-relay", "managed-kubeconfig"), func() { + By("Checking generated managed kubeconfig Secret") + generated := getGeneratedManagedKubeconfig() + Expect(generated.Data).To(HaveKey("kubeconfig")) + Expect(generated.Annotations).To(HaveKey(sourceKubeconfigHashAnnotation)) + originalHash := generated.Annotations[sourceKubeconfigHashAnnotation] + originalResourceVersion := generated.ResourceVersion + + By("Checking ManagedKubeconfigReady condition") + addon := &addonapiv1alpha1.ManagedClusterAddOn{} + Expect(hubRuntimeClient.Get(context.Background(), types.NamespacedName{ + Namespace: managedClusterName, + Name: "cluster-proxy", + }, addon)).To(Succeed()) + Expect(meta.IsStatusConditionTrue(addon.Status.Conditions, "ManagedKubeconfigReady")).To(BeTrue()) + + By("Changing source kubeconfig data and waiting for refresh") + sourceSecretName := envOrDefault("E2E_HOSTED_EXTERNAL_KUBECONFIG_SECRET", "external-managed-kubeconfig") + source, err := hostingKubeClient.CoreV1().Secrets(managedClusterName).Get( + context.Background(), sourceSecretName, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + source = source.DeepCopy() + source.Data["kubeconfig"] = append(source.Data["kubeconfig"], []byte(fmt.Sprintf("\n# e2e-refresh=%d\n", time.Now().UnixNano()))...) + _, err = hostingKubeClient.CoreV1().Secrets(managedClusterName).Update(context.Background(), source, metav1.UpdateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func() error { + refreshed := getGeneratedManagedKubeconfig() + if refreshed.ResourceVersion == originalResourceVersion { + return fmt.Errorf("generated secret resourceVersion has not changed") + } + if refreshed.Annotations[sourceKubeconfigHashAnnotation] == originalHash { + return fmt.Errorf("source kubeconfig hash has not changed") + } + return nil + }, time.Minute, 5*time.Second).Should(Succeed()) + }) + + It("should proxy kube-apiserver requests with hub and managed tokens", Label("hosted-relay", "kube-apiserver"), func() { + By("Checking raw konnectivity tunnel health") + Expect(probeHealth()).To(Succeed()) + + By("Checking hub token impersonation") + _, err := clusterProxyKubeClient.CoreV1().Pods(targetNamespace).List(context.Background(), metav1.ListOptions{}) + Expect(err).ToNot(HaveOccurred()) + + req := clusterProxyKubeClient.CoreV1().Pods(targetNamespace).GetLogs(podName, &corev1.PodLogOptions{}) + logs, err := req.Stream(context.Background()) + Expect(err).ToNot(HaveOccurred()) + Expect(logs.Close()).To(Succeed()) + + stdout := execThroughClusterProxy(clusterProxyCfg) + Expect(stdout).To(ContainSubstring("hello")) + + body := portForwardThroughClusterProxy() + Expect(body).To(ContainSubstring("Hello from hello-world")) + + By("Checking managed token authentication") + _, err = clusterProxyManagedClient.CoreV1().Pods(targetNamespace).List(context.Background(), metav1.ListOptions{}) + Expect(err).ToNot(HaveOccurred()) + + By("Checking RBAC failures") + _, err = clusterProxyKubeClient.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{}) + Expect(apierrors.IsForbidden(err)).To(BeTrue()) + _, err = clusterProxyManagedClient.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{}) + Expect(apierrors.IsForbidden(err)).To(BeTrue()) + + By("Checking invalid tokens are rejected") + _, err = clusterProxyUnAuthClient.CoreV1().Pods(targetNamespace).List(context.Background(), metav1.ListOptions{}) + Expect(apierrors.IsUnauthorized(err)).To(BeTrue()) + }) + + It("should proxy HTTP and HTTPS services through Relay and expose metrics", Label("hosted-relay", "serviceproxy"), func() { + statusCode, body := requestServiceThroughUserServer("http", "hello-world", 8000) + Expect(statusCode).To(Equal(http.StatusOK)) + Expect(body).To(ContainSubstring("Hello from hello-world")) + + statusCode, body = requestServiceThroughUserServer("https", "hello-world-https", 8443) + Expect(statusCode).To(Equal(http.StatusOK)) + Expect(body).To(ContainSubstring("Hello from hello-world-https")) + + Eventually(func() string { + return metricsFromPod(hostingRESTConfig, hostingKubeClient, managedClusterInstallNamespace, + "proxy.open-cluster-management.io/component-name=proxy-agent") + }, time.Minute, 5*time.Second).Should(ContainSubstring("cluster_proxy_service_proxy_requests_total")) + + Eventually(func() string { + return metricsFromPod(managedRESTConfig, managedKubeClient, managedClusterInstallNamespace, + "proxy.open-cluster-management.io/component-name=service-relay") + }, time.Minute, 5*time.Second).Should(ContainSubstring("cluster_proxy_service_relay_requests_total")) + }) + + It("should run BestEffort service proxy when explicitly requested", Label("hosted-besteffort"), func() { + if os.Getenv("RUN_HOSTED_BESTEFFORT") != "true" { + Skip("RUN_HOSTED_BESTEFFORT=true is required") + } + + patchAddOnDeploymentConfigVariable("hostedServiceProxyMode", "BestEffort") + waitServiceProxyMode("BestEffort") + + statusCode, body := requestServiceThroughUserServer("http", "hello-world", 8000) + Expect(statusCode).To(Equal(http.StatusOK)) + Expect(body).To(ContainSubstring("Hello from hello-world")) + }) + + It("should clean generated managed kubeconfig resources when the addon is deleted", Label("hosted-relay", "cleanup"), func() { + By("Removing the managed cluster from placement") + labelKey := envOrDefault("E2E_HOSTED_PLACEMENT_LABEL_KEY", "cluster-proxy-e2e") + Eventually(func() error { + cluster, err := hubClusterClient.ClusterV1().ManagedClusters().Get(context.Background(), managedClusterName, metav1.GetOptions{}) + if err != nil { + return err + } + cluster = cluster.DeepCopy() + delete(cluster.Labels, labelKey) + _, err = hubClusterClient.ClusterV1().ManagedClusters().Update(context.Background(), cluster, metav1.UpdateOptions{}) + return err + }, time.Minute, 5*time.Second).Should(Succeed()) + + By("Deleting the ManagedClusterAddOn") + err := hubRuntimeClient.Delete(context.Background(), &addonapiv1alpha1.ManagedClusterAddOn{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: managedClusterName, + Name: "cluster-proxy", + }, + }) + if err != nil && !apierrors.IsNotFound(err) { + Expect(err).ToNot(HaveOccurred()) + } + + Eventually(func() bool { + _, err := hostingKubeClient.CoreV1().Secrets(managedClusterInstallNamespace).Get( + context.Background(), managedKubeconfigSecretName, metav1.GetOptions{}) + return apierrors.IsNotFound(err) + }, 5*time.Minute, 5*time.Second).Should(BeTrue()) + + Eventually(func() bool { + _, err := hostingKubeClient.BatchV1().Jobs(managedClusterInstallNamespace).Get( + context.Background(), "cluster-proxy-managed-kubeconfig-cleanup", metav1.GetOptions{}) + return apierrors.IsNotFound(err) + }, 5*time.Minute, 5*time.Second).Should(BeTrue()) + + Eventually(func() bool { + addon := &addonapiv1alpha1.ManagedClusterAddOn{} + err := hubRuntimeClient.Get(context.Background(), types.NamespacedName{ + Namespace: managedClusterName, + Name: "cluster-proxy", + }, addon) + return apierrors.IsNotFound(err) + }, 5*time.Minute, 5*time.Second).Should(BeTrue()) + }) +}) + +func getDeployment(kubeClient kubernetes.Interface, namespace, name string) *appsv1.Deployment { + deploy, err := kubeClient.AppsV1().Deployments(namespace).Get(context.Background(), name, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + return deploy +} + +func expectDeploymentNotFound(kubeClient kubernetes.Interface, namespace, name string) { + _, err := kubeClient.AppsV1().Deployments(namespace).Get(context.Background(), name, metav1.GetOptions{}) + Expect(apierrors.IsNotFound(err)).To(BeTrue(), "expected deployment %s/%s to be absent, got %v", namespace, name, err) +} + +func expectRoleNotFound(kubeClient kubernetes.Interface, namespace, name string) { + _, err := kubeClient.RbacV1().Roles(namespace).Get(context.Background(), name, metav1.GetOptions{}) + Expect(apierrors.IsNotFound(err)).To(BeTrue(), "expected role %s/%s to be absent, got %v", namespace, name, err) +} + +func containerNames(deploy *appsv1.Deployment) []string { + names := make([]string, 0, len(deploy.Spec.Template.Spec.Containers)) + for _, container := range deploy.Spec.Template.Spec.Containers { + names = append(names, container.Name) + } + return names +} + +func containerHasVolumeMount(deploy *appsv1.Deployment, containerName, mountPath string) bool { + for _, container := range deploy.Spec.Template.Spec.Containers { + if container.Name != containerName { + continue + } + for _, mount := range container.VolumeMounts { + if mount.MountPath == mountPath { + return true + } + } + } + return false +} + +func deploymentHasVolume(deploy *appsv1.Deployment, name string) bool { + for _, volume := range deploy.Spec.Template.Spec.Volumes { + if volume.Name == name { + return true + } + } + return false +} + +func getGeneratedManagedKubeconfig() *corev1.Secret { + secret, err := hostingKubeClient.CoreV1().Secrets(managedClusterInstallNamespace).Get( + context.Background(), managedKubeconfigSecretName, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + return secret +} + +func execThroughClusterProxy(config *rest.Config) string { + req := clusterProxyKubeClient.CoreV1().RESTClient().Post(). + Resource("pods"). + Name(podName). + Namespace(targetNamespace). + SubResource("exec"). + Param("container", podContainerName) + + req.VersionedParams(&corev1.PodExecOptions{ + Command: []string{"/bin/sh", "-c", "echo hello"}, + Container: podContainerName, + Stdin: false, + Stdout: true, + Stderr: true, + TTY: false, + }, k8sscheme.ParameterCodec) + + executor, err := remotecommand.NewSPDYExecutor(config, "POST", req.URL()) + Expect(err).ToNot(HaveOccurred()) + + var stdout, stderr bytes.Buffer + err = executor.StreamWithContext(context.Background(), remotecommand.StreamOptions{ + Stdout: &stdout, + Stderr: &stderr, + Tty: false, + }) + Expect(err).ToNot(HaveOccurred(), stderr.String()) + return stdout.String() +} + +func portForwardThroughClusterProxy() string { + return getViaPortForward(clusterProxyCfg, clusterProxyKubeClient, targetNamespace, podName, podPort, "/index.html") +} + +func requestServiceThroughUserServer(proto, service string, port int) (int, string) { + targetHost := fmt.Sprintf( + "https://%s/%s/api/v1/namespaces/default/services/%s:%s:%d/proxy-service/index.html", + userServerServiceAddress, + managedClusterName, + proto, + service, + port, + ) + + req, err := http.NewRequest("GET", targetHost, nil) + Expect(err).ToNot(HaveOccurred()) + resp, err := clusterProxyHttpClient.Do(req) + Expect(err).ToNot(HaveOccurred()) + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + Expect(err).ToNot(HaveOccurred()) + return resp.StatusCode, string(body) +} + +func metricsFromPod(restConfig *rest.Config, kubeClient kubernetes.Interface, namespace, selector string) string { + pods, err := kubeClient.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{LabelSelector: selector}) + Expect(err).ToNot(HaveOccurred()) + Expect(pods.Items).ToNot(BeEmpty()) + + pod := pods.Items[0] + return getViaPortForward(restConfig, kubeClient, namespace, pod.Name, 8000, "/metrics") +} + +func getViaPortForward(restConfig *rest.Config, kubeClient kubernetes.Interface, namespace, pod string, remotePort int, path string) string { + listener, err := net.Listen("tcp", "127.0.0.1:0") + Expect(err).ToNot(HaveOccurred()) + defer listener.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + errCh := make(chan error, 1) + var requestID int32 + go func() { + for { + conn, err := listener.Accept() + if err != nil { + if ctx.Err() != nil { + return + } + errCh <- err + return + } + id := atomic.AddInt32(&requestID, 1) + go func() { + if err := forwardPortForwardConnection(restConfig, kubeClient, namespace, pod, remotePort, int(id), conn); err != nil { + errCh <- err + } + }() + } + }() + + localPort := listener.Addr().(*net.TCPAddr).Port + httpClient := &http.Client{Timeout: 30 * time.Second} + resp, err := httpClient.Get(fmt.Sprintf("http://127.0.0.1:%d%s", localPort, path)) + Expect(err).ToNot(HaveOccurred()) + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + Expect(err).ToNot(HaveOccurred()) + Expect(resp.StatusCode).To(Equal(http.StatusOK)) + select { + case err := <-errCh: + Expect(err).ToNot(HaveOccurred()) + default: + } + return string(body) +} + +func forwardPortForwardConnection( + restConfig *rest.Config, + kubeClient kubernetes.Interface, + namespace, pod string, + remotePort int, + requestID int, + conn net.Conn, +) error { + defer conn.Close() + + transport, upgrader, err := spdy.RoundTripperFor(restConfig) + if err != nil { + return err + } + req := kubeClient.CoreV1().RESTClient().Post(). + Resource("pods"). + Namespace(namespace). + Name(pod). + SubResource("portforward") + dialer := spdy.NewDialer(upgrader, &http.Client{Transport: transport}, "POST", req.URL()) + streamConn, _, err := dialer.Dial("portforward.k8s.io") + if err != nil { + return err + } + defer streamConn.Close() + + headers := http.Header{} + headers.Set(corev1.StreamType, corev1.StreamTypeError) + headers.Set(corev1.PortHeader, strconv.Itoa(remotePort)) + headers.Set(corev1.PortForwardRequestIDHeader, strconv.Itoa(requestID)) + errorStream, err := streamConn.CreateStream(headers) + if err != nil { + return err + } + errorStream.Close() + + errorCh := make(chan error, 1) + go func() { + message, err := io.ReadAll(errorStream) + switch { + case err != nil: + errorCh <- err + case len(message) > 0: + errorCh <- fmt.Errorf("port-forward error: %s", string(message)) + default: + errorCh <- nil + } + }() + + headers.Set(corev1.StreamType, corev1.StreamTypeData) + dataStream, err := streamConn.CreateStream(headers) + if err != nil { + return err + } + defer dataStream.Close() + + remoteDone := make(chan error, 1) + localDone := make(chan error, 1) + go func() { + _, err := io.Copy(conn, dataStream) + remoteDone <- err + }() + go func() { + _, err := io.Copy(dataStream, conn) + localDone <- err + }() + + select { + case err := <-errorCh: + return err + case <-remoteDone: + return nil + case <-localDone: + return nil + case <-time.After(30 * time.Second): + return fmt.Errorf("timed out waiting for port-forward data") + } +} + +func patchAddOnDeploymentConfigVariable(name, value string) { + configName := envOrDefault("E2E_HOSTED_DEPLOY_CONFIG_NAME", "hosted-relay") + Eventually(func() error { + config := &addonapiv1alpha1.AddOnDeploymentConfig{} + if err := hubRuntimeClient.Get(context.Background(), types.NamespacedName{ + Namespace: managedClusterName, + Name: configName, + }, config); err != nil { + return err + } + config = config.DeepCopy() + found := false + for i := range config.Spec.CustomizedVariables { + if config.Spec.CustomizedVariables[i].Name == name { + config.Spec.CustomizedVariables[i].Value = value + found = true + break + } + } + if !found { + config.Spec.CustomizedVariables = append(config.Spec.CustomizedVariables, addonapiv1alpha1.CustomizedVariable{ + Name: name, + Value: value, + }) + } + return hubRuntimeClient.Update(context.Background(), config) + }, time.Minute, 5*time.Second).Should(Succeed()) +} + +func waitServiceProxyMode(mode string) { + expectedArg := "--hosted-service-proxy-mode=" + mode + Eventually(func() error { + deploy := getDeployment(hostingKubeClient, managedClusterInstallNamespace, "cluster-proxy-proxy-agent") + for _, container := range deploy.Spec.Template.Spec.Containers { + if container.Name == "service-proxy" && stringSliceContains(container.Args, expectedArg) { + return nil + } + } + return fmt.Errorf("service-proxy does not contain arg %q", expectedArg) + }, 2*time.Minute, 5*time.Second).Should(Succeed()) +} + +func stringSliceContains(values []string, target string) bool { + for _, value := range values { + if value == target { + return true + } + } + return false +} + +func envOrDefault(name, defaultValue string) string { + if value := os.Getenv(name); value != "" { + return value + } + return defaultValue +}