From 5f946bed82e0dee4b33b618cb0fc917356f4b003 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 12 Apr 2024 10:03:54 -0700 Subject: [PATCH 1/3] make ssh services optional --- sky/clouds/kubernetes.py | 2 ++ sky/provision/kubernetes/network_utils.py | 17 +++++++++++++++++ sky/templates/kubernetes-ray.yml.j2 | 8 ++++++++ 3 files changed, 27 insertions(+) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index e6b3b5bbe9e..cd89f99c5db 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -254,6 +254,7 @@ def make_deploy_resources_variables( kubernetes_utils.get_gpu_label_key_value(acc_type) port_mode = network_utils.get_port_mode(None) + networking_mode = network_utils.get_networking_mode(None) deploy_vars = { 'instance_type': resources.instance_type, @@ -266,6 +267,7 @@ def make_deploy_resources_variables( 'k8s_namespace': kubernetes_utils.get_current_kube_config_context_namespace(), 'k8s_port_mode': port_mode.value, + 'k8s_networking_mode': networking_mode.value, 'k8s_ssh_key_secret_name': self.SKY_SSH_KEY_SECRET_NAME, 'k8s_acc_label_key': k8s_acc_label_key, 'k8s_acc_label_value': k8s_acc_label_value, diff --git a/sky/provision/kubernetes/network_utils.py b/sky/provision/kubernetes/network_utils.py index 4102d10c4ac..0fd9af59f6c 100644 --- a/sky/provision/kubernetes/network_utils.py +++ b/sky/provision/kubernetes/network_utils.py @@ -43,6 +43,23 @@ def get_port_mode( return port_mode +def get_networking_mode(mode_str: Optional[str] = None) -> kubernetes_enums.KubernetesNetworkingMode: + """Get the networking mode from the provider config.""" + mode_str = mode_str or skypilot_config.get_nested( + ('kubernetes', 'networking_mode'), + kubernetes_enums.KubernetesNetworkingMode.PORTFORWARD.value) + try: + networking_mode = kubernetes_enums.KubernetesNetworkingMode(mode_str) + except ValueError as e: + with ux_utils.print_exception_no_traceback(): + raise ValueError(str(e) + + ' Cluster was setup with invalid networking mode.' + + 'Please check the networking_mode in provider config.') \ + from None + + return networking_mode + + def fill_loadbalancer_template(namespace: str, service_name: str, ports: List[int], selector_key: str, selector_value: str) -> Dict: diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 893d5c8565a..804ebc36643 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -80,8 +80,11 @@ provider: name: skypilot-service-account-role apiGroup: rbac.authorization.k8s.io + {% if k8s_networking_mode == "nodeport" or num_nodes > 1 %} services: + {% if k8s_networking_mode == "nodeport" %} # Service to expose the head node pod's SSH port. + # Required only when using nodeport for accessing ssh. - apiVersion: v1 kind: Service metadata: @@ -96,7 +99,10 @@ provider: - protocol: TCP port: 22 targetPort: 22 + {% endif %} + {% if num_nodes > 1 %} # Service that maps to the head node of the Ray cluster. + # Required only in multi-node settings. - apiVersion: v1 kind: Service metadata: @@ -120,6 +126,8 @@ provider: protocol: TCP port: 8265 targetPort: 8265 + {% endif %} + {% endif %} # Specify the pod type for the ray head node (as configured below). head_node_type: ray_head_default From 607c1e40eae708974725d9d19fa4d1210a7b8927 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 17 Apr 2024 08:02:55 -0700 Subject: [PATCH 2/3] fix --- sky/provision/kubernetes/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py index 60e9857333b..4d3b115bea3 100644 --- a/sky/provision/kubernetes/utils.py +++ b/sky/provision/kubernetes/utils.py @@ -1223,7 +1223,7 @@ def combine_metadata_fields(cluster_yaml_path: str) -> None: yaml_obj['available_node_types']['ray_head_default']['node_config'] ['metadata'], # Services for pods - *[svc['metadata'] for svc in yaml_obj['provider']['services']] + *[svc['metadata'] for svc in yaml_obj['provider'].get('services', [])] ] for destination in combination_destinations: From 57f4ed5fe90b0642a571403104609f27f7e43299 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 17 Apr 2024 08:05:26 -0700 Subject: [PATCH 3/3] lint --- sky/provision/kubernetes/network_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sky/provision/kubernetes/network_utils.py b/sky/provision/kubernetes/network_utils.py index 0fd9af59f6c..c2125eb09a9 100644 --- a/sky/provision/kubernetes/network_utils.py +++ b/sky/provision/kubernetes/network_utils.py @@ -43,7 +43,9 @@ def get_port_mode( return port_mode -def get_networking_mode(mode_str: Optional[str] = None) -> kubernetes_enums.KubernetesNetworkingMode: +def get_networking_mode( + mode_str: Optional[str] = None +) -> kubernetes_enums.KubernetesNetworkingMode: """Get the networking mode from the provider config.""" mode_str = mode_str or skypilot_config.get_nested( ('kubernetes', 'networking_mode'),