Skip to content

Commit

Permalink
Merge pull request #599 from ozhuraki/operator-select-device-type
Browse files Browse the repository at this point in the history
Make it possible to select supported devices in the operator
  • Loading branch information
bart0sh committed Apr 12, 2021
2 parents 7cbd4d0 + 6fbf7c9 commit 3161459
Show file tree
Hide file tree
Showing 12 changed files with 183 additions and 25 deletions.
18 changes: 18 additions & 0 deletions cmd/operator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,20 @@ NAME DESIRED READY NODE SELECTOR AGE
gpudeviceplugin-sample 1 1 5s
```

In order to limit the deployment to a specific device type,
use one of kustomizations under deployments/operator/device.

For example, to limit the deployment to FPGA, use:

```bash
$ kubectl apply -k deployments/operator/device/fpga
```

Operator also supports deployments with multiple selected device types.
In this case, create a new kustomization with the necessary resources
that passes the desired device types to the operator using `--device`
command line argument multiple times.

## Known issues

When the operator is run with leader election enabled, that is with the option
Expand All @@ -106,3 +120,7 @@ number of pods. Otherwise a heart beat used by the leader election code may trig
a timeout and crash. We are going to use different clients for the controller and
leader election code to alleviate the issue. See more details in
https://github.com/intel/intel-device-plugins-for-kubernetes/issues/476.

In case the deployment is limited to specific device type(s),
the CRDs for other device types are still created, but no controllers
for them are registered.
95 changes: 70 additions & 25 deletions cmd/operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ package main

import (
"flag"
"fmt"
"os"
"strings"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -54,18 +56,58 @@ func init() {

type devicePluginControllerAndWebhook map[string](func(ctrl.Manager, string, bool) error)

type flagList []string

var supportedDevices = flagList{"dsa", "fpga", "gpu", "qat", "sgx"}
var devices flagList

func (flag *flagList) String() string {
return strings.Join(*flag, ", ")
}

func (flag *flagList) Set(value string) error {
if !contains(supportedDevices, value) {
setupLog.Error(nil, fmt.Sprintf("Unsupported device: %s", value))
os.Exit(1)
}

if contains(devices, value) {
setupLog.Error(nil, fmt.Sprintf("Duplicate device: %s", value))
os.Exit(1)
}

*flag = append(*flag, value)
return nil
}

func contains(arr []string, val string) bool {
for _, s := range arr {
if s == val {
return true
}
}
return false
}

func main() {
var metricsAddr string
var devicePluginNamespace string
var enableLeaderElection bool
var pm *patcher.PatcherManager

ctrl.SetLogger(klogr.New())

flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&devicePluginNamespace, "deviceplugin-namespace", metav1.NamespaceSystem, "The namespace where deviceplugin daemonsets are created")
flag.BoolVar(&enableLeaderElection, "enable-leader-election", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
flag.Var(&devices, "devices", "Device(s) to set up.")
flag.Parse()

ctrl.SetLogger(klogr.New())
if len(devices) == 0 {
devices = supportedDevices
}

setupControllerAndWebhook := devicePluginControllerAndWebhook{
"dsa": dsa.SetupReconciler,
Expand Down Expand Up @@ -95,40 +137,43 @@ func main() {

withWebhook := true

// TODO(mythi): add a StringVar flag to select which controllers to enable
for _, device := range []string{"dsa", "fpga", "gpu", "qat", "sgx"} {
for _, device := range devices {
if err = setupControllerAndWebhook[device](mgr, ns, withWebhook); err != nil {
setupLog.Error(err, "unable to initialize controller", "controller", device)
os.Exit(1)
}
}

pm := patcher.NewPatcherManager(mgr.GetLogger().WithName("webhooks").WithName("Fpga"))
if contains(devices, "sgx") {
mgr.GetWebhookServer().Register("/pods-sgx", &webhook.Admission{
Handler: &sgxwebhook.SgxMutator{Client: mgr.GetClient()},
})
}

mgr.GetWebhookServer().Register("/pods", &webhook.Admission{
Handler: admission.HandlerFunc(pm.GetPodMutator()),
})
if contains(devices, "fpga") {
pm = patcher.NewPatcherManager(mgr.GetLogger().WithName("webhooks").WithName("Fpga"))

mgr.GetWebhookServer().Register("/pods-sgx", &webhook.Admission{
Handler: &sgxwebhook.SgxMutator{Client: mgr.GetClient()},
})
mgr.GetWebhookServer().Register("/pods", &webhook.Admission{
Handler: admission.HandlerFunc(pm.GetPodMutator()),
})

if err = (&fpgacontroller.AcceleratorFunctionReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
PatcherManager: pm,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "AcceleratorFunction")
os.Exit(1)
}
if err = (&fpgacontroller.AcceleratorFunctionReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
PatcherManager: pm,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "AcceleratorFunction")
os.Exit(1)
}

if err = (&fpgacontroller.FpgaRegionReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
PatcherManager: pm,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "FpgaRegion")
os.Exit(1)
if err = (&fpgacontroller.FpgaRegionReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
PatcherManager: pm,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "FpgaRegion")
os.Exit(1)
}
}

setupLog.Info("starting manager")
Expand Down
14 changes: 14 additions & 0 deletions deployments/operator/device/dsa/dsa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: inteldeviceplugins-controller-manager
namespace: inteldeviceplugins-system
spec:
template:
spec:
containers:
- args:
- --metrics-addr=127.0.0.1:8080
- --enable-leader-election
- --device=dsa
name: manager
5 changes: 5 additions & 0 deletions deployments/operator/device/dsa/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bases:
- ../../default

patchesStrategicMerge:
- dsa.yaml
14 changes: 14 additions & 0 deletions deployments/operator/device/fpga/fpga.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: inteldeviceplugins-controller-manager
namespace: inteldeviceplugins-system
spec:
template:
spec:
containers:
- args:
- --metrics-addr=127.0.0.1:8080
- --enable-leader-election
- --device=fpga
name: manager
5 changes: 5 additions & 0 deletions deployments/operator/device/fpga/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bases:
- ../../default

patchesStrategicMerge:
- fpga.yaml
14 changes: 14 additions & 0 deletions deployments/operator/device/gpu/gpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: inteldeviceplugins-controller-manager
namespace: inteldeviceplugins-system
spec:
template:
spec:
containers:
- args:
- --metrics-addr=127.0.0.1:8080
- --enable-leader-election
- --device=gpu
name: manager
5 changes: 5 additions & 0 deletions deployments/operator/device/gpu/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bases:
- ../../default

patchesStrategicMerge:
- gpu.yaml
5 changes: 5 additions & 0 deletions deployments/operator/device/qat/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bases:
- ../../default

patchesStrategicMerge:
- qat.yaml
14 changes: 14 additions & 0 deletions deployments/operator/device/qat/qat.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: inteldeviceplugins-controller-manager
namespace: inteldeviceplugins-system
spec:
template:
spec:
containers:
- args:
- --metrics-addr=127.0.0.1:8080
- --enable-leader-election
- --device=qat
name: manager
5 changes: 5 additions & 0 deletions deployments/operator/device/sgx/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bases:
- ../../default

patchesStrategicMerge:
- sgx.yaml
14 changes: 14 additions & 0 deletions deployments/operator/device/sgx/sgx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: inteldeviceplugins-controller-manager
namespace: inteldeviceplugins-system
spec:
template:
spec:
containers:
- args:
- --metrics-addr=127.0.0.1:8080
- --enable-leader-election
- --device=sgx
name: manager

0 comments on commit 3161459

Please sign in to comment.