Skip to content

Commit

Permalink
fix(platform): pre allocate svc ip for qgpu scheduler (tkestack#2040)
Browse files Browse the repository at this point in the history
  • Loading branch information
Lis committed Jul 28, 2022
1 parent b259718 commit 0e1412b
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 6 deletions.
10 changes: 8 additions & 2 deletions pkg/platform/provider/baremetal/cluster/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,8 @@ func completeServiceIP(cluster *v1.Cluster) error {
cluster.Annotations = make(map[string]string)
}
for index, name := range map[int]string{
constants.GPUQuotaAdmissionIPIndex: constants.GPUQuotaAdmissionIPAnnotaion,
constants.GPUQuotaAdmissionIPIndex: constants.GPUQuotaAdmissionIPAnnotaion,
constants.QGPUQuotaAdmissionIPIndex: constants.QGPUQuotaAdmissionIPAnnotaion,
} {
ip, err := GetIndexedIP(cluster.Status.ServiceCIDR, index)
if err != nil {
Expand Down Expand Up @@ -756,11 +757,16 @@ func (p *Provider) EnsurePrepareForControlplane(ctx context.Context, c *v1.Clust
oidcCa, _ := ioutil.ReadFile(constants.OIDCConfigFile)
auditPolicyData, _ := ioutil.ReadFile(constants.AuditPolicyConfigFile)
GPUQuotaAdmissionHost := c.Annotations[constants.GPUQuotaAdmissionIPAnnotaion]
QGPUQuotaAdmissionHost := c.Annotations[constants.QGPUQuotaAdmissionIPAnnotaion]
if GPUQuotaAdmissionHost == "" {
GPUQuotaAdmissionHost = "gpu-quota-admission"
}
if QGPUQuotaAdmissionHost == "" {
GPUQuotaAdmissionHost = "qgpu-quota-admission"
}
schedulerPolicyConfig, err := template.ParseString(schedulerPolicyConfig, map[string]interface{}{
"GPUQuotaAdmissionHost": GPUQuotaAdmissionHost,
"GPUQuotaAdmissionHost": GPUQuotaAdmissionHost,
"QGPUQuotaAdmissionHost": QGPUQuotaAdmissionHost,
})
if err != nil {
return errors.Wrap(err, "parse schedulerPolicyConfig error")
Expand Down
17 changes: 17 additions & 0 deletions pkg/platform/provider/baremetal/cluster/manifests.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,23 @@ const (
}
],
"nodeCacheCapable" : false
},
{
"urlPrefix": "http:https://{{.QGPUQuotaAdmissionHost}}:12345/scheduler",
"filterVerb" : "filter",
"prebindVerb": "prebind",
"unreserveVerb": "unreserve",
"prioritizeVerb": "priorities",
"nodeCacheCapable": true,
"weight": 10,
"managedResources" : [
{
"name": "tke.cloud.tencent.com/qgpu-core"
},
{
"name" : "tke.cloud.tencent.com/qgpu-memory"
}
]
}
],
"kind" : "Policy"
Expand Down
10 changes: 6 additions & 4 deletions pkg/platform/provider/baremetal/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,12 @@ const (
MetricsServerManifest = ManifestsDir + "metrics-server/metrics-server.yaml"
CiliumManifest = SrcDir + "cilium/*.yaml"

KUBERNETES = 1
DNSIPIndex = 10
GPUQuotaAdmissionIPIndex = 9
GPUQuotaAdmissionIPAnnotaion = platformv1.GroupName + "/gpu-quota-admission-ip"
KUBERNETES = 1
DNSIPIndex = 10
GPUQuotaAdmissionIPIndex = 9
QGPUQuotaAdmissionIPIndex = 8
GPUQuotaAdmissionIPAnnotaion = platformv1.GroupName + "/gpu-quota-admission-ip"
QGPUQuotaAdmissionIPAnnotaion = platformv1.GroupName + "/qgpu-quota-admission-ip"

// RenewCertsTimeThreshold control how long time left to renew certs
RenewCertsTimeThreshold = 30 * 24 * time.Hour
Expand Down

0 comments on commit 0e1412b

Please sign in to comment.