Skip to content

Commit

Permalink
fix: allow onExit to run if wf exceeds activeDeadlineSeconds. Fixes #…
Browse files Browse the repository at this point in the history
  • Loading branch information
markterm committed Apr 9, 2020
1 parent ffc43ce commit 6c685c5
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 12 deletions.
24 changes: 14 additions & 10 deletions workflow/controller/exec_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,21 @@ func (woc *wfOperationCtx) applyExecutionControl(pod *apiv1.Pod, wfNodesLock *sy
// Check if we are past the workflow deadline. If we are, and the pod is still pending
// then we should simply delete it and mark the pod as Failed
if woc.workflowDeadline != nil && time.Now().UTC().After(*woc.workflowDeadline) {
woc.log.Infof("Deleting Pending pod %s/%s which has exceeded workflow deadline %s", pod.Namespace, pod.Name, woc.workflowDeadline)
err := woc.controller.kubeclientset.CoreV1().Pods(pod.Namespace).Delete(pod.Name, &metav1.DeleteOptions{})
if err == nil {
wfNodesLock.Lock()
defer wfNodesLock.Unlock()
node := woc.wf.Status.Nodes[pod.Name]
woc.markNodePhase(node.Name, wfv1.NodeFailed, fmt.Sprintf("step exceeded workflow deadline %s", *woc.workflowDeadline))
return nil
//pods that are part of an onExit handler aren't subject to the deadline
_, onExitPod := pod.Labels[common.LabelKeyOnExit]
if !onExitPod {
woc.log.Infof("Deleting Pending pod %s/%s which has exceeded workflow deadline %s", pod.Namespace, pod.Name, woc.workflowDeadline)
err := woc.controller.kubeclientset.CoreV1().Pods(pod.Namespace).Delete(pod.Name, &metav1.DeleteOptions{})
if err == nil {
wfNodesLock.Lock()
defer wfNodesLock.Unlock()
node := woc.wf.Status.Nodes[pod.Name]
woc.markNodePhase(node.Name, wfv1.NodeFailed, fmt.Sprintf("step exceeded workflow deadline %s", *woc.workflowDeadline))
return nil
}
// If we fail to delete the pod, fall back to setting the annotation
woc.log.Warnf("Failed to delete %s/%s: %v", pod.Namespace, pod.Name, err)
}
// If we fail to delete the pod, fall back to setting the annotation
woc.log.Warnf("Failed to delete %s/%s: %v", pod.Namespace, pod.Name, err)
}
}

Expand Down
44 changes: 44 additions & 0 deletions workflow/controller/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2306,6 +2306,50 @@ func TestStepsOnExitFailures(t *testing.T) {
assert.Contains(t, woc.globalParams[common.GlobalVarWorkflowFailures], `[{\"displayName\":\"exit-handlers\",\"message\":\"Unexpected pod phase for exit-handlers: \",\"templateName\":\"intentional-fail\",\"phase\":\"Error\",\"podName\":\"exit-handlers\"`)
}

var onExitTimeout = `
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
name: exit-handlers
spec:
entrypoint: intentional-fail
activeDeadlineSeconds: 0
onExit: exit-handler
templates:
- name: intentional-fail
suspend: {}
- name: exit-handler
container:
image: alpine:latest
command: [sh, -c]
args: ["echo send e-mail: {{workflow.name}} {{workflow.status}}."]
`

func TestStepsOnExitTimeout(t *testing.T) {
controller := newController()
wfcset := controller.wfclientset.ArgoprojV1alpha1().Workflows("")

// Test list expansion
wf := unmarshalWF(onExitTimeout)
wf, err := wfcset.Create(wf)
assert.Nil(t, err)
woc := newWorkflowOperationCtx(wf, controller)

woc.operate()
woc.operate()

wf, err = wfcset.Get(wf.ObjectMeta.Name, metav1.GetOptions{})
assert.Nil(t, err)
onExitNodeIsPresent := false
for _, node := range wf.Status.Nodes {
if strings.Contains(node.Name, "onExit") && node.Phase == wfv1.NodePending {
onExitNodeIsPresent = true
break
}
}
assert.True(t, onExitNodeIsPresent)
}

var invalidSpec = `
apiVersion: argoproj.io/v1alpha1
kind: Workflow
Expand Down
4 changes: 2 additions & 2 deletions workflow/controller/workflowpod.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,11 @@ func (woc *wfOperationCtx) createWorkflowPod(nodeName string, mainCtr apiv1.Cont

var activeDeadlineSeconds *int64
wfDeadline := woc.getWorkflowDeadline()
if wfDeadline == nil {
if wfDeadline == nil || opts.onExitPod { //ignore the workflow deadline for exit handler so they still run if the deadline has passed
activeDeadlineSeconds = tmpl.ActiveDeadlineSeconds
} else {
wfActiveDeadlineSeconds := int64((*wfDeadline).Sub(time.Now().UTC()).Seconds())
if wfActiveDeadlineSeconds < 0 {
if wfActiveDeadlineSeconds <= 0 {
return nil, nil
} else if tmpl.ActiveDeadlineSeconds == nil || wfActiveDeadlineSeconds < *tmpl.ActiveDeadlineSeconds {
activeDeadlineSeconds = &wfActiveDeadlineSeconds
Expand Down

0 comments on commit 6c685c5

Please sign in to comment.