From 523584f13cb8e92a2e3b7eb2b8f27cb47a983fef Mon Sep 17 00:00:00 2001 From: xianlubird Date: Fri, 21 Jun 2019 16:14:50 +0800 Subject: [PATCH 1/5] Fix bug: dag will missing some nodes when another branch node fails --- workflow/controller/dag.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/workflow/controller/dag.go b/workflow/controller/dag.go index ec3442bf6b7b..d69345b8c249 100644 --- a/workflow/controller/dag.go +++ b/workflow/controller/dag.go @@ -250,6 +250,25 @@ func (woc *wfOperationCtx) executeDAGTask(dagCtx *dagContext, taskName string) { dependenciesSuccessful = false } continue + } else if depNode.Type == wfv1.NodeTypeRetry { + // For retry type node + // Maybe one of children node already success, but the retry node hasn't sync to latest status + // So skip these steps, the next process function will make the status to be right + tmpContinueFlag := false + for _, tmpChildName := range depNode.Children { + tmpChild, tmpOK := woc.wf.Status.Nodes[tmpChildName] + if !tmpOK { + continue + } + if tmpChild.Successful() { + tmpContinueFlag = true + break + } + } + if tmpContinueFlag { + woc.markNodePhase(depNode.Name, wfv1.NodeSucceeded) + continue + } } } dependenciesCompleted = false From ed62d1531b905beaf06f7b919a3ba828505007ad Mon Sep 17 00:00:00 2001 From: xianlubird Date: Fri, 21 Jun 2019 16:26:20 +0800 Subject: [PATCH 2/5] Add test file --- .../functional/dag-noroot-branch-failed.yaml | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 test/e2e/functional/dag-noroot-branch-failed.yaml diff --git a/test/e2e/functional/dag-noroot-branch-failed.yaml b/test/e2e/functional/dag-noroot-branch-failed.yaml new file mode 100644 index 000000000000..867d4cc6f8b7 --- /dev/null +++ b/test/e2e/functional/dag-noroot-branch-failed.yaml @@ -0,0 +1,48 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: dag-primay-branch- +spec: + entrypoint: statis + templates: + - name: a + container: + image: docker/whalesay:latest + command: [cowsay] + args: ["hello world"] + - name: b + retryStrategy: + limit: 2 + container: + image: alpine:latest + command: [sh, -c] + args: ["sleep 30; echo haha"] + - name: c + retryStrategy: + limit: 3 + container: + image: alpine:latest + command: [sh, -c] + args: ["echo intentional failure; exit 2"] + - name: d + container: + image: docker/whalesay:latest + command: [cowsay] + args: ["hello world"] + - name: statis + dag: + tasks: + - name: A + template: a + - name: B + dependencies: [A] + template: b + - name: C + dependencies: [A] + template: c + - name: D + dependencies: [B] + template: d + - name: E + dependencies: [D] + template: d \ No newline at end of file From eab546948d06acb4ec9c7619a494fef62dc463dc Mon Sep 17 00:00:00 2001 From: xianlubird Date: Tue, 25 Jun 2019 16:02:44 +0800 Subject: [PATCH 3/5] New Feature: provide failFast flag, allow a DAG to run all branches of the DAG (either success or failure) --- api/openapi-spec/swagger.json | 4 ++ .../workflow/v1alpha1/openapi_generated.go | 7 +++ pkg/apis/workflow/v1alpha1/types.go | 8 +++ .../v1alpha1/zz_generated.deepcopy.go | 5 ++ test/e2e/functional/dag-disable-failFast.yaml | 49 +++++++++++++++++++ workflow/controller/dag.go | 41 +++++++++------- 6 files changed, 95 insertions(+), 19 deletions(-) create mode 100644 test/e2e/functional/dag-disable-failFast.yaml diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index 3617d5d51edb..b56e73ae36ac 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -1160,6 +1160,10 @@ "description": "Entrypoint is a template reference to the starting point of the workflow", "type": "string" }, + "failFast": { + "description": "This flag is for DAG logic. The DAG logic has a built-in \"fail fast\" feature to stop scheduling new steps, as soon as it detects that one of the DAG nodes is failed. Then it waits until all DAG nodes are completed before failing the DAG itself. The FailFast flag default is true, if set to false, it will allow a DAG to run all branches of the DAG to completion (either success or failure), regardless of the failed outcomes of branches in the DAG. More info and example about this feature at https://github.com/argoproj/argo/issues/1442", + "type": "boolean" + }, "hostAliases": { "description": "HostAliases is an optional list of hosts and IPs that will be injected into the pod spec", "type": "array", diff --git a/pkg/apis/workflow/v1alpha1/openapi_generated.go b/pkg/apis/workflow/v1alpha1/openapi_generated.go index 392818bd5a32..032702ee9651 100644 --- a/pkg/apis/workflow/v1alpha1/openapi_generated.go +++ b/pkg/apis/workflow/v1alpha1/openapi_generated.go @@ -2288,6 +2288,13 @@ func schema_pkg_apis_workflow_v1alpha1_WorkflowSpec(ref common.ReferenceCallback }, }, }, + "failFast": { + SchemaProps: spec.SchemaProps{ + Description: "This flag is for DAG logic. The DAG logic has a built-in \"fail fast\" feature to stop scheduling new steps, as soon as it detects that one of the DAG nodes is failed. Then it waits until all DAG nodes are completed before failing the DAG itself. The FailFast flag default is true, if set to false, it will allow a DAG to run all branches of the DAG to completion (either success or failure), regardless of the failed outcomes of branches in the DAG. More info and example about this feature at https://github.com/argoproj/argo/issues/1442", + Type: []string{"boolean"}, + Format: "", + }, + }, }, Required: []string{"templates", "entrypoint"}, }, diff --git a/pkg/apis/workflow/v1alpha1/types.go b/pkg/apis/workflow/v1alpha1/types.go index f5d35003437a..50ca966e7130 100644 --- a/pkg/apis/workflow/v1alpha1/types.go +++ b/pkg/apis/workflow/v1alpha1/types.go @@ -167,6 +167,14 @@ type WorkflowSpec struct { // HostAliases is an optional list of hosts and IPs that will be injected into the pod spec HostAliases []apiv1.HostAlias `json:"hostAliases,omitempty"` + + // This flag is for DAG logic. The DAG logic has a built-in "fail fast" feature to stop scheduling new steps, + // as soon as it detects that one of the DAG nodes is failed. Then it waits until all DAG nodes are completed + // before failing the DAG itself. + // The FailFast flag default is true, if set to false, it will allow a DAG to run all branches of the DAG to + // completion (either success or failure), regardless of the failed outcomes of branches in the DAG. + // More info and example about this feature at https://github.com/argoproj/argo/issues/1442 + FailFast *bool `json:"failFast,omitempty"` } // Template is a reusable and composable unit of execution in a workflow diff --git a/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go index 48f38e70c121..6d61b7cf3b81 100644 --- a/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go @@ -1068,6 +1068,11 @@ func (in *WorkflowSpec) DeepCopyInto(out *WorkflowSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.FailFast != nil { + in, out := &in.FailFast, &out.FailFast + *out = new(bool) + **out = **in + } return } diff --git a/test/e2e/functional/dag-disable-failFast.yaml b/test/e2e/functional/dag-disable-failFast.yaml new file mode 100644 index 000000000000..13b11f2fdaf9 --- /dev/null +++ b/test/e2e/functional/dag-disable-failFast.yaml @@ -0,0 +1,49 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: dag-primay-branch- +spec: + failFast: false + entrypoint: statis + templates: + - name: a + container: + image: docker/whalesay:latest + command: [cowsay] + args: ["hello world"] + - name: b + retryStrategy: + limit: 2 + container: + image: alpine:latest + command: [sh, -c] + args: ["sleep 30; echo haha"] + - name: c + retryStrategy: + limit: 3 + container: + image: alpine:latest + command: [sh, -c] + args: ["echo intentional failure; exit 2"] + - name: d + container: + image: docker/whalesay:latest + command: [cowsay] + args: ["hello world"] + - name: statis + dag: + tasks: + - name: A + template: a + - name: B + dependencies: [A] + template: b + - name: C + dependencies: [A] + template: c + - name: D + dependencies: [B] + template: d + - name: E + dependencies: [D] + template: d \ No newline at end of file diff --git a/workflow/controller/dag.go b/workflow/controller/dag.go index d69345b8c249..8803e8eaf0e2 100644 --- a/workflow/controller/dag.go +++ b/workflow/controller/dag.go @@ -87,7 +87,29 @@ func (d *dagContext) assessDAGPhase(targetTasks []string, nodes map[string]wfv1. retriesExhausted = false } } + if unsuccessfulPhase != "" { + // If failFast set to false, we should return Running to continue this workflow for other DAG branch + if d.wf.Spec.FailFast != nil && !*d.wf.Spec.FailFast { + tmpOverAllFinished := true + // If all the nodes have finished, we should mark the failed node to finish overall workflow + // So we should check all the targetTasks have finished + for _, tmpDepName := range targetTasks { + tmpDepNode := d.getTaskNode(tmpDepName) + if tmpDepNode == nil { + tmpOverAllFinished = false + break + } + if tmpDepNode.Type == wfv1.NodeTypeRetry && hasMoreRetries(tmpDepNode, d.wf) { + tmpOverAllFinished = false + break + } + } + if !tmpOverAllFinished { + return wfv1.NodeRunning + } + } + // if we were unsuccessful, we can return *only* if all retry nodes have ben exhausted. if retriesExhausted { return unsuccessfulPhase @@ -250,25 +272,6 @@ func (woc *wfOperationCtx) executeDAGTask(dagCtx *dagContext, taskName string) { dependenciesSuccessful = false } continue - } else if depNode.Type == wfv1.NodeTypeRetry { - // For retry type node - // Maybe one of children node already success, but the retry node hasn't sync to latest status - // So skip these steps, the next process function will make the status to be right - tmpContinueFlag := false - for _, tmpChildName := range depNode.Children { - tmpChild, tmpOK := woc.wf.Status.Nodes[tmpChildName] - if !tmpOK { - continue - } - if tmpChild.Successful() { - tmpContinueFlag = true - break - } - } - if tmpContinueFlag { - woc.markNodePhase(depNode.Name, wfv1.NodeSucceeded) - continue - } } } dependenciesCompleted = false From fb7f06cb900096afcaddb103b03b58eaa21faec8 Mon Sep 17 00:00:00 2001 From: xianlubird Date: Wed, 26 Jun 2019 17:17:54 +0800 Subject: [PATCH 4/5] Move failFast flag to DAG template spec --- api/openapi-spec/swagger.json | 8 +- .../workflow/v1alpha1/openapi_generated.go | 14 +- pkg/apis/workflow/v1alpha1/types.go | 16 +- .../v1alpha1/zz_generated.deepcopy.go | 10 +- test/e2e/functional/dag-disable-failFast.yaml | 2 +- workflow/controller/dag.go | 34 +-- workflow/controller/dag_test.go | 8 + .../testdata/dag-disable-fail-fast.yaml | 211 ++++++++++++++++++ 8 files changed, 262 insertions(+), 41 deletions(-) create mode 100644 workflow/controller/testdata/dag-disable-fail-fast.yaml diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index b56e73ae36ac..12367812521b 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -235,6 +235,10 @@ "tasks" ], "properties": { + "failFast": { + "description": "This flag is for DAG logic. The DAG logic has a built-in \"fail fast\" feature to stop scheduling new steps, as soon as it detects that one of the DAG nodes is failed. Then it waits until all DAG nodes are completed before failing the DAG itself. The FailFast flag default is true, if set to false, it will allow a DAG to run all branches of the DAG to completion (either success or failure), regardless of the failed outcomes of branches in the DAG. More info and example about this feature at https://github.com/argoproj/argo/issues/1442", + "type": "boolean" + }, "target": { "description": "Target are one or more names of targets to execute in a DAG", "type": "string" @@ -1160,10 +1164,6 @@ "description": "Entrypoint is a template reference to the starting point of the workflow", "type": "string" }, - "failFast": { - "description": "This flag is for DAG logic. The DAG logic has a built-in \"fail fast\" feature to stop scheduling new steps, as soon as it detects that one of the DAG nodes is failed. Then it waits until all DAG nodes are completed before failing the DAG itself. The FailFast flag default is true, if set to false, it will allow a DAG to run all branches of the DAG to completion (either success or failure), regardless of the failed outcomes of branches in the DAG. More info and example about this feature at https://github.com/argoproj/argo/issues/1442", - "type": "boolean" - }, "hostAliases": { "description": "HostAliases is an optional list of hosts and IPs that will be injected into the pod spec", "type": "array", diff --git a/pkg/apis/workflow/v1alpha1/openapi_generated.go b/pkg/apis/workflow/v1alpha1/openapi_generated.go index 032702ee9651..85d9389a626b 100644 --- a/pkg/apis/workflow/v1alpha1/openapi_generated.go +++ b/pkg/apis/workflow/v1alpha1/openapi_generated.go @@ -478,6 +478,13 @@ func schema_pkg_apis_workflow_v1alpha1_DAGTemplate(ref common.ReferenceCallback) }, }, }, + "failFast": { + SchemaProps: spec.SchemaProps{ + Description: "This flag is for DAG logic. The DAG logic has a built-in \"fail fast\" feature to stop scheduling new steps, as soon as it detects that one of the DAG nodes is failed. Then it waits until all DAG nodes are completed before failing the DAG itself. The FailFast flag default is true, if set to false, it will allow a DAG to run all branches of the DAG to completion (either success or failure), regardless of the failed outcomes of branches in the DAG. More info and example about this feature at https://github.com/argoproj/argo/issues/1442", + Type: []string{"boolean"}, + Format: "", + }, + }, }, Required: []string{"tasks"}, }, @@ -2288,13 +2295,6 @@ func schema_pkg_apis_workflow_v1alpha1_WorkflowSpec(ref common.ReferenceCallback }, }, }, - "failFast": { - SchemaProps: spec.SchemaProps{ - Description: "This flag is for DAG logic. The DAG logic has a built-in \"fail fast\" feature to stop scheduling new steps, as soon as it detects that one of the DAG nodes is failed. Then it waits until all DAG nodes are completed before failing the DAG itself. The FailFast flag default is true, if set to false, it will allow a DAG to run all branches of the DAG to completion (either success or failure), regardless of the failed outcomes of branches in the DAG. More info and example about this feature at https://github.com/argoproj/argo/issues/1442", - Type: []string{"boolean"}, - Format: "", - }, - }, }, Required: []string{"templates", "entrypoint"}, }, diff --git a/pkg/apis/workflow/v1alpha1/types.go b/pkg/apis/workflow/v1alpha1/types.go index 50ca966e7130..17f0d0760e76 100644 --- a/pkg/apis/workflow/v1alpha1/types.go +++ b/pkg/apis/workflow/v1alpha1/types.go @@ -167,14 +167,6 @@ type WorkflowSpec struct { // HostAliases is an optional list of hosts and IPs that will be injected into the pod spec HostAliases []apiv1.HostAlias `json:"hostAliases,omitempty"` - - // This flag is for DAG logic. The DAG logic has a built-in "fail fast" feature to stop scheduling new steps, - // as soon as it detects that one of the DAG nodes is failed. Then it waits until all DAG nodes are completed - // before failing the DAG itself. - // The FailFast flag default is true, if set to false, it will allow a DAG to run all branches of the DAG to - // completion (either success or failure), regardless of the failed outcomes of branches in the DAG. - // More info and example about this feature at https://github.com/argoproj/argo/issues/1442 - FailFast *bool `json:"failFast,omitempty"` } // Template is a reusable and composable unit of execution in a workflow @@ -899,6 +891,14 @@ type DAGTemplate struct { // Tasks are a list of DAG tasks Tasks []DAGTask `json:"tasks"` + + // This flag is for DAG logic. The DAG logic has a built-in "fail fast" feature to stop scheduling new steps, + // as soon as it detects that one of the DAG nodes is failed. Then it waits until all DAG nodes are completed + // before failing the DAG itself. + // The FailFast flag default is true, if set to false, it will allow a DAG to run all branches of the DAG to + // completion (either success or failure), regardless of the failed outcomes of branches in the DAG. + // More info and example about this feature at https://github.com/argoproj/argo/issues/1442 + FailFast *bool `json:"failFast,omitempty"` } // DAGTask represents a node in the graph during DAG execution diff --git a/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go index 6d61b7cf3b81..7977fd1ec5ed 100644 --- a/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go @@ -251,6 +251,11 @@ func (in *DAGTemplate) DeepCopyInto(out *DAGTemplate) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.FailFast != nil { + in, out := &in.FailFast, &out.FailFast + *out = new(bool) + **out = **in + } return } @@ -1068,11 +1073,6 @@ func (in *WorkflowSpec) DeepCopyInto(out *WorkflowSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.FailFast != nil { - in, out := &in.FailFast, &out.FailFast - *out = new(bool) - **out = **in - } return } diff --git a/test/e2e/functional/dag-disable-failFast.yaml b/test/e2e/functional/dag-disable-failFast.yaml index 13b11f2fdaf9..683c990a1939 100644 --- a/test/e2e/functional/dag-disable-failFast.yaml +++ b/test/e2e/functional/dag-disable-failFast.yaml @@ -3,7 +3,6 @@ kind: Workflow metadata: generateName: dag-primay-branch- spec: - failFast: false entrypoint: statis templates: - name: a @@ -32,6 +31,7 @@ spec: args: ["hello world"] - name: statis dag: + failFast: false tasks: - name: A template: a diff --git a/workflow/controller/dag.go b/workflow/controller/dag.go index 8803e8eaf0e2..2131f9d32a60 100644 --- a/workflow/controller/dag.go +++ b/workflow/controller/dag.go @@ -89,25 +89,27 @@ func (d *dagContext) assessDAGPhase(targetTasks []string, nodes map[string]wfv1. } if unsuccessfulPhase != "" { - // If failFast set to false, we should return Running to continue this workflow for other DAG branch - if d.wf.Spec.FailFast != nil && !*d.wf.Spec.FailFast { - tmpOverAllFinished := true - // If all the nodes have finished, we should mark the failed node to finish overall workflow - // So we should check all the targetTasks have finished - for _, tmpDepName := range targetTasks { - tmpDepNode := d.getTaskNode(tmpDepName) - if tmpDepNode == nil { - tmpOverAllFinished = false - break + if d.tmpl != nil && d.tmpl.DAG != nil { + // If failFast set to false, we should return Running to continue this workflow for other DAG branch + if d.tmpl.DAG.FailFast != nil && !*d.tmpl.DAG.FailFast { + tmpOverAllFinished := true + // If all the nodes have finished, we should mark the failed node to finish overall workflow + // So we should check all the targetTasks have finished + for _, tmpDepName := range targetTasks { + tmpDepNode := d.getTaskNode(tmpDepName) + if tmpDepNode == nil { + tmpOverAllFinished = false + break + } + if tmpDepNode.Type == wfv1.NodeTypeRetry && hasMoreRetries(tmpDepNode, d.wf) { + tmpOverAllFinished = false + break + } } - if tmpDepNode.Type == wfv1.NodeTypeRetry && hasMoreRetries(tmpDepNode, d.wf) { - tmpOverAllFinished = false - break + if !tmpOverAllFinished { + return wfv1.NodeRunning } } - if !tmpOverAllFinished { - return wfv1.NodeRunning - } } // if we were unsuccessful, we can return *only* if all retry nodes have ben exhausted. diff --git a/workflow/controller/dag_test.go b/workflow/controller/dag_test.go index c9aa2b8ecc53..7f51de45d741 100644 --- a/workflow/controller/dag_test.go +++ b/workflow/controller/dag_test.go @@ -32,3 +32,11 @@ func TestDagRetryExhaustedXfail(t *testing.T) { woc.operate() assert.Equal(t, string(wfv1.NodeFailed), string(woc.wf.Status.Phase)) } + +// TestDagDisableFailFast test disable fail fast function +func TestDagDisableFailFast(t *testing.T) { + wf := test.LoadTestWorkflow("testdata/dag-disable-fail-fast.yaml") + woc := newWoc(*wf) + woc.operate() + assert.Equal(t, string(wfv1.NodeFailed), string(woc.wf.Status.Phase)) +} diff --git a/workflow/controller/testdata/dag-disable-fail-fast.yaml b/workflow/controller/testdata/dag-disable-fail-fast.yaml new file mode 100644 index 000000000000..45e9772bcb64 --- /dev/null +++ b/workflow/controller/testdata/dag-disable-fail-fast.yaml @@ -0,0 +1,211 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + creationTimestamp: 2019-06-26T09:11:58Z + generateName: dag-disable-fail-fast- + generation: 1 + labels: + workflows.argoproj.io/completed: "true" + workflows.argoproj.io/phase: Failed + name: dag-disable-fail-fast-r6xdc + namespace: default + resourceVersion: "15772210" + selfLink: /apis/argoproj.io/v1alpha1/namespaces/default/workflows/dag-disable-fail-fast-r6xdc + uid: 734516b5-97f2-11e9-9fea-00163e00cf4e +spec: + arguments: {} + entrypoint: statis + templates: + - container: + args: + - hello world + command: + - cowsay + image: docker/whalesay:latest + name: "" + resources: {} + inputs: {} + metadata: {} + name: a + outputs: {} + - container: + args: + - sleep 30; echo haha + command: + - sh + - -c + image: alpine:latest + name: "" + resources: {} + inputs: {} + metadata: {} + name: b + outputs: {} + retryStrategy: + limit: 2 + - container: + args: + - echo intentional failure; exit 2 + command: + - sh + - -c + image: alpine:latest + name: "" + resources: {} + inputs: {} + metadata: {} + name: c + outputs: {} + retryStrategy: + limit: 1 + - container: + args: + - hello world + command: + - cowsay + image: docker/whalesay:latest + name: "" + resources: {} + inputs: {} + metadata: {} + name: d + outputs: {} + - dag: + failFast: false + tasks: + - arguments: {} + name: A + template: a + - arguments: {} + dependencies: + - A + name: B + template: b + - arguments: {} + dependencies: + - A + name: C + template: c + - arguments: {} + dependencies: + - B + name: D + template: d + - arguments: {} + dependencies: + - D + name: E + template: d + inputs: {} + metadata: {} + name: statis + outputs: {} +status: + finishedAt: 2019-06-26T09:12:46Z + nodes: + dag-disable-fail-fast-r6xdc: + children: + - dag-disable-fail-fast-r6xdc-3928436299 + displayName: dag-disable-fail-fast-r6xdc + finishedAt: 2019-06-26T09:12:46Z + id: dag-disable-fail-fast-r6xdc + name: dag-disable-fail-fast-r6xdc + phase: Failed + startedAt: 2019-06-26T09:11:58Z + templateName: statis + type: DAG + dag-disable-fail-fast-r6xdc-3256495944: + boundaryID: dag-disable-fail-fast-r6xdc + displayName: C(0) + finishedAt: 2019-06-26T09:12:08Z + id: dag-disable-fail-fast-r6xdc-3256495944 + message: failed with exit code 2 + name: dag-disable-fail-fast-r6xdc.C(0) + phase: Failed + startedAt: 2019-06-26T09:12:03Z + templateName: c + type: Pod + dag-disable-fail-fast-r6xdc-3457680277: + boundaryID: dag-disable-fail-fast-r6xdc + displayName: C(1) + finishedAt: 2019-06-26T09:12:12Z + id: dag-disable-fail-fast-r6xdc-3457680277 + message: failed with exit code 2 + name: dag-disable-fail-fast-r6xdc.C(1) + phase: Failed + startedAt: 2019-06-26T09:12:09Z + templateName: c + type: Pod + dag-disable-fail-fast-r6xdc-3928436299: + boundaryID: dag-disable-fail-fast-r6xdc + children: + - dag-disable-fail-fast-r6xdc-3945213918 + - dag-disable-fail-fast-r6xdc-3961991537 + displayName: A + finishedAt: 2019-06-26T09:12:02Z + id: dag-disable-fail-fast-r6xdc-3928436299 + name: dag-disable-fail-fast-r6xdc.A + phase: Succeeded + startedAt: 2019-06-26T09:11:58Z + templateName: a + type: Pod + dag-disable-fail-fast-r6xdc-3945213918: + boundaryID: dag-disable-fail-fast-r6xdc + children: + - dag-disable-fail-fast-r6xdc-4286504589 + displayName: B + finishedAt: 2019-06-26T09:12:36Z + id: dag-disable-fail-fast-r6xdc-3945213918 + name: dag-disable-fail-fast-r6xdc.B + phase: Succeeded + startedAt: 2019-06-26T09:12:03Z + type: Retry + dag-disable-fail-fast-r6xdc-3961991537: + boundaryID: dag-disable-fail-fast-r6xdc + children: + - dag-disable-fail-fast-r6xdc-3256495944 + - dag-disable-fail-fast-r6xdc-3457680277 + displayName: C + finishedAt: 2019-06-26T09:12:13Z + id: dag-disable-fail-fast-r6xdc-3961991537 + message: No more retries left + name: dag-disable-fail-fast-r6xdc.C + phase: Failed + startedAt: 2019-06-26T09:12:03Z + type: Retry + dag-disable-fail-fast-r6xdc-3978769156: + boundaryID: dag-disable-fail-fast-r6xdc + children: + - dag-disable-fail-fast-r6xdc-3995546775 + displayName: D + finishedAt: 2019-06-26T09:12:41Z + id: dag-disable-fail-fast-r6xdc-3978769156 + name: dag-disable-fail-fast-r6xdc.D + phase: Succeeded + startedAt: 2019-06-26T09:12:37Z + templateName: d + type: Pod + dag-disable-fail-fast-r6xdc-3995546775: + boundaryID: dag-disable-fail-fast-r6xdc + displayName: E + finishedAt: 2019-06-26T09:12:45Z + id: dag-disable-fail-fast-r6xdc-3995546775 + name: dag-disable-fail-fast-r6xdc.E + phase: Succeeded + startedAt: 2019-06-26T09:12:42Z + templateName: d + type: Pod + dag-disable-fail-fast-r6xdc-4286504589: + boundaryID: dag-disable-fail-fast-r6xdc + children: + - dag-disable-fail-fast-r6xdc-3978769156 + displayName: B(0) + finishedAt: 2019-06-26T09:12:36Z + id: dag-disable-fail-fast-r6xdc-4286504589 + name: dag-disable-fail-fast-r6xdc.B(0) + phase: Succeeded + startedAt: 2019-06-26T09:12:03Z + templateName: b + type: Pod + phase: Failed + startedAt: 2019-06-26T09:11:58Z \ No newline at end of file From 744107b24a3d9d75650c0c955373c5b77682b9e3 Mon Sep 17 00:00:00 2001 From: xianlubird Date: Fri, 28 Jun 2019 19:42:28 +0800 Subject: [PATCH 5/5] * Move test case file to test/e2e/expectedfailures since it is expected to fail * Remove unused check code --- .../dag-disable-failFast.yaml | 0 .../dag-noroot-branch-failed.yaml | 0 workflow/controller/dag.go | 34 +++++++++---------- 3 files changed, 16 insertions(+), 18 deletions(-) rename test/e2e/{functional => expectedfailures}/dag-disable-failFast.yaml (100%) rename test/e2e/{functional => expectedfailures}/dag-noroot-branch-failed.yaml (100%) diff --git a/test/e2e/functional/dag-disable-failFast.yaml b/test/e2e/expectedfailures/dag-disable-failFast.yaml similarity index 100% rename from test/e2e/functional/dag-disable-failFast.yaml rename to test/e2e/expectedfailures/dag-disable-failFast.yaml diff --git a/test/e2e/functional/dag-noroot-branch-failed.yaml b/test/e2e/expectedfailures/dag-noroot-branch-failed.yaml similarity index 100% rename from test/e2e/functional/dag-noroot-branch-failed.yaml rename to test/e2e/expectedfailures/dag-noroot-branch-failed.yaml diff --git a/workflow/controller/dag.go b/workflow/controller/dag.go index 2131f9d32a60..ed9e90a25e65 100644 --- a/workflow/controller/dag.go +++ b/workflow/controller/dag.go @@ -89,27 +89,25 @@ func (d *dagContext) assessDAGPhase(targetTasks []string, nodes map[string]wfv1. } if unsuccessfulPhase != "" { - if d.tmpl != nil && d.tmpl.DAG != nil { - // If failFast set to false, we should return Running to continue this workflow for other DAG branch - if d.tmpl.DAG.FailFast != nil && !*d.tmpl.DAG.FailFast { - tmpOverAllFinished := true - // If all the nodes have finished, we should mark the failed node to finish overall workflow - // So we should check all the targetTasks have finished - for _, tmpDepName := range targetTasks { - tmpDepNode := d.getTaskNode(tmpDepName) - if tmpDepNode == nil { - tmpOverAllFinished = false - break - } - if tmpDepNode.Type == wfv1.NodeTypeRetry && hasMoreRetries(tmpDepNode, d.wf) { - tmpOverAllFinished = false - break - } + // If failFast set to false, we should return Running to continue this workflow for other DAG branch + if d.tmpl.DAG.FailFast != nil && !*d.tmpl.DAG.FailFast { + tmpOverAllFinished := true + // If all the nodes have finished, we should mark the failed node to finish overall workflow + // So we should check all the targetTasks have finished + for _, tmpDepName := range targetTasks { + tmpDepNode := d.getTaskNode(tmpDepName) + if tmpDepNode == nil { + tmpOverAllFinished = false + break } - if !tmpOverAllFinished { - return wfv1.NodeRunning + if tmpDepNode.Type == wfv1.NodeTypeRetry && hasMoreRetries(tmpDepNode, d.wf) { + tmpOverAllFinished = false + break } } + if !tmpOverAllFinished { + return wfv1.NodeRunning + } } // if we were unsuccessful, we can return *only* if all retry nodes have ben exhausted.