From 9597f82cd7a8b65cb03e4dfaa3023dcf20619b9d Mon Sep 17 00:00:00 2001
From: Jesse Suen <jessesuen@users.noreply.github.com>
Date: Thu, 18 Jan 2018 16:38:27 -0800
Subject: [PATCH 1/8] Initial support for DAG based workflows (#693)

* Initial support for DAG workflows
* Make DAG a new template type instead of a different mode of execution
* Capture outbound nodes to determine how to connect nested templates
* Switch to dagre-d3 to visualize graph (from visjs)
* Support for referencing dependency outputs in DAG templates. Add DAG validataion
---
 examples/dag-coinflip.yaml                    |  44 +++
 examples/dag-diamond-steps.yaml               |  69 +++++
 examples/dag-diamond.yaml                     |  43 +++
 examples/dag-multiroot.yaml                   |  41 +++
 examples/dag-nested.yaml                      |  61 ++++
 hack/wfgraph.py                               | 134 ++++++++
 pkg/apis/workflow/v1alpha1/types.go           |  50 ++-
 .../v1alpha1/zz_generated.deepcopy.go         |  59 ++++
 test/e2e/expectedfailures/dag-fail.yaml       |  44 +++
 test/e2e/functional/dag-argument-passing.yaml |  34 +++
 workflow/common/validate.go                   | 170 +++++++++--
 workflow/common/validate_dag_test.go          | 174 +++++++++++
 workflow/controller/dag.go                    | 289 ++++++++++++++++++
 workflow/controller/dag_test.go               |   1 +
 workflow/controller/operator.go               | 155 +++++++---
 workflow/controller/operator_test.go          |  22 +-
 workflow/controller/steps.go                  |  76 +++--
 17 files changed, 1355 insertions(+), 111 deletions(-)
 create mode 100644 examples/dag-coinflip.yaml
 create mode 100644 examples/dag-diamond-steps.yaml
 create mode 100644 examples/dag-diamond.yaml
 create mode 100644 examples/dag-multiroot.yaml
 create mode 100644 examples/dag-nested.yaml
 create mode 100755 hack/wfgraph.py
 create mode 100644 test/e2e/expectedfailures/dag-fail.yaml
 create mode 100644 test/e2e/functional/dag-argument-passing.yaml
 create mode 100644 workflow/common/validate_dag_test.go
 create mode 100644 workflow/controller/dag.go
 create mode 100644 workflow/controller/dag_test.go

diff --git a/examples/dag-coinflip.yaml b/examples/dag-coinflip.yaml
new file mode 100644
index 000000000000..ab0ff76a4fce
--- /dev/null
+++ b/examples/dag-coinflip.yaml
@@ -0,0 +1,44 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: dag-diamond-coinflip-
+spec:
+  entrypoint: diamond
+  templates:
+  - name: coinflip
+    steps:
+    - - name: flip-coin
+        template: flip-coin
+    - - name: heads
+        template: heads
+        when: "{{steps.flip-coin.outputs.result}} == heads"
+      - name: tails
+        template: coinflip
+        when: "{{steps.flip-coin.outputs.result}} == tails"
+  - name: flip-coin
+    script:
+      image: python:3.6
+      command: [python]
+      source: |
+        import random
+        result = "heads" if random.randint(0,1) == 0 else "tails"
+        print(result)
+  - name: heads
+    container:
+      image: alpine:3.6
+      command: [sh, -c]
+      args: ["echo \"it was heads\""]
+  - name: diamond
+    dag:
+      tasks:
+      - name: A
+        template: coinflip
+      - name: B
+        dependencies: [A]
+        template: coinflip
+      - name: C
+        dependencies: [A]
+        template: coinflip
+      - name: D
+        dependencies: [B, C]
+        template: coinflip
diff --git a/examples/dag-diamond-steps.yaml b/examples/dag-diamond-steps.yaml
new file mode 100644
index 000000000000..3a91bec17970
--- /dev/null
+++ b/examples/dag-diamond-steps.yaml
@@ -0,0 +1,69 @@
+# The following workflow executes a diamond workflow, with each
+# node comprising of three parallel fan-in fan-out steps.
+# 
+#           *
+#         / | \
+#       A1 A2  A3
+#         \ | /
+#           *
+#         /   \ 
+#       /       \
+#      *         *
+#    / | \     / | \
+#  B1  B2 B3  C1 C2 C3
+#    \ | /     \ | /
+#      *         *
+#       \       /
+#         \   /
+#           *
+#         / | \
+#       D1 D2  D3
+#         \ | /
+#           *
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: dag-diamond-steps-
+spec:
+  entrypoint: diamond
+  templates:
+  - name: echo
+    inputs:
+      parameters:
+      - name: message
+    container:
+      image: alpine:3.7
+      command: [echo, "{{inputs.parameters.message}}"]
+  - name: echo-thrice
+    inputs:
+      parameters:
+      - name: message
+    steps:
+    - - name: echo
+        template: echo
+        arguments:
+          parameters:
+          - {name: message, value: "{{inputs.parameters.message}}{{item}}"}
+        withItems: [1,2,3]
+  - name: diamond
+    dag:
+      tasks:
+      - name: A
+        template: echo-thrice
+        arguments:
+          parameters: [{name: message, value: A}]
+      - name: B
+        dependencies: [A]
+        template: echo-thrice
+        arguments:
+          parameters: [{name: message, value: B}]
+      - name: C
+        dependencies: [A]
+        template: echo-thrice
+        arguments:
+          parameters: [{name: message, value: C}]
+      - name: D
+        dependencies: [B, C]
+        template: echo-thrice
+        arguments:
+          parameters: [{name: message, value: D}]
diff --git a/examples/dag-diamond.yaml b/examples/dag-diamond.yaml
new file mode 100644
index 000000000000..bdc644c6dd23
--- /dev/null
+++ b/examples/dag-diamond.yaml
@@ -0,0 +1,43 @@
+# The following workflow executes a diamond workflow
+# 
+#   A
+#  / \
+# B   C
+#  \ /
+#   D
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: dag-diamond-
+spec:
+  entrypoint: diamond
+  templates:
+  - name: echo
+    inputs:
+      parameters:
+      - name: message
+    container:
+      image: alpine:3.7
+      command: [echo, "{{inputs.parameters.message}}"]
+  - name: diamond
+    dag:
+      tasks:
+      - name: A
+        template: echo
+        arguments:
+          parameters: [{name: message, value: A}]
+      - name: B
+        dependencies: [A]
+        template: echo
+        arguments:
+          parameters: [{name: message, value: B}]
+      - name: C
+        dependencies: [A]
+        template: echo
+        arguments:
+          parameters: [{name: message, value: C}]
+      - name: D
+        dependencies: [B, C]
+        template: echo
+        arguments:
+          parameters: [{name: message, value: D}]
diff --git a/examples/dag-multiroot.yaml b/examples/dag-multiroot.yaml
new file mode 100644
index 000000000000..f5e553af5507
--- /dev/null
+++ b/examples/dag-multiroot.yaml
@@ -0,0 +1,41 @@
+# The following workflow executes a multi-root workflow
+# 
+#   A   B
+#  / \ /
+# C   D
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: dag-multiroot-
+spec:
+  entrypoint: multiroot
+  templates:
+  - name: echo
+    inputs:
+      parameters:
+      - name: message
+    container:
+      image: alpine:3.7
+      command: [echo, "{{inputs.parameters.message}}"]
+  - name: multiroot
+    dag:
+      tasks:
+      - name: A
+        template: echo
+        arguments:
+          parameters: [{name: message, value: A}]
+      - name: B
+        dependencies:
+        template: echo
+        arguments:
+          parameters: [{name: message, value: B}]
+      - name: C
+        dependencies: [A]
+        template: echo
+        arguments:
+          parameters: [{name: message, value: C}]
+      - name: D
+        dependencies: [A, B]
+        template: echo
+        arguments:
+          parameters: [{name: message, value: D}]
diff --git a/examples/dag-nested.yaml b/examples/dag-nested.yaml
new file mode 100644
index 000000000000..ab755bd15daa
--- /dev/null
+++ b/examples/dag-nested.yaml
@@ -0,0 +1,61 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: dag-nested-
+spec:
+  entrypoint: diamond
+  templates:
+  - name: echo
+    inputs:
+      parameters:
+      - name: message
+    container:
+      image: alpine:3.7
+      command: [echo, "{{inputs.parameters.message}}"]
+  - name: diamond
+    dag:
+      tasks:
+      - name: A
+        template: nested-diamond
+        arguments:
+          parameters: [{name: message, value: A}]
+      - name: B
+        dependencies: [A]
+        template: nested-diamond
+        arguments:
+          parameters: [{name: message, value: B}]
+      - name: C
+        dependencies: [A]
+        template: nested-diamond
+        arguments:
+          parameters: [{name: message, value: C}]
+      - name: D
+        dependencies: [B, C]
+        template: nested-diamond
+        arguments:
+          parameters: [{name: message, value: D}]
+  - name: nested-diamond
+    inputs:
+      parameters:
+      - name: message
+    dag:
+      tasks:
+      - name: A
+        template: echo
+        arguments:
+          parameters: [{name: message, value: "{{inputs.parameters.message}}A"}]
+      - name: B
+        dependencies: [A]
+        template: echo
+        arguments:
+          parameters: [{name: message, value: "{{inputs.parameters.message}}B"}]
+      - name: C
+        dependencies: [A]
+        template: echo
+        arguments:
+          parameters: [{name: message, value: "{{inputs.parameters.message}}C"}]
+      - name: D
+        dependencies: [B, C]
+        template: echo
+        arguments:
+          parameters: [{name: message, value: "{{inputs.parameters.message}}D"}]
diff --git a/hack/wfgraph.py b/hack/wfgraph.py
new file mode 100755
index 000000000000..96074ca9c54d
--- /dev/null
+++ b/hack/wfgraph.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import subprocess
+import tempfile
+
+from subprocess import run
+
+template = '''
+<!doctype html>
+
+<meta charset="utf-8">
+<title>%s</title>
+
+<link rel="stylesheet" href="demo.css">
+<script src="http://d3js.org/d3.v3.min.js" charset="utf-8"></script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/dagre-d3/0.4.17/dagre-d3.js"></script>
+
+<style id="css">
+body {
+  font: 300 14px 'Helvetica Neue', Helvetica;
+}
+
+.node rect,
+.node circle,
+.node ellipse {
+  stroke: #333;
+  fill: #fff;
+  stroke-width: 1px;
+}
+
+.edgePath path {
+  stroke: #333;
+  fill: #333;
+  stroke-width: 1.5px;
+}
+</style>
+
+<h2>%s</h2>
+
+<svg width=960 height=600><g/></svg>
+
+<script id="js">
+// Create a new directed graph
+var g = new dagreD3.graphlib.Graph().setGraph({});
+
+var nodes = 
+  %s
+;
+
+var edges = 
+  %s
+;
+
+nodes.forEach(function(node) {
+  g.setNode(node.id, { 
+    label: node.label,
+    style: node.color,
+  });
+});
+
+edges.forEach(function(edge) {
+  g.setEdge(edge.from, edge.to, {
+    arrowhead: "normal",
+    lineInterpolate: "basis",
+  });
+});
+
+var svg = d3.select("svg"),
+    inner = svg.select("g");
+
+// Set up zoom support
+var zoom = d3.behavior.zoom().on("zoom", function() {
+      inner.attr("transform", "translate(" + d3.event.translate + ")" +
+                                  "scale(" + d3.event.scale + ")");
+    });
+svg.call(zoom);
+
+// Create the renderer
+var render = new dagreD3.render();
+
+// Run the renderer. This is what draws the final graph.
+render(inner, g);
+
+// Center the graph
+var initialScale = 0.75;
+zoom
+  .translate([(svg.attr("width") - g.graph().width * initialScale) / 2, 20])
+  .scale(initialScale)
+  .event(svg);
+svg.attr('height', g.graph().height * initialScale + 40);
+</script>
+
+
+'''
+
+def main():
+    parser = argparse.ArgumentParser(description='Visualize graph of a workflow')
+    parser.add_argument('workflow', type=str, help='workflow name')
+    args = parser.parse_args()
+    res = run(["kubectl", "get", "workflow", "-o", "json", args.workflow  ], stdout=subprocess.PIPE)
+    wf = json.loads(res.stdout.decode("utf-8"))
+    nodes = []
+    edges = []
+    colors = {
+      'Pending': 'fill: #D0D0D0',
+      'Running': 'fill: #A0FFFF',
+      'Failed': 'fill: #f77',
+      'Succeeded': 'fill: #afa',
+      'Skipped': 'fill: #D0D0D0',
+      'Error': 'fill: #f77',
+    }
+    wf_name = wf['metadata']['name']
+    for node_id, node_status in wf['status']['nodes'].items():
+        if node_status['name'] == wf_name:
+            label = node_status['name']
+        else:
+            label = node_status['name'].replace(wf_name, "")
+        node = {'id': node_id, 'label': label, 'color': colors[node_status['phase']]}
+        nodes.append(node)
+        if 'children' in node_status:
+            for child_id in node_status['children']:
+              edge = {'from': node_id, 'to': child_id, 'arrows': 'to'}
+              edges.append(edge)
+    html = template % (wf_name, wf_name, json.dumps(nodes), json.dumps(edges))
+    tmpfile = tempfile.NamedTemporaryFile(suffix='.html', delete=False)
+    tmpfile.write(html.encode())
+    tmpfile.flush()
+    run(["open", tmpfile.name])
+    
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/pkg/apis/workflow/v1alpha1/types.go b/pkg/apis/workflow/v1alpha1/types.go
index 17ea15815e8a..61680f4131f7 100644
--- a/pkg/apis/workflow/v1alpha1/types.go
+++ b/pkg/apis/workflow/v1alpha1/types.go
@@ -18,6 +18,7 @@ const (
 	TemplateTypeSteps     TemplateType = "Steps"
 	TemplateTypeScript    TemplateType = "Script"
 	TemplateTypeResource  TemplateType = "Resource"
+	TemplateTypeDAG       TemplateType = "DAG"
 )
 
 // NodePhase is a label for the condition of a node at the current time.
@@ -115,9 +116,12 @@ type Template struct {
 	// Sidecar containers
 	Sidecars []Sidecar `json:"sidecars,omitempty"`
 
-	// Resource is the resource template type
+	// Resource template subtype which can run k8s resources
 	Resource *ResourceTemplate `json:"resource,omitempty"`
 
+	// DAG template subtype which runs a DAG
+	DAG *DAG `json:"dag,omitempty"`
+
 	// Location in which all files related to the step will be stored (logs, artifacts, etc...).
 	// Can be overridden by individual items in Outputs. If omitted, will use the default
 	// artifact repository location configured in the controller, appended with the
@@ -312,6 +316,9 @@ type NodeStatus struct {
 	// Time at which this node completed
 	FinishedAt metav1.Time `json:"finishedAt,omitempty"`
 
+	// IsPod indicates if this node is a pod or not
+	IsPod bool `json:"isPod,omitempty"`
+
 	// PodIP captures the IP of the pod for daemoned steps
 	PodIP string `json:"podIP,omitempty"`
 
@@ -325,6 +332,20 @@ type NodeStatus struct {
 
 	// Children is a list of child node IDs
 	Children []string `json:"children,omitempty"`
+
+	// OutboundNodes tracks the node IDs which are considered "outbound" nodes to a template invocation.
+	// For every invocation of a template, there are nodes which we considered as "outbound". Essentially,
+	// these are last nodes in the execution sequence to run, before the template is considered completed.
+	// These nodes are then connected as parents to a following step.
+	//
+	// In the case of single pod steps (i.e. container, script, resource templates), this list will be nil
+	// since the pod itself is already considered the "outbound" node.
+	// In the case of DAGs, outbound nodes are the "target" tasks (tasks with no children).
+	// In the case of steps, outbound nodes are all the containers involved in the last step group.
+	// NOTE: since templates are composable, the list of outbound nodes are carried upwards when
+	// a DAG/steps template invokes another DAG/steps template. In other words, the outbound nodes of
+	// a template, will be a superset of the outbound nodes of its last children.
+	OutboundNodes []string `json:"outboundNodes,omitempty"`
 }
 
 func (n NodeStatus) String() string {
@@ -429,6 +450,9 @@ func (tmpl *Template) GetType() TemplateType {
 	if tmpl.Steps != nil {
 		return TemplateTypeSteps
 	}
+	if tmpl.DAG != nil {
+		return TemplateTypeDAG
+	}
 	if tmpl.Script != nil {
 		return TemplateTypeScript
 	}
@@ -438,6 +462,30 @@ func (tmpl *Template) GetType() TemplateType {
 	return "Unknown"
 }
 
+// DAG is a template subtype for directed acyclic graph templates
+type DAG struct {
+	// Target are one or more names of targets to execute in a DAG
+	Targets string `json:"target,omitempty"`
+
+	// Tasks are a list of DAG tasks
+	Tasks []DAGTask `json:"tasks"`
+}
+
+// DAGTask represents a node in the graph during DAG execution
+type DAGTask struct {
+	// Name is the name of the target
+	Name string `json:"name"`
+
+	// Name of template to execute
+	Template string `json:"template"`
+
+	// Arguments are the parameter and artifact arguments to the template
+	Arguments Arguments `json:"arguments,omitempty"`
+
+	// Dependencies are name of other targets which this depends on
+	Dependencies []string `json:"dependencies,omitempty"`
+}
+
 func (in *Inputs) GetArtifactByName(name string) *Artifact {
 	for _, art := range in.Artifacts {
 		if art.Name == name {
diff --git a/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go
index f923712c6c65..577bb35c3e1b 100644
--- a/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go
+++ b/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go
@@ -177,6 +177,51 @@ func (in *ArtifactoryAuth) DeepCopy() *ArtifactoryAuth {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DAG) DeepCopyInto(out *DAG) {
+	*out = *in
+	if in.Tasks != nil {
+		in, out := &in.Tasks, &out.Tasks
+		*out = make([]DAGTask, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+	return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DAG.
+func (in *DAG) DeepCopy() *DAG {
+	if in == nil {
+		return nil
+	}
+	out := new(DAG)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DAGTask) DeepCopyInto(out *DAGTask) {
+	*out = *in
+	in.Arguments.DeepCopyInto(&out.Arguments)
+	if in.Dependencies != nil {
+		in, out := &in.Dependencies, &out.Dependencies
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+	return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DAGTask.
+func (in *DAGTask) DeepCopy() *DAGTask {
+	if in == nil {
+		return nil
+	}
+	out := new(DAGTask)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *GitArtifact) DeepCopyInto(out *GitArtifact) {
 	*out = *in
@@ -294,6 +339,11 @@ func (in *NodeStatus) DeepCopyInto(out *NodeStatus) {
 		*out = make([]string, len(*in))
 		copy(*out, *in)
 	}
+	if in.OutboundNodes != nil {
+		in, out := &in.OutboundNodes, &out.OutboundNodes
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
 	return
 }
 
@@ -622,6 +672,15 @@ func (in *Template) DeepCopyInto(out *Template) {
 			**out = **in
 		}
 	}
+	if in.DAG != nil {
+		in, out := &in.DAG, &out.DAG
+		if *in == nil {
+			*out = nil
+		} else {
+			*out = new(DAG)
+			(*in).DeepCopyInto(*out)
+		}
+	}
 	if in.ArchiveLocation != nil {
 		in, out := &in.ArchiveLocation, &out.ArchiveLocation
 		if *in == nil {
diff --git a/test/e2e/expectedfailures/dag-fail.yaml b/test/e2e/expectedfailures/dag-fail.yaml
new file mode 100644
index 000000000000..21d085ba804b
--- /dev/null
+++ b/test/e2e/expectedfailures/dag-fail.yaml
@@ -0,0 +1,44 @@
+# The following workflow executes a diamond workflow where C fails
+# 
+#   A
+#  / \
+# B   C
+#  \ /
+#   D
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: dag-fail-
+spec:
+  entrypoint: diamond
+  templates:
+  - name: echo
+    inputs:
+      parameters:
+      - name: cmd
+    container:
+      image: alpine:3.7
+      command: [sh, -c]
+      args: ["{{inputs.parameters.cmd}}"]
+  - name: diamond
+    dag:
+      tasks:
+      - name: A
+        template: echo
+        arguments:
+          parameters: [{name: cmd, value: echo A}]
+      - name: B
+        dependencies: [A]
+        template: echo
+        arguments:
+          parameters: [{name: cmd, value: echo B}]
+      - name: C
+        dependencies: [A]
+        template: echo
+        arguments:
+          parameters: [{name: cmd, value: echo C; exit 1}]
+      - name: D
+        dependencies: [B, C]
+        template: echo
+        arguments:
+          parameters:  [{name: cmd, value: echo D}]
diff --git a/test/e2e/functional/dag-argument-passing.yaml b/test/e2e/functional/dag-argument-passing.yaml
new file mode 100644
index 000000000000..21335a695944
--- /dev/null
+++ b/test/e2e/functional/dag-argument-passing.yaml
@@ -0,0 +1,34 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: dag-arg-passing-
+spec:
+  entrypoint: dag-arg-passing
+  templates:
+  - name: echo
+    inputs:
+      parameters:
+      - name: message
+    container:
+      image: alpine:3.7
+      command: [echo, "{{inputs.parameters.message}}"]
+    outputs:
+      parameters:
+      - name: hosts
+        path: /etc/hosts
+  - name: dag-arg-passing
+    dag:
+      tasks:
+      - name: A
+        template: echo
+        arguments:
+          parameters: 
+          - name: message
+            value: val
+      - name: B
+        dependencies: [A]
+        template: echo
+        arguments:
+          parameters:
+          - name: message
+            value: "{{dependencies.A.outputs.parameters.hosts}}"
\ No newline at end of file
diff --git a/workflow/common/validate.go b/workflow/common/validate.go
index 9c04925d63ae..e2a48de29400 100644
--- a/workflow/common/validate.go
+++ b/workflow/common/validate.go
@@ -53,7 +53,6 @@ func ValidateWorkflow(wf *wfv1.Workflow) error {
 	for _, param := range ctx.wf.Spec.Arguments.Parameters {
 		ctx.globalParams["workflow.parameters."+param.Name] = placeholderValue
 	}
-
 	if ctx.wf.Spec.Entrypoint == "" {
 		return errors.New(errors.CodeBadRequest, "spec.entrypoint is required")
 	}
@@ -61,7 +60,6 @@ func ValidateWorkflow(wf *wfv1.Workflow) error {
 	if entryTmpl == nil {
 		return errors.Errorf(errors.CodeBadRequest, "spec.entrypoint template '%s' undefined", ctx.wf.Spec.Entrypoint)
 	}
-
 	err = ctx.validateTemplate(entryTmpl, ctx.wf.Spec.Arguments)
 	if err != nil {
 		return err
@@ -113,17 +111,23 @@ func (ctx *wfValidationCtx) validateTemplate(tmpl *wfv1.Template, args wfv1.Argu
 	if tmpl.Resource != nil {
 		tmplTypes++
 	}
+	if tmpl.DAG != nil {
+		tmplTypes++
+	}
 	switch tmplTypes {
 	case 0:
-		return errors.New(errors.CodeBadRequest, "template type unspecified. choose one of: container, steps, script, resource")
+		return errors.New(errors.CodeBadRequest, "template type unspecified. choose one of: container, steps, script, resource, dag")
 	case 1:
 	default:
-		return errors.New(errors.CodeBadRequest, "multiple template types specified. choose one of: container, steps, script, resource")
+		return errors.New(errors.CodeBadRequest, "multiple template types specified. choose one of: container, steps, script, resource, dag")
 	}
-	if tmpl.Steps == nil {
-		err = validateLeaf(scope, tmpl)
-	} else {
+	switch tmpl.GetType() {
+	case wfv1.TemplateTypeSteps:
 		err = ctx.validateSteps(scope, tmpl)
+	case wfv1.TemplateTypeDAG:
+		err = ctx.validateDAG(scope, tmpl)
+	default:
+		err = validateLeaf(scope, tmpl)
 	}
 	if err != nil {
 		return err
@@ -138,11 +142,11 @@ func (ctx *wfValidationCtx) validateTemplate(tmpl *wfv1.Template, args wfv1.Argu
 func validateInputs(tmpl *wfv1.Template) (map[string]interface{}, error) {
 	err := validateWorkflowFieldNames(tmpl.Inputs.Parameters)
 	if err != nil {
-		return nil, errors.Errorf(errors.CodeBadRequest, "template '%s' inputs.parameters%s", tmpl.Name, err.Error())
+		return nil, errors.Errorf(errors.CodeBadRequest, "templates.%s.inputs.parameters%s", tmpl.Name, err.Error())
 	}
 	err = validateWorkflowFieldNames(tmpl.Inputs.Artifacts)
 	if err != nil {
-		return nil, errors.Errorf(errors.CodeBadRequest, "template '%s' inputs.artifacts%s", tmpl.Name, err.Error())
+		return nil, errors.Errorf(errors.CodeBadRequest, "templates.%s.inputs.artifacts%s", tmpl.Name, err.Error())
 	}
 	scope := make(map[string]interface{})
 	for _, param := range tmpl.Inputs.Parameters {
@@ -154,17 +158,17 @@ func validateInputs(tmpl *wfv1.Template) (map[string]interface{}, error) {
 		scope[artRef] = true
 		if isLeaf {
 			if art.Path == "" {
-				return nil, errors.Errorf(errors.CodeBadRequest, "template '%s' %s.path not specified", tmpl.Name, artRef)
+				return nil, errors.Errorf(errors.CodeBadRequest, "templates.%s.%s.path not specified", tmpl.Name, artRef)
 			}
 		} else {
 			if art.Path != "" {
-				return nil, errors.Errorf(errors.CodeBadRequest, "template '%s' %s.path only valid in container/script templates", tmpl.Name, artRef)
+				return nil, errors.Errorf(errors.CodeBadRequest, "templates.%s.%s.path only valid in container/script templates", tmpl.Name, artRef)
 			}
 		}
 		if art.From != "" {
-			return nil, errors.Errorf(errors.CodeBadRequest, "template '%s' %s.from not valid in inputs", tmpl.Name, artRef)
+			return nil, errors.Errorf(errors.CodeBadRequest, "templates.%s.%s.from not valid in inputs", tmpl.Name, artRef)
 		}
-		errPrefix := fmt.Sprintf("template '%s' %s", tmpl.Name, artRef)
+		errPrefix := fmt.Sprintf("templates.%s.%s", tmpl.Name, artRef)
 		err = validateArtifactLocation(errPrefix, art)
 		if err != nil {
 			return nil, err
@@ -211,7 +215,7 @@ func validateLeaf(scope map[string]interface{}, tmpl *wfv1.Template) error {
 	}
 	err = resolveAllVariables(scope, string(tmplBytes))
 	if err != nil {
-		return errors.Errorf(errors.CodeBadRequest, "template '%s' %s", tmpl.Name, err.Error())
+		return errors.Errorf(errors.CodeBadRequest, "template.%s: %s", tmpl.Name, err.Error())
 	}
 	if tmpl.Container != nil {
 		// Ensure there are no collisions with volume mountPaths and artifact load paths
@@ -256,19 +260,19 @@ func (ctx *wfValidationCtx) validateSteps(scope map[string]interface{}, tmpl *wf
 	for i, stepGroup := range tmpl.Steps {
 		for _, step := range stepGroup {
 			if step.Name == "" {
-				return errors.Errorf(errors.CodeBadRequest, "template '%s' steps[%d].name is required", tmpl.Name, i)
+				return errors.Errorf(errors.CodeBadRequest, "template.%s.steps[%d].name is required", tmpl.Name, i)
 			}
 			_, ok := stepNames[step.Name]
 			if ok {
-				return errors.Errorf(errors.CodeBadRequest, "template '%s' steps[%d].name '%s' is not unique", tmpl.Name, i, step.Name)
+				return errors.Errorf(errors.CodeBadRequest, "template.%s.steps[%d].name '%s' is not unique", tmpl.Name, i, step.Name)
 			}
 			if errs := IsValidWorkflowFieldName(step.Name); len(errs) != 0 {
-				return errors.Errorf(errors.CodeBadRequest, "template '%s' steps[%d].name '%s' is invalid: %s", tmpl.Name, i, step.Name, strings.Join(errs, ";"))
+				return errors.Errorf(errors.CodeBadRequest, "template.%s.steps[%d].name '%s' is invalid: %s", tmpl.Name, i, step.Name, strings.Join(errs, ";"))
 			}
 			stepNames[step.Name] = true
 			err := addItemsToScope(&step, scope)
 			if err != nil {
-				return errors.Errorf(errors.CodeBadRequest, "template '%s' steps[%d].%s %s", tmpl.Name, i, step.Name, err.Error())
+				return errors.Errorf(errors.CodeBadRequest, "template.%s.steps[%d].%s %s", tmpl.Name, i, step.Name, err.Error())
 			}
 			stepBytes, err := json.Marshal(stepGroup)
 			if err != nil {
@@ -276,13 +280,13 @@ func (ctx *wfValidationCtx) validateSteps(scope map[string]interface{}, tmpl *wf
 			}
 			err = resolveAllVariables(scope, string(stepBytes))
 			if err != nil {
-				return errors.Errorf(errors.CodeBadRequest, "template '%s' steps[%d].%s %s", tmpl.Name, i, step.Name, err.Error())
+				return errors.Errorf(errors.CodeBadRequest, "template.%s.steps[%d].%s %s", tmpl.Name, i, step.Name, err.Error())
 			}
 			childTmpl := ctx.wf.GetTemplate(step.Template)
 			if childTmpl == nil {
-				return errors.Errorf(errors.CodeBadRequest, "template '%s' steps[%d].%s.template '%s' undefined", tmpl.Name, i, step.Name, step.Template)
+				return errors.Errorf(errors.CodeBadRequest, "templates.%s.steps[%d].%s.template '%s' undefined", tmpl.Name, i, step.Name, step.Template)
 			}
-			err = validateArguments(fmt.Sprintf("template '%s' steps[%d].%s.arguments.", tmpl.Name, i, step.Name), step.Arguments)
+			err = validateArguments(fmt.Sprintf("templates.%s.steps[%d].%s.arguments.", tmpl.Name, i, step.Name), step.Arguments)
 			if err != nil {
 				return err
 			}
@@ -292,7 +296,7 @@ func (ctx *wfValidationCtx) validateSteps(scope map[string]interface{}, tmpl *wf
 			}
 		}
 		for _, step := range stepGroup {
-			ctx.addOutputsToScope(step.Template, step.Name, scope)
+			ctx.addOutputsToScope(step.Template, fmt.Sprintf("steps.%s", step.Name), scope)
 		}
 	}
 	return nil
@@ -320,30 +324,30 @@ func addItemsToScope(step *wfv1.WorkflowStep, scope map[string]interface{}) erro
 	return nil
 }
 
-func (ctx *wfValidationCtx) addOutputsToScope(templateName string, stepName string, scope map[string]interface{}) {
+func (ctx *wfValidationCtx) addOutputsToScope(templateName string, prefix string, scope map[string]interface{}) {
 	tmpl := ctx.wf.GetTemplate(templateName)
 	if tmpl.Daemon != nil && *tmpl.Daemon {
-		scope[fmt.Sprintf("steps.%s.ip", stepName)] = true
+		scope[fmt.Sprintf("%s.ip", prefix)] = true
 	}
 	if tmpl.Script != nil {
-		scope[fmt.Sprintf("steps.%s.outputs.result", stepName)] = true
+		scope[fmt.Sprintf("%s.outputs.result", prefix)] = true
 	}
 	for _, param := range tmpl.Outputs.Parameters {
-		scope[fmt.Sprintf("steps.%s.outputs.parameters.%s", stepName, param.Name)] = true
+		scope[fmt.Sprintf("%s.outputs.parameters.%s", prefix, param.Name)] = true
 	}
 	for _, art := range tmpl.Outputs.Artifacts {
-		scope[fmt.Sprintf("steps.%s.outputs.artifacts.%s", stepName, art.Name)] = true
+		scope[fmt.Sprintf("%s.outputs.artifacts.%s", prefix, art.Name)] = true
 	}
 }
 
 func validateOutputs(scope map[string]interface{}, tmpl *wfv1.Template) error {
 	err := validateWorkflowFieldNames(tmpl.Outputs.Parameters)
 	if err != nil {
-		return errors.Errorf(errors.CodeBadRequest, "template '%s' outputs.parameters%s", tmpl.Name, err.Error())
+		return errors.Errorf(errors.CodeBadRequest, "templates.%s.outputs.parameters%s", tmpl.Name, err.Error())
 	}
 	err = validateWorkflowFieldNames(tmpl.Outputs.Artifacts)
 	if err != nil {
-		return errors.Errorf(errors.CodeBadRequest, "template '%s' outputs.artifacts%s", tmpl.Name, err.Error())
+		return errors.Errorf(errors.CodeBadRequest, "templates.%s.outputs.artifacts%s", tmpl.Name, err.Error())
 	}
 	outputBytes, err := json.Marshal(tmpl.Outputs)
 	if err != nil {
@@ -359,11 +363,11 @@ func validateOutputs(scope map[string]interface{}, tmpl *wfv1.Template) error {
 		artRef := fmt.Sprintf("outputs.artifacts.%s", art.Name)
 		if isLeaf {
 			if art.Path == "" {
-				return errors.Errorf(errors.CodeBadRequest, "template '%s' %s.path not specified", tmpl.Name, artRef)
+				return errors.Errorf(errors.CodeBadRequest, "templates.%s.%s.path not specified", tmpl.Name, artRef)
 			}
 		} else {
 			if art.Path != "" {
-				return errors.Errorf(errors.CodeBadRequest, "template '%s' %s.path only valid in container/script templates", tmpl.Name, artRef)
+				return errors.Errorf(errors.CodeBadRequest, "templates.%s.%s.path only valid in container/script templates", tmpl.Name, artRef)
 			}
 		}
 	}
@@ -415,3 +419,107 @@ func validateWorkflowFieldNames(slice interface{}) error {
 	}
 	return nil
 }
+
+func (ctx *wfValidationCtx) validateDAG(scope map[string]interface{}, tmpl *wfv1.Template) error {
+	err := validateWorkflowFieldNames(tmpl.DAG.Tasks)
+	if err != nil {
+		return errors.Errorf(errors.CodeBadRequest, "templates.%s.tasks%s", tmpl.Name, err.Error())
+	}
+	nameToTask := make(map[string]wfv1.DAGTask)
+	for _, task := range tmpl.DAG.Tasks {
+		nameToTask[task.Name] = task
+	}
+
+	// Verify dependencies for all tasks can be resolved as well as template names
+	for _, task := range tmpl.DAG.Tasks {
+		if task.Template == "" {
+			return errors.Errorf(errors.CodeBadRequest, "templates.%s.tasks.%s.template is required", tmpl.Name, task.Name)
+		}
+		taskTmpl := ctx.wf.GetTemplate(task.Template)
+		if taskTmpl == nil {
+			return errors.Errorf(errors.CodeBadRequest, "templates.%s.tasks%s.template '%s' undefined", tmpl.Name, task.Name, task.Template)
+		}
+		dupDependencies := make(map[string]bool)
+		for j, depName := range task.Dependencies {
+			if _, ok := dupDependencies[depName]; ok {
+				return errors.Errorf(errors.CodeBadRequest,
+					"templates.%s.tasks.%s.dependencies[%d] dependency '%s' duplicated",
+					tmpl.Name, task.Name, j, depName)
+			}
+			dupDependencies[depName] = true
+			if _, ok := nameToTask[depName]; !ok {
+				return errors.Errorf(errors.CodeBadRequest,
+					"templates.%s.tasks.%s.dependencies[%d] dependency '%s' not defined",
+					tmpl.Name, task.Name, j, depName)
+			}
+		}
+	}
+
+	err = verifyNoCycles(tmpl, nameToTask)
+	if err != nil {
+		return err
+	}
+
+	for _, task := range tmpl.DAG.Tasks {
+		taskBytes, err := json.Marshal(task)
+		if err != nil {
+			return errors.InternalWrapError(err)
+		}
+		// add outputs of all our dependencies to scope
+		taskScope := make(map[string]interface{})
+		for k, v := range scope {
+			taskScope[k] = v
+		}
+		for _, depName := range task.Dependencies {
+			ctx.addOutputsToScope(nameToTask[depName].Template, fmt.Sprintf("dependencies.%s", depName), taskScope)
+		}
+		err = resolveAllVariables(taskScope, string(taskBytes))
+		if err != nil {
+			return errors.Errorf(errors.CodeBadRequest, "template.%s.tasks.%s %s", tmpl.Name, task.Name, err.Error())
+		}
+		taskTmpl := ctx.wf.GetTemplate(task.Template)
+		err = ctx.validateTemplate(taskTmpl, task.Arguments)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// verifyNoCycles verifies there are no cycles in the DAG graph
+func verifyNoCycles(tmpl *wfv1.Template, nameToTask map[string]wfv1.DAGTask) error {
+	visited := make(map[string]bool)
+	var noCyclesHelper func(taskName string, cycle []string) error
+	noCyclesHelper = func(taskName string, cycle []string) error {
+		if _, ok := visited[taskName]; ok {
+			return nil
+		}
+		task := nameToTask[taskName]
+		for _, depName := range task.Dependencies {
+			for _, name := range cycle {
+				if name == depName {
+					return errors.Errorf(errors.CodeBadRequest,
+						"templates.%s.tasks dependency cycle detected: %s->%s",
+						tmpl.Name, strings.Join(cycle, "->"), name)
+				}
+			}
+			cycle = append(cycle, depName)
+			err := noCyclesHelper(depName, cycle)
+			if err != nil {
+				return err
+			}
+			cycle = cycle[0 : len(cycle)-1]
+		}
+		visited[taskName] = true
+		return nil
+	}
+
+	for _, task := range tmpl.DAG.Tasks {
+		err := noCyclesHelper(task.Name, []string{})
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/workflow/common/validate_dag_test.go b/workflow/common/validate_dag_test.go
new file mode 100644
index 000000000000..55a058c9f1a4
--- /dev/null
+++ b/workflow/common/validate_dag_test.go
@@ -0,0 +1,174 @@
+package common
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+var dagCycle = `
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: dag-cycle-
+spec:
+  entrypoint: cycle
+  templates:
+  - name: echo
+    container:
+      image: alpine:3.7
+      command: [echo, hello]
+  - name: cycle
+    dag:
+      tasks:
+      - name: A
+        dependencies: [C]
+        template: echo
+      - name: B
+        dependencies: [A]
+        template: echo
+      - name: C
+        dependencies: [A]
+        template: echo
+`
+
+func TestDAGCycle(t *testing.T) {
+	err := validate(dagCycle)
+	if assert.NotNil(t, err) {
+		assert.Contains(t, err.Error(), "cycle")
+	}
+}
+
+var duplicateDependencies = `
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: dag-dup-depends-
+spec:
+  entrypoint: cycle
+  templates:
+  - name: echo
+    container:
+      image: alpine:3.7
+      command: [echo, hello]
+  - name: cycle
+    dag:
+      tasks:
+      - name: A
+        template: echo
+      - name: B
+        dependencies: [A, A]
+        template: echo
+`
+
+func TestDuplicateDependencies(t *testing.T) {
+	err := validate(duplicateDependencies)
+	if assert.NotNil(t, err) {
+		assert.Contains(t, err.Error(), "duplicate")
+	}
+}
+
+var dagUndefinedTemplate = `
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: dag-undefined-
+spec:
+  entrypoint: undef
+  templates:
+  - name: undef
+    dag:
+      tasks:
+      - name: A
+        template: echo
+`
+
+func TestDAGUndefinedTemplate(t *testing.T) {
+	err := validate(dagUndefinedTemplate)
+	if assert.NotNil(t, err) {
+		assert.Contains(t, err.Error(), "undefined")
+	}
+}
+
+var dagUnresolvedVar = `
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: dag-cycle-
+spec:
+  entrypoint: unresolved
+  templates:
+  - name: echo
+    inputs:
+      parameters:
+      - name: message
+    container:
+      image: alpine:3.7
+      command: [echo, "{{inputs.parameters.message}}"]
+    outputs:
+      parameters:
+      - name: hosts
+        path: /etc/hosts
+  - name: unresolved
+    dag:
+      tasks:
+      - name: A
+        template: echo
+        arguments:
+          parameters: 
+          - name: message
+            value: val
+      - name: B
+        dependencies: [A]
+        template: echo
+        arguments:
+          parameters:
+          - name: message
+            value: "{{dependencies.A.outputs.parameters.unresolvable}}"
+`
+
+var dagResolvedVar = `
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: dag-cycle-
+spec:
+  entrypoint: unresolved
+  templates:
+  - name: echo
+    inputs:
+      parameters:
+      - name: message
+    container:
+      image: alpine:3.7
+      command: [echo, "{{inputs.parameters.message}}"]
+    outputs:
+      parameters:
+      - name: hosts
+        path: /etc/hosts
+  - name: unresolved
+    dag:
+      tasks:
+      - name: A
+        template: echo
+        arguments:
+          parameters: 
+          - name: message
+            value: val
+      - name: B
+        dependencies: [A]
+        template: echo
+        arguments:
+          parameters:
+          - name: message
+            value: "{{dependencies.A.outputs.parameters.hosts}}"
+`
+
+func TestDAGVariableResolution(t *testing.T) {
+	err := validate(dagUnresolvedVar)
+	if assert.NotNil(t, err) {
+		assert.Contains(t, err.Error(), "failed to resolve {{dependencies.A.outputs.parameters.unresolvable}}")
+	}
+	err = validate(dagResolvedVar)
+	assert.Nil(t, err)
+}
diff --git a/workflow/controller/dag.go b/workflow/controller/dag.go
new file mode 100644
index 000000000000..0526e474ab7f
--- /dev/null
+++ b/workflow/controller/dag.go
@@ -0,0 +1,289 @@
+package controller
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"github.com/argoproj/argo/errors"
+	wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1"
+	"github.com/argoproj/argo/workflow/common"
+	"github.com/valyala/fasttemplate"
+)
+
+// dagContext holds context information about this context's DAG
+type dagContext struct {
+	// encompasser is the node name of the encompassing node to this DAG.
+	// This is used to incorporate into each of the task's node names.
+	encompasser string
+
+	// tasks are all the tasks in the template
+	tasks []wfv1.DAGTask
+
+	// visited keeps track of tasks we have already visited during an invocation of executeDAG
+	// in order to avoid duplicating work
+	visited map[string]bool
+
+	// tmpl is the template spec. it is needed to resolve hard-wired artifacts
+	tmpl *wfv1.Template
+
+	// wf is stored to formulate nodeIDs
+	wf *wfv1.Workflow
+}
+
+func (d *dagContext) getTask(taskName string) *wfv1.DAGTask {
+	for _, task := range d.tasks {
+		if task.Name == taskName {
+			return &task
+		}
+	}
+	panic("target " + taskName + " does not exist")
+}
+
+// taskNodeName formulates the nodeName for a dag task
+func (d *dagContext) taskNodeName(taskName string) string {
+	return fmt.Sprintf("%s.%s", d.encompasser, taskName)
+}
+
+// taskNodeID formulates the node ID for a dag task
+func (d *dagContext) taskNodeID(taskName string) string {
+	nodeName := d.taskNodeName(taskName)
+	return d.wf.NodeID(nodeName)
+}
+
+func (d *dagContext) getTaskNode(taskName string) *wfv1.NodeStatus {
+	nodeID := d.taskNodeID(taskName)
+	node, ok := d.wf.Status.Nodes[nodeID]
+	if !ok {
+		return nil
+	}
+	return &node
+}
+
+func (woc *wfOperationCtx) executeDAG(nodeName string, tmpl *wfv1.Template) *wfv1.NodeStatus {
+	nodeID := woc.wf.NodeID(nodeName)
+	node, nodeInitialized := woc.wf.Status.Nodes[nodeID]
+	if nodeInitialized && node.Completed() {
+		return &node
+	}
+	dagCtx := &dagContext{
+		encompasser: nodeName,
+		tasks:       tmpl.DAG.Tasks,
+		visited:     make(map[string]bool),
+		tmpl:        tmpl,
+		wf:          woc.wf,
+	}
+	var targetTasks []string
+	if tmpl.DAG.Targets == "" {
+		targetTasks = findLeafTaskNames(tmpl.DAG.Tasks)
+	} else {
+		targetTasks = strings.Split(tmpl.DAG.Targets, " ")
+	}
+
+	if !nodeInitialized {
+		node = *woc.markNodePhase(nodeName, wfv1.NodeRunning)
+		rootTasks := findRootTaskNames(dagCtx, targetTasks)
+		woc.log.Infof("Root tasks of %s identified as %s", nodeName, rootTasks)
+		for _, rootTaskName := range rootTasks {
+			woc.addChildNode(node.Name, dagCtx.taskNodeName(rootTaskName))
+		}
+	}
+	// kick off execution of each target task asynchronously
+	for _, taskNames := range targetTasks {
+		woc.executeDAGTask(dagCtx, taskNames)
+	}
+	// return early if we have yet to complete execution of any one of our dependencies
+	for _, depName := range targetTasks {
+		depNode := dagCtx.getTaskNode(depName)
+		if depNode == nil || !depNode.Completed() {
+			return &node
+		}
+	}
+	// all desired tasks completed. now it is time to assess state
+	for _, depName := range targetTasks {
+		depNode := dagCtx.getTaskNode(depName)
+		if !depNode.Successful() {
+			// TODO: consider creating a virtual fan-in node
+			return woc.markNodePhase(nodeName, depNode.Phase)
+		}
+	}
+
+	// set the outbound nodes from the target tasks
+	node = *woc.getNodeByName(nodeName)
+	outbound := make([]string, 0)
+	for _, depName := range targetTasks {
+		depNode := dagCtx.getTaskNode(depName)
+		outboundNodeIDs := woc.getOutboundNodes(depNode.ID)
+		for _, outNodeID := range outboundNodeIDs {
+			outbound = append(outbound, outNodeID)
+		}
+	}
+	woc.log.Infof("Outbound nodes of %s set to %s", node.ID, outbound)
+	node.OutboundNodes = outbound
+	woc.wf.Status.Nodes[node.ID] = node
+
+	return woc.markNodePhase(nodeName, wfv1.NodeSucceeded)
+}
+
+// findRootTaskNames finds the names of all tasks which have no dependencies.
+// Once identified, these root tasks are marked as children to the encompassing node.
+func findRootTaskNames(dagCtx *dagContext, targetTasks []string) []string {
+	//rootTaskNames := make(map[string]bool)
+	rootTaskNames := make([]string, 0)
+	visited := make(map[string]bool)
+	var findRootHelper func(s string)
+	findRootHelper = func(taskName string) {
+		if _, ok := visited[taskName]; ok {
+			return
+		}
+		visited[taskName] = true
+		task := dagCtx.getTask(taskName)
+		if len(task.Dependencies) == 0 {
+			rootTaskNames = append(rootTaskNames, taskName)
+			return
+		}
+		for _, depName := range task.Dependencies {
+			findRootHelper(depName)
+		}
+	}
+	for _, targetTaskName := range targetTasks {
+		findRootHelper(targetTaskName)
+	}
+	return rootTaskNames
+}
+
+// executeDAGTask traverses and executes the upward chain of dependencies of a task
+func (woc *wfOperationCtx) executeDAGTask(dagCtx *dagContext, taskName string) {
+	if _, ok := dagCtx.visited[taskName]; ok {
+		return
+	}
+	dagCtx.visited[taskName] = true
+
+	node := dagCtx.getTaskNode(taskName)
+	if node != nil && node.Completed() {
+		return
+	}
+	// Check if our dependencies completed. If not, recurse our parents executing them if necessary
+	task := dagCtx.getTask(taskName)
+	dependenciesCompleted := true
+	dependenciesSuccessful := true
+	nodeName := dagCtx.taskNodeName(taskName)
+	for _, depName := range task.Dependencies {
+		depNode := dagCtx.getTaskNode(depName)
+		if depNode != nil {
+			if depNode.Completed() {
+				if !depNode.Successful() {
+					dependenciesSuccessful = false
+				}
+				continue
+			}
+		}
+		dependenciesCompleted = false
+		dependenciesSuccessful = false
+		// recurse our dependency
+		woc.executeDAGTask(dagCtx, depName)
+	}
+	if !dependenciesCompleted {
+		return
+	}
+
+	// All our dependencies completed. Now add the child relationship from our dependency's
+	// outbound nodes to this node.
+	node = dagCtx.getTaskNode(taskName)
+	if node == nil {
+		woc.log.Infof("All of node %s dependencies %s completed", nodeName, task.Dependencies)
+		// Add all outbound nodes of our dependencies as parents to this node
+		for _, depName := range task.Dependencies {
+			depNode := dagCtx.getTaskNode(depName)
+			woc.log.Infof("node %s outbound nodes: %s", depNode, depNode.OutboundNodes)
+			if depNode.IsPod {
+				woc.addChildNode(depNode.Name, nodeName)
+			} else {
+				for _, outNodeID := range depNode.OutboundNodes {
+					woc.addChildNode(woc.wf.Status.Nodes[outNodeID].Name, nodeName)
+				}
+			}
+		}
+	}
+
+	if !dependenciesSuccessful {
+		woc.log.Infof("Task %s being marked %s due to dependency failure", taskName, wfv1.NodeSkipped)
+		_ = woc.markNodePhase(nodeName, wfv1.NodeSkipped)
+		return
+	}
+
+	// All our dependencies were satisfied and successful. It's our turn to run
+	// Substitute params/artifacts from our dependencies and execute the template
+	newTask, err := woc.resolveDependencyReferences(dagCtx, task)
+	if err != nil {
+		woc.markNodeError(nodeName, err)
+		return
+	}
+	_ = woc.executeTemplate(newTask.Template, newTask.Arguments, nodeName)
+}
+
+// resolveDependencyReferences replaces any references to outputs of task dependencies, or artifacts in the inputs
+// NOTE: by now, input parameters should have been substituted throughout the template
+func (woc *wfOperationCtx) resolveDependencyReferences(dagCtx *dagContext, task *wfv1.DAGTask) (*wfv1.DAGTask, error) {
+	// build up the scope
+	scope := wfScope{
+		tmpl:  dagCtx.tmpl,
+		scope: make(map[string]interface{}),
+	}
+	for _, depName := range task.Dependencies {
+		depNode := dagCtx.getTaskNode(depName)
+		prefix := fmt.Sprintf("dependencies.%s", depName)
+		scope.addNodeOutputsToScope(prefix, depNode)
+	}
+
+	// Perform replacement
+	taskBytes, err := json.Marshal(task)
+	if err != nil {
+		return nil, errors.InternalWrapError(err)
+	}
+	fstTmpl := fasttemplate.New(string(taskBytes), "{{", "}}")
+	newTaskStr, err := common.Replace(fstTmpl, scope.replaceMap(), false, "")
+	if err != nil {
+		return nil, err
+	}
+	var newTask wfv1.DAGTask
+	err = json.Unmarshal([]byte(newTaskStr), &newTask)
+	if err != nil {
+		return nil, errors.InternalWrapError(err)
+	}
+
+	// replace all artifact references
+	for j, art := range newTask.Arguments.Artifacts {
+		if art.From == "" {
+			continue
+		}
+		resolvedArt, err := scope.resolveArtifact(art.From)
+		if err != nil {
+			return nil, err
+		}
+		resolvedArt.Name = art.Name
+		newTask.Arguments.Artifacts[j] = *resolvedArt
+	}
+	return &newTask, nil
+}
+
+// findLeafTaskNames finds the names of all tasks whom no other nodes depend on.
+// This list of tasks is used as the the default list of targets when dag.targets is omitted.
+func findLeafTaskNames(tasks []wfv1.DAGTask) []string {
+	taskIsLeaf := make(map[string]bool)
+	for _, task := range tasks {
+		if _, ok := taskIsLeaf[task.Name]; !ok {
+			taskIsLeaf[task.Name] = true
+		}
+		for _, dependency := range task.Dependencies {
+			taskIsLeaf[dependency] = false
+		}
+	}
+	leafTaskNames := make([]string, 0)
+	for taskName, isLeaf := range taskIsLeaf {
+		if isLeaf {
+			leafTaskNames = append(leafTaskNames, taskName)
+		}
+	}
+	return leafTaskNames
+}
diff --git a/workflow/controller/dag_test.go b/workflow/controller/dag_test.go
new file mode 100644
index 000000000000..b0b429f89997
--- /dev/null
+++ b/workflow/controller/dag_test.go
@@ -0,0 +1 @@
+package controller
diff --git a/workflow/controller/operator.go b/workflow/controller/operator.go
index b33fcece1161..7d2b06569888 100644
--- a/workflow/controller/operator.go
+++ b/workflow/controller/operator.go
@@ -133,6 +133,8 @@ func (wfc *WorkflowController) operateWorkflow(wf *wfv1.Workflow) {
 		woc.markWorkflowError(err, true)
 		return
 	}
+	var workflowStatus wfv1.NodePhase
+	var workflowMessage string
 	err = woc.executeTemplate(wf.Spec.Entrypoint, wf.Spec.Arguments, wf.ObjectMeta.Name)
 	if err != nil {
 		if errors.IsCode(errors.CodeTimeout, err) {
@@ -144,18 +146,20 @@ func (wfc *WorkflowController) operateWorkflow(wf *wfv1.Workflow) {
 		}
 		woc.log.Errorf("%s error: %+v", wf.ObjectMeta.Name, err)
 	}
-	node := woc.wf.Status.Nodes[woc.wf.NodeID(wf.ObjectMeta.Name)]
+	node := woc.getNodeByName(wf.ObjectMeta.Name)
 	if !node.Completed() {
 		return
 	}
+	workflowStatus = node.Phase
+	workflowMessage = node.Message
 
 	var onExitNode *wfv1.NodeStatus
 	if wf.Spec.OnExit != "" {
-		if node.Phase == wfv1.NodeSkipped {
+		if workflowStatus == wfv1.NodeSkipped {
 			// treat skipped the same as Succeeded for workflow.status
 			woc.globalParams[common.GlobalVarWorkflowStatus] = string(wfv1.NodeSucceeded)
 		} else {
-			woc.globalParams[common.GlobalVarWorkflowStatus] = string(node.Phase)
+			woc.globalParams[common.GlobalVarWorkflowStatus] = string(workflowStatus)
 		}
 		woc.log.Infof("Running OnExit handler: %s", wf.Spec.OnExit)
 		onExitNodeName := wf.ObjectMeta.Name + ".onExit"
@@ -187,7 +191,7 @@ func (wfc *WorkflowController) operateWorkflow(wf *wfv1.Workflow) {
 	// If we get here, the workflow completed, all PVCs were deleted successfully, and
 	// exit handlers were executed. We now need to infer the workflow phase from the
 	// node phase.
-	switch node.Phase {
+	switch workflowStatus {
 	case wfv1.NodeSucceeded, wfv1.NodeSkipped:
 		if onExitNode != nil && !onExitNode.Successful() {
 			// if main workflow succeeded, but the exit node was unsuccessful
@@ -197,9 +201,9 @@ func (wfc *WorkflowController) operateWorkflow(wf *wfv1.Workflow) {
 			woc.markWorkflowSuccess()
 		}
 	case wfv1.NodeFailed:
-		woc.markWorkflowFailed(node.Message)
+		woc.markWorkflowFailed(workflowMessage)
 	case wfv1.NodeError:
-		woc.markWorkflowPhase(wfv1.NodeError, true, node.Message)
+		woc.markWorkflowPhase(wfv1.NodeError, true, workflowMessage)
 	default:
 		// NOTE: we should never make it here because if the the node was 'Running'
 		// we should have returned earlier.
@@ -208,7 +212,16 @@ func (wfc *WorkflowController) operateWorkflow(wf *wfv1.Workflow) {
 	}
 }
 
-// persistUpdates will update a workflow with any updates made during workflow operation.
+func (woc *wfOperationCtx) getNodeByName(nodeName string) *wfv1.NodeStatus {
+	nodeID := woc.wf.NodeID(nodeName)
+	node, ok := woc.wf.Status.Nodes[nodeID]
+	if !ok {
+		return nil
+	}
+	return &node
+}
+
+// persistUpdates will PATCH a workflow with any updates made during workflow operation.
 // It also labels any pods as completed if we have extracted everything we need from it.
 func (woc *wfOperationCtx) persistUpdates() {
 	if !woc.updated {
@@ -372,7 +385,7 @@ func (woc *wfOperationCtx) podReconciliation() error {
 	// is now impossible to infer status. The only thing we can do at this point is
 	// to mark the node with Error.
 	for nodeID, node := range woc.wf.Status.Nodes {
-		if len(node.Children) > 0 || node.Completed() {
+		if !node.IsPod || node.Completed() {
 			// node is not a pod, or it is already complete
 			continue
 		}
@@ -728,21 +741,10 @@ func (woc *wfOperationCtx) getLastChildNode(node *wfv1.NodeStatus) (*wfv1.NodeSt
 	return &lastChildNode, nil
 }
 
-func (woc *wfOperationCtx) getNode(nodeName string) wfv1.NodeStatus {
-	nodeID := woc.wf.NodeID(nodeName)
-	node, ok := woc.wf.Status.Nodes[nodeID]
-	if !ok {
-		panic("Failed to find node " + nodeName)
-	}
-
-	return node
-}
-
 func (woc *wfOperationCtx) executeTemplate(templateName string, args wfv1.Arguments, nodeName string) error {
 	woc.log.Debugf("Evaluating node %s: template: %s", nodeName, templateName)
-	nodeID := woc.wf.NodeID(nodeName)
-	node, ok := woc.wf.Status.Nodes[nodeID]
-	if ok && node.Completed() {
+	node := woc.getNodeByName(nodeName)
+	if node != nil && node.Completed() {
 		woc.log.Debugf("Node %s already completed", nodeName)
 		return nil
 	}
@@ -761,19 +763,18 @@ func (woc *wfOperationCtx) executeTemplate(templateName string, args wfv1.Argume
 
 	switch tmpl.GetType() {
 	case wfv1.TemplateTypeContainer:
-		if ok {
+		if node != nil {
 			if node.RetryStrategy != nil {
-				if err = woc.processNodeRetries(&node); err != nil {
+				if err = woc.processNodeRetries(node); err != nil {
 					return err
 				}
-
 				// The updated node status could've changed. Get the latest copy of the node.
-				node = woc.getNode(node.Name)
-				log.Infof("Node %s: Status: %s", node.Name, node.Phase)
+				node = woc.getNodeByName(node.Name)
+				fmt.Printf("Node %s: Status: %s\n", node.Name, node.Phase)
 				if node.Completed() {
 					return nil
 				}
-				lastChildNode, err := woc.getLastChildNode(&node)
+				lastChildNode, err := woc.getLastChildNode(node)
 				if err != nil {
 					return err
 				}
@@ -803,7 +804,7 @@ func (woc *wfOperationCtx) executeTemplate(templateName string, args wfv1.Argume
 			retries := wfv1.RetryStrategy{}
 			node.RetryStrategy = &retries
 			node.RetryStrategy.Limit = tmpl.RetryStrategy.Limit
-			woc.wf.Status.Nodes[nodeID] = *node
+			woc.wf.Status.Nodes[node.ID] = *node
 
 			// Create new node as child of 'node'
 			newContainerName := fmt.Sprintf("%s(%d)", nodeName, len(node.Children))
@@ -815,21 +816,19 @@ func (woc *wfOperationCtx) executeTemplate(templateName string, args wfv1.Argume
 		// We have not yet created the pod
 		err = woc.executeContainer(nodeToExecute, tmpl)
 	case wfv1.TemplateTypeSteps:
-		if !ok {
-			node = *woc.markNodePhase(nodeName, wfv1.NodeRunning)
-			woc.log.Infof("Initialized workflow node %v", node)
-		}
 		err = woc.executeSteps(nodeName, tmpl)
 	case wfv1.TemplateTypeScript:
-		if !ok {
+		if node == nil {
 			err = woc.executeScript(nodeName, tmpl)
 		}
 	case wfv1.TemplateTypeResource:
-		if !ok {
+		if node == nil {
 			err = woc.executeResource(nodeName, tmpl)
 		}
+	case wfv1.TemplateTypeDAG:
+		_ = woc.executeDAG(nodeName, tmpl)
 	default:
-		err = errors.Errorf("Template '%s' missing specification", tmpl.Name)
+		err = errors.Errorf(errors.CodeBadRequest, "Template '%s' missing specification", tmpl.Name)
 		woc.markNodeError(nodeName, err)
 	}
 	if err != nil {
@@ -905,18 +904,28 @@ func (woc *wfOperationCtx) markNodePhase(nodeName string, phase wfv1.NodePhase,
 			Phase:     phase,
 			StartedAt: metav1.Time{Time: time.Now().UTC()},
 		}
+		woc.log.Infof("node %s initialized %s", node, node.Phase)
+		woc.updated = true
 	} else {
-		node.Phase = phase
+		if node.Phase != phase {
+			woc.log.Infof("node %s phase %s -> %s", node, node.Phase, phase)
+			node.Phase = phase
+			woc.updated = true
+		}
 	}
 	if len(message) > 0 {
-		node.Message = message[0]
+		if message[0] != node.Message {
+			woc.log.Infof("node %s message: %s", node, message[0])
+			node.Message = message[0]
+			woc.updated = true
+		}
 	}
 	if node.Completed() && node.FinishedAt.IsZero() {
 		node.FinishedAt = metav1.Time{Time: time.Now().UTC()}
+		woc.log.Infof("node %s finished: %s", node, node.FinishedAt)
+		woc.updated = true
 	}
 	woc.wf.Status.Nodes[nodeID] = node
-	woc.updated = true
-	woc.log.Debugf("Marked node %s %s", nodeName, phase)
 	return &node
 }
 
@@ -926,17 +935,44 @@ func (woc *wfOperationCtx) markNodeError(nodeName string, err error) *wfv1.NodeS
 }
 
 func (woc *wfOperationCtx) executeContainer(nodeName string, tmpl *wfv1.Template) error {
+	node := woc.getNodeByName(nodeName)
+	if node != nil && node.Phase == wfv1.NodeRunning {
+		// we already marked the node running. pod should have already been created
+		return nil
+	}
 	woc.log.Debugf("Executing node %s with container template: %v\n", nodeName, tmpl)
 	_, err := woc.createWorkflowPod(nodeName, *tmpl.Container, tmpl)
 	if err != nil {
 		woc.markNodeError(nodeName, err)
 		return err
 	}
-	node := woc.markNodePhase(nodeName, wfv1.NodeRunning)
+	node = woc.markNodePhase(nodeName, wfv1.NodeRunning)
+	node.IsPod = true
+	woc.wf.Status.Nodes[node.ID] = *node
 	woc.log.Infof("Initialized container node %v", node)
 	return nil
 }
 
+func (woc *wfOperationCtx) getOutboundNodes(nodeID string) []string {
+	node := woc.wf.Status.Nodes[nodeID]
+	if node.IsPod {
+		return []string{node.ID}
+	}
+	outbound := make([]string, 0)
+	for _, outboundNodeID := range node.OutboundNodes {
+		outNode := woc.wf.Status.Nodes[outboundNodeID]
+		if outNode.IsPod {
+			outbound = append(outbound, outboundNodeID)
+		} else {
+			subOutIDs := woc.getOutboundNodes(outboundNodeID)
+			for _, subOutID := range subOutIDs {
+				outbound = append(outbound, subOutID)
+			}
+		}
+	}
+	return outbound
+}
+
 // getTemplateOutputsFromScope resolves a template's outputs from the scope of the template
 func getTemplateOutputsFromScope(tmpl *wfv1.Template, scope *wfScope) (*wfv1.Outputs, error) {
 	if !tmpl.Outputs.HasOutputs() {
@@ -981,10 +1017,46 @@ func (woc *wfOperationCtx) executeScript(nodeName string, tmpl *wfv1.Template) e
 		return err
 	}
 	node := woc.markNodePhase(nodeName, wfv1.NodeRunning)
+	node.IsPod = true
+	woc.wf.Status.Nodes[node.ID] = *node
 	woc.log.Infof("Initialized script node %v", node)
 	return nil
 }
 
+// addNodeOutputsToScope adds all of a nodes outputs to the scope with the given prefix
+func (wfs *wfScope) addNodeOutputsToScope(prefix string, node *wfv1.NodeStatus) {
+	if node.PodIP != "" {
+		key := fmt.Sprintf("%s.ip", prefix)
+		wfs.addParamToScope(key, node.PodIP)
+	}
+	if node.Outputs != nil {
+		if node.Outputs.Result != nil {
+			key := fmt.Sprintf("%s.outputs.result", prefix)
+			wfs.addParamToScope(key, *node.Outputs.Result)
+		}
+		for _, outParam := range node.Outputs.Parameters {
+			key := fmt.Sprintf("%s.outputs.parameters.%s", prefix, outParam.Name)
+			wfs.addParamToScope(key, *outParam.Value)
+		}
+		for _, outArt := range node.Outputs.Artifacts {
+			key := fmt.Sprintf("%s.outputs.artifacts.%s", prefix, outArt.Name)
+			wfs.addArtifactToScope(key, outArt)
+		}
+	}
+}
+
+// replaceMap returns a replacement map of strings intended to be used simple string substitution
+func (wfs *wfScope) replaceMap() map[string]string {
+	replaceMap := make(map[string]string)
+	for key, val := range wfs.scope {
+		valStr, ok := val.(string)
+		if ok {
+			replaceMap[key] = valStr
+		}
+	}
+	return replaceMap
+}
+
 func (wfs *wfScope) addParamToScope(key, val string) {
 	wfs.scope[key] = val
 }
@@ -1037,6 +1109,7 @@ func (wfs *wfScope) resolveArtifact(v string) (*wfv1.Artifact, error) {
 }
 
 // addChildNode adds a nodeID as a child to a parent
+// parent and child are both node names
 func (woc *wfOperationCtx) addChildNode(parent string, child string) {
 	parentID := woc.wf.NodeID(parent)
 	childID := woc.wf.NodeID(child)
@@ -1075,6 +1148,8 @@ func (woc *wfOperationCtx) executeResource(nodeName string, tmpl *wfv1.Template)
 		return err
 	}
 	node := woc.markNodePhase(nodeName, wfv1.NodeRunning)
+	node.IsPod = true
+	woc.wf.Status.Nodes[node.ID] = *node
 	woc.log.Infof("Initialized resource node %v", node)
 	return nil
 }
diff --git a/workflow/controller/operator_test.go b/workflow/controller/operator_test.go
index 35321033c7cc..028b980494fe 100644
--- a/workflow/controller/operator_test.go
+++ b/workflow/controller/operator_test.go
@@ -47,40 +47,40 @@ func TestProcessNodesWithRetries(t *testing.T) {
 		woc.addChildNode(nodeName, childNode)
 	}
 
-	n := woc.getNode(nodeName)
-	lastChild, err = woc.getLastChildNode(&n)
+	n := woc.getNodeByName(nodeName)
+	lastChild, err = woc.getLastChildNode(n)
 	assert.Nil(t, err)
 	assert.NotNil(t, lastChild)
 
 	// Last child is still running. processNodesWithRetries() should return false since
 	// there should be no retries at this point.
-	err = woc.processNodeRetries(&n)
+	err = woc.processNodeRetries(n)
 	assert.Nil(t, err)
-	n = woc.getNode(nodeName)
+	n = woc.getNodeByName(nodeName)
 	assert.Equal(t, n.Phase, wfv1.NodeRunning)
 
 	// Mark lastChild as successful.
 	woc.markNodePhase(lastChild.Name, wfv1.NodeSucceeded)
-	err = woc.processNodeRetries(&n)
+	err = woc.processNodeRetries(n)
 	assert.Nil(t, err)
 	// The parent node also gets marked as Succeeded.
-	n = woc.getNode(nodeName)
+	n = woc.getNodeByName(nodeName)
 	assert.Equal(t, n.Phase, wfv1.NodeSucceeded)
 
 	// Mark the parent node as running again and the lastChild as failed.
 	woc.markNodePhase(n.Name, wfv1.NodeRunning)
 	woc.markNodePhase(lastChild.Name, wfv1.NodeFailed)
-	woc.processNodeRetries(&n)
-	n = woc.getNode(nodeName)
+	woc.processNodeRetries(n)
+	n = woc.getNodeByName(nodeName)
 	assert.Equal(t, n.Phase, wfv1.NodeRunning)
 
 	// Add a third node that has failed.
 	childNode := "child-node-3"
 	woc.markNodePhase(childNode, wfv1.NodeFailed)
 	woc.addChildNode(nodeName, childNode)
-	n = woc.getNode(nodeName)
-	err = woc.processNodeRetries(&n)
+	n = woc.getNodeByName(nodeName)
+	err = woc.processNodeRetries(n)
 	assert.Nil(t, err)
-	n = woc.getNode(nodeName)
+	n = woc.getNodeByName(nodeName)
 	assert.Equal(t, n.Phase, wfv1.NodeFailed)
 }
diff --git a/workflow/controller/steps.go b/workflow/controller/steps.go
index 60abff4021be..be41e4c9cb44 100644
--- a/workflow/controller/steps.go
+++ b/workflow/controller/steps.go
@@ -16,7 +16,11 @@ import (
 )
 
 func (woc *wfOperationCtx) executeSteps(nodeName string, tmpl *wfv1.Template) error {
+	node := woc.getNodeByName(nodeName)
 	nodeID := woc.wf.NodeID(nodeName)
+	if node == nil {
+		node = woc.markNodePhase(nodeName, wfv1.NodeRunning)
+	}
 	defer func() {
 		if woc.wf.Status.Nodes[nodeID].Completed() {
 			_ = woc.killDeamonedChildren(nodeID)
@@ -28,29 +32,45 @@ func (woc *wfOperationCtx) executeSteps(nodeName string, tmpl *wfv1.Template) er
 	}
 	for i, stepGroup := range tmpl.Steps {
 		sgNodeName := fmt.Sprintf("%s[%d]", nodeName, i)
-		woc.addChildNode(nodeName, sgNodeName)
+		sgNode := woc.getNodeByName(sgNodeName)
+		if sgNode == nil {
+			// initialize the step group
+			sgNode = woc.markNodePhase(sgNodeName, wfv1.NodeRunning)
+			if i == 0 {
+				woc.addChildNode(nodeName, sgNodeName)
+			} else {
+				// This logic will connect all the outbound nodes of the previous
+				// step group as parents to the current step group node
+				prevStepGroupName := fmt.Sprintf("%s[%d]", nodeName, i-1)
+				prevStepGroupNode := woc.getNodeByName(prevStepGroupName)
+				for _, childID := range prevStepGroupNode.Children {
+					outboundNodeIDs := woc.getOutboundNodes(childID)
+					woc.log.Infof("SG Outbound nodes of %s are %s", childID, outboundNodeIDs)
+					for _, outNodeID := range outboundNodeIDs {
+						woc.addChildNode(woc.wf.Status.Nodes[outNodeID].Name, sgNodeName)
+					}
+				}
+			}
+		}
 		err := woc.executeStepGroup(stepGroup, sgNodeName, &scope)
 		if err != nil {
-			if errors.IsCode(errors.CodeTimeout, err) {
-				return err
+			if !errors.IsCode(errors.CodeTimeout, err) {
+				woc.markNodeError(nodeName, err)
 			}
-			woc.markNodeError(nodeName, err)
 			return err
 		}
-		sgNodeID := woc.wf.NodeID(sgNodeName)
-		if !woc.wf.Status.Nodes[sgNodeID].Completed() {
-			woc.log.Infof("Workflow step group node %v not yet completed", woc.wf.Status.Nodes[sgNodeID])
+		if !sgNode.Completed() {
+			woc.log.Infof("Workflow step group node %v not yet completed", sgNode)
 			return nil
 		}
 
-		if !woc.wf.Status.Nodes[sgNodeID].Successful() {
-			failMessage := fmt.Sprintf("step group %s was unsuccessful", sgNodeName)
+		if !sgNode.Successful() {
+			failMessage := fmt.Sprintf("step group %s was unsuccessful", sgNode)
 			woc.log.Info(failMessage)
 			woc.markNodePhase(nodeName, wfv1.NodeFailed, failMessage)
 			return nil
 		}
 
-		// HACK: need better way to add children to scope
 		for _, step := range stepGroup {
 			childNodeName := fmt.Sprintf("%s.%s", sgNodeName, step.Name)
 			childNodeID := woc.wf.NodeID(childNodeName)
@@ -61,26 +81,11 @@ func (woc *wfOperationCtx) executeSteps(nodeName string, tmpl *wfv1.Template) er
 				// are not easily referenceable by user.
 				continue
 			}
-			if childNode.PodIP != "" {
-				key := fmt.Sprintf("steps.%s.ip", step.Name)
-				scope.addParamToScope(key, childNode.PodIP)
-			}
-			if childNode.Outputs != nil {
-				if childNode.Outputs.Result != nil {
-					key := fmt.Sprintf("steps.%s.outputs.result", step.Name)
-					scope.addParamToScope(key, *childNode.Outputs.Result)
-				}
-				for _, outParam := range childNode.Outputs.Parameters {
-					key := fmt.Sprintf("steps.%s.outputs.parameters.%s", step.Name, outParam.Name)
-					scope.addParamToScope(key, *outParam.Value)
-				}
-				for _, outArt := range childNode.Outputs.Artifacts {
-					key := fmt.Sprintf("steps.%s.outputs.artifacts.%s", step.Name, outArt.Name)
-					scope.addArtifactToScope(key, outArt)
-				}
-			}
+			prefix := fmt.Sprintf("steps.%s", step.Name)
+			scope.addNodeOutputsToScope(prefix, &childNode)
 		}
 	}
+	// If this template has outputs from any of its steps, copy them to this node here
 	outputs, err := getTemplateOutputsFromScope(tmpl, &scope)
 	if err != nil {
 		woc.markNodeError(nodeName, err)
@@ -91,6 +96,21 @@ func (woc *wfOperationCtx) executeSteps(nodeName string, tmpl *wfv1.Template) er
 		node.Outputs = outputs
 		woc.wf.Status.Nodes[nodeID] = node
 	}
+	// Now that we have completed, set the outbound nodes from the last step group
+	outbound := make([]string, 0)
+	lastSGNode := woc.getNodeByName(fmt.Sprintf("%s[%d]", nodeName, len(tmpl.Steps)-1))
+	for _, childID := range lastSGNode.Children {
+		outboundNodeIDs := woc.getOutboundNodes(childID)
+		woc.log.Infof("Outbound nodes of %s is %s", childID, outboundNodeIDs)
+		for _, outNodeID := range outboundNodeIDs {
+			outbound = append(outbound, outNodeID)
+		}
+	}
+	node = woc.getNodeByName(nodeName)
+	woc.log.Infof("Outbound nodes of %s is %s", node.ID, outbound)
+	node.OutboundNodes = outbound
+	woc.wf.Status.Nodes[node.ID] = *node
+
 	woc.markNodePhase(nodeName, wfv1.NodeSucceeded)
 	return nil
 }

From 639ad1e15312da5efa88fd62a0f3aced2ac17c52 Mon Sep 17 00:00:00 2001
From: Jesse Suen <Jesse_Suen@intuit.com>
Date: Thu, 25 Jan 2018 22:34:05 -0800
Subject: [PATCH 2/8] Introduce Type field in NodeStatus to to assist with
 visualization

---
 pkg/apis/workflow/v1alpha1/types.go |  20 ++-
 workflow/controller/dag.go          |  26 ++--
 workflow/controller/operator.go     | 189 ++++++++++++++--------------
 workflow/controller/steps.go        |  33 ++---
 4 files changed, 136 insertions(+), 132 deletions(-)

diff --git a/pkg/apis/workflow/v1alpha1/types.go b/pkg/apis/workflow/v1alpha1/types.go
index 61680f4131f7..98a573d64cc3 100644
--- a/pkg/apis/workflow/v1alpha1/types.go
+++ b/pkg/apis/workflow/v1alpha1/types.go
@@ -33,6 +33,19 @@ const (
 	NodeError     NodePhase = "Error"
 )
 
+// NodeType is the type of a node
+type NodeType string
+
+// Node types
+const (
+	NodeTypePod       NodeType = "Pod"
+	NodeTypeSteps     NodeType = "Steps"
+	NodeTypeStepGroup NodeType = "StepGroup"
+	NodeTypeDAG       NodeType = "DAG"
+	NodeTypeRetry     NodeType = "Retry"
+	NodeTypeSkipped   NodeType = "Skipped"
+)
+
 // +genclient
 // +genclient:noStatus
 // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@@ -303,6 +316,9 @@ type NodeStatus struct {
 	// It can represent a container, step group, or the entire workflow
 	Name string `json:"name"`
 
+	// Type indicates type of node
+	Type NodeType `json:"type"`
+
 	// Phase a simple, high-level summary of where the node is in its lifecycle.
 	// Can be used as a state machine.
 	Phase NodePhase `json:"phase"`
@@ -316,15 +332,13 @@ type NodeStatus struct {
 	// Time at which this node completed
 	FinishedAt metav1.Time `json:"finishedAt,omitempty"`
 
-	// IsPod indicates if this node is a pod or not
-	IsPod bool `json:"isPod,omitempty"`
-
 	// PodIP captures the IP of the pod for daemoned steps
 	PodIP string `json:"podIP,omitempty"`
 
 	// Daemoned tracks whether or not this node was daemoned and need to be terminated
 	Daemoned *bool `json:"daemoned,omitempty"`
 
+	// RetryStrategy controls how to retry a step
 	RetryStrategy *RetryStrategy `json:"retryStrategy,omitempty"`
 
 	// Outputs captures output parameter values and artifact locations
diff --git a/workflow/controller/dag.go b/workflow/controller/dag.go
index 0526e474ab7f..0298ece4ff8d 100644
--- a/workflow/controller/dag.go
+++ b/workflow/controller/dag.go
@@ -61,10 +61,9 @@ func (d *dagContext) getTaskNode(taskName string) *wfv1.NodeStatus {
 }
 
 func (woc *wfOperationCtx) executeDAG(nodeName string, tmpl *wfv1.Template) *wfv1.NodeStatus {
-	nodeID := woc.wf.NodeID(nodeName)
-	node, nodeInitialized := woc.wf.Status.Nodes[nodeID]
-	if nodeInitialized && node.Completed() {
-		return &node
+	node := woc.getNodeByName(nodeName)
+	if node != nil && node.Completed() {
+		return node
 	}
 	dagCtx := &dagContext{
 		encompasser: nodeName,
@@ -80,8 +79,8 @@ func (woc *wfOperationCtx) executeDAG(nodeName string, tmpl *wfv1.Template) *wfv
 		targetTasks = strings.Split(tmpl.DAG.Targets, " ")
 	}
 
-	if !nodeInitialized {
-		node = *woc.markNodePhase(nodeName, wfv1.NodeRunning)
+	if node == nil {
+		node = woc.initializeNode(nodeName, wfv1.NodeTypeDAG, wfv1.NodeRunning)
 		rootTasks := findRootTaskNames(dagCtx, targetTasks)
 		woc.log.Infof("Root tasks of %s identified as %s", nodeName, rootTasks)
 		for _, rootTaskName := range rootTasks {
@@ -96,20 +95,21 @@ func (woc *wfOperationCtx) executeDAG(nodeName string, tmpl *wfv1.Template) *wfv
 	for _, depName := range targetTasks {
 		depNode := dagCtx.getTaskNode(depName)
 		if depNode == nil || !depNode.Completed() {
-			return &node
+			return node
 		}
 	}
 	// all desired tasks completed. now it is time to assess state
 	for _, depName := range targetTasks {
 		depNode := dagCtx.getTaskNode(depName)
 		if !depNode.Successful() {
+			// One of our dependencies failed/errored. Mark this failed
 			// TODO: consider creating a virtual fan-in node
 			return woc.markNodePhase(nodeName, depNode.Phase)
 		}
 	}
 
 	// set the outbound nodes from the target tasks
-	node = *woc.getNodeByName(nodeName)
+	node = woc.getNodeByName(nodeName)
 	outbound := make([]string, 0)
 	for _, depName := range targetTasks {
 		depNode := dagCtx.getTaskNode(depName)
@@ -120,7 +120,7 @@ func (woc *wfOperationCtx) executeDAG(nodeName string, tmpl *wfv1.Template) *wfv
 	}
 	woc.log.Infof("Outbound nodes of %s set to %s", node.ID, outbound)
 	node.OutboundNodes = outbound
-	woc.wf.Status.Nodes[node.ID] = node
+	woc.wf.Status.Nodes[node.ID] = *node
 
 	return woc.markNodePhase(nodeName, wfv1.NodeSucceeded)
 }
@@ -196,7 +196,7 @@ func (woc *wfOperationCtx) executeDAGTask(dagCtx *dagContext, taskName string) {
 		for _, depName := range task.Dependencies {
 			depNode := dagCtx.getTaskNode(depName)
 			woc.log.Infof("node %s outbound nodes: %s", depNode, depNode.OutboundNodes)
-			if depNode.IsPod {
+			if depNode.Type == wfv1.NodeTypePod {
 				woc.addChildNode(depNode.Name, nodeName)
 			} else {
 				for _, outNodeID := range depNode.OutboundNodes {
@@ -207,8 +207,8 @@ func (woc *wfOperationCtx) executeDAGTask(dagCtx *dagContext, taskName string) {
 	}
 
 	if !dependenciesSuccessful {
-		woc.log.Infof("Task %s being marked %s due to dependency failure", taskName, wfv1.NodeSkipped)
-		_ = woc.markNodePhase(nodeName, wfv1.NodeSkipped)
+		woc.log.Infof("Task %s being marked %s due to dependency failure", taskName, wfv1.NodeFailed)
+		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, wfv1.NodeFailed)
 		return
 	}
 
@@ -216,7 +216,7 @@ func (woc *wfOperationCtx) executeDAGTask(dagCtx *dagContext, taskName string) {
 	// Substitute params/artifacts from our dependencies and execute the template
 	newTask, err := woc.resolveDependencyReferences(dagCtx, task)
 	if err != nil {
-		woc.markNodeError(nodeName, err)
+		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, wfv1.NodeError, err.Error())
 		return
 	}
 	_ = woc.executeTemplate(newTask.Template, newTask.Arguments, nodeName)
diff --git a/workflow/controller/operator.go b/workflow/controller/operator.go
index 53db995b950e..63dc7eb44671 100644
--- a/workflow/controller/operator.go
+++ b/workflow/controller/operator.go
@@ -122,7 +122,7 @@ func (woc *wfOperationCtx) operate() {
 
 	err := woc.createPVCs()
 	if err != nil {
-		woc.log.Errorf("%s error: %+v", woc.wf.ObjectMeta.Name, err)
+		woc.log.Errorf("%s pvc create error: %+v", woc.wf.ObjectMeta.Name, err)
 		woc.markWorkflowError(err, true)
 		return
 	}
@@ -379,7 +379,7 @@ func (woc *wfOperationCtx) podReconciliation() error {
 	// is now impossible to infer status. The only thing we can do at this point is
 	// to mark the node with Error.
 	for nodeID, node := range woc.wf.Status.Nodes {
-		if !node.IsPod || node.Completed() {
+		if node.Type != wfv1.NodeTypePod || node.Completed() {
 			// node is not a pod, or it is already complete
 			continue
 		}
@@ -676,7 +676,6 @@ func (woc *wfOperationCtx) createPVCs() error {
 		}
 		pvc, err := pvcClient.Create(&pvcTmpl)
 		if err != nil {
-			woc.markNodeError(woc.wf.ObjectMeta.Name, err)
 			return err
 		}
 		vol := apiv1.Volume{
@@ -749,85 +748,34 @@ func (woc *wfOperationCtx) executeTemplate(templateName string, args wfv1.Argume
 	tmpl := woc.wf.GetTemplate(templateName)
 	if tmpl == nil {
 		err := errors.Errorf(errors.CodeBadRequest, "Node %v error: template '%s' undefined", node, templateName)
-		woc.markNodeError(nodeName, err)
+		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, wfv1.NodeError, err.Error())
 		return err
 	}
 
 	tmpl, err := common.ProcessArgs(tmpl, args, woc.globalParams, false)
 	if err != nil {
-		woc.markNodeError(nodeName, err)
+		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, wfv1.NodeError, err.Error())
 		return err
 	}
 
 	switch tmpl.GetType() {
 	case wfv1.TemplateTypeContainer:
-		if node != nil {
-			if node.RetryStrategy != nil {
-				if err = woc.processNodeRetries(node); err != nil {
-					return err
-				}
-				// The updated node status could've changed. Get the latest copy of the node.
-				node = woc.getNodeByName(node.Name)
-				fmt.Printf("Node %s: Status: %s\n", node.Name, node.Phase)
-				if node.Completed() {
-					return nil
-				}
-				lastChildNode, err := woc.getLastChildNode(node)
-				if err != nil {
-					return err
-				}
-				if !lastChildNode.Completed() {
-					// last child node is still running.
-					return nil
-				}
-			} else {
-				// There are no retries configured and there's already a node entry for the container.
-				// This means the container was already scheduled (or had a create pod error). Nothing
-				// to more to do with this node.
-				return nil
-			}
-		}
-
-		// If the user has specified retries, a special "retries" non-leaf node
-		// is created. This node acts as the parent of all retries that will be
-		// done for the container. The status of this node should be "Success"
-		// if any of the retries succeed. Otherwise, it is "Failed".
-
-		// TODO(shri): Mark the current node as a "retry" node
-		// Create a new child node as the first attempt node and
-		// run the template in that node.
-		nodeToExecute := nodeName
 		if tmpl.RetryStrategy != nil {
-			node := woc.markNodePhase(nodeName, wfv1.NodeRunning)
-			retries := wfv1.RetryStrategy{}
-			node.RetryStrategy = &retries
-			node.RetryStrategy.Limit = tmpl.RetryStrategy.Limit
-			woc.wf.Status.Nodes[node.ID] = *node
-
-			// Create new node as child of 'node'
-			newContainerName := fmt.Sprintf("%s(%d)", nodeName, len(node.Children))
-			woc.markNodePhase(newContainerName, wfv1.NodeRunning)
-			woc.addChildNode(nodeName, newContainerName)
-			nodeToExecute = newContainerName
+			err = woc.executeRetryContainer(nodeName, tmpl)
+		} else {
+			err = woc.executeContainer(nodeName, tmpl)
 		}
-
-		// We have not yet created the pod
-		err = woc.executeContainer(nodeToExecute, tmpl)
 	case wfv1.TemplateTypeSteps:
 		err = woc.executeSteps(nodeName, tmpl)
 	case wfv1.TemplateTypeScript:
-		if node == nil {
-			err = woc.executeScript(nodeName, tmpl)
-		}
+		err = woc.executeScript(nodeName, tmpl)
 	case wfv1.TemplateTypeResource:
-		if node == nil {
-			err = woc.executeResource(nodeName, tmpl)
-		}
+		err = woc.executeResource(nodeName, tmpl)
 	case wfv1.TemplateTypeDAG:
 		_ = woc.executeDAG(nodeName, tmpl)
 	default:
 		err = errors.Errorf(errors.CodeBadRequest, "Template '%s' missing specification", tmpl.Name)
-		woc.markNodeError(nodeName, err)
+		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, wfv1.NodeError, err.Error())
 	}
 	if err != nil {
 		return err
@@ -891,25 +839,38 @@ func (woc *wfOperationCtx) markWorkflowError(err error, markCompleted bool) {
 	woc.markWorkflowPhase(wfv1.NodeError, markCompleted, err.Error())
 }
 
+func (woc *wfOperationCtx) initializeNode(nodeName string, nodeType wfv1.NodeType, phase wfv1.NodePhase, message ...string) *wfv1.NodeStatus {
+	nodeID := woc.wf.NodeID(nodeName)
+	_, ok := woc.wf.Status.Nodes[nodeID]
+	if ok {
+		panic(fmt.Sprintf("node %s already initialized", nodeName))
+	}
+	node := wfv1.NodeStatus{
+		ID:        nodeID,
+		Name:      nodeName,
+		Type:      nodeType,
+		Phase:     phase,
+		StartedAt: metav1.Time{Time: time.Now().UTC()},
+	}
+	if node.Completed() && node.FinishedAt.IsZero() {
+		node.FinishedAt = node.StartedAt
+	}
+	woc.wf.Status.Nodes[nodeID] = node
+	woc.log.Infof("node %s initialized %s", node, node.Phase)
+	woc.updated = true
+	return &node
+}
+
 // markNodePhase marks a node with the given phase, creating the node if necessary and handles timestamps
 func (woc *wfOperationCtx) markNodePhase(nodeName string, phase wfv1.NodePhase, message ...string) *wfv1.NodeStatus {
-	nodeID := woc.wf.NodeID(nodeName)
-	node, ok := woc.wf.Status.Nodes[nodeID]
-	if !ok {
-		node = wfv1.NodeStatus{
-			ID:        nodeID,
-			Name:      nodeName,
-			Phase:     phase,
-			StartedAt: metav1.Time{Time: time.Now().UTC()},
-		}
-		woc.log.Infof("node %s initialized %s", node, node.Phase)
+	node := woc.getNodeByName(nodeName)
+	if node == nil {
+		panic(fmt.Sprintf("node %s uninitialized", nodeName))
+	}
+	if node.Phase != phase {
+		woc.log.Infof("node %s phase %s -> %s", node, node.Phase, phase)
+		node.Phase = phase
 		woc.updated = true
-	} else {
-		if node.Phase != phase {
-			woc.log.Infof("node %s phase %s -> %s", node, node.Phase, phase)
-			node.Phase = phase
-			woc.updated = true
-		}
 	}
 	if len(message) > 0 {
 		if message[0] != node.Message {
@@ -923,8 +884,8 @@ func (woc *wfOperationCtx) markNodePhase(nodeName string, phase wfv1.NodePhase,
 		woc.log.Infof("node %s finished: %s", node, node.FinishedAt)
 		woc.updated = true
 	}
-	woc.wf.Status.Nodes[nodeID] = node
-	return &node
+	woc.wf.Status.Nodes[node.ID] = *node
+	return node
 }
 
 // markNodeError is a convenience method to mark a node with an error and set the message from the error
@@ -932,34 +893,66 @@ func (woc *wfOperationCtx) markNodeError(nodeName string, err error) *wfv1.NodeS
 	return woc.markNodePhase(nodeName, wfv1.NodeError, err.Error())
 }
 
+// If the user has specified retries, a special "retries" non-leaf node
+// is created. This node acts as the parent of all retries that will be
+// done for the container. The status of this node should be "Success"
+// if any of the retries succeed. Otherwise, it is "Failed".
+func (woc *wfOperationCtx) executeRetryContainer(nodeName string, tmpl *wfv1.Template) error {
+	node := woc.getNodeByName(nodeName)
+	if node == nil {
+		node := woc.initializeNode(nodeName, wfv1.NodeTypeRetry, wfv1.NodeRunning)
+		node.RetryStrategy = tmpl.RetryStrategy
+		woc.wf.Status.Nodes[node.ID] = *node
+	}
+	if err := woc.processNodeRetries(node); err != nil {
+		return err
+	}
+	// The updated node status could've changed. Get the latest copy of the node.
+	node = woc.getNodeByName(node.Name)
+	woc.log.Infof("Node %s: Status: %s", node.Name, node.Phase)
+	if node.Completed() {
+		return nil
+	}
+	lastChildNode, err := woc.getLastChildNode(node)
+	if err != nil {
+		return err
+	}
+	if !lastChildNode.Completed() {
+		// last child node is still running.
+		return nil
+	}
+	// Create new node as child of 'node'
+	newContainerName := fmt.Sprintf("%s(%d)", nodeName, len(node.Children))
+	err = woc.executeContainer(newContainerName, tmpl)
+	woc.addChildNode(nodeName, newContainerName)
+	return err
+}
+
 func (woc *wfOperationCtx) executeContainer(nodeName string, tmpl *wfv1.Template) error {
 	node := woc.getNodeByName(nodeName)
-	if node != nil && node.Phase == wfv1.NodeRunning {
-		// we already marked the node running. pod should have already been created
+	if node != nil {
 		return nil
 	}
 	woc.log.Debugf("Executing node %s with container template: %v\n", nodeName, tmpl)
 	_, err := woc.createWorkflowPod(nodeName, *tmpl.Container, tmpl)
 	if err != nil {
-		woc.markNodeError(nodeName, err)
+		woc.initializeNode(nodeName, wfv1.NodeTypePod, wfv1.NodeError, err.Error())
 		return err
 	}
-	node = woc.markNodePhase(nodeName, wfv1.NodeRunning)
-	node.IsPod = true
-	woc.wf.Status.Nodes[node.ID] = *node
+	node = woc.initializeNode(nodeName, wfv1.NodeTypePod, wfv1.NodeRunning)
 	woc.log.Infof("Initialized container node %v", node)
 	return nil
 }
 
 func (woc *wfOperationCtx) getOutboundNodes(nodeID string) []string {
 	node := woc.wf.Status.Nodes[nodeID]
-	if node.IsPod {
+	if node.Type == wfv1.NodeTypePod {
 		return []string{node.ID}
 	}
 	outbound := make([]string, 0)
 	for _, outboundNodeID := range node.OutboundNodes {
 		outNode := woc.wf.Status.Nodes[outboundNodeID]
-		if outNode.IsPod {
+		if outNode.Type == wfv1.NodeTypePod {
 			outbound = append(outbound, outboundNodeID)
 		} else {
 			subOutIDs := woc.getOutboundNodes(outboundNodeID)
@@ -1004,6 +997,10 @@ func getTemplateOutputsFromScope(tmpl *wfv1.Template, scope *wfScope) (*wfv1.Out
 }
 
 func (woc *wfOperationCtx) executeScript(nodeName string, tmpl *wfv1.Template) error {
+	node := woc.getNodeByName(nodeName)
+	if node != nil {
+		return nil
+	}
 	mainCtr := apiv1.Container{
 		Image:   tmpl.Script.Image,
 		Command: tmpl.Script.Command,
@@ -1011,12 +1008,10 @@ func (woc *wfOperationCtx) executeScript(nodeName string, tmpl *wfv1.Template) e
 	}
 	_, err := woc.createWorkflowPod(nodeName, mainCtr, tmpl)
 	if err != nil {
-		woc.markNodeError(nodeName, err)
+		woc.initializeNode(nodeName, wfv1.NodeTypePod, wfv1.NodeError, err.Error())
 		return err
 	}
-	node := woc.markNodePhase(nodeName, wfv1.NodeRunning)
-	node.IsPod = true
-	woc.wf.Status.Nodes[node.ID] = *node
+	node = woc.initializeNode(nodeName, wfv1.NodeTypePod, wfv1.NodeRunning)
 	woc.log.Infof("Initialized script node %v", node)
 	return nil
 }
@@ -1131,6 +1126,10 @@ func (woc *wfOperationCtx) addChildNode(parent string, child string) {
 
 // executeResource is runs a kubectl command against a manifest
 func (woc *wfOperationCtx) executeResource(nodeName string, tmpl *wfv1.Template) error {
+	node := woc.getNodeByName(nodeName)
+	if node != nil {
+		return nil
+	}
 	mainCtr := apiv1.Container{
 		Image:   woc.controller.Config.ExecutorImage,
 		Command: []string{"argoexec"},
@@ -1142,12 +1141,10 @@ func (woc *wfOperationCtx) executeResource(nodeName string, tmpl *wfv1.Template)
 	}
 	_, err := woc.createWorkflowPod(nodeName, mainCtr, tmpl)
 	if err != nil {
-		woc.markNodeError(nodeName, err)
+		woc.initializeNode(nodeName, wfv1.NodeTypePod, wfv1.NodeError, err.Error())
 		return err
 	}
-	node := woc.markNodePhase(nodeName, wfv1.NodeRunning)
-	node.IsPod = true
-	woc.wf.Status.Nodes[node.ID] = *node
+	node = woc.initializeNode(nodeName, wfv1.NodeTypePod, wfv1.NodeRunning)
 	woc.log.Infof("Initialized resource node %v", node)
 	return nil
 }
diff --git a/workflow/controller/steps.go b/workflow/controller/steps.go
index be41e4c9cb44..1f78bd14e2e1 100644
--- a/workflow/controller/steps.go
+++ b/workflow/controller/steps.go
@@ -17,13 +17,12 @@ import (
 
 func (woc *wfOperationCtx) executeSteps(nodeName string, tmpl *wfv1.Template) error {
 	node := woc.getNodeByName(nodeName)
-	nodeID := woc.wf.NodeID(nodeName)
 	if node == nil {
-		node = woc.markNodePhase(nodeName, wfv1.NodeRunning)
+		node = woc.initializeNode(nodeName, wfv1.NodeTypeSteps, wfv1.NodeRunning)
 	}
 	defer func() {
-		if woc.wf.Status.Nodes[nodeID].Completed() {
-			_ = woc.killDeamonedChildren(nodeID)
+		if woc.wf.Status.Nodes[node.ID].Completed() {
+			_ = woc.killDeamonedChildren(node.ID)
 		}
 	}()
 	scope := wfScope{
@@ -35,7 +34,7 @@ func (woc *wfOperationCtx) executeSteps(nodeName string, tmpl *wfv1.Template) er
 		sgNode := woc.getNodeByName(sgNodeName)
 		if sgNode == nil {
 			// initialize the step group
-			sgNode = woc.markNodePhase(sgNodeName, wfv1.NodeRunning)
+			sgNode = woc.initializeNode(sgNodeName, wfv1.NodeTypeStepGroup, wfv1.NodeRunning)
 			if i == 0 {
 				woc.addChildNode(nodeName, sgNodeName)
 			} else {
@@ -92,9 +91,9 @@ func (woc *wfOperationCtx) executeSteps(nodeName string, tmpl *wfv1.Template) er
 		return err
 	}
 	if outputs != nil {
-		node := woc.wf.Status.Nodes[nodeID]
+		node = woc.getNodeByName(nodeName)
 		node.Outputs = outputs
-		woc.wf.Status.Nodes[nodeID] = node
+		woc.wf.Status.Nodes[node.ID] = *node
 	}
 	// Now that we have completed, set the outbound nodes from the last step group
 	outbound := make([]string, 0)
@@ -111,24 +110,18 @@ func (woc *wfOperationCtx) executeSteps(nodeName string, tmpl *wfv1.Template) er
 	node.OutboundNodes = outbound
 	woc.wf.Status.Nodes[node.ID] = *node
 
-	woc.markNodePhase(nodeName, wfv1.NodeSucceeded)
+	node = woc.markNodePhase(nodeName, wfv1.NodeSucceeded)
 	return nil
 }
 
 // executeStepGroup examines a map of parallel steps and executes them in parallel.
 // Handles referencing of variables in scope, expands `withItem` clauses, and evaluates `when` expressions
 func (woc *wfOperationCtx) executeStepGroup(stepGroup []wfv1.WorkflowStep, sgNodeName string, scope *wfScope) error {
-	nodeID := woc.wf.NodeID(sgNodeName)
-	node, ok := woc.wf.Status.Nodes[nodeID]
-	if ok && node.Completed() {
+	node := woc.getNodeByName(sgNodeName)
+	if node.Completed() {
 		woc.log.Debugf("Step group node %v already marked completed", node)
 		return nil
 	}
-	if !ok {
-		node = *woc.markNodePhase(sgNodeName, wfv1.NodeRunning)
-		woc.log.Infof("Initializing step group node %v", node)
-	}
-
 	// First, resolve any references to outputs from previous steps, and perform substitution
 	stepGroup, err := woc.resolveReferences(stepGroup, scope)
 	if err != nil {
@@ -158,7 +151,7 @@ func (woc *wfOperationCtx) executeStepGroup(stepGroup []wfv1.WorkflowStep, sgNod
 		if !proceed {
 			skipReason := fmt.Sprintf("when '%s' evaluated false", step.When)
 			woc.log.Infof("Skipping %s: %s", childNodeName, skipReason)
-			woc.markNodePhase(childNodeName, wfv1.NodeSkipped, skipReason)
+			woc.initializeNode(childNodeName, wfv1.NodeTypeSkipped, wfv1.NodeSkipped, skipReason)
 			continue
 		}
 		err = woc.executeTemplate(step.Template, step.Arguments, childNodeName)
@@ -171,7 +164,7 @@ func (woc *wfOperationCtx) executeStepGroup(stepGroup []wfv1.WorkflowStep, sgNod
 		}
 	}
 
-	node = woc.wf.Status.Nodes[nodeID]
+	node = woc.getNodeByName(sgNodeName)
 	// Return if not all children completed
 	for _, childNodeID := range node.Children {
 		if !woc.wf.Status.Nodes[childNodeID].Completed() {
@@ -188,8 +181,8 @@ func (woc *wfOperationCtx) executeStepGroup(stepGroup []wfv1.WorkflowStep, sgNod
 			return nil
 		}
 	}
-	woc.markNodePhase(node.Name, wfv1.NodeSucceeded)
-	woc.log.Infof("Step group node %v successful", woc.wf.Status.Nodes[nodeID])
+	node = woc.markNodePhase(node.Name, wfv1.NodeSucceeded)
+	woc.log.Infof("Step group node %v successful", node)
 	return nil
 }
 

From d38324b46100e6ba07ad1c8ffc957c257aac41d7 Mon Sep 17 00:00:00 2001
From: Jesse Suen <Jesse_Suen@intuit.com>
Date: Fri, 26 Jan 2018 00:22:06 -0800
Subject: [PATCH 3/8] Add boundaryID field in NodeStatus to group nodes by
 template boundaries

---
 pkg/apis/workflow/v1alpha1/types.go |  3 ++
 workflow/controller/dag.go          | 28 +++++++------
 workflow/controller/operator.go     | 61 +++++++++++++++--------------
 workflow/controller/steps.go        | 38 ++++++++++++------
 4 files changed, 74 insertions(+), 56 deletions(-)

diff --git a/pkg/apis/workflow/v1alpha1/types.go b/pkg/apis/workflow/v1alpha1/types.go
index 98a573d64cc3..1a4e8dc74ece 100644
--- a/pkg/apis/workflow/v1alpha1/types.go
+++ b/pkg/apis/workflow/v1alpha1/types.go
@@ -323,6 +323,9 @@ type NodeStatus struct {
 	// Can be used as a state machine.
 	Phase NodePhase `json:"phase"`
 
+	// BoundaryID indicates the node ID of the associated template root node in which this node belongs to
+	BoundaryID string `json:"boundaryID,omitempty"`
+
 	// A human readable message indicating details about why the node is in this condition.
 	Message string `json:"message,omitempty"`
 
diff --git a/workflow/controller/dag.go b/workflow/controller/dag.go
index 0298ece4ff8d..e7f4da3bd5d7 100644
--- a/workflow/controller/dag.go
+++ b/workflow/controller/dag.go
@@ -13,9 +13,10 @@ import (
 
 // dagContext holds context information about this context's DAG
 type dagContext struct {
-	// encompasser is the node name of the encompassing node to this DAG.
+	// boundaryName is the node name of the boundary node to this DAG.
 	// This is used to incorporate into each of the task's node names.
-	encompasser string
+	boundaryName string
+	boundaryID   string
 
 	// tasks are all the tasks in the template
 	tasks []wfv1.DAGTask
@@ -42,7 +43,7 @@ func (d *dagContext) getTask(taskName string) *wfv1.DAGTask {
 
 // taskNodeName formulates the nodeName for a dag task
 func (d *dagContext) taskNodeName(taskName string) string {
-	return fmt.Sprintf("%s.%s", d.encompasser, taskName)
+	return fmt.Sprintf("%s.%s", d.boundaryName, taskName)
 }
 
 // taskNodeID formulates the node ID for a dag task
@@ -60,17 +61,18 @@ func (d *dagContext) getTaskNode(taskName string) *wfv1.NodeStatus {
 	return &node
 }
 
-func (woc *wfOperationCtx) executeDAG(nodeName string, tmpl *wfv1.Template) *wfv1.NodeStatus {
+func (woc *wfOperationCtx) executeDAG(nodeName string, tmpl *wfv1.Template, boundaryID string) *wfv1.NodeStatus {
 	node := woc.getNodeByName(nodeName)
 	if node != nil && node.Completed() {
 		return node
 	}
 	dagCtx := &dagContext{
-		encompasser: nodeName,
-		tasks:       tmpl.DAG.Tasks,
-		visited:     make(map[string]bool),
-		tmpl:        tmpl,
-		wf:          woc.wf,
+		boundaryName: nodeName,
+		boundaryID:   woc.wf.NodeID(nodeName),
+		tasks:        tmpl.DAG.Tasks,
+		visited:      make(map[string]bool),
+		tmpl:         tmpl,
+		wf:           woc.wf,
 	}
 	var targetTasks []string
 	if tmpl.DAG.Targets == "" {
@@ -80,7 +82,7 @@ func (woc *wfOperationCtx) executeDAG(nodeName string, tmpl *wfv1.Template) *wfv
 	}
 
 	if node == nil {
-		node = woc.initializeNode(nodeName, wfv1.NodeTypeDAG, wfv1.NodeRunning)
+		node = woc.initializeNode(nodeName, wfv1.NodeTypeDAG, boundaryID, wfv1.NodeRunning)
 		rootTasks := findRootTaskNames(dagCtx, targetTasks)
 		woc.log.Infof("Root tasks of %s identified as %s", nodeName, rootTasks)
 		for _, rootTaskName := range rootTasks {
@@ -208,7 +210,7 @@ func (woc *wfOperationCtx) executeDAGTask(dagCtx *dagContext, taskName string) {
 
 	if !dependenciesSuccessful {
 		woc.log.Infof("Task %s being marked %s due to dependency failure", taskName, wfv1.NodeFailed)
-		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, wfv1.NodeFailed)
+		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, dagCtx.boundaryID, wfv1.NodeFailed)
 		return
 	}
 
@@ -216,10 +218,10 @@ func (woc *wfOperationCtx) executeDAGTask(dagCtx *dagContext, taskName string) {
 	// Substitute params/artifacts from our dependencies and execute the template
 	newTask, err := woc.resolveDependencyReferences(dagCtx, task)
 	if err != nil {
-		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, wfv1.NodeError, err.Error())
+		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, dagCtx.boundaryID, wfv1.NodeError, err.Error())
 		return
 	}
-	_ = woc.executeTemplate(newTask.Template, newTask.Arguments, nodeName)
+	_ = woc.executeTemplate(newTask.Template, newTask.Arguments, nodeName, dagCtx.boundaryID)
 }
 
 // resolveDependencyReferences replaces any references to outputs of task dependencies, or artifacts in the inputs
diff --git a/workflow/controller/operator.go b/workflow/controller/operator.go
index 63dc7eb44671..f8c2a8e2072a 100644
--- a/workflow/controller/operator.go
+++ b/workflow/controller/operator.go
@@ -128,7 +128,7 @@ func (woc *wfOperationCtx) operate() {
 	}
 	var workflowStatus wfv1.NodePhase
 	var workflowMessage string
-	err = woc.executeTemplate(woc.wf.Spec.Entrypoint, woc.wf.Spec.Arguments, woc.wf.ObjectMeta.Name)
+	err = woc.executeTemplate(woc.wf.Spec.Entrypoint, woc.wf.Spec.Arguments, woc.wf.ObjectMeta.Name, "")
 	if err != nil {
 		if errors.IsCode(errors.CodeTimeout, err) {
 			// A timeout indicates we took too long operating on the workflow.
@@ -156,7 +156,7 @@ func (woc *wfOperationCtx) operate() {
 		}
 		woc.log.Infof("Running OnExit handler: %s", woc.wf.Spec.OnExit)
 		onExitNodeName := woc.wf.ObjectMeta.Name + ".onExit"
-		err = woc.executeTemplate(woc.wf.Spec.OnExit, woc.wf.Spec.Arguments, onExitNodeName)
+		err = woc.executeTemplate(woc.wf.Spec.OnExit, woc.wf.Spec.Arguments, onExitNodeName, "")
 		if err != nil {
 			if errors.IsCode(errors.CodeTimeout, err) {
 				woc.requeue()
@@ -738,7 +738,7 @@ func (woc *wfOperationCtx) getLastChildNode(node *wfv1.NodeStatus) (*wfv1.NodeSt
 	return &lastChildNode, nil
 }
 
-func (woc *wfOperationCtx) executeTemplate(templateName string, args wfv1.Arguments, nodeName string) error {
+func (woc *wfOperationCtx) executeTemplate(templateName string, args wfv1.Arguments, nodeName string, boundaryID string) error {
 	woc.log.Debugf("Evaluating node %s: template: %s", nodeName, templateName)
 	node := woc.getNodeByName(nodeName)
 	if node != nil && node.Completed() {
@@ -748,34 +748,34 @@ func (woc *wfOperationCtx) executeTemplate(templateName string, args wfv1.Argume
 	tmpl := woc.wf.GetTemplate(templateName)
 	if tmpl == nil {
 		err := errors.Errorf(errors.CodeBadRequest, "Node %v error: template '%s' undefined", node, templateName)
-		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, wfv1.NodeError, err.Error())
+		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, boundaryID, wfv1.NodeError, err.Error())
 		return err
 	}
 
 	tmpl, err := common.ProcessArgs(tmpl, args, woc.globalParams, false)
 	if err != nil {
-		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, wfv1.NodeError, err.Error())
+		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, boundaryID, wfv1.NodeError, err.Error())
 		return err
 	}
 
 	switch tmpl.GetType() {
 	case wfv1.TemplateTypeContainer:
 		if tmpl.RetryStrategy != nil {
-			err = woc.executeRetryContainer(nodeName, tmpl)
+			err = woc.executeRetryContainer(nodeName, tmpl, boundaryID)
 		} else {
-			err = woc.executeContainer(nodeName, tmpl)
+			err = woc.executeContainer(nodeName, tmpl, boundaryID)
 		}
 	case wfv1.TemplateTypeSteps:
-		err = woc.executeSteps(nodeName, tmpl)
+		err = woc.executeSteps(nodeName, tmpl, boundaryID)
 	case wfv1.TemplateTypeScript:
-		err = woc.executeScript(nodeName, tmpl)
+		err = woc.executeScript(nodeName, tmpl, boundaryID)
 	case wfv1.TemplateTypeResource:
-		err = woc.executeResource(nodeName, tmpl)
+		err = woc.executeResource(nodeName, tmpl, boundaryID)
 	case wfv1.TemplateTypeDAG:
-		_ = woc.executeDAG(nodeName, tmpl)
+		_ = woc.executeDAG(nodeName, tmpl, boundaryID)
 	default:
 		err = errors.Errorf(errors.CodeBadRequest, "Template '%s' missing specification", tmpl.Name)
-		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, wfv1.NodeError, err.Error())
+		woc.initializeNode(nodeName, wfv1.NodeTypeSkipped, boundaryID, wfv1.NodeError, err.Error())
 	}
 	if err != nil {
 		return err
@@ -839,18 +839,19 @@ func (woc *wfOperationCtx) markWorkflowError(err error, markCompleted bool) {
 	woc.markWorkflowPhase(wfv1.NodeError, markCompleted, err.Error())
 }
 
-func (woc *wfOperationCtx) initializeNode(nodeName string, nodeType wfv1.NodeType, phase wfv1.NodePhase, message ...string) *wfv1.NodeStatus {
+func (woc *wfOperationCtx) initializeNode(nodeName string, nodeType wfv1.NodeType, boundaryID string, phase wfv1.NodePhase, message ...string) *wfv1.NodeStatus {
 	nodeID := woc.wf.NodeID(nodeName)
 	_, ok := woc.wf.Status.Nodes[nodeID]
 	if ok {
 		panic(fmt.Sprintf("node %s already initialized", nodeName))
 	}
 	node := wfv1.NodeStatus{
-		ID:        nodeID,
-		Name:      nodeName,
-		Type:      nodeType,
-		Phase:     phase,
-		StartedAt: metav1.Time{Time: time.Now().UTC()},
+		ID:         nodeID,
+		Name:       nodeName,
+		Type:       nodeType,
+		BoundaryID: boundaryID,
+		Phase:      phase,
+		StartedAt:  metav1.Time{Time: time.Now().UTC()},
 	}
 	if node.Completed() && node.FinishedAt.IsZero() {
 		node.FinishedAt = node.StartedAt
@@ -897,10 +898,10 @@ func (woc *wfOperationCtx) markNodeError(nodeName string, err error) *wfv1.NodeS
 // is created. This node acts as the parent of all retries that will be
 // done for the container. The status of this node should be "Success"
 // if any of the retries succeed. Otherwise, it is "Failed".
-func (woc *wfOperationCtx) executeRetryContainer(nodeName string, tmpl *wfv1.Template) error {
+func (woc *wfOperationCtx) executeRetryContainer(nodeName string, tmpl *wfv1.Template, boundaryID string) error {
 	node := woc.getNodeByName(nodeName)
 	if node == nil {
-		node := woc.initializeNode(nodeName, wfv1.NodeTypeRetry, wfv1.NodeRunning)
+		node := woc.initializeNode(nodeName, wfv1.NodeTypeRetry, boundaryID, wfv1.NodeRunning)
 		node.RetryStrategy = tmpl.RetryStrategy
 		woc.wf.Status.Nodes[node.ID] = *node
 	}
@@ -923,12 +924,12 @@ func (woc *wfOperationCtx) executeRetryContainer(nodeName string, tmpl *wfv1.Tem
 	}
 	// Create new node as child of 'node'
 	newContainerName := fmt.Sprintf("%s(%d)", nodeName, len(node.Children))
-	err = woc.executeContainer(newContainerName, tmpl)
+	err = woc.executeContainer(newContainerName, tmpl, boundaryID)
 	woc.addChildNode(nodeName, newContainerName)
 	return err
 }
 
-func (woc *wfOperationCtx) executeContainer(nodeName string, tmpl *wfv1.Template) error {
+func (woc *wfOperationCtx) executeContainer(nodeName string, tmpl *wfv1.Template, boundaryID string) error {
 	node := woc.getNodeByName(nodeName)
 	if node != nil {
 		return nil
@@ -936,10 +937,10 @@ func (woc *wfOperationCtx) executeContainer(nodeName string, tmpl *wfv1.Template
 	woc.log.Debugf("Executing node %s with container template: %v\n", nodeName, tmpl)
 	_, err := woc.createWorkflowPod(nodeName, *tmpl.Container, tmpl)
 	if err != nil {
-		woc.initializeNode(nodeName, wfv1.NodeTypePod, wfv1.NodeError, err.Error())
+		woc.initializeNode(nodeName, wfv1.NodeTypePod, boundaryID, wfv1.NodeError, err.Error())
 		return err
 	}
-	node = woc.initializeNode(nodeName, wfv1.NodeTypePod, wfv1.NodeRunning)
+	node = woc.initializeNode(nodeName, wfv1.NodeTypePod, boundaryID, wfv1.NodeRunning)
 	woc.log.Infof("Initialized container node %v", node)
 	return nil
 }
@@ -996,7 +997,7 @@ func getTemplateOutputsFromScope(tmpl *wfv1.Template, scope *wfScope) (*wfv1.Out
 	return &outputs, nil
 }
 
-func (woc *wfOperationCtx) executeScript(nodeName string, tmpl *wfv1.Template) error {
+func (woc *wfOperationCtx) executeScript(nodeName string, tmpl *wfv1.Template, boundaryID string) error {
 	node := woc.getNodeByName(nodeName)
 	if node != nil {
 		return nil
@@ -1008,10 +1009,10 @@ func (woc *wfOperationCtx) executeScript(nodeName string, tmpl *wfv1.Template) e
 	}
 	_, err := woc.createWorkflowPod(nodeName, mainCtr, tmpl)
 	if err != nil {
-		woc.initializeNode(nodeName, wfv1.NodeTypePod, wfv1.NodeError, err.Error())
+		woc.initializeNode(nodeName, wfv1.NodeTypePod, boundaryID, wfv1.NodeError, err.Error())
 		return err
 	}
-	node = woc.initializeNode(nodeName, wfv1.NodeTypePod, wfv1.NodeRunning)
+	node = woc.initializeNode(nodeName, wfv1.NodeTypePod, boundaryID, wfv1.NodeRunning)
 	woc.log.Infof("Initialized script node %v", node)
 	return nil
 }
@@ -1125,7 +1126,7 @@ func (woc *wfOperationCtx) addChildNode(parent string, child string) {
 }
 
 // executeResource is runs a kubectl command against a manifest
-func (woc *wfOperationCtx) executeResource(nodeName string, tmpl *wfv1.Template) error {
+func (woc *wfOperationCtx) executeResource(nodeName string, tmpl *wfv1.Template, boundaryID string) error {
 	node := woc.getNodeByName(nodeName)
 	if node != nil {
 		return nil
@@ -1141,10 +1142,10 @@ func (woc *wfOperationCtx) executeResource(nodeName string, tmpl *wfv1.Template)
 	}
 	_, err := woc.createWorkflowPod(nodeName, mainCtr, tmpl)
 	if err != nil {
-		woc.initializeNode(nodeName, wfv1.NodeTypePod, wfv1.NodeError, err.Error())
+		woc.initializeNode(nodeName, wfv1.NodeTypePod, boundaryID, wfv1.NodeError, err.Error())
 		return err
 	}
-	node = woc.initializeNode(nodeName, wfv1.NodeTypePod, wfv1.NodeRunning)
+	node = woc.initializeNode(nodeName, wfv1.NodeTypePod, boundaryID, wfv1.NodeRunning)
 	woc.log.Infof("Initialized resource node %v", node)
 	return nil
 }
diff --git a/workflow/controller/steps.go b/workflow/controller/steps.go
index 1f78bd14e2e1..634db109dd74 100644
--- a/workflow/controller/steps.go
+++ b/workflow/controller/steps.go
@@ -15,26 +15,38 @@ import (
 	"github.com/valyala/fasttemplate"
 )
 
-func (woc *wfOperationCtx) executeSteps(nodeName string, tmpl *wfv1.Template) error {
+// stepsContext holds context information about this context's steps
+type stepsContext struct {
+	// boundaryID is the node ID of the boundary which all immediate child steps are bound to
+	boundaryID string
+
+	// scope holds parameter and artifacts which are referenceable in scope during execution
+	scope *wfScope
+}
+
+func (woc *wfOperationCtx) executeSteps(nodeName string, tmpl *wfv1.Template, boundaryID string) error {
 	node := woc.getNodeByName(nodeName)
 	if node == nil {
-		node = woc.initializeNode(nodeName, wfv1.NodeTypeSteps, wfv1.NodeRunning)
+		node = woc.initializeNode(nodeName, wfv1.NodeTypeSteps, boundaryID, wfv1.NodeRunning)
 	}
 	defer func() {
 		if woc.wf.Status.Nodes[node.ID].Completed() {
 			_ = woc.killDeamonedChildren(node.ID)
 		}
 	}()
-	scope := wfScope{
-		tmpl:  tmpl,
-		scope: make(map[string]interface{}),
+	stepsCtx := stepsContext{
+		boundaryID: node.ID,
+		scope: &wfScope{
+			tmpl:  tmpl,
+			scope: make(map[string]interface{}),
+		},
 	}
 	for i, stepGroup := range tmpl.Steps {
 		sgNodeName := fmt.Sprintf("%s[%d]", nodeName, i)
 		sgNode := woc.getNodeByName(sgNodeName)
 		if sgNode == nil {
 			// initialize the step group
-			sgNode = woc.initializeNode(sgNodeName, wfv1.NodeTypeStepGroup, wfv1.NodeRunning)
+			sgNode = woc.initializeNode(sgNodeName, wfv1.NodeTypeStepGroup, stepsCtx.boundaryID, wfv1.NodeRunning)
 			if i == 0 {
 				woc.addChildNode(nodeName, sgNodeName)
 			} else {
@@ -51,7 +63,7 @@ func (woc *wfOperationCtx) executeSteps(nodeName string, tmpl *wfv1.Template) er
 				}
 			}
 		}
-		err := woc.executeStepGroup(stepGroup, sgNodeName, &scope)
+		err := woc.executeStepGroup(stepGroup, sgNodeName, &stepsCtx)
 		if err != nil {
 			if !errors.IsCode(errors.CodeTimeout, err) {
 				woc.markNodeError(nodeName, err)
@@ -81,11 +93,11 @@ func (woc *wfOperationCtx) executeSteps(nodeName string, tmpl *wfv1.Template) er
 				continue
 			}
 			prefix := fmt.Sprintf("steps.%s", step.Name)
-			scope.addNodeOutputsToScope(prefix, &childNode)
+			stepsCtx.scope.addNodeOutputsToScope(prefix, &childNode)
 		}
 	}
 	// If this template has outputs from any of its steps, copy them to this node here
-	outputs, err := getTemplateOutputsFromScope(tmpl, &scope)
+	outputs, err := getTemplateOutputsFromScope(tmpl, stepsCtx.scope)
 	if err != nil {
 		woc.markNodeError(nodeName, err)
 		return err
@@ -116,14 +128,14 @@ func (woc *wfOperationCtx) executeSteps(nodeName string, tmpl *wfv1.Template) er
 
 // executeStepGroup examines a map of parallel steps and executes them in parallel.
 // Handles referencing of variables in scope, expands `withItem` clauses, and evaluates `when` expressions
-func (woc *wfOperationCtx) executeStepGroup(stepGroup []wfv1.WorkflowStep, sgNodeName string, scope *wfScope) error {
+func (woc *wfOperationCtx) executeStepGroup(stepGroup []wfv1.WorkflowStep, sgNodeName string, stepsCtx *stepsContext) error {
 	node := woc.getNodeByName(sgNodeName)
 	if node.Completed() {
 		woc.log.Debugf("Step group node %v already marked completed", node)
 		return nil
 	}
 	// First, resolve any references to outputs from previous steps, and perform substitution
-	stepGroup, err := woc.resolveReferences(stepGroup, scope)
+	stepGroup, err := woc.resolveReferences(stepGroup, stepsCtx.scope)
 	if err != nil {
 		woc.markNodeError(sgNodeName, err)
 		return err
@@ -151,10 +163,10 @@ func (woc *wfOperationCtx) executeStepGroup(stepGroup []wfv1.WorkflowStep, sgNod
 		if !proceed {
 			skipReason := fmt.Sprintf("when '%s' evaluated false", step.When)
 			woc.log.Infof("Skipping %s: %s", childNodeName, skipReason)
-			woc.initializeNode(childNodeName, wfv1.NodeTypeSkipped, wfv1.NodeSkipped, skipReason)
+			woc.initializeNode(childNodeName, wfv1.NodeTypeSkipped, stepsCtx.boundaryID, wfv1.NodeSkipped, skipReason)
 			continue
 		}
-		err = woc.executeTemplate(step.Template, step.Arguments, childNodeName)
+		err = woc.executeTemplate(step.Template, step.Arguments, childNodeName, stepsCtx.boundaryID)
 		if err != nil {
 			if !errors.IsCode(errors.CodeTimeout, err) {
 				woc.markNodeError(childNodeName, err)

From c2dd9b635657273c3974fc358fcdf797c821ac92 Mon Sep 17 00:00:00 2001
From: Jesse Suen <Jesse_Suen@intuit.com>
Date: Fri, 26 Jan 2018 00:35:16 -0800
Subject: [PATCH 4/8] Fix unit test breakages

---
 workflow/controller/operator_test.go    | 6 +++---
 workflow/controller/workflowpod_test.go | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/workflow/controller/operator_test.go b/workflow/controller/operator_test.go
index f1c39de68961..51020bd7dced 100644
--- a/workflow/controller/operator_test.go
+++ b/workflow/controller/operator_test.go
@@ -115,7 +115,7 @@ func TestProcessNodesWithRetries(t *testing.T) {
 	// Add the parent node for retries.
 	nodeName := "test-node"
 	nodeID := woc.wf.NodeID(nodeName)
-	node := woc.markNodePhase(nodeName, wfv1.NodeRunning)
+	node := woc.initializeNode(nodeName, wfv1.NodeTypeRetry, "", wfv1.NodeRunning)
 	retries := wfv1.RetryStrategy{}
 	var retryLimit int32
 	retryLimit = 2
@@ -135,7 +135,7 @@ func TestProcessNodesWithRetries(t *testing.T) {
 	// Add child nodes.
 	for i := 0; i < 2; i++ {
 		childNode := fmt.Sprintf("child-node-%d", i)
-		woc.markNodePhase(childNode, wfv1.NodeRunning)
+		woc.initializeNode(childNode, wfv1.NodeTypePod, "", wfv1.NodeRunning)
 		woc.addChildNode(nodeName, childNode)
 	}
 
@@ -168,7 +168,7 @@ func TestProcessNodesWithRetries(t *testing.T) {
 
 	// Add a third node that has failed.
 	childNode := "child-node-3"
-	woc.markNodePhase(childNode, wfv1.NodeFailed)
+	woc.initializeNode(childNode, wfv1.NodeTypePod, "", wfv1.NodeFailed)
 	woc.addChildNode(nodeName, childNode)
 	n = woc.getNodeByName(nodeName)
 	err = woc.processNodeRetries(n)
diff --git a/workflow/controller/workflowpod_test.go b/workflow/controller/workflowpod_test.go
index 2b68f546eacc..84bd227ca01d 100644
--- a/workflow/controller/workflowpod_test.go
+++ b/workflow/controller/workflowpod_test.go
@@ -74,7 +74,7 @@ script:
 // TestScriptTemplateWithVolume ensure we can a script pod with input artifacts
 func TestScriptTemplateWithVolume(t *testing.T) {
 	tmpl := unmarshalTemplate(scriptTemplateWithInputArtifact)
-	err := newWoc().executeScript(tmpl.Name, tmpl)
+	err := newWoc().executeScript(tmpl.Name, tmpl, "")
 	assert.Nil(t, err)
 }
 
@@ -83,7 +83,7 @@ func TestScriptTemplateWithVolume(t *testing.T) {
 func TestServiceAccount(t *testing.T) {
 	woc := newWoc()
 	woc.wf.Spec.ServiceAccountName = "foo"
-	err := woc.executeContainer(woc.wf.Spec.Entrypoint, &woc.wf.Spec.Templates[0])
+	err := woc.executeContainer(woc.wf.Spec.Entrypoint, &woc.wf.Spec.Templates[0], "")
 	assert.Nil(t, err)
 	podName := getPodName(woc.wf)
 	pod, err := woc.controller.kubeclientset.CoreV1().Pods("").Get(podName, metav1.GetOptions{})

From 4272691035e0588bbd301449c122ee2851e3c87f Mon Sep 17 00:00:00 2001
From: gaganapplatix <33636454+gaganapplatix@users.noreply.github.com>
Date: Thu, 1 Feb 2018 14:09:11 -0800
Subject: [PATCH 5/8] Fix retry in dag branch (#709)

---
 workflow/controller/operator.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/controller/operator.go b/workflow/controller/operator.go
index f8c2a8e2072a..96e893767a9e 100644
--- a/workflow/controller/operator.go
+++ b/workflow/controller/operator.go
@@ -901,7 +901,7 @@ func (woc *wfOperationCtx) markNodeError(nodeName string, err error) *wfv1.NodeS
 func (woc *wfOperationCtx) executeRetryContainer(nodeName string, tmpl *wfv1.Template, boundaryID string) error {
 	node := woc.getNodeByName(nodeName)
 	if node == nil {
-		node := woc.initializeNode(nodeName, wfv1.NodeTypeRetry, boundaryID, wfv1.NodeRunning)
+		node = woc.initializeNode(nodeName, wfv1.NodeTypeRetry, boundaryID, wfv1.NodeRunning)
 		node.RetryStrategy = tmpl.RetryStrategy
 		woc.wf.Status.Nodes[node.ID] = *node
 	}
@@ -918,7 +918,7 @@ func (woc *wfOperationCtx) executeRetryContainer(nodeName string, tmpl *wfv1.Tem
 	if err != nil {
 		return err
 	}
-	if !lastChildNode.Completed() {
+	if lastChildNode != nil && !lastChildNode.Completed() {
 		// last child node is still running.
 		return nil
 	}

From 491ed08ffe2f8430fcf35bf36e6dd16707eb5a0a Mon Sep 17 00:00:00 2001
From: Jesse Suen <Jesse_Suen@intuit.com>
Date: Fri, 2 Feb 2018 10:57:27 -0800
Subject: [PATCH 6/8] Check-in the OpenAPI spec. Automate generation as part of
 `make update-codegen`

---
 Makefile                      |   1 +
 api/openapi-spec/swagger.json | 806 ++++++++++++++++++++++++++++++++++
 hack/gen-openapi-spec/main.go |   4 +-
 3 files changed, 809 insertions(+), 2 deletions(-)
 create mode 100644 api/openapi-spec/swagger.json

diff --git a/Makefile b/Makefile
index c1fef1209e89..ea6370525125 100644
--- a/Makefile
+++ b/Makefile
@@ -94,6 +94,7 @@ test:
 update-codegen:
 	./hack/update-codegen.sh
 	./hack/update-openapigen.sh
+	go run ./hack/gen-openapi-spec/main.go ${VERSION} > ${CURRENT_DIR}/api/openapi-spec/swagger.json
 
 verify-codegen:
 	./hack/verify-codegen.sh
diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json
new file mode 100644
index 000000000000..3242b077561d
--- /dev/null
+++ b/api/openapi-spec/swagger.json
@@ -0,0 +1,806 @@
+{
+  "swagger": "2.0",
+  "info": {
+    "title": "Argo",
+    "version": "v2.0.0"
+  },
+  "paths": {},
+  "definitions": {
+    "io.argoproj.workflow.v1alpha1.Arguments": {
+      "description": "Arguments to a template",
+      "properties": {
+        "artifacts": {
+          "description": "Artifacts is the list of artifacts to pass to the template or workflow",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Artifact"
+          }
+        },
+        "parameters": {
+          "description": "Parameters is the list of parameters to pass to the template or workflow",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Parameter"
+          }
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.Artifact": {
+      "description": "Artifact indicates an artifact to place at a specified path",
+      "required": [
+        "name"
+      ],
+      "properties": {
+        "artifactory": {
+          "description": "Artifactory contains artifactory artifact location details",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ArtifactoryArtifact"
+        },
+        "from": {
+          "description": "From allows an artifact to reference an artifact from a previous step",
+          "type": "string"
+        },
+        "git": {
+          "description": "Git contains git artifact location details",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.GitArtifact"
+        },
+        "http": {
+          "description": "HTTP contains HTTP artifact location details",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.HTTPArtifact"
+        },
+        "mode": {
+          "description": "mode bits to use on this file, must be a value between 0 and 0777 set when loading input artifacts.",
+          "type": "integer",
+          "format": "int32"
+        },
+        "name": {
+          "description": "name of the artifact. must be unique within a template's inputs/outputs.",
+          "type": "string"
+        },
+        "path": {
+          "description": "Path is the container path to the artifact",
+          "type": "string"
+        },
+        "raw": {
+          "description": "Raw contains raw artifact location details",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.RawArtifact"
+        },
+        "s3": {
+          "description": "S3 contains S3 artifact location details",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.S3Artifact"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.ArtifactLocation": {
+      "description": "ArtifactLocation describes a location for a single or multiple artifacts. It is used as single artifact in the context of inputs/outputs (e.g. outputs.artifacts.artname). It is also used to describe the location of multiple artifacts such as the archive location of a single workflow step, which the executor will use as a default location to store its files.",
+      "properties": {
+        "artifactory": {
+          "description": "Artifactory contains artifactory artifact location details",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ArtifactoryArtifact"
+        },
+        "git": {
+          "description": "Git contains git artifact location details",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.GitArtifact"
+        },
+        "http": {
+          "description": "HTTP contains HTTP artifact location details",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.HTTPArtifact"
+        },
+        "raw": {
+          "description": "Raw contains raw artifact location details",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.RawArtifact"
+        },
+        "s3": {
+          "description": "S3 contains S3 artifact location details",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.S3Artifact"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.ArtifactoryArtifact": {
+      "description": "ArtifactoryArtifact is the location of an artifactory artifact",
+      "required": [
+        "url"
+      ],
+      "properties": {
+        "passwordSecret": {
+          "description": "PasswordSecret is the secret selector to the repository password",
+          "$ref": "#/definitions/io.k8s.api.core.v1.SecretKeySelector"
+        },
+        "url": {
+          "description": "URL of the artifact",
+          "type": "string"
+        },
+        "usernameSecret": {
+          "description": "UsernameSecret is the secret selector to the repository username",
+          "$ref": "#/definitions/io.k8s.api.core.v1.SecretKeySelector"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.ArtifactoryAuth": {
+      "description": "ArtifactoryAuth describes the secret selectors required for authenticating to artifactory",
+      "properties": {
+        "passwordSecret": {
+          "description": "PasswordSecret is the secret selector to the repository password",
+          "$ref": "#/definitions/io.k8s.api.core.v1.SecretKeySelector"
+        },
+        "usernameSecret": {
+          "description": "UsernameSecret is the secret selector to the repository username",
+          "$ref": "#/definitions/io.k8s.api.core.v1.SecretKeySelector"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.DAG": {
+      "description": "DAG is a template subtype for directed acyclic graph templates",
+      "required": [
+        "tasks"
+      ],
+      "properties": {
+        "target": {
+          "description": "Target are one or more names of targets to execute in a DAG",
+          "type": "string"
+        },
+        "tasks": {
+          "description": "Tasks are a list of DAG tasks",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.DAGTask"
+          }
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.DAGTask": {
+      "description": "DAGTask represents a node in the graph during DAG execution",
+      "required": [
+        "name",
+        "template"
+      ],
+      "properties": {
+        "arguments": {
+          "description": "Arguments are the parameter and artifact arguments to the template",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Arguments"
+        },
+        "dependencies": {
+          "description": "Dependencies are name of other targets which this depends on",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
+        "name": {
+          "description": "Name is the name of the target",
+          "type": "string"
+        },
+        "template": {
+          "description": "Name of template to execute",
+          "type": "string"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.GitArtifact": {
+      "description": "GitArtifact is the location of an git artifact",
+      "required": [
+        "repo"
+      ],
+      "properties": {
+        "passwordSecret": {
+          "description": "PasswordSecret is the secret selector to the repository password",
+          "$ref": "#/definitions/io.k8s.api.core.v1.SecretKeySelector"
+        },
+        "repo": {
+          "description": "Repo is the git repository",
+          "type": "string"
+        },
+        "revision": {
+          "description": "Revision is the git commit, tag, branch to checkout",
+          "type": "string"
+        },
+        "usernameSecret": {
+          "description": "UsernameSecret is the secret selector to the repository username",
+          "$ref": "#/definitions/io.k8s.api.core.v1.SecretKeySelector"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.HTTPArtifact": {
+      "description": "HTTPArtifact allows an file served on HTTP to be placed as an input artifact in a container",
+      "required": [
+        "url"
+      ],
+      "properties": {
+        "url": {
+          "description": "URL of the artifact",
+          "type": "string"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.Inputs": {
+      "description": "Inputs are the mechanism for passing parameters, artifacts, volumes from one template to another",
+      "properties": {
+        "artifacts": {
+          "description": "Artifact are a list of artifacts passed as inputs",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Artifact"
+          }
+        },
+        "parameters": {
+          "description": "Parameters are a list of parameters passed as inputs",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Parameter"
+          }
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.Outputs": {
+      "description": "Outputs hold parameters, artifacts, and results from a step",
+      "properties": {
+        "artifacts": {
+          "description": "Artifacts holds the list of output artifacts produced by a step",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Artifact"
+          }
+        },
+        "parameters": {
+          "description": "Parameters holds the list of output parameters produced by a step",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Parameter"
+          }
+        },
+        "result": {
+          "description": "Result holds the result (stdout) of a script template",
+          "type": "string"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.Parameter": {
+      "description": "Parameter indicate a passed string parameter to a service template with an optional default value",
+      "required": [
+        "name"
+      ],
+      "properties": {
+        "default": {
+          "description": "Default is the default value to use for an input parameter if a value was not supplied",
+          "type": "string"
+        },
+        "name": {
+          "description": "Name is the parameter name",
+          "type": "string"
+        },
+        "value": {
+          "description": "Value is the literal value to use for the parameter. If specified in the context of an input parameter, the value takes precedence over any passed values",
+          "type": "string"
+        },
+        "valueFrom": {
+          "description": "ValueFrom is the source for the output parameter's value",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ValueFrom"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.RawArtifact": {
+      "description": "RawArtifact allows raw string content to be placed as an artifact in a container",
+      "required": [
+        "data"
+      ],
+      "properties": {
+        "data": {
+          "description": "Data is the string contents of the artifact",
+          "type": "string"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.ResourceTemplate": {
+      "description": "ResourceTemplate is a template subtype to manipulate kubernetes resources",
+      "required": [
+        "action",
+        "manifest"
+      ],
+      "properties": {
+        "action": {
+          "description": "Action is the action to perform to the resource. Must be one of: get, create, apply, delete, replace",
+          "type": "string"
+        },
+        "failureCondition": {
+          "description": "FailureCondition is a label selector expression which describes the conditions of the k8s resource in which the step was considered failed",
+          "type": "string"
+        },
+        "manifest": {
+          "description": "Manifest contains the kubernetes manifest",
+          "type": "string"
+        },
+        "successCondition": {
+          "description": "SuccessCondition is a label selector expression which describes the conditions of the k8s resource in which it is acceptable to proceed to the following step",
+          "type": "string"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.RetryStrategy": {
+      "description": "RetryStrategy provides controls on how to retry a workflow step",
+      "properties": {
+        "limit": {
+          "description": "Limit is the maximum number of attempts when retrying a container",
+          "type": "integer",
+          "format": "int32"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.S3Artifact": {
+      "description": "S3Artifact is the location of an S3 artifact",
+      "required": [
+        "endpoint",
+        "bucket",
+        "accessKeySecret",
+        "secretKeySecret",
+        "key"
+      ],
+      "properties": {
+        "accessKeySecret": {
+          "description": "AccessKeySecret is the secret selector to the bucket's access key",
+          "$ref": "#/definitions/io.k8s.api.core.v1.SecretKeySelector"
+        },
+        "bucket": {
+          "description": "Bucket is the name of the bucket",
+          "type": "string"
+        },
+        "endpoint": {
+          "description": "Endpoint is the hostname of the bucket endpoint",
+          "type": "string"
+        },
+        "insecure": {
+          "description": "Insecure will connect to the service with TLS",
+          "type": "boolean"
+        },
+        "key": {
+          "description": "Key is the key in the bucket where the artifact resides",
+          "type": "string"
+        },
+        "region": {
+          "description": "Region contains the optional bucket region",
+          "type": "string"
+        },
+        "secretKeySecret": {
+          "description": "SecretKeySecret is the secret selector to the bucket's secret key",
+          "$ref": "#/definitions/io.k8s.api.core.v1.SecretKeySelector"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.S3Bucket": {
+      "description": "S3Bucket contains the access information required for interfacing with an S3 bucket",
+      "required": [
+        "endpoint",
+        "bucket",
+        "accessKeySecret",
+        "secretKeySecret"
+      ],
+      "properties": {
+        "accessKeySecret": {
+          "description": "AccessKeySecret is the secret selector to the bucket's access key",
+          "$ref": "#/definitions/io.k8s.api.core.v1.SecretKeySelector"
+        },
+        "bucket": {
+          "description": "Bucket is the name of the bucket",
+          "type": "string"
+        },
+        "endpoint": {
+          "description": "Endpoint is the hostname of the bucket endpoint",
+          "type": "string"
+        },
+        "insecure": {
+          "description": "Insecure will connect to the service with TLS",
+          "type": "boolean"
+        },
+        "region": {
+          "description": "Region contains the optional bucket region",
+          "type": "string"
+        },
+        "secretKeySecret": {
+          "description": "SecretKeySecret is the secret selector to the bucket's secret key",
+          "$ref": "#/definitions/io.k8s.api.core.v1.SecretKeySelector"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.Script": {
+      "description": "Script is a template subtype to enable scripting through code steps",
+      "required": [
+        "image",
+        "command",
+        "source"
+      ],
+      "properties": {
+        "command": {
+          "description": "Command is the interpreter coommand to run (e.g. [python])",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
+        "image": {
+          "description": "Image is the container image to run",
+          "type": "string"
+        },
+        "source": {
+          "description": "Source contains the source code of the script to execute",
+          "type": "string"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.Sidecar": {
+      "description": "Sidecar is a container which runs alongside the main container",
+      "required": [
+        "name"
+      ],
+      "properties": {
+        "args": {
+          "description": "Arguments to the entrypoint. The docker image's CMD is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container's environment. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
+        "command": {
+          "description": "Entrypoint array. Not executed within a shell. The docker image's ENTRYPOINT is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container's environment. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
+        "env": {
+          "description": "List of environment variables to set in the container. Cannot be updated.",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.k8s.api.core.v1.EnvVar"
+          },
+          "x-kubernetes-patch-merge-key": "name",
+          "x-kubernetes-patch-strategy": "merge"
+        },
+        "envFrom": {
+          "description": "List of sources to populate environment variables in the container. The keys defined within a source must be a C_IDENTIFIER. All invalid keys will be reported as an event when the container is starting. When a key exists in multiple sources, the value associated with the last source will take precedence. Values defined by an Env with a duplicate key will take precedence. Cannot be updated.",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.k8s.api.core.v1.EnvFromSource"
+          }
+        },
+        "image": {
+          "description": "Docker image name. More info: https://kubernetes.io/docs/concepts/containers/images This field is optional to allow higher level config management to default or override container images in workload controllers like Deployments and StatefulSets.",
+          "type": "string"
+        },
+        "imagePullPolicy": {
+          "description": "Image pull policy. One of Always, Never, IfNotPresent. Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. Cannot be updated. More info: https://kubernetes.io/docs/concepts/containers/images#updating-images",
+          "type": "string"
+        },
+        "lifecycle": {
+          "description": "Actions that the management system should take in response to container lifecycle events. Cannot be updated.",
+          "$ref": "#/definitions/io.k8s.api.core.v1.Lifecycle"
+        },
+        "livenessProbe": {
+          "description": "Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes",
+          "$ref": "#/definitions/io.k8s.api.core.v1.Probe"
+        },
+        "mirrorVolumeMounts": {
+          "description": "MirrorVolumeMounts will mount the same volumes specified in the main container to the sidecar (including artifacts), at the same mountPaths. This enables dind daemon to partially see the same filesystem as the main container in order to use features such as docker volume binding",
+          "type": "boolean"
+        },
+        "name": {
+          "description": "Name of the container specified as a DNS_LABEL. Each container in a pod must have a unique name (DNS_LABEL). Cannot be updated.",
+          "type": "string"
+        },
+        "ports": {
+          "description": "List of ports to expose from the container. Exposing a port here gives the system additional information about the network connections a container uses, but is primarily informational. Not specifying a port here DOES NOT prevent that port from being exposed. Any port which is listening on the default \"0.0.0.0\" address inside a container will be accessible from the network. Cannot be updated.",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.k8s.api.core.v1.ContainerPort"
+          },
+          "x-kubernetes-patch-merge-key": "containerPort",
+          "x-kubernetes-patch-strategy": "merge"
+        },
+        "readinessProbe": {
+          "description": "Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes",
+          "$ref": "#/definitions/io.k8s.api.core.v1.Probe"
+        },
+        "resources": {
+          "description": "Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources",
+          "$ref": "#/definitions/io.k8s.api.core.v1.ResourceRequirements"
+        },
+        "securityContext": {
+          "description": "Security options the pod should run with. More info: https://kubernetes.io/docs/concepts/policy/security-context/ More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/",
+          "$ref": "#/definitions/io.k8s.api.core.v1.SecurityContext"
+        },
+        "stdin": {
+          "description": "Whether this container should allocate a buffer for stdin in the container runtime. If this is not set, reads from stdin in the container will always result in EOF. Default is false.",
+          "type": "boolean"
+        },
+        "stdinOnce": {
+          "description": "Whether the container runtime should close the stdin channel after it has been opened by a single attach. When stdin is true the stdin stream will remain open across multiple attach sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the first client attaches to stdin, and then remains open and accepts data until the client disconnects, at which time stdin is closed and remains closed until the container is restarted. If this flag is false, a container processes that reads from stdin will never receive an EOF. Default is false",
+          "type": "boolean"
+        },
+        "terminationMessagePath": {
+          "description": "Optional: Path at which the file to which the container's termination message will be written is mounted into the container's filesystem. Message written is intended to be brief final status, such as an assertion failure message. Will be truncated by the node if greater than 4096 bytes. The total message length across all containers will be limited to 12kb. Defaults to /dev/termination-log. Cannot be updated.",
+          "type": "string"
+        },
+        "terminationMessagePolicy": {
+          "description": "Indicate how the termination message should be populated. File will use the contents of terminationMessagePath to populate the container status message on both success and failure. FallbackToLogsOnError will use the last chunk of container log output if the termination message file is empty and the container exited with an error. The log output is limited to 2048 bytes or 80 lines, whichever is smaller. Defaults to File. Cannot be updated.",
+          "type": "string"
+        },
+        "tty": {
+          "description": "Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. Default is false.",
+          "type": "boolean"
+        },
+        "volumeDevices": {
+          "description": "volumeDevices is the list of block devices to be used by the container. This is an alpha feature and may change in the future.",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.k8s.api.core.v1.VolumeDevice"
+          },
+          "x-kubernetes-patch-merge-key": "devicePath",
+          "x-kubernetes-patch-strategy": "merge"
+        },
+        "volumeMounts": {
+          "description": "Pod volumes to mount into the container's filesystem. Cannot be updated.",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.k8s.api.core.v1.VolumeMount"
+          },
+          "x-kubernetes-patch-merge-key": "mountPath",
+          "x-kubernetes-patch-strategy": "merge"
+        },
+        "workingDir": {
+          "description": "Container's working directory. If not specified, the container runtime's default will be used, which might be configured in the container image. Cannot be updated.",
+          "type": "string"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.SidecarOptions": {
+      "description": "SidecarOptions provide a way to customize the behavior of a sidecar and how it affects the main container.",
+      "properties": {
+        "mirrorVolumeMounts": {
+          "description": "MirrorVolumeMounts will mount the same volumes specified in the main container to the sidecar (including artifacts), at the same mountPaths. This enables dind daemon to partially see the same filesystem as the main container in order to use features such as docker volume binding",
+          "type": "boolean"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.Template": {
+      "description": "Template is a reusable and composable unit of execution in a workflow",
+      "required": [
+        "name"
+      ],
+      "properties": {
+        "activeDeadlineSeconds": {
+          "description": "Optional duration in seconds relative to the StartTime that the pod may be active on a node before the system actively tries to terminate the pod; value must be positive integer This field is only applicable to container and script templates.",
+          "type": "integer",
+          "format": "int64"
+        },
+        "affinity": {
+          "description": "Affinity sets the pod's scheduling constraints Overrides the affinity set at the workflow level (if any)",
+          "$ref": "#/definitions/io.k8s.api.core.v1.Affinity"
+        },
+        "archiveLocation": {
+          "description": "Location in which all files related to the step will be stored (logs, artifacts, etc...). Can be overridden by individual items in Outputs. If omitted, will use the default artifact repository location configured in the controller, appended with the \u003cworkflowname\u003e/\u003cnodename\u003e in the key.",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ArtifactLocation"
+        },
+        "container": {
+          "description": "Container is the main container image to run in the pod",
+          "$ref": "#/definitions/io.k8s.api.core.v1.Container"
+        },
+        "daemon": {
+          "description": "Deamon will allow a workflow to proceed to the next step so long as the container reaches readiness",
+          "type": "boolean"
+        },
+        "dag": {
+          "description": "DAG template subtype which runs a DAG",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.DAG"
+        },
+        "inputs": {
+          "description": "Inputs describe what inputs parameters and artifacts are supplied to this template",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Inputs"
+        },
+        "name": {
+          "description": "Name is the name of the template",
+          "type": "string"
+        },
+        "nodeSelector": {
+          "description": "NodeSelector is a selector to schedule this step of the workflow to be run on the selected node(s). Overrides the selector set at the workflow level.",
+          "type": "object",
+          "additionalProperties": {
+            "type": "string"
+          }
+        },
+        "outputs": {
+          "description": "Outputs describe the parameters and artifacts that this template produces",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Outputs"
+        },
+        "resource": {
+          "description": "Resource template subtype which can run k8s resources",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.ResourceTemplate"
+        },
+        "retryStrategy": {
+          "description": "RetryStrategy describes how to retry a template when it fails",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.RetryStrategy"
+        },
+        "script": {
+          "description": "Script runs a portion of code against an interpreter",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Script"
+        },
+        "sidecars": {
+          "description": "Sidecars is a list of containers which run alongside the main container Sidecars are automatically killed when the main container completes",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Sidecar"
+          }
+        },
+        "steps": {
+          "description": "Steps define a series of sequential/parallel workflow steps",
+          "type": "array",
+          "items": {
+            "type": "array",
+            "items": {
+              "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.WorkflowStep"
+            }
+          }
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.ValueFrom": {
+      "description": "ValueFrom describes a location in which to obtain the value to a parameter",
+      "properties": {
+        "jqFilter": {
+          "description": "JQFilter expression against the resource object in resource templates",
+          "type": "string"
+        },
+        "jsonPath": {
+          "description": "JSONPath of a resource to retrieve an output parameter value from in resource templates",
+          "type": "string"
+        },
+        "parameter": {
+          "description": "Parameter reference to a step or dag task in which to retrieve an output parameter value from (e.g. '{{steps.mystep.outputs.myparam}}')",
+          "type": "string"
+        },
+        "path": {
+          "description": "Path in the container to retrieve an output parameter value from in container templates",
+          "type": "string"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.Workflow": {
+      "description": "Workflow is the definition of a workflow resource",
+      "required": [
+        "metadata",
+        "spec",
+        "status"
+      ],
+      "properties": {
+        "apiVersion": {
+          "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources",
+          "type": "string"
+        },
+        "kind": {
+          "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds",
+          "type": "string"
+        },
+        "metadata": {
+          "$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.ObjectMeta"
+        },
+        "spec": {
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.WorkflowSpec"
+        },
+        "status": {
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.WorkflowStatus"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.WorkflowList": {
+      "description": "WorkflowList is list of Workflow resources",
+      "required": [
+        "metadata",
+        "items"
+      ],
+      "properties": {
+        "apiVersion": {
+          "description": "APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources",
+          "type": "string"
+        },
+        "items": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Workflow"
+          }
+        },
+        "kind": {
+          "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds",
+          "type": "string"
+        },
+        "metadata": {
+          "$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.ListMeta"
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.WorkflowSpec": {
+      "description": "WorkflowSpec is the specification of a Workflow.",
+      "required": [
+        "templates",
+        "entrypoint"
+      ],
+      "properties": {
+        "affinity": {
+          "description": "Affinity sets the scheduling constraints for all pods in the workflow. Can be overridden by an affinity specified in the template",
+          "$ref": "#/definitions/io.k8s.api.core.v1.Affinity"
+        },
+        "arguments": {
+          "description": "Arguments contain the parameters and artifacts sent to the workflow entrypoint Parameters are referencable globally using the 'workflow' variable prefix. e.g. {{workflow.parameters.myparam}}",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Arguments"
+        },
+        "entrypoint": {
+          "description": "Entrypoint is a template reference to the starting point of the workflow",
+          "type": "string"
+        },
+        "imagePullSecrets": {
+          "description": "ImagePullSecrets is a list of references to secrets in the same namespace to use for pulling any images in pods that reference this ServiceAccount. ImagePullSecrets are distinct from Secrets because Secrets can be mounted in the pod, but ImagePullSecrets are only accessed by the kubelet. More info: https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.k8s.api.core.v1.LocalObjectReference"
+          }
+        },
+        "nodeSelector": {
+          "description": "NodeSelector is a selector which will result in all pods of the workflow to be scheduled on the selected node(s). This is able to be overridden by a nodeSelector specified in the template.",
+          "type": "object",
+          "additionalProperties": {
+            "type": "string"
+          }
+        },
+        "onExit": {
+          "description": "OnExit is a template reference which is invoked at the end of the workflow, irrespective of the success, failure, or error of the primary workflow.",
+          "type": "string"
+        },
+        "serviceAccountName": {
+          "description": "ServiceAccountName is the name of the ServiceAccount to run all pods of the workflow as.",
+          "type": "string"
+        },
+        "templates": {
+          "description": "Templates is a list of workflow templates used in a workflow",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Template"
+          }
+        },
+        "volumeClaimTemplates": {
+          "description": "VolumeClaimTemplates is a list of claims that containers are allowed to reference. The Workflow controller will create the claims at the beginning of the workflow and delete the claims upon completion of the workflow",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.k8s.api.core.v1.PersistentVolumeClaim"
+          }
+        },
+        "volumes": {
+          "description": "Volumes is a list of volumes that can be mounted by containers in a workflow.",
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/io.k8s.api.core.v1.Volume"
+          }
+        }
+      }
+    },
+    "io.argoproj.workflow.v1alpha1.WorkflowStep": {
+      "description": "WorkflowStep is a reference to a template to execute in a series of step",
+      "properties": {
+        "arguments": {
+          "description": "Arguments hold arguments to the template",
+          "$ref": "#/definitions/io.argoproj.workflow.v1alpha1.Arguments"
+        },
+        "name": {
+          "description": "Name of the step",
+          "type": "string"
+        },
+        "template": {
+          "description": "Template is a reference to the template to execute as the step",
+          "type": "string"
+        },
+        "when": {
+          "description": "When is an expression in which the step should conditionally execute",
+          "type": "string"
+        },
+        "withParam": {
+          "description": "WithParam expands a step into from the value in the parameter",
+          "type": "string"
+        }
+      }
+    }
+  }
+}
diff --git a/hack/gen-openapi-spec/main.go b/hack/gen-openapi-spec/main.go
index a7f1d45c7767..8c728cbe36cb 100644
--- a/hack/gen-openapi-spec/main.go
+++ b/hack/gen-openapi-spec/main.go
@@ -26,7 +26,7 @@ func main() {
 	})
 	defs := spec.Definitions{}
 	for defName, val := range oAPIDefs {
-		defs[defName] = val.Schema
+		defs[swaggify(defName)] = val.Schema
 	}
 	swagger := spec.Swagger{
 		SwaggerProps: spec.SwaggerProps{
@@ -41,7 +41,7 @@ func main() {
 			},
 		},
 	}
-	jsonBytes, err := json.MarshalIndent(swagger, "", "    ")
+	jsonBytes, err := json.MarshalIndent(swagger, "", "  ")
 	if err != nil {
 		log.Fatal(err.Error())
 	}

From 8420deb30a48839a097d3f5cd089e4b493b5e751 Mon Sep 17 00:00:00 2001
From: Jesse Suen <Jesse_Suen@intuit.com>
Date: Fri, 2 Feb 2018 17:05:49 -0800
Subject: [PATCH 7/8] Skipped steps were being re-initialized causing a
 controller panic

---
 workflow/controller/steps.go | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/workflow/controller/steps.go b/workflow/controller/steps.go
index 634db109dd74..1b5c5f8c1a04 100644
--- a/workflow/controller/steps.go
+++ b/workflow/controller/steps.go
@@ -161,9 +161,11 @@ func (woc *wfOperationCtx) executeStepGroup(stepGroup []wfv1.WorkflowStep, sgNod
 			return err
 		}
 		if !proceed {
-			skipReason := fmt.Sprintf("when '%s' evaluated false", step.When)
-			woc.log.Infof("Skipping %s: %s", childNodeName, skipReason)
-			woc.initializeNode(childNodeName, wfv1.NodeTypeSkipped, stepsCtx.boundaryID, wfv1.NodeSkipped, skipReason)
+			if woc.getNodeByName(childNodeName) == nil {
+				skipReason := fmt.Sprintf("when '%s' evaluated false", step.When)
+				woc.log.Infof("Skipping %s: %s", childNodeName, skipReason)
+				woc.initializeNode(childNodeName, wfv1.NodeTypeSkipped, stepsCtx.boundaryID, wfv1.NodeSkipped, skipReason)
+			}
 			continue
 		}
 		err = woc.executeTemplate(step.Template, step.Arguments, childNodeName, stepsCtx.boundaryID)

From 75caa877bc08184cad6dd34366b2b9f8b3dccc38 Mon Sep 17 00:00:00 2001
From: gaganapplatix <33636454+gaganapplatix@users.noreply.github.com>
Date: Mon, 5 Feb 2018 11:17:24 -0800
Subject: [PATCH 8/8] Initial work for dag based cli for everything. get now
 works (#714)

* Initial work for dag based cli for everything. get now works

* Fix bugs around ordering and also display related

* Fix metalinter issue

* Clean up code based on review suggestions
---
 cmd/argo/commands/get.go | 401 +++++++++++++++++++++++++++++++--------
 1 file changed, 319 insertions(+), 82 deletions(-)

diff --git a/cmd/argo/commands/get.go b/cmd/argo/commands/get.go
index db98bb86b442..4baf7e445c0f 100644
--- a/cmd/argo/commands/get.go
+++ b/cmd/argo/commands/get.go
@@ -17,6 +17,8 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
+const onExitSuffix = "onExit"
+
 func init() {
 	RootCmd.AddCommand(getCmd)
 	getCmd.Flags().StringVarP(&getArgs.output, "output", "o", "", "Output format. One of: json|yaml|wide")
@@ -116,25 +118,284 @@ func printWorkflowHelper(wf *wfv1.Workflow) {
 		} else {
 			fmt.Fprintf(w, "%s\tPODNAME\tDURATION\tMESSAGE\n", ansiFormat("STEP", FgDefault))
 		}
-		node, ok := wf.Status.Nodes[wf.ObjectMeta.Name]
-		if ok {
-			printNodeTree(w, wf, node, 0, " ", " ")
-		}
-		onExitNode, ok := wf.Status.Nodes[wf.NodeID(wf.ObjectMeta.Name+".onExit")]
-		if ok {
-			fmt.Fprintf(w, "\t\t\t\t\n")
-			onExitNode.Name = "onExit"
-			printNodeTree(w, wf, onExitNode, 0, " ", " ")
+
+		// Convert Nodes to Render Trees
+		roots := convertToRenderTrees(wf)
+
+		// Print main and onExit Trees
+		rootNodeIDs := [2]string{wf.NodeID(wf.ObjectMeta.Name), wf.NodeID(wf.ObjectMeta.Name + "." + onExitSuffix)}
+		for _, id := range rootNodeIDs {
+			if node, ok := wf.Status.Nodes[id]; ok {
+				if root, ok := roots[node.ID]; ok {
+					root.renderNodes(w, wf, 0, " ", " ")
+				}
+			}
 		}
+
 		_ = w.Flush()
 	}
 }
 
-func printNodeTree(w *tabwriter.Writer, wf *wfv1.Workflow, node wfv1.NodeStatus, depth int, nodePrefix string, childPrefix string) {
-	nodeName := fmt.Sprintf("%s %s", jobStatusIconMap[node.Phase], node.Name)
+type nodeInfoInterface interface {
+	getID() string
+	getNodeStatus(wf *wfv1.Workflow) wfv1.NodeStatus
+	getStartTime(wf *wfv1.Workflow) metav1.Time
+}
+
+type nodeInfo struct {
+	id string
+}
+
+func (n *nodeInfo) getID() string {
+	return n.id
+}
+
+func (n *nodeInfo) getNodeStatus(wf *wfv1.Workflow) wfv1.NodeStatus {
+	return wf.Status.Nodes[n.id]
+}
+
+func (n *nodeInfo) getStartTime(wf *wfv1.Workflow) metav1.Time {
+	return wf.Status.Nodes[n.id].StartedAt
+}
+
+// Interface to represent Nodes in render form types
+type renderNode interface {
+	// Render this renderNode and its children
+	renderNodes(w *tabwriter.Writer, wf *wfv1.Workflow, depth int,
+		nodePrefix string, childPrefix string)
+	nodeInfoInterface
+}
+
+// Currently this is Pod or Resource Nodes
+type executionNode struct {
+	nodeInfo
+}
+
+// Currently this is the step groups or retry nodes
+type nonBoundaryParentNode struct {
+	nodeInfo
+	children []renderNode // Can be boundaryNode or executionNode
+}
+
+// Currently this is the virtual Template node
+type boundaryNode struct {
+	nodeInfo
+	boundaryContained []renderNode // Can be nonBoundaryParent or executionNode or boundaryNode
+}
+
+func isBoundaryNode(node wfv1.NodeType) bool {
+	return (node == wfv1.NodeTypeDAG) || (node == wfv1.NodeTypeSteps)
+}
+
+func isNonBoundaryParentNode(node wfv1.NodeType) bool {
+	return (node == wfv1.NodeTypeStepGroup) || (node == wfv1.NodeTypeRetry)
+}
+
+func isExecutionNode(node wfv1.NodeType) bool {
+	return (node == wfv1.NodeTypePod) || (node == wfv1.NodeTypeSkipped)
+}
+
+func insertSorted(wf *wfv1.Workflow, sortedArray []renderNode, item renderNode) []renderNode {
+	insertTime := item.getStartTime(wf)
+	var index int
+	for index = 0; index < len(sortedArray); index++ {
+		existingItem := sortedArray[index]
+		t := existingItem.getStartTime(wf)
+		if insertTime.Before(&t) {
+			break
+		} else if insertTime.Equal(&t) {
+			// If they are equal apply alphabetical order so we
+			// get some consistent printing
+			insertName := humanizeNodeName(wf, item.getNodeStatus(wf))
+			equalName := humanizeNodeName(wf, existingItem.getNodeStatus(wf))
+			if insertName < equalName {
+				break
+			}
+		}
+	}
+	sortedArray = append(sortedArray, nil)
+	copy(sortedArray[index+1:], sortedArray[index:])
+	sortedArray[index] = item
+	return sortedArray
+}
+
+// Attach render node n to its parent based on what has been parsed previously
+// In some cases add it to list of things that still needs to be attached to parent
+// Return if I am a possible root
+func attachToParent(wf *wfv1.Workflow, n renderNode,
+	nonBoundaryParentChildrenMap map[string]*nonBoundaryParentNode, boundaryID string,
+	boundaryNodeMap map[string]*boundaryNode, parentBoundaryMap map[string][]renderNode) bool {
+
+	// Check first if I am a child of a nonBoundaryParent
+	// that implies I attach to that instead of my boundary. This was already
+	// figured out in Pass 1
+	if nonBoundaryParent, ok := nonBoundaryParentChildrenMap[n.getID()]; ok {
+		nonBoundaryParent.children = insertSorted(wf, nonBoundaryParent.children, n)
+		return false
+	}
+
+	// If I am not attached to a nonBoundaryParent and I have no Boundary ID then
+	// I am a possible root
+	if boundaryID == "" {
+		return true
+	}
+	if parentBoundary, ok := boundaryNodeMap[boundaryID]; ok {
+		parentBoundary.boundaryContained = insertSorted(wf, parentBoundary.boundaryContained, n)
+	} else {
+		// put ourselves to be added by the parent when we get to it later
+		if _, ok := parentBoundaryMap[boundaryID]; !ok {
+			parentBoundaryMap[boundaryID] = make([]renderNode, 0)
+		}
+		parentBoundaryMap[boundaryID] = append(parentBoundaryMap[boundaryID], n)
+	}
+	return false
+}
+
+// This takes the map of NodeStatus and converts them into a forrest
+// of trees of renderNodes and returns the set of roots for each tree
+func convertToRenderTrees(wf *wfv1.Workflow) map[string]renderNode {
+
+	renderTreeRoots := make(map[string]renderNode)
+
+	// Used to store all boundary nodes so future render children can attach
+	// Maps node Name -> *boundaryNode
+	boundaryNodeMap := make(map[string]*boundaryNode)
+	// Used to store children of a boundary node that has not been parsed yet
+	// Maps boundary Node name -> array of render Children
+	parentBoundaryMap := make(map[string][]renderNode)
+
+	// Used to store Non Boundary Parent nodes so render children can attach
+	// Maps non Boundary Parent Node name -> *nonBoundaryParentNode
+	nonBoundaryParentMap := make(map[string]*nonBoundaryParentNode)
+	// Used to store children which have a Non Boundary Parent from rendering perspective
+	// Maps non Boundary render Children name -> *nonBoundaryParentNode
+	nonBoundaryParentChildrenMap := make(map[string]*nonBoundaryParentNode)
+
+	// We have to do a 2 pass approach because anything that is a child
+	// of a nonBoundaryParent and also has a boundaryID we may not know which
+	// parent to attach to if we didn't see the nonBoundaryParent earlier
+	// in a 1 pass strategy
+
+	// 1st Pass Process enough of nonBoundaryParent nodes to know all their children
+	for id, status := range wf.Status.Nodes {
+		if isNonBoundaryParentNode(status.Type) {
+			n := nonBoundaryParentNode{nodeInfo: nodeInfo{id: id}}
+			nonBoundaryParentMap[id] = &n
+
+			for _, child := range status.Children {
+				nonBoundaryParentChildrenMap[child] = &n
+			}
+		}
+	}
+
+	// 2nd Pass process everything
+	for id, status := range wf.Status.Nodes {
+		switch {
+		case isBoundaryNode(status.Type):
+			n := boundaryNode{nodeInfo: nodeInfo{id: id}}
+			boundaryNodeMap[id] = &n
+			// Attach to my parent if needed
+			if attachToParent(wf, &n, nonBoundaryParentChildrenMap,
+				status.BoundaryID, boundaryNodeMap, parentBoundaryMap) {
+				renderTreeRoots[n.getID()] = &n
+			}
+			// Attach nodes who are in my boundary already seen before me to me
+			for _, val := range parentBoundaryMap[id] {
+				n.boundaryContained = insertSorted(wf, n.boundaryContained, val)
+			}
+		case isNonBoundaryParentNode(status.Type):
+			nPtr, ok := nonBoundaryParentMap[id]
+			if !ok {
+				log.Fatal("Unable to lookup node " + id)
+				return nil
+			}
+			// Attach to my parent if needed
+			if attachToParent(wf, nPtr, nonBoundaryParentChildrenMap,
+				status.BoundaryID, boundaryNodeMap, parentBoundaryMap) {
+				renderTreeRoots[nPtr.getID()] = nPtr
+			}
+			// All children attach directly to the nonBoundaryParents since they are already created
+			// in pass 1 so no need to do that here
+		case isExecutionNode(status.Type):
+			n := executionNode{nodeInfo: nodeInfo{id: id}}
+			// Attach to my parent if needed
+			if attachToParent(wf, &n, nonBoundaryParentChildrenMap,
+				status.BoundaryID, boundaryNodeMap, parentBoundaryMap) {
+				renderTreeRoots[n.getID()] = &n
+			}
+			// Execution nodes don't have other render nodes as children
+		}
+	}
+
+	return renderTreeRoots
+}
+
+// This function decides if a Node will be filtered from rendering and returns
+// two things. First argument tells if the node is filtered and second argument
+// tells whether the children need special indentation due to filtering
+// Return Values: (is node filtered, do children need special indent)
+func filterNode(node wfv1.NodeStatus) (bool, bool) {
+	if node.Type == wfv1.NodeTypeRetry && len(node.Children) == 1 {
+		return true, false
+	} else if node.Type == wfv1.NodeTypeStepGroup {
+		return true, true
+	}
+	return false, false
+}
+
+// Render the child of a given node based on information about the parent such as:
+// whether it was filtered and does this child need special indent
+func renderChild(w *tabwriter.Writer, wf *wfv1.Workflow, nInfo renderNode, depth int,
+	nodePrefix string, childPrefix string, parentFiltered bool,
+	childIndex int, maxIndex int, childIndent bool) {
+	var part, subp string
+	if parentFiltered && childIndent {
+		if maxIndex == 0 {
+			part = "--"
+			subp = "  "
+		} else if childIndex == 0 {
+			part = "·-"
+			subp = "  "
+		} else if childIndex == maxIndex {
+			part = "└-"
+			subp = "  "
+		} else {
+			part = "├-"
+			subp = "  "
+		}
+	} else if !parentFiltered {
+		if childIndex == maxIndex {
+			part = "└-"
+			subp = "  "
+		} else {
+			part = "├-"
+			subp = "| "
+		}
+	}
+	var childNodePrefix, childChldPrefix string
+	if !parentFiltered {
+		depth = depth + 1
+		childNodePrefix = childPrefix + part
+		childChldPrefix = childPrefix + subp
+	} else {
+		if childIndex == 0 {
+			childNodePrefix = nodePrefix + part
+		} else {
+			childNodePrefix = childPrefix + part
+		}
+		childChldPrefix = childPrefix + subp
+	}
+	nInfo.renderNodes(w, wf, depth, childNodePrefix, childChldPrefix)
+}
+
+// Main method to print information of node in get
+func printNode(w *tabwriter.Writer, wf *wfv1.Workflow, node wfv1.NodeStatus, depth int,
+	nodePrefix string, childPrefix string) {
+
+	nodeName := fmt.Sprintf("%s %s", jobStatusIconMap[node.Phase], humanizeNodeName(wf, node))
 	var args []interface{}
 	duration := humanizeDurationShort(node.StartedAt, node.FinishedAt)
-	if len(node.Children) == 0 && node.Phase != wfv1.NodeSkipped {
+	if isExecutionNode(node.Type) && node.Phase != wfv1.NodeSkipped {
 		args = []interface{}{nodePrefix, nodeName, node.ID, duration, node.Message}
 	} else {
 		args = []interface{}{nodePrefix, nodeName, "", "", ""}
@@ -147,78 +408,44 @@ func printNodeTree(w *tabwriter.Writer, wf *wfv1.Workflow, node wfv1.NodeStatus,
 	} else {
 		fmt.Fprintf(w, "%s%s\t%s\t%s\t%s\n", args...)
 	}
+}
 
-	if node.RetryStrategy != nil {
-		for i, childNodeID := range node.Children {
-			var part1, subp1 string
-			subp1 = "  "
+// renderNodes for each renderNode Type
+// boundaryNode
+func (nodeInfo *boundaryNode) renderNodes(w *tabwriter.Writer, wf *wfv1.Workflow, depth int,
+	nodePrefix string, childPrefix string) {
 
-			childNode := wf.Status.Nodes[childNodeID]
-			if i > 0 && i < len(node.Children)-1 {
-				part1 = "├-"
-			} else {
-				part1 = "└-"
-			}
-			var part2, subp2 string
-			part2 = "--"
-			childNodePrefix := childPrefix + part1 + part2
-			childChldPrefix := childPrefix + subp1 + subp2
-			printNodeTree(w, wf, childNode, depth+1, childNodePrefix, childChldPrefix)
-		}
-	} else {
-		// If the node has children, the node is a workflow template and
-		// node.Children prepresent a list of parallel steps. We skip
-		// a generation when recursing since the children nodes of workflow
-		// templates represent a virtual step group, which are not worh printing.
-		for i, stepGroupNodeID := range node.Children {
-			lastStepGroup := bool(i == len(node.Children)-1)
-			var part1, subp1 string
-			if lastStepGroup {
-				part1 = "└-"
-				subp1 = "  "
-			} else {
-				part1 = "├-"
-				subp1 = "| "
-			}
-			stepGroupNode := wf.Status.Nodes[stepGroupNodeID]
-			for j, childNodeID := range stepGroupNode.Children {
-				childNode := wf.Status.Nodes[childNodeID]
-				if j > 0 {
-					if lastStepGroup {
-						part1 = "  "
-					} else {
-						part1 = "| "
-					}
-				}
-				firstParallel := bool(j == 0)
-				lastParallel := bool(j == len(stepGroupNode.Children)-1)
-				var part2, subp2 string
-				if firstParallel {
-					if len(stepGroupNode.Children) == 1 {
-						part2 = "--"
-					} else {
-						part2 = "·-"
-					}
-					if !lastParallel {
-						subp2 = "| "
-					} else {
-						subp2 = "  "
-					}
-
-				} else if lastParallel {
-					part2 = "└-"
-					subp2 = "  "
-				} else {
-					part2 = "├-"
-					subp2 = "| "
-				}
-				childNodePrefix := childPrefix + part1 + part2
-				childChldPrefix := childPrefix + subp1 + subp2
-				// Remove stepgroup name from being displayed
-				childNode.Name = strings.TrimPrefix(childNode.Name, stepGroupNode.Name+".")
-				printNodeTree(w, wf, childNode, depth+1, childNodePrefix, childChldPrefix)
-			}
-		}
+	filtered, childIndent := filterNode(nodeInfo.getNodeStatus(wf))
+	if !filtered {
+		printNode(w, wf, nodeInfo.getNodeStatus(wf), depth, nodePrefix, childPrefix)
+	}
+
+	for i, nInfo := range nodeInfo.boundaryContained {
+		renderChild(w, wf, nInfo, depth, nodePrefix, childPrefix, filtered, i,
+			len(nodeInfo.boundaryContained)-1, childIndent)
+	}
+}
+
+// nonBoundaryParentNode
+func (nodeInfo *nonBoundaryParentNode) renderNodes(w *tabwriter.Writer, wf *wfv1.Workflow, depth int,
+	nodePrefix string, childPrefix string) {
+	filtered, childIndent := filterNode(nodeInfo.getNodeStatus(wf))
+	if !filtered {
+		printNode(w, wf, nodeInfo.getNodeStatus(wf), depth, nodePrefix, childPrefix)
+	}
+
+	for i, nInfo := range nodeInfo.children {
+		renderChild(w, wf, nInfo, depth, nodePrefix, childPrefix, filtered, i,
+			len(nodeInfo.children)-1, childIndent)
+	}
+}
+
+// executionNode
+func (nodeInfo *executionNode) renderNodes(w *tabwriter.Writer, wf *wfv1.Workflow, depth int,
+	nodePrefix string, childPrefix string) {
+	filtered, _ := filterNode(nodeInfo.getNodeStatus(wf))
+	if !filtered {
+		printNode(w, wf, nodeInfo.getNodeStatus(wf), depth, nodePrefix, childPrefix)
 	}
 }
 
@@ -233,6 +460,16 @@ func getArtifactsString(node wfv1.NodeStatus) string {
 	return strings.Join(artNames, ",")
 }
 
+// Will take the printed name for a node to be the last part after a '.'
+// Will also special case wfName.onExit nodes to onExit
+func humanizeNodeName(wf *wfv1.Workflow, node wfv1.NodeStatus) string {
+	if node.Name == (wf.ObjectMeta.Name + onExitSuffix) {
+		return onExitSuffix
+	}
+	parts := strings.Split(node.Name, ".")
+	return parts[len(parts)-1]
+}
+
 func humanizeTimestamp(epoch int64) string {
 	ts := time.Unix(epoch, 0)
 	return fmt.Sprintf("%s (%s)", ts.Format("Mon Jan 02 15:04:05 -0700"), humanize.Time(ts))