internal/core/dep: first stab at precise dependency analyzer

Change-Id: I0bf5de9a1b2421ddffa4108c6dbffd38dae2cfe0
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/7604
Reviewed-by: CUE cueckoo <cueckoo@gmail.com>
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/internal/core/adt/context.go b/internal/core/adt/context.go
index 411a7ad..5b52a18 100644
--- a/internal/core/adt/context.go
+++ b/internal/core/adt/context.go
@@ -340,6 +340,14 @@
 		return nil, arc.ChildErrors
 	}
 
+	for {
+		x, ok := arc.Value.(*Vertex)
+		if !ok {
+			break
+		}
+		arc = x
+	}
+
 	return arc, err
 }
 
diff --git a/internal/core/dep/dep.go b/internal/core/dep/dep.go
new file mode 100644
index 0000000..71ab3e4
--- /dev/null
+++ b/internal/core/dep/dep.go
@@ -0,0 +1,311 @@
+// Copyright 2020 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package dep analyzes dependencies between values.
+package dep
+
+import (
+	"errors"
+
+	"cuelang.org/go/internal/core/adt"
+)
+
+// A Dependency is a reference and the node that reference resolves to.
+type Dependency struct {
+	// Node is the referenced node.
+	Node *adt.Vertex
+
+	// Reference is the expression that referenced the node.
+	Reference adt.Resolver
+
+	top bool
+}
+
+// Import returns the import reference or nil if the reference was within
+// the same package as the visited Vertex.
+func (d *Dependency) Import() *adt.ImportReference {
+	x, _ := d.Reference.(adt.Expr)
+	return importRef(x)
+}
+
+// IsRoot reports whether the dependency is referenced by the root of the
+// original Vertex passed to any of the Visit* functions, and not one of its
+// descendent arcs. This always returns true for Visit().
+func (d *Dependency) IsRoot() bool {
+	return d.top
+}
+
+func (d *Dependency) Path() []adt.Feature {
+	return nil
+}
+
+func importRef(r adt.Expr) *adt.ImportReference {
+	switch x := r.(type) {
+	case *adt.ImportReference:
+		return x
+	case *adt.SelectorExpr:
+		return importRef(x.X)
+	case *adt.IndexExpr:
+		return importRef(x.X)
+	}
+	return nil
+}
+
+// VisitFunc is used for reporting dependencies.
+type VisitFunc func(Dependency) error
+
+// Visit calls f for all vertices referenced by the conjuncts of n without
+// descending into the elements of list or fields of structs. Only references
+// that do not refer to the conjuncts of n itself are reported.
+func Visit(c *adt.OpContext, n *adt.Vertex, f VisitFunc) error {
+	return visit(c, n, f, false, true)
+}
+
+// VisitAll calls f for all vertices referenced by the conjuncts of n including
+// those of descendant fields and elements. Only references that do not refer to
+// the conjuncts of n itself are reported.
+func VisitAll(c *adt.OpContext, n *adt.Vertex, f VisitFunc) error {
+	return visit(c, n, f, true, true)
+}
+
+var empty *adt.Vertex
+
+func init() {
+	empty = &adt.Vertex{}
+	empty.UpdateStatus(adt.Finalized)
+}
+
+func visit(c *adt.OpContext, n *adt.Vertex, f VisitFunc, all, top bool) (err error) {
+	if c == nil {
+		panic("nil context")
+	}
+	v := visitor{
+		ctxt:  c,
+		visit: f,
+		node:  n,
+		all:   all,
+		top:   top,
+	}
+
+	defer func() {
+		switch x := recover(); x {
+		case nil:
+		case aborted:
+			err = v.err
+		default:
+			panic(x)
+		}
+	}()
+
+	for _, x := range n.Conjuncts {
+		v.markExpr(x.Env, x.Expr())
+	}
+
+	return nil
+}
+
+var aborted = errors.New("aborted")
+
+type visitor struct {
+	ctxt  *adt.OpContext
+	visit VisitFunc
+	node  *adt.Vertex
+	err   error
+	all   bool
+	top   bool
+}
+
+// TODO: factor out the below logic as either a low-level dependency analyzer or
+// some walk functionality.
+
+// markExpr visits all nodes in an expression to mark dependencies.
+func (c *visitor) markExpr(env *adt.Environment, expr adt.Expr) {
+	switch x := expr.(type) {
+	case nil:
+	case adt.Resolver:
+		c.markResolver(env, x)
+
+	case *adt.BinaryExpr:
+		c.markExpr(env, x.X)
+		c.markExpr(env, x.Y)
+
+	case *adt.UnaryExpr:
+		c.markExpr(env, x.X)
+
+	case *adt.Interpolation:
+		for i := 1; i < len(x.Parts); i += 2 {
+			c.markExpr(env, x.Parts[i])
+		}
+
+	case *adt.BoundExpr:
+		c.markExpr(env, x.Expr)
+
+	case *adt.CallExpr:
+		c.markExpr(env, x.Fun)
+		saved := c.all
+		c.all = true
+		for _, a := range x.Args {
+			c.markExpr(env, a)
+		}
+		c.all = saved
+
+	case *adt.DisjunctionExpr:
+		for _, d := range x.Values {
+			c.markExpr(env, d.Val)
+		}
+
+	case *adt.SliceExpr:
+		c.markExpr(env, x.X)
+		c.markExpr(env, x.Lo)
+		c.markExpr(env, x.Hi)
+		c.markExpr(env, x.Stride)
+
+	case *adt.ListLit:
+		for _, e := range x.Elems {
+			switch x := e.(type) {
+			case adt.Yielder:
+				c.markYielder(env, x)
+
+			case adt.Expr:
+				c.markSubExpr(env, x)
+
+			case *adt.Ellipsis:
+				if x.Value != nil {
+					c.markSubExpr(env, x.Value)
+				}
+			}
+		}
+
+	case *adt.StructLit:
+		env := &adt.Environment{Up: env, Vertex: empty}
+		for _, e := range x.Decls {
+			c.markDecl(env, e)
+		}
+	}
+}
+
+// markResolve resolves dependencies.
+func (c *visitor) markResolver(env *adt.Environment, r adt.Resolver) {
+	switch x := r.(type) {
+	case nil:
+	case *adt.LetReference:
+		saved := c.ctxt.PushState(env, nil)
+		env := c.ctxt.Env(x.UpCount)
+		c.markExpr(env, x.X)
+		c.ctxt.PopState(saved)
+		return
+	}
+
+	if ref, _ := c.ctxt.Resolve(env, r); ref != nil {
+		if ref != c.node {
+			d := Dependency{
+				Node:      ref,
+				Reference: r,
+				top:       c.top,
+			}
+			if err := c.visit(d); err != nil {
+				c.err = err
+				panic(aborted)
+			}
+		}
+
+		return
+	}
+
+	// It is possible that a reference cannot be resolved because it is
+	// incomplete. In this case, we should check whether subexpressions of the
+	// reference can be resolved to mark those dependencies. For instance,
+	// prefix paths of selectors and the value or index of an index experssion
+	// may independently resolve to a valid dependency.
+
+	switch x := r.(type) {
+	case *adt.NodeLink:
+		panic("unreachable")
+
+	case *adt.IndexExpr:
+		c.markExpr(env, x.X)
+		c.markExpr(env, x.Index)
+
+	case *adt.SelectorExpr:
+		c.markExpr(env, x.X)
+	}
+}
+
+func (c *visitor) markSubExpr(env *adt.Environment, x adt.Expr) {
+	if c.all {
+		saved := c.top
+		c.top = false
+		c.markExpr(env, x)
+		c.top = saved
+	}
+}
+
+func (c *visitor) markDecl(env *adt.Environment, d adt.Decl) {
+	switch x := d.(type) {
+	case *adt.Field:
+		c.markSubExpr(env, x.Value)
+
+	case *adt.OptionalField:
+		// when dynamic, only continue if there is evidence of
+		// the field in the parallel actual evaluation.
+		c.markSubExpr(env, x.Value)
+
+	case *adt.BulkOptionalField:
+		c.markExpr(env, x.Filter)
+		// when dynamic, only continue if there is evidence of
+		// the field in the parallel actual evaluation.
+		c.markSubExpr(env, x.Value)
+
+	case *adt.DynamicField:
+		c.markExpr(env, x.Key)
+		// when dynamic, only continue if there is evidence of
+		// a matching field in the parallel actual evaluation.
+		c.markSubExpr(env, x.Value)
+
+	case adt.Yielder:
+		c.markYielder(env, x)
+
+	case adt.Expr:
+		c.markExpr(env, x)
+
+	case *adt.Ellipsis:
+		if x.Value != nil {
+			c.markSubExpr(env, x.Value)
+		}
+	}
+}
+
+func (c *visitor) markYielder(env *adt.Environment, y adt.Yielder) {
+	switch x := y.(type) {
+	case *adt.ForClause:
+		c.markExpr(env, x.Src)
+		env := &adt.Environment{Up: env, Vertex: empty}
+		c.markYielder(env, x.Dst)
+		// In dynamic mode, iterate over all actual value and
+		// evaluate.
+
+	case *adt.LetClause:
+		c.markExpr(env, x.Expr)
+		env := &adt.Environment{Up: env, Vertex: empty}
+		c.markYielder(env, x.Dst)
+
+	case *adt.IfClause:
+		c.markExpr(env, x.Condition)
+		// In dynamic mode, only continue if condition is true.
+		c.markYielder(env, x.Dst)
+
+	case *adt.ValueClause:
+		c.markExpr(env, x.StructLit)
+	}
+}
diff --git a/internal/core/dep/dep_test.go b/internal/core/dep/dep_test.go
new file mode 100644
index 0000000..3ec0c00
--- /dev/null
+++ b/internal/core/dep/dep_test.go
@@ -0,0 +1,131 @@
+// Copyright 2020 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package dep_test
+
+import (
+	"flag"
+	"fmt"
+	"strings"
+	"testing"
+
+	"cuelang.org/go/cue"
+	"cuelang.org/go/internal"
+	"cuelang.org/go/internal/core/adt"
+	"cuelang.org/go/internal/core/debug"
+	"cuelang.org/go/internal/core/dep"
+	"cuelang.org/go/internal/core/eval"
+	"cuelang.org/go/internal/core/runtime"
+	"cuelang.org/go/internal/cuetxtar"
+)
+
+var update = flag.Bool("update", false, "update the test files")
+
+func TestVisit(t *testing.T) {
+	test := cuetxtar.TxTarTest{
+		Root:   "./testdata",
+		Name:   "dependencies",
+		Update: *update,
+	}
+
+	test.Run(t, func(t *cuetxtar.Test) {
+		a := t.ValidInstances()
+
+		inst := cue.Build(a)[0].Value()
+		if inst.Err() != nil {
+			t.Fatal(inst.Err())
+		}
+
+		rx, nx := internal.CoreValue(inst)
+		ctxt := eval.NewContext(rx.(*runtime.Runtime), nx.(*adt.Vertex))
+
+		testCases := []struct {
+			name string
+			root string
+			fn   func(*adt.OpContext, *adt.Vertex, dep.VisitFunc) error
+		}{{
+			name: "field",
+			root: "a.b",
+			fn:   dep.Visit,
+		}, {
+			name: "all",
+			root: "a",
+			fn:   dep.VisitAll,
+		}}
+
+		for _, tc := range testCases {
+			v := inst.LookupPath(cue.ParsePath(tc.root))
+
+			_, nx = internal.CoreValue(v)
+			n := nx.(*adt.Vertex)
+			w := t.Writer(tc.name)
+
+			t.Run(tc.name, func(sub *testing.T) {
+				tc.fn(ctxt, n, func(d dep.Dependency) error {
+					str := cue.MakeValue(ctxt, d.Node).Path().String()
+					if i := d.Import(); i != nil {
+						path := i.ImportPath.StringValue(ctxt)
+						str = fmt.Sprintf("%q.%s", path, str)
+					}
+					fmt.Fprintln(w, str)
+					return nil
+				})
+			})
+		}
+	})
+}
+
+// DO NOT REMOVE: for Testing purposes.
+func TestX(t *testing.T) {
+	// a and a.b are the fields for which to determine the references.
+	in := `
+	`
+
+	if strings.TrimSpace(in) == "" {
+		t.Skip()
+	}
+
+	rt := cue.Runtime{}
+	inst, err := rt.Compile("", in)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	v := inst.Lookup("a")
+
+	rx, nx := internal.CoreValue(v)
+	r := rx.(*runtime.Runtime)
+	n := nx.(*adt.Vertex)
+
+	ctxt := eval.NewContext(r, n)
+
+	for _, c := range n.Conjuncts {
+		str := debug.NodeString(ctxt, c.Expr(), nil)
+		t.Log(str)
+	}
+
+	deps := []string{}
+
+	_ = dep.VisitAll(ctxt, n, func(d dep.Dependency) error {
+		str := cue.MakeValue(ctxt, d.Node).Path().String()
+		if i := d.Import(); i != nil {
+			path := i.ImportPath.StringValue(ctxt)
+			str = fmt.Sprintf("%q.%s", path, str)
+		}
+		deps = append(deps, str)
+		return nil
+	})
+
+	t.Error(deps)
+}
diff --git a/internal/core/dep/testdata/alias.txtar b/internal/core/dep/testdata/alias.txtar
new file mode 100644
index 0000000..51f3efa
--- /dev/null
+++ b/internal/core/dep/testdata/alias.txtar
@@ -0,0 +1,14 @@
+-- in.cue --
+let X = Y
+let Y = c + d
+
+a: b: X
+
+c: 5
+d: 6
+-- out/dependencies/field --
+c
+d
+-- out/dependencies/all --
+c
+d
diff --git a/internal/core/dep/testdata/call.txtar b/internal/core/dep/testdata/call.txtar
new file mode 100644
index 0000000..5336504
--- /dev/null
+++ b/internal/core/dep/testdata/call.txtar
@@ -0,0 +1,13 @@
+-- in.cue --
+import "encoding/json"
+
+// The reference to string needs to be included even for Visit.
+a: b: json.Marshal({ #a: str })
+
+str: "x:y:z"
+-- out/dependencies/field --
+"encoding/json".Marshal
+str
+-- out/dependencies/all --
+"encoding/json".Marshal
+str
diff --git a/internal/core/dep/testdata/dynamic.txtar b/internal/core/dep/testdata/dynamic.txtar
new file mode 100644
index 0000000..3c42ae7
--- /dev/null
+++ b/internal/core/dep/testdata/dynamic.txtar
@@ -0,0 +1,32 @@
+-- in.cue --
+first: {
+    out: [1, 2]
+}
+ignore: {
+    x: 1
+    y: a.c
+}
+middle: {
+    for x in first.out {
+        ignore.y
+
+        "la\(x)": ignore & {
+            seq: x+1
+            val: "foo\(x)"
+            out: ignore.x
+        }
+    }
+}
+
+a: {
+    b: [ for x in middle { x } ]
+    c: {}
+}
+-- out/dependencies/field --
+middle
+-- out/dependencies/all --
+middle
+-- out/dependencies/dynamic --
+middle
+middle.la1
+middle.la2
diff --git a/internal/core/dep/testdata/expr.txtar b/internal/core/dep/testdata/expr.txtar
new file mode 100644
index 0000000..88c48ea
--- /dev/null
+++ b/internal/core/dep/testdata/expr.txtar
@@ -0,0 +1,22 @@
+-- in.cue --
+// Note: in dynamic mode, [d] does not get picked up
+// because the disjunction is not resolved.
+a: b: "\(d)" | -d | c[:1] | c[0] | <e | [d, ...] | [...e]
+
+c: [1, 2]
+d: 2
+e: 3
+-- out/dependencies/field --
+d
+d
+c
+c[0]
+e
+-- out/dependencies/all --
+d
+d
+c
+c[0]
+e
+d
+e
diff --git a/internal/core/dep/testdata/field.txtar b/internal/core/dep/testdata/field.txtar
new file mode 100644
index 0000000..e993bc3
--- /dev/null
+++ b/internal/core/dep/testdata/field.txtar
@@ -0,0 +1,23 @@
+-- in.cue --
+a: b: {
+    { [pattern]:  c }
+    { "\(name)": c }
+    regular:    c
+    optional?:  c
+    ...
+}
+
+pattern: =~"^Foo"
+value:   c
+name:    "name"
+c:       "descendants"
+-- out/dependencies/field --
+pattern
+name
+-- out/dependencies/all --
+pattern
+c
+name
+c
+c
+c
diff --git a/internal/core/dep/testdata/incomplete.txtar b/internal/core/dep/testdata/incomplete.txtar
new file mode 100644
index 0000000..fdf83ad
--- /dev/null
+++ b/internal/core/dep/testdata/incomplete.txtar
@@ -0,0 +1,20 @@
+-- in.cue --
+a: b: c.d | c.e | c[e] | c["d"] | c[d]
+
+c: d: 3
+d: "d"
+e: "e"
+-- out/dependencies/field --
+c.d
+c
+c
+e
+c.d
+c.d
+-- out/dependencies/all --
+c.d
+c
+c
+e
+c.d
+c.d
diff --git a/internal/core/dep/testdata/list.txtar b/internal/core/dep/testdata/list.txtar
new file mode 100644
index 0000000..217194a
--- /dev/null
+++ b/internal/core/dep/testdata/list.txtar
@@ -0,0 +1,11 @@
+-- in.cue --
+// Note: in dynamic mode, [d] does not get picked up
+// because the disjunction is not resolved.
+a: b: [ d, ...e ] & [ 1, 2, ... ]
+
+d: int
+e: int
+-- out/dependencies/field --
+-- out/dependencies/all --
+d
+e
diff --git a/internal/core/dep/testdata/listcomprehension.txtar b/internal/core/dep/testdata/listcomprehension.txtar
new file mode 100644
index 0000000..d2a458f
--- /dev/null
+++ b/internal/core/dep/testdata/listcomprehension.txtar
@@ -0,0 +1,11 @@
+-- in.cue --
+a: b: [ for x in c if x.a > 0 { x.a + d } ]
+
+c: [{a: 1}, {a: 3}]
+d: 2
+-- out/dependencies/field --
+c
+d
+-- out/dependencies/all --
+c
+d
diff --git a/internal/core/dep/testdata/self.txtar b/internal/core/dep/testdata/self.txtar
new file mode 100644
index 0000000..70bf1f5
--- /dev/null
+++ b/internal/core/dep/testdata/self.txtar
@@ -0,0 +1,18 @@
+-- in.cue --
+a: b: {
+    for x in b {}
+
+    x: {
+        c: m
+        d: y
+        e: f
+        f: 1
+        g: b.x
+    }
+    y: 3
+}
+
+m: 3
+-- out/dependencies/field --
+-- out/dependencies/all --
+m
diff --git a/internal/core/dep/testdata/structcomprehension.txtar b/internal/core/dep/testdata/structcomprehension.txtar
new file mode 100644
index 0000000..62ffd8f
--- /dev/null
+++ b/internal/core/dep/testdata/structcomprehension.txtar
@@ -0,0 +1,19 @@
+-- in.cue --
+a: b: {
+    for i, x in c
+    let y = x
+    if y > 0 {
+        "\(e)\(i)": x + d
+    }
+}
+
+c: [1, 2]
+d: 2
+e: "t"
+-- out/dependencies/field --
+c
+e
+-- out/dependencies/all --
+c
+e
+d
diff --git a/internal/core/eval/eval.go b/internal/core/eval/eval.go
index 079550c..dd9f6a6 100644
--- a/internal/core/eval/eval.go
+++ b/internal/core/eval/eval.go
@@ -1148,13 +1148,6 @@
 			n.exprs = append(n.exprs, v)
 			break
 		}
-		for {
-			x, ok := arc.Value.(*adt.Vertex)
-			if !ok {
-				break
-			}
-			arc = x
-		}
 
 		// We need to ensure that each arc is only unified once (or at least) a
 		// bounded time, witch each conjunct. Comprehensions, for instance, may