internal/core/eval: handle structural cycles

Issue #29
Issue #42

Change-Id: I86191d7bf4fdcf8705d740171aedd81e04819dd5
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/6522
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/internal/core/adt/composite.go b/internal/core/adt/composite.go
index d7c51ca..5e2f611 100644
--- a/internal/core/adt/composite.go
+++ b/internal/core/adt/composite.go
@@ -22,6 +22,61 @@
 	"cuelang.org/go/cue/token"
 )
 
+// TODO: unanswered questions about structural cycles:
+//
+// 1. When detecting a structural cycle, should we consider this as:
+//    a) an unevaluated value,
+//    b) an incomplete error (which does not affect parent validity), or
+//    c) a special value.
+//
+// Making it an error is the simplest way to ensure reentrancy is disallowed:
+// without an error it would require an additional mechanism to stop reentrancy
+// from continuing to process. Even worse, in some cases it may only partially
+// evaluate, resulting in unexpected results. For this reason, we are taking
+// approach `b` for now.
+//
+// This has some consequences of how disjunctions are treated though. Consider
+//
+//     list: {
+//        head: _
+//        tail: list | null
+//     }
+//
+// When making it an error, evaluating the above will result in
+//
+//     list: {
+//        head: _
+//        tail: null
+//     }
+//
+// because list will result in a structural cycle, and thus an error, it will be
+// stripped from the disjunction. This may or may not be a desirable property. A
+// nice thing is that it is not required to write `list | *null`. A disadvantage
+// is that this is perhaps somewhat inexplicit.
+//
+// When not making it an error (and simply cease evaluating child arcs upon
+// cycle detection), the result would be:
+//
+//     list: {
+//        head: _
+//        tail: list | null
+//     }
+//
+// In other words, an evaluation would result in a cycle and thus an error.
+// Implementations can recognize such cases by having unevaluated arcs. An
+// explicit structure cycle marker would probably be less error prone.
+//
+// Note that in both cases, a reference to list will still use the original
+// conjuncts, so the result will be the same for either method in this case.
+//
+//
+// 2. Structural cycle allowance.
+//
+// Structural cycle detection disallows reentrancy as well. This means one
+// cannot use structs for recursive computation. This will probably preclude
+// evaluation of some configuration. Given that there is no real alternative
+// yet, we could allow structural cycle detection to be optionally disabled.
+
 // An Environment links the parent scopes for identifier lookup to a composite
 // node. Each conjunct that make up node in the tree can be associated with
 // a different environment (although some conjuncts may share an Environment).
@@ -33,10 +88,44 @@
 	// constraint. It is used to resolve label references.
 	DynamicLabel Feature
 
+	// TODO(perf): make the following public fields a shareable struct as it
+	// mostly is going to be the same for child nodes.
+
 	// CloseID is a unique number that tracks a group of conjuncts that need
 	// belong to a single originating definition.
 	CloseID uint32
 
+	// Cyclic indicates a structural cycle was detected for this conjunct or one
+	// of its ancestors.
+	Cyclic bool
+
+	// Deref keeps track of nodes that should dereference to Vertex. It is used
+	// for detecting structural cycle.
+	//
+	// The detection algorithm is based on Tomabechi's quasi-destructive graph
+	// unification. This detection requires dependencies to be resolved into
+	// fully dereferenced vertices. This is not the case in our algorithm:
+	// the result of evaluating conjuncts is placed into dereferenced vertices
+	// _after_ they are evaluated, but the Environment still points to the
+	// non-dereferenced context.
+	//
+	// In order to be able to detect structural cycles, we need to ensure that
+	// at least one node that is part of a cycle in the context in which
+	// conjunctions are evaluated dereferences correctly.
+	//
+	// The only field necessary to detect a structural cycle, however, is
+	// the Status field of the Vertex. So rather than dereferencing a node
+	// proper, it is sufficient to copy the Status of the dereferenced nodes
+	// to these nodes (will always be EvaluatingArcs).
+	Deref []*Vertex
+
+	// Cycles contains vertices for which cycles are detected. It is used
+	// for tracking self-references within structural cycles.
+	//
+	// Unlike Deref, Cycles is not incremented with child nodes.
+	// TODO: Cycles is always a tail end of Deref, so this can be optimized.
+	Cycles []*Vertex
+
 	cache map[Expr]Value
 }
 
@@ -71,6 +160,8 @@
 	// Label is the feature leading to this vertex.
 	Label Feature
 
+	// TODO: move the following status fields to a separate struct.
+
 	// status indicates the evaluation progress of this vertex.
 	status VertexStatus
 
@@ -79,6 +170,13 @@
 	// ignored.
 	isData bool
 
+	// EvalCount keeps track of temporary dereferencing during evaluation.
+	// If EvalCount > 0, status should be considered to be EvaluatingArcs.
+	EvalCount int
+
+	// SelfCount is used for tracking self-references.
+	SelfCount int
+
 	// Value is the value associated with this vertex. For lists and structs
 	// this is a sentinel value indicating its kind.
 	Value Value
@@ -140,6 +238,9 @@
 )
 
 func (v *Vertex) Status() VertexStatus {
+	if v.EvalCount > 0 {
+		return EvaluatingArcs
+	}
 	return v.status
 }
 
diff --git a/internal/core/adt/errors.go b/internal/core/adt/errors.go
index 58ddac5..ecfe359 100644
--- a/internal/core/adt/errors.go
+++ b/internal/core/adt/errors.go
@@ -51,6 +51,12 @@
 	// Mostly used for legacy reasons.
 	NotExistError
 
+	// StructuralCycleError means a structural cycle was found. Structural
+	// cycles are permanent errors, but they are not passed up recursively,
+	// as a unification of a value with a structural cycle with one that
+	// doesn't may still give a useful result.
+	StructuralCycleError
+
 	// IncompleteError means an evaluation could not complete because of
 	// insufficient information that may still be added later.
 	IncompleteError
@@ -67,6 +73,8 @@
 		return "eval"
 	case UserError:
 		return "user"
+	case StructuralCycleError:
+		return "structural cycle"
 	case IncompleteError:
 		return "incomplete"
 	case CycleError:
diff --git a/internal/core/eval/eval.go b/internal/core/eval/eval.go
index 596a62d..950f3aa 100644
--- a/internal/core/eval/eval.go
+++ b/internal/core/eval/eval.go
@@ -545,6 +545,8 @@
 		n.node.Closed = m
 	}
 
+	cyclic := n.hasCycle && !n.hasNonCycle
+
 	// Visit arcs recursively to validate and compute error.
 	for _, a := range n.node.Arcs {
 		if updated != nil {
@@ -560,13 +562,24 @@
 		}
 		// Call UpdateStatus here to be absolutely sure the status is set
 		// correctly and that we are not regressing.
-		n.node.UpdateStatus(adt.EvaluatingArcs)
-		n.eval.Unify(ctx, a, adt.Finalized)
-		if err, _ := a.Value.(*adt.Bottom); err != nil {
-			n.node.AddChildError(err)
+		if !cyclic {
+			n.node.UpdateStatus(adt.EvaluatingArcs)
+			n.eval.Unify(ctx, a, adt.Finalized)
+			if err, _ := a.Value.(*adt.Bottom); err != nil {
+				n.node.AddChildError(err)
+			}
 		}
 	}
 
+	if cyclic {
+		n.node.Value = adt.CombineErrors(nil, n.node.Value, &adt.Bottom{
+			Code:  adt.StructuralCycleError,
+			Err:   errors.Newf(token.NoPos, "structural cycle"),
+			Value: n.node.Value,
+			// TODO: probably, this should have the referenced arc.
+		})
+	}
+
 	n.node.UpdateStatus(adt.Finalized)
 }
 
@@ -682,6 +695,9 @@
 	vLists []*adt.Vertex
 	exprs  []conjunct
 
+	hasCycle    bool // has conjunct with structural cycle
+	hasNonCycle bool // has conjunct without structural cycle
+
 	// Disjunction handling
 	disjunctions []envDisjunct
 	defaultMode  defaultMode
@@ -867,6 +883,23 @@
 	// Require an Environment.
 	ctx := n.ctx
 
+	// TODO: see if we can do without these counters.
+	for _, d := range v.Env.Deref {
+		d.EvalCount++
+	}
+	for _, d := range v.Env.Cycles {
+		d.SelfCount++
+	}
+	defer func() {
+		for _, d := range v.Env.Deref {
+			d.EvalCount--
+		}
+		for _, d := range v.Env.Cycles {
+			d.SelfCount++
+		}
+	}()
+
+outer:
 	switch x := v.Expr().(type) {
 	case adt.Resolver:
 		arc, err := ctx.Resolve(v.Env, x)
@@ -883,30 +916,81 @@
 			break
 		}
 
-		// If this is a cycle error, we have reached a fixed point and adding
-		// conjuncts at this point will not change the value. Also, continuing
-		// to pursue this value will result in an infinite loop.
-		//
-		// TODO: add a mechanism so that the computation will only have to be
-		// one once?
-		if isEvaluating(arc) {
-			break
+		// Pass detection of structural cycles from parent to children.
+		cyclic := false
+		if v.Env != nil {
+			// If a reference is in a tainted set, so is the value it refers to.
+			cyclic = v.Env.Cyclic
 		}
 
-		// TODO: detect structural cycles here. A structural cycle can occur
-		// if it is not a reference cycle, but refers to a parent. node.
-		// This should only be allowed if it is unified with a finite structure.
+		status := arc.Status()
+		for _, d := range v.Env.Deref {
+			if d == arc {
+				status = adt.EvaluatingArcs
+			}
+		}
+
+		switch status {
+		case adt.Evaluating:
+			// Reference cycle detected. We have reached a fixed point and
+			// adding conjuncts at this point will not change the value. Also,
+			// continuing to pursue this value will result in an infinite loop.
+
+			// TODO: add a mechanism so that the computation will only have to
+			// be done once?
+
+			if arc == n.node {
+				// TODO: we could use node sharing here. This may avoid an
+				// exponential blowup during evaluation, like is possible with
+				// YAML.
+				break outer
+			}
+
+		case adt.EvaluatingArcs:
+			// Structural cycle detected. Continue evaluation as usual, but
+			// keep track of whether any other conjuncts without structural
+			// cycles are added. If not, evaluation of child arcs will end
+			// with this node.
+
+			// For the purpose of determining whether at least one non-cyclic
+			// conjuncts exists, we consider all conjuncts of a cyclic conjuncts
+			// also cyclic.
+
+			cyclic = true
+			n.hasCycle = true
+
+			// As the adt.EvaluatingArcs mechanism bypasses the self-reference
+			// mechanism, we need to separately keep track of it here.
+			// If this (originally) is a self-reference node, adding them
+			// will result in recursively adding the same reference. For this
+			// we also mark the node as evaluating.
+			if arc.SelfCount > 0 {
+				break outer
+			}
+
+			// This count is added for values that are directly added below.
+			// The count is handled separately for delayed values.
+			arc.SelfCount++
+			defer func() { arc.SelfCount-- }()
+		}
+
+		// TODO: uncommenting the following almost works, but causes some
+		// faulty results in complex cycle handling between disjunctions.
+		// The reason is that disjunctions must be eliminated if checks in
+		// values on which they depend fail.
+		ctx.Unify(ctx, arc, adt.Finalized)
 
 		if arc.Label.IsDef() {
-			n.insertClosed(arc)
+			n.insertClosed(arc, cyclic, arc)
 		} else {
 			for _, a := range arc.Conjuncts {
-				n.addExprConjunct(a, closeID, top)
+				n.addExprConjunct(updateCyclic(a, cyclic, arc), closeID, top)
 			}
 		}
 
 	case adt.Evaluator:
 		// adt.Interpolation, adt.UnaryExpr, adt.BinaryExpr, adt.CallExpr
+		// Could be unify?
 		val, complete := ctx.Evaluate(v.Env, v.Expr())
 		if !complete {
 			n.exprs = append(n.exprs, conjunct{v, closeID, top})
@@ -933,7 +1017,40 @@
 	}
 }
 
-func (n *nodeContext) insertClosed(arc *adt.Vertex) {
+// updateCyclicStatus looks for proof of non-cyclic conjuncts to override
+// a structural cycle.
+func (n *nodeContext) updateCyclicStatus(env *adt.Environment) {
+	if env == nil || !env.Cyclic {
+		// fmt.Printf("%p -- %d hasNonCycle\n", n.node, n.node.Label)
+		n.hasNonCycle = true
+	}
+}
+
+func updateCyclic(c adt.Conjunct, cyclic bool, deref *adt.Vertex) adt.Conjunct {
+	env := c.Env
+	switch {
+	case env == nil:
+		if !cyclic && deref == nil {
+			return c
+		}
+		env = &adt.Environment{Cyclic: cyclic}
+	case deref == nil && env.Cyclic == cyclic:
+		return c
+	default:
+		// The conjunct may still be in use in other fields, so we should
+		// make a new copy to mark Cyclic only for this case.
+		e := *env
+		e.Cyclic = e.Cyclic || cyclic
+		env = &e
+	}
+	if deref != nil {
+		env.Deref = append(env.Deref, deref)
+		env.Cycles = append(env.Cycles, deref)
+	}
+	return adt.MakeConjunct(env, c.Expr())
+}
+
+func (n *nodeContext) insertClosed(arc *adt.Vertex, cyclic bool, deref *adt.Vertex) {
 	id := n.eval.nextID()
 	n.needClose = true
 
@@ -941,7 +1058,7 @@
 	n.newClose = nil
 
 	for _, a := range arc.Conjuncts {
-		n.addExprConjunct(a, id, false)
+		n.addExprConjunct(updateCyclic(a, cyclic, deref), id, false)
 	}
 
 	current, n.newClose = n.newClose, current
@@ -953,6 +1070,8 @@
 }
 
 func (n *nodeContext) addValueConjunct(env *adt.Environment, v adt.Value) {
+	n.updateCyclicStatus(env)
+
 	if x, ok := v.(*adt.Vertex); ok {
 		needClose := false
 		if isStruct(x) {
@@ -969,13 +1088,14 @@
 			n.node.AddStructs(x.Structs...)
 		}
 
-		if len(x.Conjuncts) > 0 {
+		if !x.IsData() && len(x.Conjuncts) > 0 {
+			cyclic := env != nil && env.Cyclic
 			if needClose {
-				n.insertClosed(x)
+				n.insertClosed(x, cyclic, nil)
 				return
 			}
 			for _, c := range x.Conjuncts {
-				n.addExprConjunct(c, 0, false) // Pass from eval
+				n.addExprConjunct(updateCyclic(c, cyclic, nil), 0, false) // TODO: Pass from eval
 			}
 			return
 		}
@@ -1112,6 +1232,8 @@
 	newDef uint32,
 	top bool) {
 
+	n.updateCyclicStatus(env) // to handle empty structs.
+
 	ctx := n.ctx
 	n.node.AddStructs(s)
 
@@ -1138,6 +1260,10 @@
 		Vertex:  n.node,
 		CloseID: closeID,
 	}
+	if env != nil {
+		childEnv.Cyclic = env.Cyclic
+		childEnv.Deref = env.Deref
+	}
 
 	var hasOther, hasBulk adt.Node
 
@@ -1422,6 +1548,8 @@
 
 outer:
 	for i, l := range n.lists {
+		n.updateCyclicStatus(l.env)
+
 		index := int64(0)
 		hasComprehension := false
 		for j, elem := range l.list.Elems {
diff --git a/internal/core/eval/eval_test.go b/internal/core/eval/eval_test.go
index 604def6..96197b1 100644
--- a/internal/core/eval/eval_test.go
+++ b/internal/core/eval/eval_test.go
@@ -17,6 +17,7 @@
 import (
 	"flag"
 	"fmt"
+	"strings"
 	"testing"
 
 	"cuelang.org/go/internal/core/debug"
@@ -24,7 +25,6 @@
 	"cuelang.org/go/internal/core/validate"
 	"cuelang.org/go/internal/cuetxtar"
 	"cuelang.org/go/internal/legacy/cue"
-	"cuelang.org/go/pkg/strings"
 	"github.com/rogpeppe/go-internal/txtar"
 )
 
@@ -85,12 +85,7 @@
 }
 
 var needFix = map[string]string{
-	"export/027": "cycle",
-	"export/028": "cycle",
-	"export/030": "cycle",
-
-	"cycle/025_cannot_resolve_references_that_would_be_ambiguous": "cycle",
-	"compile/scope": "cycle",
+	"DIR/NAME": "reason",
 }
 
 // TestX is for debugging. Do not delete.