internal/core/eval: handle structural cycles
Issue #29
Issue #42
Change-Id: I86191d7bf4fdcf8705d740171aedd81e04819dd5
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/6522
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/internal/core/adt/composite.go b/internal/core/adt/composite.go
index d7c51ca..5e2f611 100644
--- a/internal/core/adt/composite.go
+++ b/internal/core/adt/composite.go
@@ -22,6 +22,61 @@
"cuelang.org/go/cue/token"
)
+// TODO: unanswered questions about structural cycles:
+//
+// 1. When detecting a structural cycle, should we consider this as:
+// a) an unevaluated value,
+// b) an incomplete error (which does not affect parent validity), or
+// c) a special value.
+//
+// Making it an error is the simplest way to ensure reentrancy is disallowed:
+// without an error it would require an additional mechanism to stop reentrancy
+// from continuing to process. Even worse, in some cases it may only partially
+// evaluate, resulting in unexpected results. For this reason, we are taking
+// approach `b` for now.
+//
+// This has some consequences of how disjunctions are treated though. Consider
+//
+// list: {
+// head: _
+// tail: list | null
+// }
+//
+// When making it an error, evaluating the above will result in
+//
+// list: {
+// head: _
+// tail: null
+// }
+//
+// because list will result in a structural cycle, and thus an error, it will be
+// stripped from the disjunction. This may or may not be a desirable property. A
+// nice thing is that it is not required to write `list | *null`. A disadvantage
+// is that this is perhaps somewhat inexplicit.
+//
+// When not making it an error (and simply cease evaluating child arcs upon
+// cycle detection), the result would be:
+//
+// list: {
+// head: _
+// tail: list | null
+// }
+//
+// In other words, an evaluation would result in a cycle and thus an error.
+// Implementations can recognize such cases by having unevaluated arcs. An
+// explicit structure cycle marker would probably be less error prone.
+//
+// Note that in both cases, a reference to list will still use the original
+// conjuncts, so the result will be the same for either method in this case.
+//
+//
+// 2. Structural cycle allowance.
+//
+// Structural cycle detection disallows reentrancy as well. This means one
+// cannot use structs for recursive computation. This will probably preclude
+// evaluation of some configuration. Given that there is no real alternative
+// yet, we could allow structural cycle detection to be optionally disabled.
+
// An Environment links the parent scopes for identifier lookup to a composite
// node. Each conjunct that make up node in the tree can be associated with
// a different environment (although some conjuncts may share an Environment).
@@ -33,10 +88,44 @@
// constraint. It is used to resolve label references.
DynamicLabel Feature
+ // TODO(perf): make the following public fields a shareable struct as it
+ // mostly is going to be the same for child nodes.
+
// CloseID is a unique number that tracks a group of conjuncts that need
// belong to a single originating definition.
CloseID uint32
+ // Cyclic indicates a structural cycle was detected for this conjunct or one
+ // of its ancestors.
+ Cyclic bool
+
+ // Deref keeps track of nodes that should dereference to Vertex. It is used
+ // for detecting structural cycle.
+ //
+ // The detection algorithm is based on Tomabechi's quasi-destructive graph
+ // unification. This detection requires dependencies to be resolved into
+ // fully dereferenced vertices. This is not the case in our algorithm:
+ // the result of evaluating conjuncts is placed into dereferenced vertices
+ // _after_ they are evaluated, but the Environment still points to the
+ // non-dereferenced context.
+ //
+ // In order to be able to detect structural cycles, we need to ensure that
+ // at least one node that is part of a cycle in the context in which
+ // conjunctions are evaluated dereferences correctly.
+ //
+ // The only field necessary to detect a structural cycle, however, is
+ // the Status field of the Vertex. So rather than dereferencing a node
+ // proper, it is sufficient to copy the Status of the dereferenced nodes
+ // to these nodes (will always be EvaluatingArcs).
+ Deref []*Vertex
+
+ // Cycles contains vertices for which cycles are detected. It is used
+ // for tracking self-references within structural cycles.
+ //
+ // Unlike Deref, Cycles is not incremented with child nodes.
+ // TODO: Cycles is always a tail end of Deref, so this can be optimized.
+ Cycles []*Vertex
+
cache map[Expr]Value
}
@@ -71,6 +160,8 @@
// Label is the feature leading to this vertex.
Label Feature
+ // TODO: move the following status fields to a separate struct.
+
// status indicates the evaluation progress of this vertex.
status VertexStatus
@@ -79,6 +170,13 @@
// ignored.
isData bool
+ // EvalCount keeps track of temporary dereferencing during evaluation.
+ // If EvalCount > 0, status should be considered to be EvaluatingArcs.
+ EvalCount int
+
+ // SelfCount is used for tracking self-references.
+ SelfCount int
+
// Value is the value associated with this vertex. For lists and structs
// this is a sentinel value indicating its kind.
Value Value
@@ -140,6 +238,9 @@
)
func (v *Vertex) Status() VertexStatus {
+ if v.EvalCount > 0 {
+ return EvaluatingArcs
+ }
return v.status
}
diff --git a/internal/core/adt/errors.go b/internal/core/adt/errors.go
index 58ddac5..ecfe359 100644
--- a/internal/core/adt/errors.go
+++ b/internal/core/adt/errors.go
@@ -51,6 +51,12 @@
// Mostly used for legacy reasons.
NotExistError
+ // StructuralCycleError means a structural cycle was found. Structural
+ // cycles are permanent errors, but they are not passed up recursively,
+ // as a unification of a value with a structural cycle with one that
+ // doesn't may still give a useful result.
+ StructuralCycleError
+
// IncompleteError means an evaluation could not complete because of
// insufficient information that may still be added later.
IncompleteError
@@ -67,6 +73,8 @@
return "eval"
case UserError:
return "user"
+ case StructuralCycleError:
+ return "structural cycle"
case IncompleteError:
return "incomplete"
case CycleError:
diff --git a/internal/core/eval/eval.go b/internal/core/eval/eval.go
index 596a62d..950f3aa 100644
--- a/internal/core/eval/eval.go
+++ b/internal/core/eval/eval.go
@@ -545,6 +545,8 @@
n.node.Closed = m
}
+ cyclic := n.hasCycle && !n.hasNonCycle
+
// Visit arcs recursively to validate and compute error.
for _, a := range n.node.Arcs {
if updated != nil {
@@ -560,13 +562,24 @@
}
// Call UpdateStatus here to be absolutely sure the status is set
// correctly and that we are not regressing.
- n.node.UpdateStatus(adt.EvaluatingArcs)
- n.eval.Unify(ctx, a, adt.Finalized)
- if err, _ := a.Value.(*adt.Bottom); err != nil {
- n.node.AddChildError(err)
+ if !cyclic {
+ n.node.UpdateStatus(adt.EvaluatingArcs)
+ n.eval.Unify(ctx, a, adt.Finalized)
+ if err, _ := a.Value.(*adt.Bottom); err != nil {
+ n.node.AddChildError(err)
+ }
}
}
+ if cyclic {
+ n.node.Value = adt.CombineErrors(nil, n.node.Value, &adt.Bottom{
+ Code: adt.StructuralCycleError,
+ Err: errors.Newf(token.NoPos, "structural cycle"),
+ Value: n.node.Value,
+ // TODO: probably, this should have the referenced arc.
+ })
+ }
+
n.node.UpdateStatus(adt.Finalized)
}
@@ -682,6 +695,9 @@
vLists []*adt.Vertex
exprs []conjunct
+ hasCycle bool // has conjunct with structural cycle
+ hasNonCycle bool // has conjunct without structural cycle
+
// Disjunction handling
disjunctions []envDisjunct
defaultMode defaultMode
@@ -867,6 +883,23 @@
// Require an Environment.
ctx := n.ctx
+ // TODO: see if we can do without these counters.
+ for _, d := range v.Env.Deref {
+ d.EvalCount++
+ }
+ for _, d := range v.Env.Cycles {
+ d.SelfCount++
+ }
+ defer func() {
+ for _, d := range v.Env.Deref {
+ d.EvalCount--
+ }
+ for _, d := range v.Env.Cycles {
+ d.SelfCount++
+ }
+ }()
+
+outer:
switch x := v.Expr().(type) {
case adt.Resolver:
arc, err := ctx.Resolve(v.Env, x)
@@ -883,30 +916,81 @@
break
}
- // If this is a cycle error, we have reached a fixed point and adding
- // conjuncts at this point will not change the value. Also, continuing
- // to pursue this value will result in an infinite loop.
- //
- // TODO: add a mechanism so that the computation will only have to be
- // one once?
- if isEvaluating(arc) {
- break
+ // Pass detection of structural cycles from parent to children.
+ cyclic := false
+ if v.Env != nil {
+ // If a reference is in a tainted set, so is the value it refers to.
+ cyclic = v.Env.Cyclic
}
- // TODO: detect structural cycles here. A structural cycle can occur
- // if it is not a reference cycle, but refers to a parent. node.
- // This should only be allowed if it is unified with a finite structure.
+ status := arc.Status()
+ for _, d := range v.Env.Deref {
+ if d == arc {
+ status = adt.EvaluatingArcs
+ }
+ }
+
+ switch status {
+ case adt.Evaluating:
+ // Reference cycle detected. We have reached a fixed point and
+ // adding conjuncts at this point will not change the value. Also,
+ // continuing to pursue this value will result in an infinite loop.
+
+ // TODO: add a mechanism so that the computation will only have to
+ // be done once?
+
+ if arc == n.node {
+ // TODO: we could use node sharing here. This may avoid an
+ // exponential blowup during evaluation, like is possible with
+ // YAML.
+ break outer
+ }
+
+ case adt.EvaluatingArcs:
+ // Structural cycle detected. Continue evaluation as usual, but
+ // keep track of whether any other conjuncts without structural
+ // cycles are added. If not, evaluation of child arcs will end
+ // with this node.
+
+ // For the purpose of determining whether at least one non-cyclic
+ // conjuncts exists, we consider all conjuncts of a cyclic conjuncts
+ // also cyclic.
+
+ cyclic = true
+ n.hasCycle = true
+
+ // As the adt.EvaluatingArcs mechanism bypasses the self-reference
+ // mechanism, we need to separately keep track of it here.
+ // If this (originally) is a self-reference node, adding them
+ // will result in recursively adding the same reference. For this
+ // we also mark the node as evaluating.
+ if arc.SelfCount > 0 {
+ break outer
+ }
+
+ // This count is added for values that are directly added below.
+ // The count is handled separately for delayed values.
+ arc.SelfCount++
+ defer func() { arc.SelfCount-- }()
+ }
+
+ // TODO: uncommenting the following almost works, but causes some
+ // faulty results in complex cycle handling between disjunctions.
+ // The reason is that disjunctions must be eliminated if checks in
+ // values on which they depend fail.
+ ctx.Unify(ctx, arc, adt.Finalized)
if arc.Label.IsDef() {
- n.insertClosed(arc)
+ n.insertClosed(arc, cyclic, arc)
} else {
for _, a := range arc.Conjuncts {
- n.addExprConjunct(a, closeID, top)
+ n.addExprConjunct(updateCyclic(a, cyclic, arc), closeID, top)
}
}
case adt.Evaluator:
// adt.Interpolation, adt.UnaryExpr, adt.BinaryExpr, adt.CallExpr
+ // Could be unify?
val, complete := ctx.Evaluate(v.Env, v.Expr())
if !complete {
n.exprs = append(n.exprs, conjunct{v, closeID, top})
@@ -933,7 +1017,40 @@
}
}
-func (n *nodeContext) insertClosed(arc *adt.Vertex) {
+// updateCyclicStatus looks for proof of non-cyclic conjuncts to override
+// a structural cycle.
+func (n *nodeContext) updateCyclicStatus(env *adt.Environment) {
+ if env == nil || !env.Cyclic {
+ // fmt.Printf("%p -- %d hasNonCycle\n", n.node, n.node.Label)
+ n.hasNonCycle = true
+ }
+}
+
+func updateCyclic(c adt.Conjunct, cyclic bool, deref *adt.Vertex) adt.Conjunct {
+ env := c.Env
+ switch {
+ case env == nil:
+ if !cyclic && deref == nil {
+ return c
+ }
+ env = &adt.Environment{Cyclic: cyclic}
+ case deref == nil && env.Cyclic == cyclic:
+ return c
+ default:
+ // The conjunct may still be in use in other fields, so we should
+ // make a new copy to mark Cyclic only for this case.
+ e := *env
+ e.Cyclic = e.Cyclic || cyclic
+ env = &e
+ }
+ if deref != nil {
+ env.Deref = append(env.Deref, deref)
+ env.Cycles = append(env.Cycles, deref)
+ }
+ return adt.MakeConjunct(env, c.Expr())
+}
+
+func (n *nodeContext) insertClosed(arc *adt.Vertex, cyclic bool, deref *adt.Vertex) {
id := n.eval.nextID()
n.needClose = true
@@ -941,7 +1058,7 @@
n.newClose = nil
for _, a := range arc.Conjuncts {
- n.addExprConjunct(a, id, false)
+ n.addExprConjunct(updateCyclic(a, cyclic, deref), id, false)
}
current, n.newClose = n.newClose, current
@@ -953,6 +1070,8 @@
}
func (n *nodeContext) addValueConjunct(env *adt.Environment, v adt.Value) {
+ n.updateCyclicStatus(env)
+
if x, ok := v.(*adt.Vertex); ok {
needClose := false
if isStruct(x) {
@@ -969,13 +1088,14 @@
n.node.AddStructs(x.Structs...)
}
- if len(x.Conjuncts) > 0 {
+ if !x.IsData() && len(x.Conjuncts) > 0 {
+ cyclic := env != nil && env.Cyclic
if needClose {
- n.insertClosed(x)
+ n.insertClosed(x, cyclic, nil)
return
}
for _, c := range x.Conjuncts {
- n.addExprConjunct(c, 0, false) // Pass from eval
+ n.addExprConjunct(updateCyclic(c, cyclic, nil), 0, false) // TODO: Pass from eval
}
return
}
@@ -1112,6 +1232,8 @@
newDef uint32,
top bool) {
+ n.updateCyclicStatus(env) // to handle empty structs.
+
ctx := n.ctx
n.node.AddStructs(s)
@@ -1138,6 +1260,10 @@
Vertex: n.node,
CloseID: closeID,
}
+ if env != nil {
+ childEnv.Cyclic = env.Cyclic
+ childEnv.Deref = env.Deref
+ }
var hasOther, hasBulk adt.Node
@@ -1422,6 +1548,8 @@
outer:
for i, l := range n.lists {
+ n.updateCyclicStatus(l.env)
+
index := int64(0)
hasComprehension := false
for j, elem := range l.list.Elems {
diff --git a/internal/core/eval/eval_test.go b/internal/core/eval/eval_test.go
index 604def6..96197b1 100644
--- a/internal/core/eval/eval_test.go
+++ b/internal/core/eval/eval_test.go
@@ -17,6 +17,7 @@
import (
"flag"
"fmt"
+ "strings"
"testing"
"cuelang.org/go/internal/core/debug"
@@ -24,7 +25,6 @@
"cuelang.org/go/internal/core/validate"
"cuelang.org/go/internal/cuetxtar"
"cuelang.org/go/internal/legacy/cue"
- "cuelang.org/go/pkg/strings"
"github.com/rogpeppe/go-internal/txtar"
)
@@ -85,12 +85,7 @@
}
var needFix = map[string]string{
- "export/027": "cycle",
- "export/028": "cycle",
- "export/030": "cycle",
-
- "cycle/025_cannot_resolve_references_that_would_be_ambiguous": "cycle",
- "compile/scope": "cycle",
+ "DIR/NAME": "reason",
}
// TestX is for debugging. Do not delete.