blob: bb31c03ccb6758172da9e9855fc82f2265e79d9a [file] [log] [blame]
// Copyright 2020 CUE Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package adt
import (
"cuelang.org/go/cue/errors"
"cuelang.org/go/cue/token"
)
// Nodes man not reenter a disjunction.
//
// Copy one layer deep; throw away items on failure.
// DISJUNCTION ALGORITHM
//
// The basic concept of the algorithm is to use backtracking to find valid
// disjunctions. The algorithm can stop if two matching disjuncts are found
// where one does not subsume the other.
//
// At a later point, we can introduce a filter step to filter out possible
// disjuncts based on, say, discriminator fields or field exclusivity (oneOf
// fields in Protobuf).
//
// To understand the details of the algorithm, it is important to understand
// some properties of disjunction.
//
//
// EVALUATION OF A DISJUNCTION IS SELF CONTAINED
//
// In other words, fields outside of a disjunction cannot bind to values within
// a disjunction whilst evaluating that disjunction. This allows the computation
// of disjunctions to be isolated from side effects.
//
// The intuition behind this is as follows: as a disjunction is not a concrete
// value, it is not possible to lookup a field within a disjunction if it has
// not yet been evaluated. So if a reference within a disjunction that is needed
// to disambiguate that disjunction refers to a field outside the scope of the
// disjunction which, in turn, refers to a field within the disjunction, this
// results in a cycle error. We achieve this by not removing the cycle marker of
// the Vertex of the disjunction until the disjunction is resolved.
//
// Note that the following disjunct is still allowed:
//
// a: 1
// b: a
//
// Even though `a` refers to the root of the disjunction, it does not _select
// into_ the disjunction. Implementation-wise, it also doesn't have to, as the
// respective vertex is available within the Environment. Referencing a node
// outside the disjunction that in turn selects the disjunction root, however,
// will result in a detected cycle.
//
// As usual, cycle detection should be interpreted marked as incomplete, so that
// the referring node will not be fixed to an error prematurely.
//
//
// SUBSUMPTION OF AMBIGUOUS DISJUNCTS
//
// A disjunction can be evaluated to a concrete value if only one disjunct
// remains. Aside from disambiguating through unification failure, disjuncts
// may also be disambiguated by taking the least specific of two disjuncts.
// For instance, if a subsumes b, then the result of disjunction may be a.
//
// NEW ALGORITHM NO LONGER VERIFIES SUBSUMPTION. SUBSUMPTION IS INHERENTLY
// IMPRECISE (DUE TO BULK OPTIONAL FIELDS). OTHER THAN THAT, FOR SCALAR VALUES
// IT JUST MEANS THERE IS AMBIGUITY, AND FOR STRUCTS IT CAN LEAD TO STRANGE
// CONSEQUENCES.
//
// USE EQUALITY INSTEAD:
// - Undefined == error for optional fields.
// - So only need to check exact labels for vertices.
type envDisjunct struct {
env *Environment
cloneID CloseInfo
expr *DisjunctionExpr
value *Disjunction
hasDefaults bool
}
func (n *nodeContext) addDisjunction(env *Environment, x *DisjunctionExpr, cloneID CloseInfo) {
// TODO: precompute
numDefaults := 0
for _, v := range x.Values {
isDef := v.Default // || n.hasDefaults(env, v.Val)
if isDef {
numDefaults++
}
}
n.disjunctions = append(n.disjunctions,
envDisjunct{env, cloneID, x, nil, numDefaults > 0})
}
func (n *nodeContext) addDisjunctionValue(env *Environment, x *Disjunction, cloneID CloseInfo) {
n.disjunctions = append(n.disjunctions,
envDisjunct{env, cloneID, nil, x, x.HasDefaults})
}
func (n *nodeContext) expandDisjuncts(
state VertexStatus,
parent *nodeContext,
m defaultMode,
recursive bool) {
n.ctx.stats.DisjunctCount++
node := n.node
defer func() {
n.node = node
}()
for n.expandOne() {
}
// save node to snapShot in nodeContex
// save nodeContext.
if recursive || len(n.disjunctions) > 0 {
n.snapshot = clone(*n.node)
} else {
n.snapshot = *n.node
}
switch {
default: // len(n.disjunctions) == 0
m := *n
n.postDisjunct(state)
switch {
case n.hasErr():
// TODO: consider finalizing the node thusly:
// if recursive {
// n.node.Finalize(n.ctx)
// }
x := n.node
err, ok := x.BaseValue.(*Bottom)
if !ok {
err = n.getErr()
}
if err == nil {
// TODO(disjuncts): Is this always correct? Especially for partial
// evaluation it is okay for child errors to have incomplete errors.
// Perhaps introduce an Err() method.
err = x.ChildErrors
}
if err.IsIncomplete() {
break
}
if err != nil {
parent.disjunctErrs = append(parent.disjunctErrs, err)
}
if recursive {
n.free()
}
return
}
if recursive {
*n = m
n.result = *n.node // XXX: n.result = snapshotVertex(n.node)?
n.node = &n.result
n.disjuncts = append(n.disjuncts, n)
}
if n.node.BaseValue == nil {
n.node.BaseValue = n.getValidators()
}
case len(n.disjunctions) > 0:
// Process full disjuncts to ensure that erroneous disjuncts are
// eliminated as early as possible.
state = Finalized
n.disjuncts = append(n.disjuncts, n)
// HACK: this is an AWFUL, AWFUL HACK to work around a limitation of
// using defaults for marking oneOfs in protobuffers. Previously this
// was worked around by another hack that (deliberately erroneously)
// would move the default status of a disjunct of which only one
// disjunct remained from "not default" to "maybe default". For
// protobuf oneOfs this would mean that only the "intended" struct would
// get the default value. It also worked around various other
// limitations.
//
// With the latest performance enhancements this old hack still worked,
// but only because it introduced a bug that this hack relied on. Fixing
// this bug now causes this hack to no longer work.
//
// Ultimately, the correct way to address the issue for the protobuf
// representation is not to use defaults at all. Instead we should use
// the required annotator (which fixes a whole load of other issues):
//
// {} | {a!: int} | {b!: int}
//
// This would force that only one of these can be true independent of
// default magic. Aside from fixing this issue, it also moves to a model
// that is consistent with the recommendation to not use defaults in
// a top-level API specification.
//
// The hack we use now recognizes the oneOf patterns and then sets the
// default for the "smallest" element.
protoForm := true
for i, d := range n.disjunctions {
a := n.disjuncts
n.disjuncts = n.buffer[:0]
n.buffer = a[:0]
skipNonMonotonicChecks := i+1 < len(n.disjunctions)
if skipNonMonotonicChecks {
n.ctx.inDisjunct++
}
// HACK: see above
defaultCount := 0
override := true
if d.expr == nil {
override = false
} else {
for _, v := range d.expr.Values {
if !d.hasDefaults || v.Default {
defaultCount++
if s, ok := v.Val.(*StructLit); !ok || len(s.Decls) > 0 {
protoForm = false
override = false
}
}
}
}
for _, dn := range a {
switch {
case d.expr != nil:
for _, v := range d.expr.Values {
cn := dn.clone()
*cn.node = clone(dn.snapshot)
cn.node.state = cn
c := MakeConjunct(d.env, v.Val, d.cloneID)
cn.addExprConjunct(c)
if override {
if s, ok := v.Val.(*StructLit); ok && len(s.Decls) == 0 {
cn.protoCount++
}
}
newMode := mode(d.hasDefaults, v.Default)
cn.defaultMode = combineDefault(dn.defaultMode, newMode)
cn.expandDisjuncts(state, n, newMode, true)
}
case d.value != nil:
for i, v := range d.value.Values {
cn := dn.clone()
*cn.node = clone(dn.snapshot)
cn.node.state = cn
cn.addValueConjunct(d.env, v, d.cloneID)
newMode := mode(d.hasDefaults, i < d.value.NumDefaults)
cn.defaultMode = combineDefault(dn.defaultMode, newMode)
cn.expandDisjuncts(state, n, newMode, true)
}
}
}
if skipNonMonotonicChecks {
n.ctx.inDisjunct--
}
if len(n.disjuncts) == 0 {
n.makeError()
}
if recursive || i > 0 {
for _, x := range a {
x.free()
}
}
if len(n.disjuncts) == 0 {
break
}
}
// HACK: see above
if protoForm {
min := int32(0)
minPos := 0
for i, d := range n.disjuncts {
if d.defaultMode == isDefault {
min = 0
break
}
if d.protoCount > min {
min = d.protoCount
minPos = i
}
}
if min > 0 {
n.disjuncts[minPos].defaultMode = isDefault
}
}
// HACK alert: this replaces the hack of the previous algorithm with a
// slightly less worse hack: instead of dropping the default info when
// the value was scalar before, we drop this information when there
// is only one disjunct, while not discarding hard defaults.
// TODO: a more principled approach would be to recognize that there
// is only one default at a point where this does not break
// commutativity.
if len(n.disjuncts) == 1 && n.disjuncts[0].defaultMode != isDefault {
n.disjuncts[0].defaultMode = maybeDefault
}
}
// Compare to root, but add to this one.
// TODO: if only one value is left, set to maybeDefault.
switch p := parent; {
case p != n:
p.disjunctErrs = append(p.disjunctErrs, n.disjunctErrs...)
n.disjunctErrs = n.disjunctErrs[:0]
outer:
for _, d := range n.disjuncts {
for _, v := range p.disjuncts {
if Equal(n.ctx, &v.result, &d.result, true) {
if d.defaultMode == isDefault {
v.defaultMode = isDefault
}
d.free()
continue outer
}
}
d.defaultMode = combineDefault(m, d.defaultMode)
p.disjuncts = append(p.disjuncts, d)
}
n.disjuncts = n.disjuncts[:0]
}
}
func (n *nodeContext) makeError() {
code := IncompleteError
if len(n.disjunctErrs) > 0 {
code = EvalError
for _, c := range n.disjunctErrs {
if c.Code > code {
code = c.Code
}
}
}
b := &Bottom{
Code: code,
Err: n.disjunctError(),
}
n.node.SetValue(n.ctx, Finalized, b)
}
func mode(hasDefault, marked bool) defaultMode {
var mode defaultMode
switch {
case !hasDefault:
mode = maybeDefault
case marked:
mode = isDefault
default:
mode = notDefault
}
return mode
}
// clone makes a shallow copy of a Vertex. The purpose is to create different
// disjuncts from the same Vertex under computation. This allows the conjuncts
// of an arc to be reset to a previous position and the reuse of earlier
// computations.
//
// Notes: only Arcs need to be copied recursively. Either the arc is finalized
// and can be used as is, or Structs is assumed to not yet be computed at the
// time that a clone is needed and must be nil. Conjuncts no longer needed and
// can become nil. All other fields can be copied shallowly.
func clone(v Vertex) Vertex {
v.state = nil
if a := v.Arcs; len(a) > 0 {
v.Arcs = make([]*Vertex, len(a))
for i, arc := range a {
switch arc.status {
case Finalized:
v.Arcs[i] = arc
case 0:
a := *arc
v.Arcs[i] = &a
a.Conjuncts = make([]Conjunct, len(arc.Conjuncts))
copy(a.Conjuncts, arc.Conjuncts)
default:
// This should never happen and would be a performance issue.
// But try to mend the situation by doing something sensible in
// case the user is not running with strict mode enabled.
Assertf(false, "invalid state for disjunct")
a := *arc
a.state = arc.state.clone()
a.state.node = &a
a.state.snapshot = clone(a)
v.Arcs[i] = &a
}
}
}
if a := v.Structs; len(a) > 0 {
v.Structs = make([]*StructInfo, len(a))
copy(v.Structs, a)
}
return v
}
// Default rules from spec:
//
// U1: (v1, d1) & v2 => (v1&v2, d1&v2)
// U2: (v1, d1) & (v2, d2) => (v1&v2, d1&d2)
//
// D1: (v1, d1) | v2 => (v1|v2, d1)
// D2: (v1, d1) | (v2, d2) => (v1|v2, d1|d2)
//
// M1: *v => (v, v)
// M2: *(v1, d1) => (v1, d1)
//
// NOTE: M2 cannot be *(v1, d1) => (v1, v1), as this has the weird property
// of making a value less specific. This causes issues, for instance, when
// trimming.
//
// The old implementation does something similar though. It will discard
// default information after first determining if more than one conjunct
// has survived.
//
// def + maybe -> def
// not + maybe -> def
// not + def -> def
type defaultMode int
const (
maybeDefault defaultMode = iota
isDefault
notDefault
)
// combineDefaults combines default modes for unifying conjuncts.
//
// Default rules from spec:
//
// U1: (v1, d1) & v2 => (v1&v2, d1&v2)
// U2: (v1, d1) & (v2, d2) => (v1&v2, d1&d2)
func combineDefault(a, b defaultMode) defaultMode {
if a > b {
return a
}
return b
}
// disjunctError returns a compound error for a failed disjunction.
//
// TODO(perf): the set of errors is now computed during evaluation. Eventually,
// this could be done lazily.
func (n *nodeContext) disjunctError() (errs errors.Error) {
ctx := n.ctx
disjuncts := selectErrors(n.disjunctErrs)
if disjuncts == nil {
errs = ctx.Newf("empty disjunction") // XXX: add space to sort first
} else {
disjuncts = errors.Sanitize(disjuncts)
k := len(errors.Errors(disjuncts))
// prefix '-' to sort to top
errs = ctx.Newf("%d errors in empty disjunction:", k)
}
errs = errors.Append(errs, disjuncts)
return errs
}
func selectErrors(a []*Bottom) (errs errors.Error) {
// return all errors if less than a certain number.
if len(a) <= 2 {
for _, b := range a {
errs = errors.Append(errs, b.Err)
}
return errs
}
// First select only relevant errors.
isIncomplete := false
k := 0
for _, b := range a {
if !isIncomplete && b.Code >= IncompleteError {
k = 0
isIncomplete = true
}
a[k] = b
k++
}
a = a[:k]
// filter errors
positions := map[token.Pos]bool{}
add := func(b *Bottom, p token.Pos) bool {
if positions[p] {
return false
}
positions[p] = true
errs = errors.Append(errs, b.Err)
return true
}
for _, b := range a {
// TODO: Should we also distinguish by message type?
if add(b, b.Err.Position()) {
continue
}
for _, p := range b.Err.InputPositions() {
if add(b, p) {
break
}
}
}
return errs
}