| // Copyright 2020 CUE Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package adt |
| |
| import ( |
| "cuelang.org/go/cue/errors" |
| "cuelang.org/go/cue/token" |
| ) |
| |
| // Nodes man not reenter a disjunction. |
| // |
| // Copy one layer deep; throw away items on failure. |
| |
| // DISJUNCTION ALGORITHM |
| // |
| // The basic concept of the algorithm is to use backtracking to find valid |
| // disjunctions. The algorithm can stop if two matching disjuncts are found |
| // where one does not subsume the other. |
| // |
| // At a later point, we can introduce a filter step to filter out possible |
| // disjuncts based on, say, discriminator fields or field exclusivity (oneOf |
| // fields in Protobuf). |
| // |
| // To understand the details of the algorithm, it is important to understand |
| // some properties of disjunction. |
| // |
| // |
| // EVALUATION OF A DISJUNCTION IS SELF CONTAINED |
| // |
| // In other words, fields outside of a disjunction cannot bind to values within |
| // a disjunction whilst evaluating that disjunction. This allows the computation |
| // of disjunctions to be isolated from side effects. |
| // |
| // The intuition behind this is as follows: as a disjunction is not a concrete |
| // value, it is not possible to lookup a field within a disjunction if it has |
| // not yet been evaluated. So if a reference within a disjunction that is needed |
| // to disambiguate that disjunction refers to a field outside the scope of the |
| // disjunction which, in turn, refers to a field within the disjunction, this |
| // results in a cycle error. We achieve this by not removing the cycle marker of |
| // the Vertex of the disjunction until the disjunction is resolved. |
| // |
| // Note that the following disjunct is still allowed: |
| // |
| // a: 1 |
| // b: a |
| // |
| // Even though `a` refers to the root of the disjunction, it does not _select |
| // into_ the disjunction. Implementation-wise, it also doesn't have to, as the |
| // respective vertex is available within the Environment. Referencing a node |
| // outside the disjunction that in turn selects the disjunction root, however, |
| // will result in a detected cycle. |
| // |
| // As usual, cycle detection should be interpreted marked as incomplete, so that |
| // the referring node will not be fixed to an error prematurely. |
| // |
| // |
| // SUBSUMPTION OF AMBIGUOUS DISJUNCTS |
| // |
| // A disjunction can be evaluated to a concrete value if only one disjunct |
| // remains. Aside from disambiguating through unification failure, disjuncts |
| // may also be disambiguated by taking the least specific of two disjuncts. |
| // For instance, if a subsumes b, then the result of disjunction may be a. |
| // |
| // NEW ALGORITHM NO LONGER VERIFIES SUBSUMPTION. SUBSUMPTION IS INHERENTLY |
| // IMPRECISE (DUE TO BULK OPTIONAL FIELDS). OTHER THAN THAT, FOR SCALAR VALUES |
| // IT JUST MEANS THERE IS AMBIGUITY, AND FOR STRUCTS IT CAN LEAD TO STRANGE |
| // CONSEQUENCES. |
| // |
| // USE EQUALITY INSTEAD: |
| // - Undefined == error for optional fields. |
| // - So only need to check exact labels for vertices. |
| |
| type envDisjunct struct { |
| env *Environment |
| cloneID CloseInfo |
| expr *DisjunctionExpr |
| value *Disjunction |
| hasDefaults bool |
| |
| // These are used for book keeping, tracking whether any of the |
| // disjuncts marked with a default marker remains after unification. |
| // If no default is used, all other elements are treated as "maybeDefault". |
| // Otherwise, elements are treated as is. |
| parentDefaultUsed bool |
| childDefaultUsed bool |
| } |
| |
| func (n *nodeContext) addDisjunction(env *Environment, x *DisjunctionExpr, cloneID CloseInfo) { |
| |
| // TODO: precompute |
| numDefaults := 0 |
| for _, v := range x.Values { |
| isDef := v.Default // || n.hasDefaults(env, v.Val) |
| if isDef { |
| numDefaults++ |
| } |
| } |
| |
| n.disjunctions = append(n.disjunctions, |
| envDisjunct{env, cloneID, x, nil, numDefaults > 0, false, false}) |
| } |
| |
| func (n *nodeContext) addDisjunctionValue(env *Environment, x *Disjunction, cloneID CloseInfo) { |
| n.disjunctions = append(n.disjunctions, |
| envDisjunct{env, cloneID, nil, x, x.HasDefaults, false, false}) |
| |
| } |
| |
| func (n *nodeContext) expandDisjuncts( |
| state VertexStatus, |
| parent *nodeContext, |
| parentMode defaultMode, // default mode of this disjunct |
| recursive bool) { |
| |
| n.ctx.stats.DisjunctCount++ |
| |
| node := n.node |
| defer func() { |
| n.node = node |
| }() |
| |
| for n.expandOne() { |
| } |
| |
| // save node to snapShot in nodeContex |
| // save nodeContext. |
| |
| if recursive || len(n.disjunctions) > 0 { |
| n.snapshot = clone(*n.node) |
| } else { |
| n.snapshot = *n.node |
| } |
| |
| defaultOffset := len(n.usedDefault) |
| |
| switch { |
| default: // len(n.disjunctions) == 0 |
| m := *n |
| n.postDisjunct(state) |
| |
| switch { |
| case n.hasErr(): |
| // TODO: consider finalizing the node thusly: |
| // if recursive { |
| // n.node.Finalize(n.ctx) |
| // } |
| x := n.node |
| err, ok := x.BaseValue.(*Bottom) |
| if !ok { |
| err = n.getErr() |
| } |
| if err == nil { |
| // TODO(disjuncts): Is this always correct? Especially for partial |
| // evaluation it is okay for child errors to have incomplete errors. |
| // Perhaps introduce an Err() method. |
| err = x.ChildErrors |
| } |
| if err.IsIncomplete() { |
| break |
| } |
| if err != nil { |
| parent.disjunctErrs = append(parent.disjunctErrs, err) |
| } |
| if recursive { |
| n.free() |
| } |
| return |
| } |
| |
| if recursive { |
| *n = m |
| n.result = *n.node // XXX: n.result = snapshotVertex(n.node)? |
| n.node = &n.result |
| n.disjuncts = append(n.disjuncts, n) |
| } |
| if n.node.BaseValue == nil { |
| n.node.BaseValue = n.getValidators() |
| } |
| |
| n.usedDefault = append(n.usedDefault, defaultInfo{ |
| parentMode: parentMode, |
| nestedMode: parentMode, |
| origMode: parentMode, |
| }) |
| |
| case len(n.disjunctions) > 0: |
| // Process full disjuncts to ensure that erroneous disjuncts are |
| // eliminated as early as possible. |
| state = Finalized |
| |
| n.disjuncts = append(n.disjuncts, n) |
| |
| n.refCount++ |
| defer n.free() |
| |
| for i, d := range n.disjunctions { |
| a := n.disjuncts |
| n.disjuncts = n.buffer[:0] |
| n.buffer = a[:0] |
| |
| skipNonMonotonicChecks := i+1 < len(n.disjunctions) |
| if skipNonMonotonicChecks { |
| n.ctx.inDisjunct++ |
| } |
| |
| for _, dn := range a { |
| switch { |
| case d.expr != nil: |
| for _, v := range d.expr.Values { |
| cn := dn.clone() |
| *cn.node = clone(dn.snapshot) |
| cn.node.state = cn |
| |
| c := MakeConjunct(d.env, v.Val, d.cloneID) |
| cn.addExprConjunct(c) |
| |
| newMode := mode(d.hasDefaults, v.Default) |
| |
| cn.expandDisjuncts(state, n, newMode, true) |
| } |
| |
| case d.value != nil: |
| for i, v := range d.value.Values { |
| cn := dn.clone() |
| *cn.node = clone(dn.snapshot) |
| cn.node.state = cn |
| |
| cn.addValueConjunct(d.env, v, d.cloneID) |
| |
| newMode := mode(d.hasDefaults, i < d.value.NumDefaults) |
| |
| cn.expandDisjuncts(state, n, newMode, true) |
| } |
| } |
| } |
| |
| if skipNonMonotonicChecks { |
| n.ctx.inDisjunct-- |
| } |
| |
| if len(n.disjuncts) == 0 { |
| n.makeError() |
| } |
| |
| if recursive || i > 0 { |
| for _, x := range a { |
| x.free() |
| } |
| } |
| |
| if len(n.disjuncts) == 0 { |
| break |
| } |
| } |
| |
| // Annotate disjunctions with whether any of the default disjunctions |
| // was used. |
| for _, d := range n.disjuncts { |
| for i, info := range d.usedDefault[defaultOffset:] { |
| if info.parentMode == isDefault { |
| n.disjunctions[i].parentDefaultUsed = true |
| } |
| if info.origMode == isDefault { |
| n.disjunctions[i].childDefaultUsed = true |
| } |
| } |
| } |
| |
| // Combine parent and child default markers, considering that a parent |
| // "notDefault" is treated as "maybeDefault" if none of the disjuncts |
| // marked as default remain. |
| // |
| // NOTE for a parent marked as "notDefault", a child is *never* |
| // considered as default. It may either be "not" or "maybe" default. |
| // |
| // The result for each disjunction is conjoined into a single value. |
| for _, d := range n.disjuncts { |
| m := maybeDefault |
| orig := maybeDefault |
| for i, info := range d.usedDefault[defaultOffset:] { |
| parent := info.parentMode |
| |
| used := n.disjunctions[i].parentDefaultUsed |
| childUsed := n.disjunctions[i].childDefaultUsed |
| hasDefaults := n.disjunctions[i].hasDefaults |
| |
| orig = combineDefault(orig, info.parentMode) |
| orig = combineDefault(orig, info.nestedMode) |
| |
| switch { |
| case childUsed: |
| // One of the children used a default. This is "normal" |
| // mode. This may also happen when we are in |
| // hasDefaults/notUsed mode. Consider |
| // |
| // ("a" | "b") & (*(*"a" | string) | string) |
| // |
| // Here the doubly nested default is called twice, once |
| // for "a" and then for "b", where the second resolves to |
| // not using a default. The first does, however, and on that |
| // basis the "ot default marker cannot be overridden. |
| m = combineDefault(m, info.parentMode) |
| m = combineDefault(m, info.origMode) |
| |
| case !hasDefaults, used: |
| m = combineDefault(m, info.parentMode) |
| m = combineDefault(m, info.nestedMode) |
| |
| case hasDefaults && !used: |
| Assertf(parent == notDefault, "unexpected default mode") |
| } |
| } |
| d.defaultMode = m |
| |
| d.usedDefault = d.usedDefault[:defaultOffset] |
| d.usedDefault = append(d.usedDefault, defaultInfo{ |
| parentMode: parentMode, |
| nestedMode: m, |
| origMode: orig, |
| }) |
| |
| } |
| |
| // TODO: this is an old trick that seems no longer necessary for the new |
| // implementation. Keep around until we finalize the semantics for |
| // defaults, though. The recursion of nested defaults is not entirely |
| // proper yet. |
| // |
| // A better approach, that avoids the need for recursion (semantically), |
| // would be to only consider default usage for one level, but then to |
| // also allow a default to be passed if only one value is remaining. |
| // This means that a nested subsumption would first have to be evaluated |
| // in isolation, however, to determine that it is not previous |
| // disjunctions that cause the disambiguation. |
| // |
| // HACK alert: this replaces the hack of the previous algorithm with a |
| // slightly less worse hack: instead of dropping the default info when |
| // the value was scalar before, we drop this information when there is |
| // only one disjunct, while not discarding hard defaults. TODO: a more |
| // principled approach would be to recognize that there is only one |
| // default at a point where this does not break commutativity. if |
| // if len(n.disjuncts) == 1 && n.disjuncts[0].defaultMode != isDefault { |
| // n.disjuncts[0].defaultMode = maybeDefault |
| // } |
| } |
| |
| // Compare to root, but add to this one. |
| switch p := parent; { |
| case p != n: |
| p.disjunctErrs = append(p.disjunctErrs, n.disjunctErrs...) |
| n.disjunctErrs = n.disjunctErrs[:0] |
| |
| outer: |
| for _, d := range n.disjuncts { |
| for k, v := range p.disjuncts { |
| if Equal(n.ctx, &v.result, &d.result, true) { |
| m := maybeDefault |
| for _, u := range d.usedDefault { |
| m = combineDefault(m, u.nestedMode) |
| } |
| if m == isDefault { |
| p.disjuncts[k] = d |
| v.free() |
| } else { |
| d.free() |
| } |
| continue outer |
| } |
| } |
| |
| p.disjuncts = append(p.disjuncts, d) |
| } |
| |
| n.disjuncts = n.disjuncts[:0] |
| } |
| } |
| |
| func (n *nodeContext) makeError() { |
| code := IncompleteError |
| |
| if len(n.disjunctErrs) > 0 { |
| code = EvalError |
| for _, c := range n.disjunctErrs { |
| if c.Code > code { |
| code = c.Code |
| } |
| } |
| } |
| |
| b := &Bottom{ |
| Code: code, |
| Err: n.disjunctError(), |
| } |
| n.node.SetValue(n.ctx, Finalized, b) |
| } |
| |
| func mode(hasDefault, marked bool) defaultMode { |
| var mode defaultMode |
| switch { |
| case !hasDefault: |
| mode = maybeDefault |
| case marked: |
| mode = isDefault |
| default: |
| mode = notDefault |
| } |
| return mode |
| } |
| |
| // clone makes a shallow copy of a Vertex. The purpose is to create different |
| // disjuncts from the same Vertex under computation. This allows the conjuncts |
| // of an arc to be reset to a previous position and the reuse of earlier |
| // computations. |
| // |
| // Notes: only Arcs need to be copied recursively. Either the arc is finalized |
| // and can be used as is, or Structs is assumed to not yet be computed at the |
| // time that a clone is needed and must be nil. Conjuncts no longer needed and |
| // can become nil. All other fields can be copied shallowly. |
| func clone(v Vertex) Vertex { |
| v.state = nil |
| if a := v.Arcs; len(a) > 0 { |
| v.Arcs = make([]*Vertex, len(a)) |
| for i, arc := range a { |
| switch arc.status { |
| case Finalized: |
| v.Arcs[i] = arc |
| |
| case 0: |
| a := *arc |
| v.Arcs[i] = &a |
| |
| a.Conjuncts = make([]Conjunct, len(arc.Conjuncts)) |
| copy(a.Conjuncts, arc.Conjuncts) |
| |
| default: |
| a := *arc |
| a.state = arc.state.clone() |
| a.state.node = &a |
| a.state.snapshot = clone(a) |
| v.Arcs[i] = &a |
| } |
| } |
| } |
| |
| if a := v.Structs; len(a) > 0 { |
| v.Structs = make([]*StructInfo, len(a)) |
| copy(v.Structs, a) |
| } |
| |
| return v |
| } |
| |
| // Default rules from spec: |
| // |
| // U1: (v1, d1) & v2 => (v1&v2, d1&v2) |
| // U2: (v1, d1) & (v2, d2) => (v1&v2, d1&d2) |
| // |
| // D1: (v1, d1) | v2 => (v1|v2, d1) |
| // D2: (v1, d1) | (v2, d2) => (v1|v2, d1|d2) |
| // |
| // M1: *v => (v, v) |
| // M2: *(v1, d1) => (v1, d1) |
| // |
| // NOTE: M2 cannot be *(v1, d1) => (v1, v1), as this has the weird property |
| // of making a value less specific. This causes issues, for instance, when |
| // trimming. |
| // |
| // The old implementation does something similar though. It will discard |
| // default information after first determining if more than one conjunct |
| // has survived. |
| // |
| // def + maybe -> def |
| // not + maybe -> def |
| // not + def -> def |
| |
| type defaultMode int |
| |
| const ( |
| maybeDefault defaultMode = iota |
| isDefault |
| notDefault |
| ) |
| |
| // combineDefaults combines default modes for unifying conjuncts. |
| // |
| // Default rules from spec: |
| // |
| // U1: (v1, d1) & v2 => (v1&v2, d1&v2) |
| // U2: (v1, d1) & (v2, d2) => (v1&v2, d1&d2) |
| func combineDefault(a, b defaultMode) defaultMode { |
| if a > b { |
| return a |
| } |
| return b |
| } |
| |
| // disjunctError returns a compound error for a failed disjunction. |
| // |
| // TODO(perf): the set of errors is now computed during evaluation. Eventually, |
| // this could be done lazily. |
| func (n *nodeContext) disjunctError() (errs errors.Error) { |
| ctx := n.ctx |
| |
| disjuncts := selectErrors(n.disjunctErrs) |
| |
| if disjuncts == nil { |
| errs = ctx.Newf("empty disjunction") // XXX: add space to sort first |
| } else { |
| disjuncts = errors.Sanitize(disjuncts) |
| k := len(errors.Errors(disjuncts)) |
| // prefix '-' to sort to top |
| errs = ctx.Newf("%d errors in empty disjunction:", k) |
| } |
| |
| errs = errors.Append(errs, disjuncts) |
| |
| return errs |
| } |
| |
| func selectErrors(a []*Bottom) (errs errors.Error) { |
| // return all errors if less than a certain number. |
| if len(a) <= 2 { |
| for _, b := range a { |
| errs = errors.Append(errs, b.Err) |
| |
| } |
| return errs |
| } |
| |
| // First select only relevant errors. |
| isIncomplete := false |
| k := 0 |
| for _, b := range a { |
| if !isIncomplete && b.Code >= IncompleteError { |
| k = 0 |
| isIncomplete = true |
| } |
| a[k] = b |
| k++ |
| } |
| a = a[:k] |
| |
| // filter errors |
| positions := map[token.Pos]bool{} |
| |
| add := func(b *Bottom, p token.Pos) bool { |
| if positions[p] { |
| return false |
| } |
| positions[p] = true |
| errs = errors.Append(errs, b.Err) |
| return true |
| } |
| |
| for _, b := range a { |
| // TODO: Should we also distinguish by message type? |
| if add(b, b.Err.Position()) { |
| continue |
| } |
| for _, p := range b.Err.InputPositions() { |
| if add(b, p) { |
| break |
| } |
| } |
| } |
| |
| return errs |
| } |