Marcel van Lohuizen | 0f935f8 | 2020-06-09 09:13:01 +0200 | [diff] [blame] | 1 | // Copyright 2020 CUE Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package eval |
| 16 | |
| 17 | import ( |
| 18 | "sort" |
| 19 | |
| 20 | "cuelang.org/go/cue/errors" |
| 21 | "cuelang.org/go/cue/token" |
| 22 | "cuelang.org/go/internal/core/adt" |
| 23 | ) |
| 24 | |
| 25 | // Nodes man not reenter a disjunction. |
| 26 | // |
| 27 | // Copy one layer deep; throw away items on failure. |
| 28 | |
| 29 | // DISJUNCTION ALGORITHM |
| 30 | // |
| 31 | // The basic concept of the algorithm is to use backtracking to find valid |
| 32 | // disjunctions. The algorithm can stop if two matching disjuncts are found |
| 33 | // where one does not subsume the other. |
| 34 | // |
| 35 | // At a later point, we can introduce a filter step to filter out possible |
| 36 | // disjuncts based on, say, discriminator fields or field exclusivity (oneOf |
| 37 | // fields in Protobuf). |
| 38 | // |
| 39 | // To understand the details of the algorithm, it is important to understand |
| 40 | // some properties of disjunction. |
| 41 | // |
| 42 | // |
| 43 | // EVALUATION OF A DISJUNCTION IS SELF CONTAINED |
| 44 | // |
| 45 | // In other words, fields outside of a disjunction cannot bind to values within |
| 46 | // a disjunction whilst evaluating that disjunction. This allows the computation |
| 47 | // of disjunctions to be isolated from side effects. |
| 48 | // |
| 49 | // The intuition behind this is as follows: as a disjunction is not a concrete |
| 50 | // value, it is not possible to lookup a field within a disjunction if it has |
| 51 | // not yet been evaluated. So if a reference within a disjunction that is needed |
| 52 | // to disambiguate that disjunction refers to a field outside the scope of the |
| 53 | // disjunction which, in turn, refers to a field within the disjunction, this |
| 54 | // results in a cycle error. We achieve this by not removing the cycle marker of |
| 55 | // the Vertex of the disjunction until the disjunction is resolved. |
| 56 | // |
| 57 | // Note that the following disjunct is still allowed: |
| 58 | // |
| 59 | // a: 1 |
| 60 | // b: a |
| 61 | // |
| 62 | // Even though `a` refers to the root of the disjunction, it does not _select |
| 63 | // into_ the disjunction. Implementation-wise, it also doesn't have to, as the |
| 64 | // respective vertex is available within the Environment. Referencing a node |
| 65 | // outside the disjunction that in turn selects the disjunction root, however, |
| 66 | // will result in a detected cycle. |
| 67 | // |
| 68 | // As usual, cycle detection should be interpreted marked as incomplete, so that |
| 69 | // the referring node will not be fixed to an error prematurely. |
| 70 | // |
| 71 | // |
| 72 | // SUBSUMPTION OF AMBIGUOUS DISJUNCTS |
| 73 | // |
| 74 | // A disjunction can be evaluated to a concrete value if only one disjunct |
| 75 | // remains. Aside from disambiguating through unification failure, disjuncts |
| 76 | // may also be disambiguated by taking the least specific of two disjuncts. |
| 77 | // For instance, if a subsumes b, then the result of disjunction may be a. |
| 78 | // |
| 79 | // NEW ALGORITHM NO LONGER VERIFIES SUBSUMPTION. SUBSUMPTION IS INHERENTLY |
| 80 | // IMPRECISE (DUE TO BULK OPTIONAL FIELDS). OTHER THAN THAT, FOR SCALAR VALUES |
| 81 | // IT JUST MEANS THERE IS AMBIGUITY, AND FOR STRUCTS IT CAN LEAD TO STRANGE |
| 82 | // CONSEQUENCES. |
| 83 | // |
| 84 | // USE EQUALITY INSTEAD: |
| 85 | // - Undefined == error for optional fields. |
| 86 | // - So only need to check exact labels for vertices. |
| 87 | |
| 88 | type envDisjunct struct { |
| 89 | env *adt.Environment |
| 90 | values []disjunct |
| 91 | numDefaults int |
| 92 | cloneID uint32 |
| 93 | isEmbed bool |
| 94 | } |
| 95 | |
| 96 | type disjunct struct { |
| 97 | expr adt.Expr |
| 98 | isDefault bool |
| 99 | } |
| 100 | |
| 101 | func (n *nodeContext) addDisjunction(env *adt.Environment, x *adt.DisjunctionExpr, cloneID uint32, isEmbed bool) { |
| 102 | a := []disjunct{} |
| 103 | |
| 104 | numDefaults := 0 |
| 105 | for _, v := range x.Values { |
| 106 | isDef := v.Default // || n.hasDefaults(env, v.Val) |
| 107 | if isDef { |
| 108 | numDefaults++ |
| 109 | } |
| 110 | a = append(a, disjunct{v.Val, isDef}) |
| 111 | } |
| 112 | |
| 113 | sort.SliceStable(a, func(i, j int) bool { |
| 114 | return !a[j].isDefault && a[i].isDefault != a[j].isDefault |
| 115 | }) |
| 116 | |
| 117 | n.disjunctions = append(n.disjunctions, |
| 118 | envDisjunct{env, a, numDefaults, cloneID, isEmbed}) |
| 119 | } |
| 120 | |
| 121 | func (n *nodeContext) addDisjunctionValue(env *adt.Environment, x *adt.Disjunction, cloneID uint32, isEmbed bool) { |
| 122 | a := []disjunct{} |
| 123 | |
| 124 | for i, v := range x.Values { |
| 125 | a = append(a, disjunct{v, i < x.NumDefaults}) |
| 126 | } |
| 127 | |
| 128 | n.disjunctions = append(n.disjunctions, |
| 129 | envDisjunct{env, a, x.NumDefaults, cloneID, isEmbed}) |
| 130 | } |
| 131 | |
| 132 | func (n *nodeContext) updateResult() (isFinal bool) { |
| 133 | n.postDisjunct() |
| 134 | |
| 135 | if n.hasErr() { |
| 136 | return n.isFinal |
| 137 | } |
| 138 | |
| 139 | d := n.nodeShared.disjunct |
| 140 | if d == nil { |
| 141 | d = &adt.Disjunction{} |
| 142 | n.nodeShared.disjunct = d |
| 143 | } |
| 144 | |
| 145 | result := *n.node |
| 146 | if result.Value == nil { |
| 147 | result.Value = n.getValidators() |
| 148 | } |
| 149 | |
| 150 | for _, v := range d.Values { |
| 151 | if Equal(n.ctx, v, &result) { |
| 152 | return isFinal |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | p := &result |
| 157 | d.Values = append(d.Values, p) |
| 158 | if n.defaultMode == isDefault { |
| 159 | // Keep defaults sorted first. |
| 160 | i := d.NumDefaults |
| 161 | j := i + 1 |
| 162 | copy(d.Values[j:], d.Values[i:]) |
| 163 | d.Values[i] = p |
| 164 | d.NumDefaults = j |
| 165 | } |
| 166 | |
| 167 | // return n.isFinal |
| 168 | |
| 169 | switch { |
| 170 | case !n.nodeShared.hasResult(): |
| 171 | |
| 172 | case n.nodeShared.isDefault() && n.defaultMode != isDefault: |
| 173 | return n.isFinal |
| 174 | |
| 175 | case !n.nodeShared.isDefault() && n.defaultMode == isDefault: |
| 176 | |
| 177 | default: |
| 178 | if Equal(n.ctx, n.node, n.result()) { |
| 179 | return n.isFinal |
| 180 | } |
| 181 | |
| 182 | // TODO: Compute fancy error message. |
| 183 | n.nodeShared.resultNode = n |
| 184 | // n.nodeShared.result.AddErr(n.ctx, &adt.Bottom{ |
| 185 | // Code: adt.IncompleteError, |
| 186 | // Err: errors.Newf(n.ctx.Pos(), "ambiguous disjunction"), |
| 187 | // }) |
| 188 | n.nodeShared.result_.Arcs = nil |
| 189 | n.nodeShared.result_.Structs = nil |
| 190 | return n.isFinal // n.defaultMode == isDefault |
| 191 | } |
| 192 | |
| 193 | n.nodeShared.resultNode = n |
| 194 | n.nodeShared.setResult(n.node) |
| 195 | |
| 196 | return n.isFinal |
| 197 | } |
| 198 | |
| 199 | func (n *nodeContext) tryDisjuncts() (finished bool) { |
| 200 | if !n.insertDisjuncts() || !n.updateResult() { |
| 201 | if !n.isFinal { |
| 202 | return false // More iterations to do. |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | if n.nodeShared.hasResult() { |
| 207 | return true // found something |
| 208 | } |
| 209 | |
| 210 | if len(n.disjunctions) > 0 { |
| 211 | b := &adt.Bottom{ |
| 212 | // TODO(errors): we should not make this error worse by discarding |
| 213 | // the type or error. Using IncompleteError is a compromise. But |
| 214 | // really we should keep track of the errors and return a more |
| 215 | // accurate result here. |
| 216 | Code: adt.IncompleteError, |
| 217 | Err: errors.Newf(token.NoPos, "empty disjunction"), |
| 218 | } |
| 219 | n.node.AddErr(n.ctx, b) |
| 220 | } |
| 221 | return true |
| 222 | } |
| 223 | |
| 224 | // TODO: add proper conjuncts for the ones used by the disjunctions to replace |
| 225 | // the original source. |
| 226 | // |
| 227 | func (n *nodeContext) insertDisjuncts() (inserted bool) { |
| 228 | p := 0 |
| 229 | inserted = true |
| 230 | |
| 231 | disjunctions := []envDisjunct{} |
| 232 | |
| 233 | // fmt.Println("----", debug.NodeString(n.ctx, n.node, nil)) |
| 234 | for _, d := range n.disjunctions { |
| 235 | disjunctions = append(disjunctions, d) |
| 236 | |
| 237 | sub := len(n.disjunctions) |
| 238 | defMode, ok := n.insertSingleDisjunct(p, d, false) |
| 239 | p++ |
| 240 | if !ok { |
| 241 | inserted = false |
| 242 | break |
| 243 | } |
| 244 | |
| 245 | subMode := []defaultMode{} |
| 246 | for ; sub < len(n.disjunctions); sub++ { |
| 247 | d := n.disjunctions[sub] |
| 248 | disjunctions = append(disjunctions, d) |
| 249 | mode, ok := n.insertSingleDisjunct(p, d, true) |
| 250 | p++ |
| 251 | if !ok { |
| 252 | inserted = false |
| 253 | break |
| 254 | } |
| 255 | subMode = append(subMode, mode) |
| 256 | } |
| 257 | for i := len(subMode) - 1; i >= 0; i-- { |
| 258 | defMode = combineSubDefault(defMode, subMode[i]) |
| 259 | } |
| 260 | |
| 261 | // fmt.Println("RESMODE", defMode, combineDefault(n.defaultMode, defMode)) |
| 262 | |
| 263 | n.defaultMode = combineDefault(n.defaultMode, defMode) |
| 264 | } |
| 265 | |
| 266 | // Find last disjunction at which there is no overflow. |
| 267 | for ; p > 0 && n.stack[p-1]+1 >= len(disjunctions[p-1].values); p-- { |
| 268 | } |
| 269 | if p > 0 { |
| 270 | // Increment a valid position and set all subsequent entries to 0. |
| 271 | n.stack[p-1]++ |
| 272 | n.stack = n.stack[:p] |
| 273 | } |
| 274 | return inserted |
| 275 | } |
| 276 | |
| 277 | func (n *nodeContext) insertSingleDisjunct(p int, d envDisjunct, isSub bool) (mode defaultMode, ok bool) { |
| 278 | if p >= len(n.stack) { |
| 279 | n.stack = append(n.stack, 0) |
| 280 | } |
| 281 | |
| 282 | k := n.stack[p] |
| 283 | v := d.values[k] |
| 284 | n.isFinal = n.isFinal && k == len(d.values)-1 |
| 285 | c := adt.MakeConjunct(d.env, v.expr) |
| 286 | n.addExprConjunct(c, d.cloneID, d.isEmbed) |
| 287 | |
| 288 | for n.expandOne() { |
| 289 | } |
| 290 | |
| 291 | switch { |
| 292 | case d.numDefaults == 0: |
| 293 | mode = maybeDefault |
| 294 | case v.isDefault: |
| 295 | mode = isDefault |
| 296 | default: |
| 297 | mode = notDefault |
| 298 | } |
| 299 | |
| 300 | return mode, !n.hasErr() |
| 301 | } |
| 302 | |
| 303 | // Default rules from spec: |
| 304 | // |
| 305 | // U1: (v1, d1) & v2 => (v1&v2, d1&v2) |
| 306 | // U2: (v1, d1) & (v2, d2) => (v1&v2, d1&d2) |
| 307 | // |
| 308 | // D1: (v1, d1) | v2 => (v1|v2, d1) |
| 309 | // D2: (v1, d1) | (v2, d2) => (v1|v2, d1|d2) |
| 310 | // |
| 311 | // M1: *v => (v, v) |
| 312 | // M2: *(v1, d1) => (v1, d1) |
| 313 | // or |
| 314 | // M2: *(v1, d1) => (v1, v1) |
| 315 | // or |
| 316 | // M2: *(v1, d1) => v1 if d1 == _|_ |
| 317 | // M2: d1 otherwise |
| 318 | // |
| 319 | // def + maybe -> def |
| 320 | // not + maybe -> def |
| 321 | // not + def -> def |
| 322 | |
| 323 | type defaultMode int |
| 324 | |
| 325 | const ( |
| 326 | maybeDefault defaultMode = iota |
| 327 | notDefault |
| 328 | isDefault |
| 329 | ) |
| 330 | |
| 331 | func combineSubDefault(a, b defaultMode) defaultMode { |
| 332 | switch { |
| 333 | case a == maybeDefault && b == maybeDefault: |
| 334 | return maybeDefault |
| 335 | case a == maybeDefault && b == notDefault: |
| 336 | return notDefault |
| 337 | case a == maybeDefault && b == isDefault: |
| 338 | return isDefault |
| 339 | case a == notDefault && b == maybeDefault: |
| 340 | return notDefault |
| 341 | case a == notDefault && b == notDefault: |
| 342 | return notDefault |
| 343 | case a == notDefault && b == isDefault: |
| 344 | return isDefault |
| 345 | case a == isDefault && b == maybeDefault: |
| 346 | return isDefault |
| 347 | case a == isDefault && b == notDefault: |
| 348 | return notDefault |
| 349 | case a == isDefault && b == isDefault: |
| 350 | return isDefault |
| 351 | default: |
| 352 | panic("unreachable") |
| 353 | } |
| 354 | } |
| 355 | |
| 356 | func combineDefault(a, b defaultMode) defaultMode { |
| 357 | if a > b { |
| 358 | a, b = b, a |
| 359 | } |
| 360 | switch { |
| 361 | case a == maybeDefault && b == maybeDefault: |
| 362 | return maybeDefault |
| 363 | case a == maybeDefault && b == notDefault: |
| 364 | return notDefault |
| 365 | case a == maybeDefault && b == isDefault: |
| 366 | return isDefault |
| 367 | case a == notDefault && b == notDefault: |
| 368 | return notDefault |
| 369 | case a == notDefault && b == isDefault: |
| 370 | return notDefault |
| 371 | case a == isDefault && b == isDefault: |
| 372 | return isDefault |
| 373 | default: |
| 374 | panic("unreachable") |
| 375 | } |
| 376 | } |