internal/core/eval: tweak to disjunction

Except for a small bug, that is also fixed in this CL, the
new evaluator implemented disjunctions according to
spec before this CL. The old evaluator, however, didn't...

It turns out the semantics of the spec is somewhat awkward,
though theoretically nicer. It seems like we do need to change
the definition somewhat to make it less awkward, or at least
come up with a good workaround before adopting the spec.

This CL introduces a hack that mimic the old behavior for scalar
values, where this is the most relevant.

Change-Id: If58d27b4f4e9aa1a5ae8316cb774cf6b26c70796
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/6655
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cue/testdata/choosedefault/002_associativity_of_defaults.txtar b/cue/testdata/choosedefault/002_associativity_of_defaults.txtar
index 0b8611b..c452972 100644
--- a/cue/testdata/choosedefault/002_associativity_of_defaults.txtar
+++ b/cue/testdata/choosedefault/002_associativity_of_defaults.txtar
@@ -8,6 +8,13 @@
 c: *"a" | (*"b" | "c")
 x: a & b
 y: b & c
+
+s1: *1 | ((*2|3) & (2|*3))
+s2: *1 | ((*2|3) & (*2|3))
+s3: *1 | ((*2|3) & 3)
+s4: *1 | ((*2|3) & 2)
+s5: *1 | *(*2 | 3)
+
 -- out/def --
 x: a & b
 y: b & c
@@ -24,6 +31,11 @@
   c: (*"a"|(*"b"|"c"))
   x: (〈0;a〉 & 〈0;b〉)
   y: (〈0;b〉 & 〈0;c〉)
+  s1: (*1|((*2|3) & (2|*3)))
+  s2: (*1|((*2|3) & (*2|3)))
+  s3: (*1|((*2|3) & 3))
+  s4: (*1|((*2|3) & 2))
+  s5: (*1|*(*2|3))
 }
 -- out/eval --
 (struct){
@@ -32,4 +44,9 @@
   c: (string){ |(*(string){ "a" }, *(string){ "b" }, (string){ "c" }) }
   x: (string){ |(*(string){ "a" }, (string){ "b" }, (string){ "c" }) }
   y: (string){ |(*(string){ "a" }, (string){ "b" }, (string){ "c" }) }
+  s1: (int){ |(*(int){ 1 }, *(int){ 2 }, (int){ 3 }) }
+  s2: (int){ |(*(int){ 1 }, *(int){ 2 }, (int){ 3 }) }
+  s3: (int){ |(*(int){ 1 }, (int){ 3 }) }
+  s4: (int){ |(*(int){ 1 }, (int){ 2 }) }
+  s5: (int){ |(*(int){ 1 }, *(int){ 2 }, (int){ 3 }) }
 }
diff --git a/cue/testdata/cycle/issue429.txtar b/cue/testdata/cycle/issue429.txtar
index 84a4484..e7b5983 100644
--- a/cue/testdata/cycle/issue429.txtar
+++ b/cue/testdata/cycle/issue429.txtar
@@ -73,7 +73,7 @@
   s1: (#struct){
     res: (int){ |(*(int){ 0 }, (int){ &(>=0, int) }) }
     min: (int){ 5 }
-    max: (number){ |((int){ 5 }, (number){ >5 }) }
+    max: (number){ |(*(int){ 5 }, (number){ >5 }) }
   }
   s2: (#struct){
     res: (int){ |(*(int){ 0 }, (int){ &(>=0, int) }) }
diff --git a/cue/testdata/disjunctions/specdeviation.txtar b/cue/testdata/disjunctions/specdeviation.txtar
new file mode 100644
index 0000000..180bf3b
--- /dev/null
+++ b/cue/testdata/disjunctions/specdeviation.txtar
@@ -0,0 +1,68 @@
+It turns out the semantics of the spec is somewhat awkward,
+though theoretically nicer. It seems like we do need to change
+the definition somewhat to make it less awkward, or at least
+come up with a good workaround before adopting the spec.
+
+We have introduce a small hack to mimic the old behavior for scalar
+values.
+
+Note that the value of p below is now 2 (default), but should
+be the non-concrete 2 | int.
+
+Proof:
+    p: *((*1 | int) & 2)  | int   // substitution of both P conjuncts in p
+    p: *(*_|_ | 2)  | int         // U1: distribute conjuncts
+    p: *_|_ | 2  | int            // M2: remove mark
+    p: 2  | int                   // value after removing default.
+
+-- in.cue --
+
+Q: *1 | int
+q: *Q | int // 1 as expected
+
+P: *1 | int
+P: 2
+p: *P | int // now 2, but should be (2 | int), according to the spec:
+
+
+s1: #Size & { min: 5 }
+
+#Size : {
+    max: >min | *min
+    res: uint | * 0
+    min: >res | *(1 + res)
+}
+-- out/eval --
+(struct){
+  Q: (int){ |(*(int){ 1 }, (int){ int }) }
+  q: (int){ |(*(int){ 1 }, (int){ int }) }
+  P: (int){ 2 }
+  p: (int){ |(*(int){ 2 }, (int){ int }) }
+  s1: (#struct){
+    max: (number){ |(*(int){ 5 }, (number){ >5 }) }
+    res: (int){ 0 }
+    min: (int){ 5 }
+  }
+  #Size: (#struct){
+    max: (number){ |(*(int){ 1 }, (number){ >0 }, (number){ >1 }) }
+    res: (int){ 0 }
+    min: (number){ |(*(int){ 1 }, (number){ >0 }) }
+  }
+}
+-- out/compile --
+--- in.cue
+{
+  Q: (*1|int)
+  q: (*〈0;Q〉|int)
+  P: (*1|int)
+  P: 2
+  p: (*〈0;P〉|int)
+  s1: (〈0;#Size〉 & {
+    min: 5
+  })
+  #Size: {
+    max: (>〈0;min〉|*〈0;min〉)
+    res: (&(int, >=0)|*0)
+    min: (>〈0;res〉|*(1 + 〈0;res〉))
+  }
+}
diff --git a/internal/core/eval/disjunct.go b/internal/core/eval/disjunct.go
index 973afcc..c6c499d 100644
--- a/internal/core/eval/disjunct.go
+++ b/internal/core/eval/disjunct.go
@@ -240,9 +240,27 @@
 			break
 		}
 
-		subMode := []defaultMode{}
+		subMode := maybeDefault
 		for ; sub < len(n.disjunctions); sub++ {
 			d := n.disjunctions[sub]
+
+			// TODO: HACK ALERT: we ignore the default tags of the subexpression
+			// if we already have a scalar value and can no longer change the
+			// outcome.
+			// This is not conform the spec, but mimics the old implementation.
+			// It also results in nicer default semantics. Changing this will
+			// break existing CUE code in awkward ways.
+			// We probably should address this when we figure out how to change
+			// the spec to accommodate for this. For instance, we could say
+			// that if a disjunction only contributes a single disjunct to an
+			// end result, default information is ignored. Not the greatest
+			// definition, though.
+			// Another alternative might be to have a special builtin that
+			// mimics the good behavior.
+			// Note that the same result can be obtained in CUE by adding
+			// 0 to a referenced number (forces the default to be discarded).
+			wasScalar := n.scalar != nil // Hack line 1
+
 			disjunctions = append(disjunctions, d)
 			mode, ok := n.insertSingleDisjunct(p, d, true)
 			p++
@@ -250,13 +268,12 @@
 				inserted = false
 				break
 			}
-			subMode = append(subMode, mode)
-		}
-		for i := len(subMode) - 1; i >= 0; i-- {
-			defMode = combineSubDefault(defMode, subMode[i])
-		}
 
-		// fmt.Println("RESMODE", defMode, combineDefault(n.defaultMode, defMode))
+			if !wasScalar { // Hack line 2.
+				subMode = combineDefault(subMode, mode)
+			}
+		}
+		defMode = combineSubDefault(defMode, subMode)
 
 		n.defaultMode = combineDefault(n.defaultMode, defMode)
 	}
@@ -308,11 +325,14 @@
 //
 // M1: *v        => (v, v)
 // M2: *(v1, d1) => (v1, d1)
-// or
-// M2: *(v1, d1) => (v1, v1)
-// or
-// M2: *(v1, d1) => v1 if d1 == _|_
-// M2:              d1 otherwise
+//
+// NOTE: M2 cannot be *(v1, d1) => (v1, v1), as this has the weird property
+// of making a value less specific. This causes issues, for instance, when
+// trimming.
+//
+// The old implementation does something similar though. It will discard
+// default information after first determining if more than one conjunct
+// has survived.
 //
 // def + maybe -> def
 // not + maybe -> def
@@ -326,31 +346,49 @@
 	isDefault
 )
 
+// combineSubDefault combines default modes where b is a subexpression in
+// a disjunctions.
+//
+// Default rules from spec:
+//
+// D1: (v1, d1) | v2       => (v1|v2, d1)
+// D2: (v1, d1) | (v2, d2) => (v1|v2, d1|d2)
+//
+// Spec:
+// M1: *v        => (v, v)
+// M2: *(v1, d1) => (v1, d1)
+//
 func combineSubDefault(a, b defaultMode) defaultMode {
 	switch {
-	case a == maybeDefault && b == maybeDefault:
+	case a == maybeDefault && b == maybeDefault: // D1
 		return maybeDefault
-	case a == maybeDefault && b == notDefault:
+	case a == maybeDefault && b == notDefault: // D1
 		return notDefault
-	case a == maybeDefault && b == isDefault:
+	case a == maybeDefault && b == isDefault: // D1
 		return isDefault
-	case a == notDefault && b == maybeDefault:
+	case a == notDefault && b == maybeDefault: // D1
 		return notDefault
-	case a == notDefault && b == notDefault:
+	case a == notDefault && b == notDefault: // D2
 		return notDefault
-	case a == notDefault && b == isDefault:
+	case a == notDefault && b == isDefault: // D2
 		return isDefault
-	case a == isDefault && b == maybeDefault:
+	case a == isDefault && b == maybeDefault: // D1
 		return isDefault
-	case a == isDefault && b == notDefault:
+	case a == isDefault && b == notDefault: // M2
 		return notDefault
-	case a == isDefault && b == isDefault:
+	case a == isDefault && b == isDefault: // D2
 		return isDefault
 	default:
 		panic("unreachable")
 	}
 }
 
+// combineDefaults combines default modes for unifying conjuncts.
+//
+// Default rules from spec:
+//
+// U1: (v1, d1) & v2       => (v1&v2, d1&v2)
+// U2: (v1, d1) & (v2, d2) => (v1&v2, d1&d2)
 func combineDefault(a, b defaultMode) defaultMode {
 	if a > b {
 		a, b = b, a