internal/core/eval: performance: unify an arc only once per node

This can avoid an exponential blowup.

Note that this still does redundant work. Removing this redundancy
will significantly reduce the running time of tests and even improve
the running time of the benchmarks added in this test.

Change-Id: Icbb19e02720e9f904f7326c92c9f404c817a4792
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/7163
Reviewed-by: CUE cueckoo <cueckoo@gmail.com>
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cue/testdata/benchmarks/bench_test.go b/cue/testdata/benchmarks/bench_test.go
new file mode 100644
index 0000000..cde34d2
--- /dev/null
+++ b/cue/testdata/benchmarks/bench_test.go
@@ -0,0 +1,60 @@
+// Copyright 2020 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package benchmarks
+
+import (
+	"io/ioutil"
+	"path/filepath"
+	"testing"
+
+	"cuelang.org/go/cue"
+	"cuelang.org/go/internal/cuetxtar"
+	"github.com/rogpeppe/go-internal/txtar"
+)
+
+func Benchmark(b *testing.B) {
+	files, err := ioutil.ReadDir(".")
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	for _, fi := range files {
+		name := fi.Name()
+		if fi.IsDir() || filepath.Ext(name) != ".txtar" {
+			continue
+		}
+
+		a, err := txtar.ParseFile(name)
+		if err != nil {
+			b.Fatal(err)
+		}
+
+		inst := cue.Build(cuetxtar.Load(a, "/cuetest"))[0]
+		if inst.Err != nil {
+			b.Fatal(inst.Err)
+		}
+
+		b.Run(name, func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				inst := cue.Build(cuetxtar.Load(a, "/cuetest"))[0]
+				if inst.Err != nil {
+					b.Fatal(inst.Err)
+				}
+
+				inst.Value().Validate()
+			}
+		})
+	}
+}
diff --git a/cue/testdata/benchmarks/deduparc.txtar b/cue/testdata/benchmarks/deduparc.txtar
new file mode 100644
index 0000000..f0346e6
--- /dev/null
+++ b/cue/testdata/benchmarks/deduparc.txtar
@@ -0,0 +1,59 @@
+-- in.cue --
+package bench1
+
+#Value: {type: "float"} | {type: "string"}
+
+foo: {type: "string"}
+foo: #Value
+foo: #Value
+foo: #Value
+foo: #Value
+foo: #Value
+foo: #Value
+foo: #Value
+foo: #Value
+foo: #Value
+foo: #Value
+foo: #Value
+foo: #Value
+foo: #Value
+foo: #Value
+foo: #Value
+-- out/eval --
+(struct){
+  #Value: (struct){ |((#struct){
+      type: (string){ "float" }
+    }, (#struct){
+      type: (string){ "string" }
+    }) }
+  foo: (#struct){
+    type: (string){ "string" }
+  }
+}
+-- out/compile --
+--- in.cue
+{
+  #Value: ({
+    type: "float"
+  }|{
+    type: "string"
+  })
+  foo: {
+    type: "string"
+  }
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+  foo: 〈0;#Value〉
+}
diff --git a/cue/testdata/benchmarks/dedupelem.txtar b/cue/testdata/benchmarks/dedupelem.txtar
new file mode 100644
index 0000000..de9ad65
--- /dev/null
+++ b/cue/testdata/benchmarks/dedupelem.txtar
@@ -0,0 +1,120 @@
+-- in.cue --
+package lpcorpus
+
+#Value: {type: "int"} | {type: "float"} | {type: "string"}
+
+foo: [{type: "float"}] & [...#Value]
+foo: [{type: "float"}] & [...#Value]
+foo: [{type: "float"}] & [...#Value]
+foo: [{type: "float"}] & [...#Value]
+foo: [{type: "float"}] & [...#Value]
+foo: [{type: "string"}] & [...#Value]
+foo: [{type: "string"}] & [...#Value]
+foo: [{type: "string"}] & [...#Value]
+foo: [{type: "float"}] & [...#Value]
+foo: [{type: "float"}] & [...#Value]
+-- out/eval --
+Errors:
+foo.0.type: incompatible values "string" and "float"
+
+Result:
+(_|_){
+  // [eval]
+  #Value: (struct){ |((#struct){
+      type: (string){ "int" }
+    }, (#struct){
+      type: (string){ "float" }
+    }, (#struct){
+      type: (string){ "string" }
+    }) }
+  foo: (_|_){
+    // [eval]
+    0: (_|_){
+      // [eval]
+      type: (_|_){
+        // [eval] foo.0.type: incompatible values "string" and "float"
+      }
+    }
+  }
+}
+-- out/compile --
+--- in.cue
+{
+  #Value: ({
+    type: "int"
+  }|{
+    type: "float"
+  }|{
+    type: "string"
+  })
+  foo: ([
+    {
+      type: "float"
+    },
+  ] & [
+    ...〈0;#Value〉,
+  ])
+  foo: ([
+    {
+      type: "float"
+    },
+  ] & [
+    ...〈0;#Value〉,
+  ])
+  foo: ([
+    {
+      type: "float"
+    },
+  ] & [
+    ...〈0;#Value〉,
+  ])
+  foo: ([
+    {
+      type: "float"
+    },
+  ] & [
+    ...〈0;#Value〉,
+  ])
+  foo: ([
+    {
+      type: "float"
+    },
+  ] & [
+    ...〈0;#Value〉,
+  ])
+  foo: ([
+    {
+      type: "string"
+    },
+  ] & [
+    ...〈0;#Value〉,
+  ])
+  foo: ([
+    {
+      type: "string"
+    },
+  ] & [
+    ...〈0;#Value〉,
+  ])
+  foo: ([
+    {
+      type: "string"
+    },
+  ] & [
+    ...〈0;#Value〉,
+  ])
+  foo: ([
+    {
+      type: "float"
+    },
+  ] & [
+    ...〈0;#Value〉,
+  ])
+  foo: ([
+    {
+      type: "float"
+    },
+  ] & [
+    ...〈0;#Value〉,
+  ])
+}
diff --git a/cue/testdata/definitions/embed.txtar b/cue/testdata/definitions/embed.txtar
index fb3bc34..1b20c1f 100644
--- a/cue/testdata/definitions/embed.txtar
+++ b/cue/testdata/definitions/embed.txtar
@@ -46,7 +46,27 @@
   z: d: 3 // allow this
 }
 
-
+reclose3: {
+  #Step: (#A | #B) & {
+    #Common
+  }
+  #Common: {
+    Name: string
+  }
+  #A: {
+    #Common
+    Something: int
+  }
+  #B: {
+    #Common
+    Else: int
+  }
+  x: #Step
+  x: #A & {
+    Name:      "a"
+    Something: 4
+  }
+}
 
 -- out/eval --
 Errors:
@@ -116,6 +136,30 @@
       d: (int){ 3 }
     }
   }
+  reclose3: (struct){
+    #Step: (struct){ |((#struct){
+        Name: (string){ string }
+        Something: (int){ int }
+      }, (#struct){
+        Name: (string){ string }
+        Else: (int){ int }
+      }) }
+    #Common: (#struct){
+      Name: (string){ string }
+    }
+    #A: (#struct){
+      Name: (string){ string }
+      Something: (int){ int }
+    }
+    #B: (#struct){
+      Name: (string){ string }
+      Else: (int){ int }
+    }
+    x: (#struct){
+      Name: (string){ "a" }
+      Something: (int){ 4 }
+    }
+  }
 }
 -- out/compile --
 --- in.cue
@@ -170,4 +214,25 @@
       d: 3
     }
   }
+  reclose3: {
+    #Step: ((〈0;#A〉|〈0;#B〉) & {
+      〈1;#Common〉
+    })
+    #Common: {
+      Name: string
+    }
+    #A: {
+      〈1;#Common〉
+      Something: int
+    }
+    #B: {
+      〈1;#Common〉
+      Else: int
+    }
+    x: 〈0;#Step〉
+    x: (〈0;#A〉 & {
+      Name: "a"
+      Something: 4
+    })
+  }
 }
diff --git a/internal/core/adt/composite.go b/internal/core/adt/composite.go
index c68c5b5..e5aeba4 100644
--- a/internal/core/adt/composite.go
+++ b/internal/core/adt/composite.go
@@ -532,15 +532,12 @@
 // AddConjunct adds the given Conjuncts to v if it doesn't already exist.
 func (v *Vertex) AddConjunct(c Conjunct) *Bottom {
 	if v.Value != nil {
+		// TODO: investigate why this happens at all. Removing it seems to
+		// change the order of fields in some cases.
+		//
 		// This is likely a bug in the evaluator and should not happen.
 		return &Bottom{Err: errors.Newf(token.NoPos, "cannot add conjunct")}
 	}
-	for _, x := range v.Conjuncts {
-		if x == c {
-			return nil
-		}
-	}
-
 	v.Conjuncts = append(v.Conjuncts, c)
 	return nil
 }
diff --git a/internal/core/eval/eval.go b/internal/core/eval/eval.go
index f7cc12c..5ab7c60 100644
--- a/internal/core/eval/eval.go
+++ b/internal/core/eval/eval.go
@@ -388,6 +388,7 @@
 		// }
 
 		n := &nodeContext{
+			arcMap:     map[arcKey]bool{},
 			kind:       adt.TopKind,
 			nodeShared: shared,
 			needClose:  needClose,
@@ -704,6 +705,11 @@
 	return n.resultNode.defaultMode == isDefault
 }
 
+type arcKey struct {
+	arc *adt.Vertex
+	id  adt.ID
+}
+
 // A nodeContext is used to collate all conjuncts of a value to facilitate
 // unification. Conceptually order of unification does not matter. However,
 // order has relevance when performing checks of non-monotic properities. Such
@@ -718,6 +724,8 @@
 	// should already ensure a quick-fail for struct disjunctions with
 	// discriminators.
 
+	arcMap map[arcKey]bool
+
 	// Current value (may be under construction)
 	scalar adt.Value // TODO: use Value in node.
 
@@ -990,6 +998,29 @@
 			break
 		}
 
+		// We need to ensure that each arc is only unified once (or at least) a
+		// bounded time, witch each conjunct. Comprehensions, for instance, may
+		// distribute a value across many values that get unified back into the
+		// same value. If such a value is a disjunction, than a disjunction of N
+		// disjuncts will result in a factor N more unifications for each
+		// occurrence of such value, resulting in exponential running time. This
+		// is especially common values that are used as a type.
+		//
+		// However, unification is idempotent, so each such conjunct only needs
+		// to be unified once. This cache checks for this and prevents an
+		// exponential blowup in such case.
+		//
+		// TODO(perf): this cache ensures the conjuncts of an arc at most once
+		// per ID. However, we really need to add the conjuncts of an arc only
+		// once total, and then add the close information once per close ID
+		// (pointer can probably be shared). Aside from being more performant,
+		// this is probably the best way to guarantee that conjunctions are
+		// linear in this case.
+		if n.arcMap[arcKey{arc, v.CloseID}] {
+			return
+		}
+		n.arcMap[arcKey{arc, v.CloseID}] = true
+
 		// Pass detection of structural cycles from parent to children.
 		cyclic := false
 		if v.Env != nil {
diff --git a/internal/core/export/export_test.go b/internal/core/export/export_test.go
index 2803c01..d384681 100644
--- a/internal/core/export/export_test.go
+++ b/internal/core/export/export_test.go
@@ -23,6 +23,7 @@
 	"cuelang.org/go/cue/errors"
 	"cuelang.org/go/cue/format"
 	"cuelang.org/go/cue/parser"
+	"cuelang.org/go/encoding/gocode/gocodec"
 	"cuelang.org/go/internal"
 	"cuelang.org/go/internal/core/adt"
 	"cuelang.org/go/internal/core/compile"
@@ -235,3 +236,32 @@
 
 	t.Error(string(formatNode(t, file)))
 }
+
+func TestFromGo(t *testing.T) {
+	type Struct struct {
+		A string
+		B string
+	}
+
+	m := make(map[string]Struct)
+	m["hello"] = Struct{
+		A: "a",
+		B: "b",
+	}
+	var r cue.Runtime
+	codec := gocodec.New(&r, nil)
+	v, err := codec.Decode(m)
+	if err != nil {
+		panic(err)
+	}
+
+	syn, _ := format.Node(v.Syntax())
+	if got := string(syn); got != `{
+	hello: {
+		A: "a"
+		B: "b"
+	}
+}` {
+		t.Errorf("incorrect ordering: %s\n", got)
+	}
+}