internal/diff: first stab add diffing functionality
for supporting various commands:
- cue test
- cue diff/snapshot
Change-Id: I9ddccb0622530db9759cd25768656b712268d3c5
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/3981
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/internal/diff/diff.go b/internal/diff/diff.go
new file mode 100644
index 0000000..e219cce
--- /dev/null
+++ b/internal/diff/diff.go
@@ -0,0 +1,307 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package diff
+
+import (
+ "strconv"
+
+ "cuelang.org/go/cue"
+ "cuelang.org/go/cue/ast"
+ "cuelang.org/go/cue/errors"
+)
+
+// Diff returns an edit script representing the difference between x and y.
+func Diff(x, y cue.Value) (Kind, *EditScript) {
+ d := differ{}
+ return d.diffValue(x, y)
+}
+
+// Kind identifies the kind of operation of an edit script.
+type Kind uint8
+
+const (
+ // Identity indicates that a value pair is identical in both list X and Y.
+ Identity Kind = iota
+ // UniqueX indicates that a value only exists in X and not Y.
+ UniqueX
+ // UniqueY indicates that a value only exists in Y and not X.
+ UniqueY
+ // Modified indicates that a value pair is a modification of each other.
+ Modified
+)
+
+// EditScript represents the series of differences between two CUE values.
+// x and y must be either both list or struct.
+type EditScript struct {
+ x, y cue.Value
+ edits []Edit
+}
+
+// Len returns the number of edits.
+func (es *EditScript) Len() int {
+ return len(es.edits)
+}
+
+// Label returns a string representation of the label.
+//
+func (es *EditScript) LabelX(i int) string {
+ e := es.edits[i]
+ p := e.XPos()
+ if p < 0 {
+ return ""
+ }
+ return label(es.x, p)
+}
+
+func (es *EditScript) LabelY(i int) string {
+ e := es.edits[i]
+ p := e.YPos()
+ if p < 0 {
+ return ""
+ }
+ return label(es.y, p)
+}
+
+// TODO: support label expressions.
+func label(v cue.Value, i int) string {
+ st, err := v.Struct()
+ if err != nil {
+ return ""
+ }
+
+ // TODO: return formatted expression for optionals.
+ f := st.Field(i)
+ str := f.Name
+ if !ast.IsValidIdent(str) {
+ str = strconv.Quote(str)
+ }
+ if f.IsOptional {
+ str += "?"
+ }
+ if f.IsDefinition {
+ str += " ::"
+ } else {
+ str += ":"
+ }
+ return str
+}
+
+// ValueX returns the value of X involved at step i.
+func (es *EditScript) ValueX(i int) (v cue.Value) {
+ p := es.edits[i].XPos()
+ if p < 0 {
+ return v
+ }
+ st, err := es.x.Struct()
+ if err != nil {
+ return v
+ }
+ return st.Field(p).Value
+}
+
+// ValueY returns the value of Y involved at step i.
+func (es *EditScript) ValueY(i int) (v cue.Value) {
+ p := es.edits[i].YPos()
+ if p < 0 {
+ return v
+ }
+ st, err := es.y.Struct()
+ if err != nil {
+ return v
+ }
+ return st.Field(p).Value
+}
+
+// Edit represents a single operation within an edit-script.
+type Edit struct {
+ kind Kind
+ xPos int32 // 0 if UniqueY
+ yPos int32 // 0 if UniqueX
+ sub *EditScript // non-nil if Modified
+}
+
+func (e Edit) Kind() Kind { return e.kind }
+func (e Edit) XPos() int { return int(e.xPos - 1) }
+func (e Edit) YPos() int { return int(e.yPos - 1) }
+
+type differ struct {
+ options []cue.Option
+ errs errors.Error
+}
+
+func (d *differ) diffValue(x, y cue.Value) (Kind, *EditScript) {
+ if x.IncompleteKind() != y.IncompleteKind() {
+ return Modified, nil
+ }
+
+ switch xc, yc := x.IsConcrete(), y.IsConcrete(); {
+ case xc != yc:
+ return Modified, nil
+
+ case xc && yc:
+ switch k := x.Kind(); k {
+ case cue.StructKind:
+ return d.diffStruct(x, y)
+
+ case cue.ListKind:
+ return d.diffList(x, y)
+ }
+ fallthrough
+
+ default:
+ if !x.Equals(y) {
+ return Modified, nil
+ }
+ }
+
+ return Identity, nil
+}
+
+func (d *differ) diffStruct(x, y cue.Value) (Kind, *EditScript) {
+ sx, _ := x.Struct()
+ sy, _ := y.Struct()
+
+ // Best-effort topological sort, prioritizing x over y, using a variant of
+ // Kahn's algorithm (see, for instance
+ // https://www.geeksforgeeks.org/topological-sorting-indegree-based-solution/).
+ // We assume that the order of the elements of each value indicate an edge
+ // in the graph. This means that only the next unprocessed nodes can be
+ // those with no incoming edges.
+ xMap := make(map[string]int32, sx.Len())
+ yMap := make(map[string]int32, sy.Len())
+ for i := 0; i < sx.Len(); i++ {
+ xMap[sx.Field(i).Name] = int32(i + 1)
+ }
+ for i := 0; i < sy.Len(); i++ {
+ yMap[sy.Field(i).Name] = int32(i + 1)
+ }
+
+ edits := []Edit{}
+ differs := false
+
+ var xi, yi int
+ var xf, yf cue.FieldInfo
+ for xi < sx.Len() || yi < sx.Len() {
+ // Process zero nodes
+ for ; xi < sx.Len(); xi++ {
+ xf = sx.Field(xi)
+ yp := yMap[xf.Name]
+ if yp > 0 {
+ break
+ }
+ edits = append(edits, Edit{UniqueX, int32(xi + 1), 0, nil})
+ differs = true
+ }
+ for ; yi < sy.Len(); yi++ {
+ yf = sy.Field(yi)
+ if yMap[yf.Name] == 0 {
+ // already done
+ continue
+ }
+ xp := xMap[yf.Name]
+ if xp > 0 {
+ break
+ }
+ yMap[yf.Name] = 0
+ edits = append(edits, Edit{UniqueY, 0, int32(yi + 1), nil})
+ differs = true
+ }
+
+ // Compare nodes
+ for ; xi < sx.Len(); xi++ {
+ xf = sx.Field(xi)
+
+ yp := yMap[xf.Name]
+ if yp == 0 {
+ break
+ }
+ // If yp != xi+1, the topological sort was not possible.
+ yMap[xf.Name] = 0
+
+ yf := sy.Field(int(yp - 1))
+
+ kind := Identity
+ var script *EditScript
+ switch {
+ case xf.IsDefinition != yf.IsDefinition,
+ xf.IsOptional != yf.IsOptional:
+ kind = Modified
+
+ default:
+ xv := xf.Value
+ yv := yf.Value
+ // TODO(perf): consider evaluating lazily.
+ kind, script = d.diffValue(xv, yv)
+ }
+
+ edits = append(edits, Edit{kind, int32(xi + 1), yp, script})
+ if kind != Identity {
+ differs = true
+ }
+ }
+ }
+ if !differs {
+ return Identity, nil
+ }
+ return Modified, &EditScript{x: x, y: y, edits: edits}
+}
+
+// TODO: right now we do a simple element-by-element comparison. Instead,
+// use an algorithm that approximates a minimal Levenshtein distance, like the
+// one in github.com/google/go-cmp/internal/diff.
+func (d *differ) diffList(x, y cue.Value) (Kind, *EditScript) {
+ ix, _ := x.List()
+ iy, _ := y.List()
+
+ edits := []Edit{}
+ differs := false
+ i := int32(1)
+
+ for {
+ // TODO: This would be much easier with a Next/Done API.
+ hasX := ix.Next()
+ hasY := iy.Next()
+ if !hasX {
+ for hasY {
+ differs = true
+ edits = append(edits, Edit{UniqueY, 0, i, nil})
+ hasY = iy.Next()
+ i++
+ }
+ break
+ }
+ if !hasY {
+ for hasX {
+ differs = true
+ edits = append(edits, Edit{UniqueX, i, 0, nil})
+ hasX = ix.Next()
+ i++
+ }
+ break
+ }
+
+ // Both x and y have a value.
+ kind, script := d.diffValue(ix.Value(), iy.Value())
+ if kind != Identity {
+ differs = true
+ }
+ edits = append(edits, Edit{kind, i, i, script})
+ i++
+ }
+ if !differs {
+ return Identity, nil
+ }
+ return Modified, &EditScript{x: x, y: y, edits: edits}
+}
diff --git a/internal/diff/diff_test.go b/internal/diff/diff_test.go
new file mode 100644
index 0000000..e33093a
--- /dev/null
+++ b/internal/diff/diff_test.go
@@ -0,0 +1,359 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package diff
+
+import (
+ "bytes"
+ "testing"
+
+ "cuelang.org/go/cue"
+)
+
+func TestDiff(t *testing.T) {
+ testCases := []struct {
+ name string
+ x, y string
+ kind Kind
+ diff string
+ }{{
+ name: "identity struct",
+ x: `{
+ a: {
+ b: 1
+ c: 2
+ }
+ l: {
+ d: 1
+ }
+ }`,
+ y: `{
+ a: {
+ c: 2
+ b: 1
+ }
+ l: {
+ d: 1
+ }
+ }`,
+ }, {
+ name: "identity list",
+ x: `[1, 2, 3]`,
+ y: `[1, 2, 3]`,
+ }, {
+ name: "identity value",
+ x: `"foo"`,
+ y: `"foo"`,
+ }, {
+ name: "modified value",
+ x: `"foo"`,
+ y: `"bar"`,
+ kind: Modified,
+ }, {
+ name: "basics",
+ x: `{
+ a: int
+ b: 2
+ s: 4
+ d: 1
+ }
+ `,
+ y: `
+ {
+ a: string
+ c: 3
+ s: 4
+ d: int
+ }
+ `,
+ kind: Modified,
+ diff: ` {
+- a: int
++ a: string
+- b: 2
+ s: 4
+- d: 1
++ d: int
++ c: 3
+ }
+`,
+ }, {
+ name: "basics 2",
+ x: `{
+ ls: [2, 3, 4]
+ "foo-bar": 2
+ s: 4
+ lm1: [2, 3, 5]
+ lm2: [6]
+ }
+ `,
+ y: `
+ {
+ ls: [2, 3, 4]
+ "foo-bar": 3
+ s: 4
+ lm1: [2, 3, 4, 6]
+ lm2: []
+ la: [2, 3, 4]
+ }
+ `,
+ kind: Modified,
+ diff: ` {
+ ls: [2, 3, 4]
+- "foo-bar": 2
++ "foo-bar": 3
+ s: 4
+ lm1: [
+ 2,
+ 3,
+- 5,
++ 4,
++ 6,
+ ]
+ lm2: [
+- 6,
+ ]
++ la: [2, 3, 4]
+ }
+`,
+ }, {
+ name: "interupted run 1",
+ x: `{
+ a: 1
+ b: 2
+ c: 3
+ d: 4
+ e: 10
+ f: 6
+ g: 7
+ h: 8
+ i: 9
+ j: 10
+}
+`,
+ y: `
+{
+ a: 1
+ b: 2
+ c: 3
+ d: 4
+ e: 5
+ f: 6
+ g: 7
+ h: 8
+ i: 9
+ j: 10
+}
+`,
+ kind: Modified,
+ diff: ` {
+ ... // 2 identical elements
+ c: 3
+ d: 4
+- e: 10
++ e: 5
+ f: 6
+ g: 7
+ ... // 3 identical elements
+ }
+`,
+ }, {
+ name: "interupted run 2",
+ x: `{
+ a: -1
+ b: 2
+ c: 3
+ d: 4
+ e: 5
+ f: 6
+ g: 7
+ h: 8
+ i: 9
+ j: -10
+ }`,
+ y: `{
+ a: 1
+ b: 2
+ c: 3
+ d: 4
+ e: 5
+ f: 6
+ g: 7
+ h: 8
+ i: 9
+ j: 10
+ }
+ `,
+ kind: Modified,
+ diff: ` {
+- a: -1
++ a: 1
+ b: 2
+ c: 3
+ ... // 4 identical elements
+ h: 8
+ i: 9
+- j: -10
++ j: 10
+ }
+`,
+ }, {
+ name: "recursion",
+ x: `{
+ s: {
+ a: 1
+ b: 3
+ d: 4
+ }
+ l: [
+ [3, 4]
+ ]
+ }`,
+ y: `{
+ s: {
+ a: 2
+ b: 3
+ c: 4
+ }
+ l: [
+ [3, 5, 6]
+ ]
+ }
+ `,
+ kind: Modified,
+ diff: ` {
+ s: {
+- a: 1
++ a: 2
+ b: 3
+- d: 4
++ c: 4
+ }
+ l: [
+ [
+ 3,
+- 4,
++ 5,
++ 6,
+ ]
+ ]
+ }
+`,
+ }, {
+ name: "optional and definitions",
+ x: `{
+ s :: {
+ a :: 1
+ b: 2
+ }
+ o?: 3
+ od? :: 1
+ oc?: 5
+}`,
+ y: `{
+ s :: {
+ a: 2
+ b :: 2
+ }
+ o?: 4
+ od :: 1
+ oc? :: 5
+}
+`,
+ kind: Modified,
+ diff: ` {
+ s :: {
+- a :: 1
++ a: 2
+- b: 2
++ b :: 2
+ }
+- o?: 3
++ o?: 4
+- od? :: 1
++ od :: 1
+- oc?: 5
++ oc? :: 5
+ }
+`,
+ }}
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ var r cue.Runtime
+ x, err := r.Compile("x", tc.x)
+ if err != nil {
+ t.Fatal(err)
+ }
+ y, err := r.Compile("y", tc.y)
+ if err != nil {
+ t.Fatal(err)
+ }
+ kind, script := Diff(x.Value(), y.Value())
+ if kind != tc.kind {
+ t.Fatalf("got %d; want %d", kind, tc.kind)
+ }
+ if script != nil {
+ w := &bytes.Buffer{}
+ err = Print(w, script)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if got := w.String(); got != tc.diff {
+ t.Errorf("got\n%s;\nwant\n%s", got, tc.diff)
+ }
+ }
+ })
+ }
+}
+
+func TestX(t *testing.T) {
+ t.Skip()
+
+ tc := struct {
+ x, y string
+ kind Kind
+ diff string
+ }{
+ x: `{
+ }
+ `,
+ y: `
+ {
+ }
+ `,
+ kind: Modified,
+ diff: ``,
+ }
+ var r cue.Runtime
+ x, err := r.Compile("x", tc.x)
+ if err != nil {
+ t.Fatal(err)
+ }
+ y, err := r.Compile("y", tc.y)
+ if err != nil {
+ t.Fatal(err)
+ }
+ kind, script := Diff(x.Value(), y.Value())
+ if kind != tc.kind {
+ t.Fatalf("got %d; want %d", kind, tc.kind)
+ }
+ w := &bytes.Buffer{}
+ err = Print(w, script)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if got := w.String(); got != tc.diff {
+ t.Errorf("got\n%s;\nwant\n%s", got, tc.diff)
+ }
+}
diff --git a/internal/diff/print.go b/internal/diff/print.go
new file mode 100644
index 0000000..ebe2d10
--- /dev/null
+++ b/internal/diff/print.go
@@ -0,0 +1,271 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package diff
+
+import (
+ "fmt"
+ "io"
+
+ "cuelang.org/go/cue"
+ "cuelang.org/go/cue/errors"
+ "cuelang.org/go/cue/format"
+)
+
+// Print the differences between two structs represented by an edit script.
+func Print(w io.Writer, es *EditScript) error {
+ p := printer{
+ w: w,
+ margin: 2,
+ context: 2,
+ }
+ p.script(es)
+ return p.errs
+}
+
+type printer struct {
+ w io.Writer
+ context int
+ margin int
+ indent int
+ prefix string
+ hasPrefix bool
+ hasPrint bool
+ errs errors.Error
+}
+
+func (p *printer) writeRaw(b []byte) {
+ if len(b) == 0 {
+ return
+ }
+ if !p.hasPrefix {
+ io.WriteString(p.w, p.prefix)
+ p.hasPrefix = true
+ }
+ if !p.hasPrint {
+ fmt.Fprintf(p.w, "% [1]*s", p.indent+p.margin-len(p.prefix), "")
+ p.hasPrint = true
+ }
+ p.w.Write(b)
+}
+
+func (p *printer) Write(b []byte) (n int, err error) {
+ i, last := 0, 0
+ for ; i < len(b); i++ {
+ if b[i] != '\n' {
+ continue
+ }
+ p.writeRaw(b[last:i])
+ last = i + 1
+ io.WriteString(p.w, "\n")
+ p.hasPrefix = false
+ p.hasPrint = false
+ }
+ p.writeRaw(b[last:])
+ return len(b), nil
+}
+
+func (p *printer) write(b []byte) {
+ _, _ = p.Write(b)
+}
+
+func (p *printer) printLen(align int, str string) {
+ fmt.Fprintf(p, "% -[1]*s", align, str)
+}
+
+func (p *printer) println(s string) {
+ fmt.Fprintln(p, s)
+}
+
+func (p *printer) printf(format string, args ...interface{}) {
+ fmt.Fprintf(p, format, args...)
+}
+
+func (p *printer) script(e *EditScript) {
+ switch e.x.Kind() {
+ case cue.StructKind:
+ p.printStruct(e)
+ case cue.ListKind:
+ p.printList(e)
+ default:
+ p.printf("BadExpr")
+ }
+}
+
+func (p *printer) findRun(es *EditScript, i int) (start, end int) {
+ lastEnd := i
+
+ for ; i < es.Len() && es.edits[i].kind == Identity; i++ {
+ }
+ start = i
+
+ // Find end of run
+ include := p.context
+ for ; i < es.Len(); i++ {
+ e := es.edits[i]
+ if e.kind != Identity {
+ include = p.context + 1
+ continue
+ }
+ if include--; include == 0 {
+ break
+ }
+ }
+
+ if i-start > 0 {
+ // Adjust start of run
+ if s := start - p.context; s > lastEnd {
+ start = s
+ } else {
+ start = lastEnd
+ }
+ }
+ return start, i
+}
+
+func (p *printer) printStruct(es *EditScript) {
+ // TODO: consider not printing outer curlies, or make it an option.
+ // if p.indent > 0 {
+ p.println("{")
+ defer p.println("}")
+ // }
+ p.indent += 4
+ defer func() {
+ p.indent -= 4
+ }()
+
+ var start, i int
+ for i < es.Len() {
+ lastEnd := i
+ // Find provisional start of run.
+ start, i = p.findRun(es, i)
+
+ p.printSkipped(start - lastEnd)
+ p.printFieldRun(es, start, i)
+ }
+ p.printSkipped(es.Len() - i)
+}
+
+func (p *printer) printList(es *EditScript) {
+ p.println("[")
+ p.indent += 4
+ defer func() {
+ p.indent -= 4
+ p.println("]")
+ }()
+
+ x := getElems(es.x)
+ y := getElems(es.y)
+
+ var start, i int
+ for i < es.Len() {
+ lastEnd := i
+ // Find provisional start of run.
+ start, i = p.findRun(es, i)
+
+ p.printSkipped(start - lastEnd)
+ p.printElemRun(es, x, y, start, i)
+ }
+ p.printSkipped(es.Len() - i)
+}
+
+func getElems(x cue.Value) (a []cue.Value) {
+ for i, _ := x.List(); i.Next(); {
+ a = append(a, i.Value())
+ }
+ return a
+}
+
+func (p *printer) printSkipped(n int) {
+ if n > 0 {
+ p.printf("... // %d identical elements\n", n)
+ }
+}
+
+func (p *printer) printValue(v cue.Value) {
+ // TODO: have indent option.
+ b, _ := format.Node(v.Syntax())
+ p.write(b)
+}
+
+func (p *printer) printFieldRun(es *EditScript, start, end int) {
+ // Determine max field len.
+ for i := start; i < end; i++ {
+ e := es.edits[i]
+
+ switch e.kind {
+ case UniqueX:
+ p.printField("-", es, es.LabelX(i), es.ValueX(i))
+
+ case UniqueY:
+ p.printField("+", es, es.LabelY(i), es.ValueY(i))
+
+ case Modified:
+ if e.sub != nil {
+ io.WriteString(p, es.LabelX(i))
+ io.WriteString(p, " ")
+ p.script(e.sub)
+ break
+ }
+ // TODO: show per-line differences for multiline strings.
+ p.printField("-", es, es.LabelX(i), es.ValueX(i))
+ p.printField("+", es, es.LabelY(i), es.ValueY(i))
+
+ case Identity:
+ // TODO: write on one line
+ p.printField("", es, es.LabelX(i), es.ValueX(i))
+ }
+ }
+}
+
+func (p *printer) printField(prefix string, es *EditScript, label string, v cue.Value) {
+ p.prefix = prefix
+ io.WriteString(p, label)
+ io.WriteString(p, " ")
+ p.printValue(v)
+ io.WriteString(p, "\n")
+ p.prefix = ""
+}
+
+func (p *printer) printElemRun(es *EditScript, x, y []cue.Value, start, end int) {
+ for _, e := range es.edits[start:end] {
+ switch e.kind {
+ case UniqueX:
+ p.printElem("-", x[e.XPos()])
+
+ case UniqueY:
+ p.printElem("+", y[e.YPos()])
+
+ case Modified:
+ if e.sub != nil {
+ p.script(e.sub)
+ break
+ }
+ // TODO: show per-line differences for multiline strings.
+ p.printElem("-", x[e.XPos()])
+ p.printElem("+", y[e.YPos()])
+
+ case Identity:
+ // TODO: write on one line
+ p.printElem("", x[e.XPos()])
+ }
+ }
+}
+
+func (p *printer) printElem(prefix string, v cue.Value) {
+ p.prefix = prefix
+ p.printValue(v)
+ io.WriteString(p, ",\n")
+ p.prefix = ""
+}