internal/encoding/json: implement Encode

internal/encoding/json: implement Encode

Encode allows an ast.File to be encoded directly
to JSON, provided the AST only has nodes that
have a direct equivalent in JSON.

This serves two purposes:
- allow JSON to be represented as CUE (allowing
  trim on JSON, etc.)
- allow Encodings like OpenAPI to encode directly
  into CUE, instead of needing things like OrderedMap.

Change-Id: I1072f5c7472bc4f5d71403a783dfcd2355d9d9c6
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/5187
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/internal/encoding/json/encode.go b/internal/encoding/json/encode.go
new file mode 100644
index 0000000..7a6b857
--- /dev/null
+++ b/internal/encoding/json/encode.go
@@ -0,0 +1,306 @@
+// Copyright 2020 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package json
+
+import (
+	"bytes"
+	"encoding/json"
+	"math/big"
+	"strings"
+
+	"cuelang.org/go/cue/ast"
+	"cuelang.org/go/cue/errors"
+	"cuelang.org/go/cue/literal"
+	"cuelang.org/go/cue/token"
+	"cuelang.org/go/internal"
+)
+
+// Encode converts a CUE AST to JSON.
+//
+// The given file must only contain values that can be directly supported by
+// JSON:
+//    Type          Restrictions
+//    BasicLit
+//    File          no imports, aliases, or definitions
+//    StructLit     no embeddings, aliases, or definitions
+//    List
+//    Field         must be regular; label must be a BasicLit or Ident
+//
+// Comments and attributes are ignored.
+func Encode(n ast.Node) (b []byte, err error) {
+	e := encoder{}
+	err = e.encode(n)
+	if err != nil {
+		return nil, err
+	}
+	return e.w.Bytes(), nil
+}
+
+type encoder struct {
+	w              bytes.Buffer
+	tab            []byte
+	indentsAtLevel []int
+	indenting      bool
+	unIndenting    int
+}
+
+func (e *encoder) writeIndent(b byte) {
+	if e.indenting {
+		e.indentsAtLevel[len(e.indentsAtLevel)-1]++
+	} else {
+		e.indentsAtLevel = append(e.indentsAtLevel, 0)
+	}
+	e.indenting = true
+	_ = e.w.WriteByte(b)
+}
+
+func (e *encoder) writeUnindent(b byte, pos, def token.Pos) {
+	if e.unIndenting > 0 {
+		e.unIndenting--
+	} else {
+		e.unIndenting = e.indentsAtLevel[len(e.indentsAtLevel)-1]
+		e.indentsAtLevel = e.indentsAtLevel[:len(e.indentsAtLevel)-1]
+	}
+	e.indenting = false
+	e.ws(pos, def.RelPos())
+	_ = e.w.WriteByte(b)
+}
+
+func (e *encoder) writeString(s string) {
+	_, _ = e.w.WriteString(s)
+	e.indenting = false
+}
+
+func (e *encoder) writeByte(b byte) {
+	_ = e.w.WriteByte(b)
+}
+
+func (e *encoder) write(b []byte) {
+	_, _ = e.w.Write(b)
+	e.indenting = false
+}
+
+func (e *encoder) indent() {
+	for range e.indentsAtLevel {
+		e.write(e.tab)
+	}
+}
+
+func (e *encoder) ws(pos token.Pos, default_ token.RelPos) {
+	rel := pos.RelPos()
+	if pos == token.NoPos {
+		rel = default_
+	}
+	switch rel {
+	case token.NoSpace:
+	case token.Blank:
+		e.writeByte(' ')
+	case token.Newline:
+		e.writeByte('\n')
+		e.indent()
+	case token.NewSection:
+		e.writeString("\n\n")
+		e.indent()
+	}
+}
+func (e *encoder) encode(n ast.Node) error {
+	if e.tab == nil {
+		e.tab = []byte("    ")
+	}
+	const defPos = token.NoSpace
+	switch x := n.(type) {
+	case *ast.BasicLit:
+		e.ws(x.Pos(), defPos)
+		return e.encodeScalar(x, true)
+
+	case *ast.ListLit:
+		e.ws(foldNewline(x.Pos()), token.NoRelPos)
+		if len(x.Elts) == 0 {
+			e.writeString("[]")
+			return nil
+		}
+		e.writeIndent('[')
+		for i, x := range x.Elts {
+			if i > 0 {
+				e.writeString(",")
+			}
+			if err := e.encode(x); err != nil {
+				return err
+			}
+		}
+		e.writeUnindent(']', x.Rbrack, compactNewline(x.Elts[0].Pos()))
+		return nil
+
+	case *ast.StructLit:
+		e.ws(foldNewline(n.Pos()), token.NoRelPos)
+		return e.encodeDecls(x.Elts, x.Rbrace)
+
+	case *ast.File:
+		return e.encodeDecls(x.Decls, token.NoPos)
+
+	case *ast.UnaryExpr:
+		e.ws(foldNewline(x.Pos()), defPos)
+		l, ok := x.X.(*ast.BasicLit)
+		if ok && x.Op == token.SUB && (l.Kind == token.INT || l.Kind == token.FLOAT) {
+			e.writeByte('-')
+			return e.encodeScalar(l, false)
+		}
+	}
+	return errors.Newf(n.Pos(), "json: unsupported node %s (%T)", internal.DebugStr(n), n)
+}
+
+func (e *encoder) encodeScalar(l *ast.BasicLit, allowMinus bool) error {
+	switch l.Kind {
+	case token.INT:
+		var x big.Int
+		return e.setNum(l, allowMinus, &x)
+
+	case token.FLOAT:
+		var x big.Float
+		return e.setNum(l, allowMinus, &x)
+
+	case token.TRUE:
+		e.writeString("true")
+
+	case token.FALSE:
+		e.writeString("false")
+
+	case token.NULL:
+		e.writeString("null")
+
+	case token.STRING:
+		str, err := literal.Unquote(l.Value)
+		if err != nil {
+			return err
+		}
+		b, err := json.Marshal(str)
+		if err != nil {
+			return err
+		}
+		e.write(b)
+
+	default:
+		return errors.Newf(l.Pos(), "unknown literal type %v", l.Kind)
+	}
+	return nil
+}
+
+func (e *encoder) setNum(l *ast.BasicLit, allowMinus bool, x interface{}) error {
+	if !allowMinus && strings.HasPrefix(l.Value, "-") {
+		return errors.Newf(l.Pos(), "double minus not allowed")
+	}
+	var ni literal.NumInfo
+	if err := literal.ParseNum(l.Value, &ni); err != nil {
+		return err
+	}
+	e.writeString(ni.String())
+	return nil
+}
+
+// encodeDecls converts a sequence of declarations to a value. If it encounters
+// an embedded value, it will return this expression. This is more relaxed for
+// structs than is currently allowed for CUE, but the expectation is that this
+// will be allowed at some point. The input would still be illegal CUE.
+func (e *encoder) encodeDecls(decls []ast.Decl, endPos token.Pos) error {
+	var embed ast.Expr
+	var fields []*ast.Field
+
+	for _, d := range decls {
+		switch x := d.(type) {
+		default:
+			return errors.Newf(x.Pos(), "json: unsupported node %s (%T)", internal.DebugStr(x), x)
+
+		case *ast.Package:
+			if embed != nil || fields != nil {
+				return errors.Newf(x.Pos(), "invalid package clause")
+			}
+			continue
+
+		case *ast.Field:
+			if x.Token == token.ISA {
+				return errors.Newf(x.TokenPos, "json: definition not allowed")
+			}
+			if x.Optional != token.NoPos {
+				return errors.Newf(x.Optional, "json: optional fields not allowed")
+			}
+			fields = append(fields, x)
+
+		case *ast.EmbedDecl:
+			if embed != nil {
+				return errors.Newf(x.Pos(), "json: multiple embedded values")
+			}
+			embed = x.Expr
+
+		case *ast.CommentGroup:
+		}
+	}
+
+	if embed != nil {
+		if fields != nil {
+			return errors.Newf(embed.Pos(), "json: embedding mixed with fields")
+		}
+		return e.encode(embed)
+	}
+
+	if len(fields) == 0 {
+		e.writeString("{}")
+		return nil
+	}
+
+	e.writeIndent('{')
+	pos := compactNewline(fields[0].Pos())
+	if endPos == token.NoPos && pos.RelPos() == token.Blank {
+		pos = token.NoPos
+	}
+	firstPos := pos
+	const defPos = token.NoRelPos
+	for i, x := range fields {
+		if i > 0 {
+			e.writeByte(',')
+			pos = x.Pos()
+		}
+		name, _, err := ast.LabelName(x.Label)
+		if err != nil {
+			return errors.Newf(x.Label.Pos(), "json: only literal labels allowed")
+		}
+		b, err := json.Marshal(name)
+		if err != nil {
+			return err
+		}
+		e.ws(pos, defPos)
+		e.write(b)
+		e.writeByte(':')
+
+		if err := e.encode(x.Value); err != nil {
+			return err
+		}
+	}
+	e.writeUnindent('}', endPos, firstPos)
+	return nil
+}
+
+func compactNewline(pos token.Pos) token.Pos {
+	if pos.RelPos() == token.NewSection {
+		pos = token.Newline.Pos()
+	}
+	return pos
+}
+
+func foldNewline(pos token.Pos) token.Pos {
+	if pos.RelPos() >= token.Newline {
+		pos = token.Blank.Pos()
+	}
+	return pos
+}
diff --git a/internal/encoding/json/encode_test.go b/internal/encoding/json/encode_test.go
new file mode 100644
index 0000000..0bf2814
--- /dev/null
+++ b/internal/encoding/json/encode_test.go
@@ -0,0 +1,311 @@
+// Copyright 2020 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package json
+
+import (
+	"encoding/json"
+	"strings"
+	"testing"
+
+	"cuelang.org/go/cue/ast"
+	"cuelang.org/go/cue/parser"
+	"cuelang.org/go/cue/token"
+	"github.com/google/go-cmp/cmp"
+)
+
+func TestEncodeFile(t *testing.T) {
+	testCases := []struct {
+		name string
+		in   string
+		out  string
+	}{{
+		name: "foo",
+		in: `
+		package test
+
+		seq: [
+			1, 2, 3, {
+				a: 1
+				b: 2
+			}
+		]
+		a: b: c: 3
+		b: {
+			x: 0
+			y: 1
+			z: 2
+		}
+		`,
+		out: `{
+    "seq": [
+        1, 2, 3, {
+            "a": 1,
+            "b": 2
+        }
+    ],
+    "a": {"b": {"c": 3}},
+    "b": {
+        "x": 0,
+        "y": 1,
+        "z": 2
+    }
+}`,
+	}, {
+		name: "oneLineFields",
+		in: `
+		seq: [1, 2, 3]
+		esq: []
+		emp: {}
+		map: {a: 3}
+		str: "str"
+		int: 1K
+		bin: 0b11
+		hex: 0x11
+		dec: .3
+		dat: '\x80'
+		nil: null
+		yes: true
+		non: false
+		`,
+		out: `{
+    "seq": [1, 2, 3],
+    "esq": [],
+    "emp": {},
+    "map": {"a": 3},
+    "str": "str",
+    "int": 1000,
+    "bin": 3,
+    "hex": 17,
+    "dec": 0.3,
+    "dat": "\ufffd",
+    "nil": null,
+    "yes": true,
+    "non": false
+}`,
+	}, {
+		name: "comments",
+		in: `
+// Document
+
+// head 1
+f1: 1
+// foot 1
+
+// head 2
+f2: 2 // line 2
+
+// intermezzo f2
+//
+// with multiline
+
+// head 3
+f3:
+	// struct doc
+	{
+		a: 1
+	}
+
+f4: {
+} // line 4
+
+// Trailing
+`,
+		out: `{
+    "f1": 1,
+    "f2": 2,
+    "f3": {
+        "a": 1
+    },
+
+    "f4": {}
+}`,
+	}, {
+		// TODO: support this at some point
+		name: "embed",
+		in: `
+	// hex
+	0xabc // line
+	// trail
+	`,
+		out: `2748`,
+	}, {
+		name: "anchors",
+		in: `
+		a: b
+		b: 3
+		`,
+		out: "json: unsupported node b (*ast.Ident)",
+	}, {
+		name: "errors",
+		in: `
+			m: {
+				a: 1
+				b: 3
+			}
+			c: [1, [ x for x in m ]]
+			`,
+		out: "json: unsupported node [x for x in m ] (*ast.ListComprehension)",
+	}, {
+		name: "disallowMultipleEmbeddings",
+		in: `
+		1
+		1
+		`,
+		out: "json: multiple embedded values",
+	}, {
+		name: "disallowDefinitions",
+		in:   `a :: 2 `,
+		out:  "json: definition not allowed",
+	}, {
+		name: "disallowOptionals",
+		in:   `a?: 2`,
+		out:  "json: optional fields not allowed",
+	}, {
+		name: "disallowBulkOptionals",
+		in:   `[string]: 2`,
+		out:  "json: only literal labels allowed",
+	}, {
+		name: "noImports",
+		in: `
+		import "foo"
+
+		a: 1
+		`,
+		out: `json: unsupported node import "foo" (*ast.ImportDecl)`,
+	}, {
+		name: "disallowMultipleEmbeddings",
+		in: `
+		1
+		a: 2
+		`,
+		out: "json: embedding mixed with fields",
+	}, {
+		name: "prometheus",
+		in: `
+		{
+			receivers: [{
+				name: "pager"
+				slack_configs: [{
+					text: """
+						{{ range .Alerts }}{{ .Annotations.description }}
+						{{ end }}
+						"""
+					channel:       "#cloudmon"
+					send_resolved: true
+				}]
+			}]
+			route: {
+				receiver: "pager"
+				group_by: ["alertname", "cluster"]
+			}
+		}`,
+		out: `{
+    "receivers": [{
+        "name": "pager",
+        "slack_configs": [{
+            "text": "{{ range .Alerts }}{{ .Annotations.description }}\n{{ end }}",
+            "channel": "#cloudmon",
+            "send_resolved": true
+        }]
+    }],
+    "route": {
+        "receiver": "pager",
+        "group_by": ["alertname", "cluster"]
+    }
+}`,
+	}}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			f, err := parser.ParseFile(tc.name, tc.in, parser.ParseComments)
+			if err != nil {
+				t.Fatal(err)
+			}
+			b, err := Encode(f)
+			var got string
+			if err != nil {
+				got = err.Error()
+			} else {
+				if !json.Valid(b) {
+					t.Fatal("invalid JSON")
+				}
+				got = strings.TrimSpace(string(b))
+			}
+			want := strings.TrimSpace(tc.out)
+			if got != want {
+				t.Log("\n" + got)
+				t.Error(cmp.Diff(got, want))
+			}
+		})
+	}
+}
+
+func TestEncodeAST(t *testing.T) {
+	comment := func(s string) *ast.CommentGroup {
+		return &ast.CommentGroup{List: []*ast.Comment{
+			&ast.Comment{Text: "// " + s},
+		}}
+	}
+	testCases := []struct {
+		name string
+		in   ast.Expr
+		out  string
+	}{{
+		in: ast.NewStruct(
+			comment("foo"),
+			comment("bar"),
+			"field", ast.NewString("value"),
+			"field2", ast.NewString("value"),
+			comment("trail1"),
+			comment("trail2"),
+		),
+		out: `{"field":"value","field2":"value"}`,
+	}, {
+		in: &ast.StructLit{Elts: []ast.Decl{
+			comment("bar"),
+			&ast.EmbedDecl{Expr: ast.NewBool(true)},
+		}},
+		out: `true`,
+	}, {
+		in: &ast.UnaryExpr{
+			Op: token.SUB,
+			X:  &ast.BasicLit{Kind: token.INT, Value: "-2"},
+		},
+		out: `double minus not allowed`,
+	}, {
+		in:  &ast.BasicLit{Kind: token.INT, Value: "-2.0.0"},
+		out: `invalid number "-2.0.0"`,
+	}, {
+		in: &ast.StructLit{Elts: []ast.Decl{
+			&ast.EmbedDecl{Expr: ast.NewBool(true)},
+			&ast.Package{},
+		}},
+		out: `invalid package clause`,
+	}}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			b, err := Encode(tc.in)
+			var got string
+			if err != nil {
+				got = err.Error()
+			} else {
+				got = strings.TrimSpace(string(b))
+			}
+			want := strings.TrimSpace(tc.out)
+			if got != want {
+				t.Error(cmp.Diff(got, want))
+			}
+		})
+	}
+}