encoding/json: add JSON exporter

and use this in cmd/cue

The AST rewriter is somewhat specific to JSON.
Some parts may be moved to more general format
functionality later.

Closes #118

Change-Id: I530a7ee2a32b76b4709398039227d9c994316fda
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/3380
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cmd/cue/cmd/import.go b/cmd/cue/cmd/import.go
index 697d1ff..8f6bab2 100644
--- a/cmd/cue/cmd/import.go
+++ b/cmd/cue/cmd/import.go
@@ -15,7 +15,6 @@
 package cmd
 
 import (
-	"encoding/json"
 	"fmt"
 	"io"
 	"io/ioutil"
@@ -35,6 +34,7 @@
 	"cuelang.org/go/cue/load"
 	"cuelang.org/go/cue/parser"
 	"cuelang.org/go/cue/token"
+	"cuelang.org/go/encoding/json"
 	"cuelang.org/go/encoding/protobuf"
 	"cuelang.org/go/internal"
 	"cuelang.org/go/internal/third_party/yaml"
@@ -600,20 +600,15 @@
 }
 
 func handleJSON(path string, r io.Reader) (objects []ast.Expr, err error) {
-	d := json.NewDecoder(r)
+	d := json.NewDecoder(nil, path, r)
 
 	for {
-		var raw json.RawMessage
-		err := d.Decode(&raw)
+		expr, err := d.Extract()
 		if err == io.EOF {
 			break
 		}
 		if err != nil {
-			return nil, fmt.Errorf("could not parse JSON: %v", err)
-		}
-		expr, err := parser.ParseExpr(path, []byte(raw))
-		if err != nil {
-			return nil, fmt.Errorf("invalid input: %v %q", err, raw)
+			return nil, err
 		}
 		objects = append(objects, expr)
 	}
diff --git a/cmd/cue/cmd/testdata/import/import_files.out b/cmd/cue/cmd/testdata/import/import_files.out
index df6d24d..02c62bd 100644
--- a/cmd/cue/cmd/testdata/import/import_files.out
+++ b/cmd/cue/cmd/testdata/import/import_files.out
@@ -4,5 +4,8 @@
 name:     "booster"
 replicas: 1
 kind: "Service"
-name: "supplement"
+name: """
+		supplement
+		foo
+		"""
 json: "[1, 2]"
diff --git a/cmd/cue/cmd/testdata/import/import_hoiststr.out b/cmd/cue/cmd/testdata/import/import_hoiststr.out
index 29e11cc..4460c9d 100644
--- a/cmd/cue/cmd/testdata/import/import_hoiststr.out
+++ b/cmd/cue/cmd/testdata/import/import_hoiststr.out
@@ -5,9 +5,12 @@
 		kind: "Service"
 		name: "booster"
 	}]
-	supplement: [{
+	"supplement\nfoo": [{
 		kind: "Service"
-		name: "supplement"
+		name: """
+		supplement
+		foo
+		"""
 		json: xjson.Marshal(_cue_json)
 		_cue_json = [1, 2]
 	}]
diff --git a/cmd/cue/cmd/testdata/import/import_list.out b/cmd/cue/cmd/testdata/import/import_list.out
index e071c0d..c645992 100644
--- a/cmd/cue/cmd/testdata/import/import_list.out
+++ b/cmd/cue/cmd/testdata/import/import_list.out
@@ -3,7 +3,10 @@
 	name: "booster"
 }, {
 	kind: "Service"
-	name: "supplement"
+	name: """
+		supplement
+		foo
+		"""
 	json: "[1, 2]"
 }]
 deployment: [{
diff --git a/cmd/cue/cmd/testdata/import/import_path.out b/cmd/cue/cmd/testdata/import/import_path.out
index e59d747..b668a2d 100644
--- a/cmd/cue/cmd/testdata/import/import_path.out
+++ b/cmd/cue/cmd/testdata/import/import_path.out
@@ -8,8 +8,11 @@
 	name:     "booster"
 	replicas: 1
 }
-service supplement: {
+service "supplement\nfoo": {
 	kind: "Service"
-	name: "supplement"
+	name: """
+		supplement
+		foo
+		"""
 	json: "[1, 2]"
 }
diff --git a/cmd/cue/cmd/testdata/import/services.cue b/cmd/cue/cmd/testdata/import/services.cue
index e071c0d..c645992 100644
--- a/cmd/cue/cmd/testdata/import/services.cue
+++ b/cmd/cue/cmd/testdata/import/services.cue
@@ -3,7 +3,10 @@
 	name: "booster"
 }, {
 	kind: "Service"
-	name: "supplement"
+	name: """
+		supplement
+		foo
+		"""
 	json: "[1, 2]"
 }]
 deployment: [{
diff --git a/cmd/cue/cmd/testdata/import/services.jsonl b/cmd/cue/cmd/testdata/import/services.jsonl
index 5527811..e704fe0 100644
--- a/cmd/cue/cmd/testdata/import/services.jsonl
+++ b/cmd/cue/cmd/testdata/import/services.jsonl
@@ -9,6 +9,6 @@
 }
 {
     "kind": "Service",
-    "name": "supplement",
+    "name": "supplement\nfoo",
     "json": "[1, 2]"
 }
\ No newline at end of file
diff --git a/cue/format/node.go b/cue/format/node.go
index 196db20..a8d3f51 100644
--- a/cue/format/node.go
+++ b/cue/format/node.go
@@ -20,6 +20,7 @@
 	"strings"
 
 	"cuelang.org/go/cue/ast"
+	"cuelang.org/go/cue/literal"
 	"cuelang.org/go/cue/scanner"
 	"cuelang.org/go/cue/token"
 )
@@ -347,7 +348,15 @@
 				}
 			}
 		}
-		f.print(n.ValuePos, n.Value)
+		str := n.Value
+		// Allow any CUE string in the AST, but ensure it is formatted
+		// according to spec.
+		if strings.HasPrefix(str, `"""`) || strings.HasPrefix(str, "#") {
+			if u, err := literal.Unquote(str); err == nil {
+				str = strconv.Quote(u)
+			}
+		}
+		f.print(n.ValuePos, str)
 
 	case *ast.TemplateLabel:
 		f.print(n.Langle, token.LSS, indent)
diff --git a/cue/parser/interface.go b/cue/parser/interface.go
index 66cf341..25b5af3 100644
--- a/cue/parser/interface.go
+++ b/cue/parser/interface.go
@@ -71,6 +71,11 @@
 	}
 )
 
+// FileOffset specifies the File position info to use.
+func FileOffset(pos int) Option {
+	return func(p *parser) { p.offset = pos }
+}
+
 // A mode value is a set of flags (or 0).
 // They control the amount of source code parsed and other optional
 // parser functionality.
diff --git a/cue/parser/parser.go b/cue/parser/parser.go
index c65a762..2e99cb1 100644
--- a/cue/parser/parser.go
+++ b/cue/parser/parser.go
@@ -29,6 +29,7 @@
 // The parser structure holds the parser's internal state.
 type parser struct {
 	file    *token.File
+	offset  int
 	errors  errors.Error
 	scanner scanner.Scanner
 
@@ -62,10 +63,12 @@
 }
 
 func (p *parser) init(filename string, src []byte, mode []Option) {
-	p.file = token.NewFile(filename, -1, len(src))
+	p.offset = -1
 	for _, f := range mode {
 		f(p)
 	}
+	p.file = token.NewFile(filename, p.offset, len(src))
+
 	var m scanner.Mode
 	if p.mode&parseCommentsMode != 0 {
 		m = scanner.ScanComments
diff --git a/encoding/json/json.go b/encoding/json/json.go
index 31335db..4551370 100644
--- a/encoding/json/json.go
+++ b/encoding/json/json.go
@@ -16,13 +16,263 @@
 package json
 
 import (
+	"bytes"
+	gojson "encoding/json"
+	"io"
+	"strconv"
+	"strings"
+
 	"cuelang.org/go/cue"
+	"cuelang.org/go/cue/ast"
+	"cuelang.org/go/cue/ast/astutil"
+	"cuelang.org/go/cue/errors"
+	"cuelang.org/go/cue/literal"
+	"cuelang.org/go/cue/parser"
+	"cuelang.org/go/cue/token"
+	"cuelang.org/go/internal/source"
 	"cuelang.org/go/pkg/encoding/json"
 )
 
+// Valid reports whether data is a valid JSON encoding.
+func Valid(b []byte) bool {
+	return gojson.Valid(b)
+}
+
 // Validate validates JSON and confirms it matches the constraints
 // specified by v.
 func Validate(b []byte, v cue.Value) error {
 	_, err := json.Validate(b, v)
 	return err
 }
+
+// Extract parses the YAML to a CUE expression.
+//
+// If src != nil, Extract parses the source from src and the path is for
+// position information. The type of the argument for the src parameter must be
+// string, []byte, or io.Reader. If src == nil, ParseFile parses the file
+// specified by filename.
+func Extract(path string, src interface{}) (ast.Expr, error) {
+	expr, err := extract(path, src)
+	if err != nil {
+		return nil, err
+	}
+	patchExpr(expr)
+	return expr, nil
+}
+
+// Decode converts JSON file to a CUE value.
+//
+// If src != nil, Extract parses the source from src and the path is for
+// position information. The type of the argument for the src parameter must be
+// string, []byte, or io.Reader. If src == nil, ParseFile parses the file
+// specified by filename.
+func Decode(r *cue.Runtime, path string, src interface{}) (*cue.Instance, error) {
+	expr, err := extract(path, src)
+	if err != nil {
+		return nil, err
+	}
+	return r.CompileExpr(expr)
+}
+
+func extract(path string, src interface{}) (ast.Expr, error) {
+	b, err := source.Read(path, src)
+	if err != nil {
+		return nil, err
+	}
+	expr, err := parser.ParseExpr(path, b)
+	if err != nil || !gojson.Valid(b) {
+		// Get JSON-specific error, but
+		p := token.NoPos
+		if pos := errors.Positions(err); len(pos) > 0 {
+			p = pos[0]
+		}
+		var x interface{}
+		err := gojson.Unmarshal(b, &x)
+		return nil, errors.Wrapf(err, p, "invalid JSON for file %q", path)
+	}
+	return expr, nil
+}
+
+// NewDecoder configures a JSON decoder. The path is used to associate position
+// information with each node. The runtime may be nil if the decoder
+// is only used to extract to CUE ast objects.
+func NewDecoder(r *cue.Runtime, path string, src io.Reader) *Decoder {
+	return &Decoder{
+		r:      r,
+		path:   path,
+		dec:    gojson.NewDecoder(src),
+		offset: 1,
+	}
+}
+
+// A Decoder converts JSON values to CUE.
+type Decoder struct {
+	r      *cue.Runtime
+	path   string
+	dec    *gojson.Decoder
+	offset int
+}
+
+// Extract converts the current JSON value to a CUE ast. It returns io.EOF
+// if the input has been exhausted.
+func (d *Decoder) Extract() (ast.Expr, error) {
+	expr, err := d.extract()
+	if err != nil {
+		return expr, err
+	}
+	patchExpr(expr)
+	return expr, nil
+}
+
+func (d *Decoder) extract() (ast.Expr, error) {
+	var raw gojson.RawMessage
+	err := d.dec.Decode(&raw)
+	if err == io.EOF {
+		return nil, err
+	}
+	offset := d.offset
+	d.offset += len(raw)
+	if err != nil {
+		pos := token.NewFile(d.path, offset, len(raw)).Pos(0, 0)
+		return nil, errors.Wrapf(err, pos, "invalid JSON for file %q", d.path)
+	}
+	expr, err := parser.ParseExpr(d.path, []byte(raw), parser.FileOffset(offset))
+	if err != nil {
+		return nil, err
+	}
+	return expr, nil
+}
+
+// Decode converts the current JSON value to a CUE instance. It returns io.EOF
+// if the input has been exhausted.
+func (d *Decoder) Decode() (*cue.Instance, error) {
+	expr, err := d.Extract()
+	if err != nil {
+		return nil, err
+	}
+	return d.r.CompileExpr(expr)
+}
+
+// patchExpr simplifies the AST parsed from JSON.
+// TODO: some of the modifications are already done in format, but are
+// a package deal of a more aggressive simplify. Other pieces of modification
+// should probably be moved to format.
+func patchExpr(n ast.Node) {
+	type info struct {
+		reflow bool
+	}
+	stack := []info{{true}}
+
+	afterFn := func(n ast.Node) {
+		switch n.(type) {
+		case *ast.ListLit, *ast.StructLit:
+			stack = stack[:len(stack)-1]
+		}
+	}
+
+	var beforeFn func(n ast.Node) bool
+
+	beforeFn = func(n ast.Node) bool {
+		isLarge := n.End().Offset()-n.Pos().Offset() > 50
+		descent := true
+
+		switch x := n.(type) {
+		case *ast.ListLit:
+			reflow := true
+			if !isLarge {
+				for _, e := range x.Elts {
+					if hasSpaces(e) {
+						reflow = false
+						break
+					}
+				}
+			}
+			stack = append(stack, info{reflow})
+			if reflow {
+				x.Lbrack = x.Lbrack.WithRel(token.NoRelPos)
+				x.Rbrack = x.Rbrack.WithRel(token.NoRelPos)
+			}
+			return true
+
+		case *ast.StructLit:
+			reflow := true
+			if !isLarge {
+				for _, e := range x.Elts {
+					if f, ok := e.(*ast.Field); !ok || hasSpaces(f) || hasSpaces(f.Value) {
+						reflow = false
+						break
+					}
+				}
+			}
+			stack = append(stack, info{reflow})
+			if reflow {
+				x.Lbrace = x.Lbrace.WithRel(token.NoRelPos)
+				x.Rbrace = x.Rbrace.WithRel(token.NoRelPos)
+			}
+			return true
+
+		case *ast.Field:
+			// label is always a string for JSON.
+			switch {
+			case true:
+				s, ok := x.Label.(*ast.BasicLit)
+				if !ok || s.Kind != token.STRING {
+					break // should not happen: implies invalid JSON
+				}
+
+				u, err := literal.Unquote(s.Value)
+				if err != nil {
+					break // should not happen: implies invalid JSON
+				}
+
+				if q, err := ast.QuoteIdent(u); err != nil || q != u {
+					break
+				}
+
+				x.Label = ast.NewIdent(u)
+				astutil.CopyMeta(x.Label, s)
+			}
+			ast.Walk(x.Value, beforeFn, afterFn)
+			descent = false
+
+		case *ast.BasicLit:
+			if x.Kind == token.STRING && len(x.Value) > 10 {
+				s, err := literal.Unquote(x.Value)
+				if err != nil {
+					break // should not happen: implies invalid JSON
+				}
+
+				lines := strings.Split(s, "\n")
+				if len(lines) == 1 {
+					break
+				}
+				x.Value = quoteMulti(lines, len(stack))
+			}
+		}
+
+		if stack[len(stack)-1].reflow {
+			ast.SetRelPos(n, token.NoRelPos)
+		}
+		return descent
+	}
+
+	ast.Walk(n, beforeFn, afterFn)
+}
+
+func hasSpaces(n ast.Node) bool {
+	return n.Pos().RelPos() > token.NoSpace
+}
+
+func quoteMulti(a []string, indent int) string {
+	b := bytes.Buffer{}
+	prefix := "\n" + strings.Repeat("\t", indent)
+	b.WriteString(`"""`)
+	for _, s := range a {
+		b.WriteString(prefix)
+		q := strconv.Quote(s)
+		b.WriteString(q[1 : len(q)-1])
+	}
+	b.WriteString(prefix)
+	b.WriteString(`"""`)
+	return b.String()
+}
diff --git a/encoding/json/json_test.go b/encoding/json/json_test.go
new file mode 100644
index 0000000..26cafd2
--- /dev/null
+++ b/encoding/json/json_test.go
@@ -0,0 +1,143 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package json
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"strings"
+	"testing"
+
+	"cuelang.org/go/cue/ast"
+	"cuelang.org/go/cue/format"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestExtract(t *testing.T) {
+	testCases := []struct {
+		name string
+		in   string
+		out  string
+	}{{
+		name: "no expand as JSON is not compact",
+		in:   `{"a": 32}`,
+		out:  `{a: 32}`,
+	}, {
+		name: "break across new lines",
+		in:   `{"a":32,"b":[1,2],"c-d":"foo-bar-baz"}`,
+		out: `{
+	a: 32
+	b: [1, 2]
+	"c-d": "foo-bar-baz"
+}`,
+	}, {
+		name: "multiline string",
+		in:   `"a\nb\uD803\uDE6D\nc\\\t\nd\/"`,
+		out: `"""
+	a
+	b` + "\U00010E6D" + `
+	c\\\t
+	d/
+	"""`,
+	}, {
+		name: "multiline string indented",
+		in:   `{"x":{"y":"a\nb\nc\nd"}}`,
+		out: `{
+	x: {
+		y: """
+			a
+			b
+			c
+			d
+			"""
+	}
+}`,
+	}, {
+		name: "don't create multiline string for label",
+		in:   `{"foo\nbar\nbaz\n": 2}`,
+		out:  `{"foo\nbar\nbaz\n": 2}`,
+	}, {
+		name: "don't cap indentation",
+		in:   `{"a":{"b":{"c":{"d":"a\nb\nc\nd"}}}}`,
+		out: `{
+	a: {
+		b: {
+			c: {
+				d: """
+					a
+					b
+					c
+					d
+					"""
+			}
+		}
+	}
+}`,
+	}, {
+		name: "keep list formatting",
+		in: `[1,2,
+	3]`,
+		out: "[1, 2,\n\t3]",
+	}, {
+		// TODO: format.Node doesn't break large lists, it probably should.
+		name: "large list",
+		in:   `[11111111111,2222222222,3333333333,4444444444,5555555555,6666666666]`,
+		out:  "[11111111111, 2222222222, 3333333333, 4444444444, 5555555555, 6666666666]",
+	}, {
+		name: "reflow large values unconditionally",
+		in:   `{"a": "11111111112222222222333333333344444444445555555555"}`,
+		out:  "{\n\ta: \"11111111112222222222333333333344444444445555555555\"\n}",
+	}, {
+		name: "invalid JSON",
+		in:   `[3_]`,
+		out:  "invalid JSON for file \"invalid JSON\": invalid character '_' after array element",
+	}}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			out := &bytes.Buffer{}
+			e, err := Extract(tc.name, tc.in)
+			toString(out, e, err)
+			assert.Equal(t, tc.out, out.String())
+
+			out = &bytes.Buffer{}
+			d := NewDecoder(nil, tc.name, strings.NewReader(tc.in))
+			for {
+				e, err := d.Extract()
+				if err == io.EOF {
+					break
+				}
+				toString(out, e, err)
+				if err != nil {
+					break
+				}
+			}
+			assert.Equal(t, tc.out, out.String())
+		})
+	}
+}
+
+func toString(w *bytes.Buffer, e ast.Expr, err error) {
+	if err != nil {
+		fmt.Fprint(w, err)
+		return
+	}
+	b, err := format.Node(e)
+	if err != nil {
+		fmt.Fprint(w, err)
+		return
+	}
+	fmt.Fprint(w, string(b))
+}
diff --git a/go.mod b/go.mod
index 3e7db7a..0b23e5c 100644
--- a/go.mod
+++ b/go.mod
@@ -24,3 +24,5 @@
 	golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7
 	gopkg.in/yaml.v2 v2.2.2 // indirect
 )
+
+go 1.12