encoding/protobuf/jsonpb: add Rewrite* for interpreting JSON in PB terms

This allows code that uses the CUE API to modify an ast.Expr
or ast.File to conform to a CUE schema, allowing mappings
that Protobuf allows, but that are otherwise not allowed by
a strict interpretation of the schema.

Note that this assumes that enum integers can be mapped
to strings with a corresponding #intValue field. This is not
yet set by the proto mapping.

Issue #606

Change-Id: I71d7bfa9e69f985c1eaaf1c1e20e5a473b882e70
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/9243
Reviewed-by: CUE cueckoo <cueckoo@gmail.com>
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/encoding/protobuf/jsonpb/decoder.go b/encoding/protobuf/jsonpb/decoder.go
new file mode 100644
index 0000000..7b616d3
--- /dev/null
+++ b/encoding/protobuf/jsonpb/decoder.go
@@ -0,0 +1,325 @@
+// Copyright 2021 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package jsonpb
+
+import (
+	"encoding/base64"
+	"strings"
+
+	"cuelang.org/go/cue"
+	"cuelang.org/go/cue/ast"
+	"cuelang.org/go/cue/ast/astutil"
+	"cuelang.org/go/cue/errors"
+	"cuelang.org/go/cue/literal"
+	"cuelang.org/go/cue/token"
+	"github.com/cockroachdb/apd/v2"
+)
+
+// Option is an option.
+//
+// There are currently no options.
+type Option func()
+
+// A Decoder interprets CUE expressions as JSON protobuf encodings
+// based on an underlying schema.
+//
+// It bases the mapping on the underlying CUE type, without consulting Protobuf
+// attributes.
+//
+// Mappings per CUE type:
+//  for any CUE type:
+//             null is omitted if null is not specifically allowed.
+//  bytes:     if the expression is a string, it is reinterpreted using a
+//             base64 encoding. Either standard or URL-safe base64 encoding
+//             with/without paddings are accepted.
+//  int:       string values are interpreted as integers
+//  float:     string values are interpreted as numbers, and the values "NaN",
+//             "Infinity", and "-Infinity" are allowed and converted to
+//             to corresponding error values.
+//  disjunction of strings:
+//             this is assumed to represent a protobuf enum value. Strings
+//             are left as is. For integers, the disjunction is resolved
+//             by converting it to the string that has a corresponding #intValue
+//             value.
+//  {}:        JSON objects representing any values will be left as is.
+//             If the CUE type corresponding to the URL can be determined within
+//             the module context it will be unified.
+//  time.Time / time.Duration:
+//             left as is
+//  _:         left as is.
+//
+type Decoder struct {
+	schema cue.Value
+}
+
+// NewDecoder creates a Decoder for the given schema.
+func NewDecoder(schema cue.Value, options ...Option) *Decoder {
+	return &Decoder{schema: schema}
+}
+
+// RewriteFile modifies file, interpreting it in terms of the given schema
+// according to the protocol buffer to JSON mapping defined in the protocol
+// buffer spec.
+//
+// RewriteFile is idempotent, calling it multiples times on an expression gives
+// the same result.
+func (d *Decoder) RewriteFile(file *ast.File) error {
+	var r rewriter
+	r.rewriteDecls(d.schema, file.Decls)
+	return r.errs
+}
+
+// RewriteExpr modifies expr, interpreting it in terms of the given schema
+// according to the protocol buffer to JSON mapping defined in the
+// protocol buffer spec.
+//
+// RewriteExpr is idempotent, calling it multiples times on an expression gives
+// the same result.
+func (d *Decoder) RewriteExpr(expr ast.Expr) (ast.Expr, error) {
+	var r rewriter
+	x := r.rewrite(d.schema, expr)
+	return x, r.errs
+}
+
+type rewriter struct {
+	errs errors.Error
+}
+
+func (r *rewriter) addErr(err errors.Error) {
+	r.errs = errors.Append(r.errs, err)
+}
+
+func (r *rewriter) addErrf(p token.Pos, schema cue.Value, format string, args ...interface{}) {
+	format = "%s: " + format
+	args = append([]interface{}{schema.Path()}, args...)
+	r.addErr(errors.Newf(p, format, args...))
+}
+
+func (r *rewriter) rewriteDecls(schema cue.Value, decls []ast.Decl) {
+	for _, f := range decls {
+		field, ok := f.(*ast.Field)
+		if !ok {
+			continue
+		}
+		sel := cue.Label(field.Label)
+		if !sel.IsString() {
+			continue
+		}
+
+		v := schema.LookupPath(cue.MakePath(sel))
+		if !v.Exists() {
+			f := schema.Template()
+			if f == nil {
+				continue
+			}
+			v = f(sel.String())
+		}
+		if !v.Exists() {
+			continue
+		}
+
+		field.Value = r.rewrite(v, field.Value)
+	}
+}
+
+func (r *rewriter) rewrite(schema cue.Value, expr ast.Expr) (x ast.Expr) {
+	defer func() {
+		if expr != x && x != nil {
+			astutil.CopyMeta(x, expr)
+		}
+	}()
+
+	switch x := expr.(type) {
+	case *ast.BasicLit:
+		if x.Kind != token.NULL {
+			break
+		}
+		if schema.IncompleteKind()&cue.NullKind != 0 {
+			break
+		}
+		switch v, _ := schema.Default(); {
+		case v.IsConcrete():
+			if x, _ := v.Syntax(cue.Final()).(ast.Expr); x != nil {
+				return x
+			}
+		default: // default value for type
+			if x := zeroValue(schema, x); x != nil {
+				return x
+			}
+		}
+
+	case *ast.StructLit:
+		r.rewriteDecls(schema, x.Elts)
+		return x
+
+	case *ast.ListLit:
+		elem, _ := schema.Elem()
+		iter, _ := schema.List()
+		for i, e := range x.Elts {
+			v := elem
+			if iter.Next() {
+				v = iter.Value()
+			}
+			if !v.Exists() {
+				break
+			}
+			x.Elts[i] = r.rewrite(v, e)
+		}
+
+		return x
+	}
+
+	switch schema.IncompleteKind() {
+	case cue.IntKind, cue.FloatKind, cue.NumberKind:
+		x, q, str := stringValue(expr)
+		if x == nil || !q.IsDouble() {
+			break
+		}
+
+		var info literal.NumInfo
+		if err := literal.ParseNum(str, &info); err != nil {
+			break
+		}
+		x.Value = str
+		x.Kind = token.FLOAT
+		if info.IsInt() {
+			x.Kind = token.INT
+		}
+
+	case cue.BytesKind:
+		x, q, str := stringValue(expr)
+		if x == nil && q.IsDouble() {
+			break
+		}
+
+		var b []byte
+		var err error
+		for _, enc := range base64Encodings {
+			if b, err = enc.DecodeString(str); err == nil {
+				break
+			}
+		}
+		if err != nil {
+			r.addErrf(expr.Pos(), schema, "failed to decode base64: %v", err)
+			return expr
+		}
+
+		quoter := literal.Bytes
+		if q.IsMulti() {
+			ws := q.Whitespace()
+			tabs := (strings.Count(ws, " ")+3)/4 + strings.Count(ws, "\t")
+			quoter = quoter.WithTabIndent(tabs)
+		}
+		x.Value = quoter.Quote(string(b))
+		return x
+
+	case cue.StringKind:
+		if s, ok := expr.(*ast.BasicLit); ok && s.Kind == token.INT {
+			var info literal.NumInfo
+			if err := literal.ParseNum(s.Value, &info); err != nil || !info.IsInt() {
+				break
+			}
+			var d apd.Decimal
+			if err := info.Decimal(&d); err != nil {
+				break
+			}
+			enum, err := d.Int64()
+			if err != nil {
+				r.addErrf(expr.Pos(), schema, "invalid enum index: %v", err)
+				return expr
+			}
+			op, values := schema.Expr()
+			if op != cue.OrOp {
+				values = []cue.Value{schema} // allow single values.
+			}
+			for _, v := range values {
+				i, err := v.LookupPath(cue.MakePath(cue.Def("#intValue"))).Int64()
+				if err == nil && i == enum {
+					str, err := v.String()
+					if err != nil {
+						r.addErr(errors.Wrapf(err, v.Pos(), "invalid string enum"))
+						return expr
+					}
+					s.Kind = token.STRING
+					s.Value = literal.String.Quote(str)
+
+					return s
+				}
+			}
+			r.addErrf(expr.Pos(), schema,
+				"could not locate integer enum value %d", enum)
+		}
+
+	case cue.StructKind, cue.TopKind:
+		// TODO: Detect and mix in type.
+	}
+	return expr
+}
+
+func zeroValue(v cue.Value, x *ast.BasicLit) ast.Expr {
+	switch v.IncompleteKind() {
+	case cue.StringKind:
+		x.Kind = token.STRING
+		x.Value = `""`
+
+	case cue.BytesKind:
+		x.Kind = token.STRING
+		x.Value = `''`
+
+	case cue.BoolKind:
+		x.Kind = token.FALSE
+		x.Value = "false"
+
+	case cue.NumberKind, cue.IntKind, cue.FloatKind:
+		x.Kind = token.INT
+		x.Value = "0"
+
+	case cue.StructKind:
+		return ast.NewStruct()
+
+	case cue.ListKind:
+		return &ast.ListLit{}
+
+	default:
+		return nil
+	}
+	return x
+}
+
+func stringValue(x ast.Expr) (b *ast.BasicLit, q literal.QuoteInfo, str string) {
+	b, ok := x.(*ast.BasicLit)
+	if !ok || b.Kind != token.STRING {
+		return nil, q, ""
+	}
+	q, p, _, err := literal.ParseQuotes(b.Value, b.Value)
+	if err != nil {
+		return nil, q, ""
+	}
+
+	str, err = q.Unquote(b.Value[p:])
+	if err != nil {
+		return nil, q, ""
+	}
+
+	return b, q, str
+}
+
+// These are all the allowed base64 encodings.
+var base64Encodings = []base64.Encoding{
+	*base64.StdEncoding,
+	*base64.URLEncoding,
+	*base64.RawStdEncoding,
+	*base64.RawURLEncoding,
+}
diff --git a/encoding/protobuf/jsonpb/decoder_test.go b/encoding/protobuf/jsonpb/decoder_test.go
new file mode 100644
index 0000000..779c5ef
--- /dev/null
+++ b/encoding/protobuf/jsonpb/decoder_test.go
@@ -0,0 +1,133 @@
+// Copyright 2021 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package jsonpb
+
+import (
+	"strings"
+	"testing"
+
+	"cuelang.org/go/cue"
+	"cuelang.org/go/cue/ast"
+	"cuelang.org/go/cue/ast/astutil"
+	"cuelang.org/go/cue/errors"
+	"cuelang.org/go/cue/format"
+	"cuelang.org/go/cue/parser"
+	"cuelang.org/go/encoding/json"
+	"cuelang.org/go/encoding/yaml"
+	"cuelang.org/go/internal/cuetest"
+	"cuelang.org/go/internal/cuetxtar"
+)
+
+func TestParse(t *testing.T) {
+	test := cuetxtar.TxTarTest{
+		Root:   "./testdata/decoder",
+		Name:   "jsonpb",
+		Update: cuetest.UpdateGoldenFiles,
+	}
+
+	r := cue.Runtime{}
+
+	test.Run(t, func(t *cuetxtar.Test) {
+		// TODO: use high-level API.
+
+		var schema cue.Value
+		var file *ast.File
+
+		for _, f := range t.Archive.Files {
+			switch {
+			case f.Name == "schema.cue":
+				inst, err := r.Compile(f.Name, f.Data)
+				if err != nil {
+					t.WriteErrors(errors.Promote(err, "test"))
+					return
+				}
+				schema = inst.Value()
+				continue
+
+			case strings.HasPrefix(f.Name, "out/"):
+				continue
+
+			case strings.HasSuffix(f.Name, ".cue"):
+				f, err := parser.ParseFile(f.Name, f.Data, parser.ParseComments)
+				if err != nil {
+					t.Fatal(err)
+				}
+				file = f
+
+			case strings.HasSuffix(f.Name, ".json"):
+				x, err := json.Extract(f.Name, f.Data)
+				if err != nil {
+					t.Fatal(err)
+				}
+				file, err = astutil.ToFile(x)
+				if err != nil {
+					t.Fatal(err)
+				}
+
+			case strings.HasSuffix(f.Name, ".yaml"):
+				f, err := yaml.Extract(f.Name, f.Data)
+				if err != nil {
+					t.Fatal(err)
+				}
+				file = f
+			}
+
+			w := t.Writer(f.Name)
+			err := NewDecoder(schema).RewriteFile(file)
+			if err != nil {
+				errors.Print(w, err, nil)
+				continue
+			}
+
+			b, err := format.Node(file)
+			if err != nil {
+				t.Fatal(err)
+			}
+			_, _ = w.Write(b)
+		}
+	})
+}
+
+// For debugging purposes: DO NOT REMOVE.
+func TestX(t *testing.T) {
+	const schema = `
+
+		`
+	const data = `
+`
+	if strings.TrimSpace(data) == "" {
+		t.Skip()
+	}
+	var r cue.Runtime
+	inst, err := r.Compile("schema", schema)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	file, err := parser.ParseFile("data", data)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if err := NewDecoder(inst.Value()).RewriteFile(file); err != nil {
+		t.Fatal(err)
+	}
+
+	b, err := format.Node(file)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Error(string(b))
+}
diff --git a/encoding/protobuf/jsonpb/jsonpb.go b/encoding/protobuf/jsonpb/jsonpb.go
new file mode 100644
index 0000000..df07a38
--- /dev/null
+++ b/encoding/protobuf/jsonpb/jsonpb.go
@@ -0,0 +1,17 @@
+// Copyright 2021 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package jsonpb rewrites a CUE expression based upon the Protobuf
+// interpretation of JSON.
+package jsonpb
diff --git a/encoding/protobuf/jsonpb/testdata/decoder/base64.txtar b/encoding/protobuf/jsonpb/testdata/decoder/base64.txtar
new file mode 100644
index 0000000..7476699
--- /dev/null
+++ b/encoding/protobuf/jsonpb/testdata/decoder/base64.txtar
@@ -0,0 +1,31 @@
+-- schema.cue --
+b: [string]: bytes
+
+-- std.cue --
+b: hello: "SGVsbG8sIOS4lueVjA=="
+b: noPad: "SGVsbG8sIOS4lueVjA"
+b: bar:   "c29tZSBkYXRhIHdpdGggACBhbmQg77u/"
+
+// A large one-line text.
+b: multi: """
+    TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz
+    IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg
+    dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu
+    dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo
+    ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=
+    """
+
+-- url.cue --
+b: bar:   "c29tZSBkYXRhIHdpdGggACBhbmQg77u_"
+
+-- out/jsonpb/std.cue --
+b: hello: 'Hello, 世界'
+b: noPad: 'Hello, 世界'
+b: bar:   'some data with \x00 and \ufeff'
+
+// A large one-line text.
+b: multi: '''
+	Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.
+	'''
+-- out/jsonpb/url.cue --
+b: bar: 'some data with \x00 and \ufeff'
diff --git a/encoding/protobuf/jsonpb/testdata/decoder/basic.txtar b/encoding/protobuf/jsonpb/testdata/decoder/basic.txtar
new file mode 100644
index 0000000..cf8fa7f
--- /dev/null
+++ b/encoding/protobuf/jsonpb/testdata/decoder/basic.txtar
@@ -0,0 +1,97 @@
+-- schema.cue --
+a: int
+
+strings: {
+    c: string
+    d: "foo" | "bar"
+}
+
+lists: {
+    e: [...int]
+    f: [int, int]
+    g: [int, int, ...int]
+    h: [int, int]
+}
+
+structs: [string]: {
+    a: int
+}
+
+-- data.json --
+{
+    "a": "44",
+    "strings": {
+        "c": "cc",
+        "d": "foo"
+    },
+    "lists": {
+        "e": ["1"],
+        "f": ["2"],
+        "g": ["3", "4", "5"],
+        "h": ["3", "4", "5"]
+    },
+    "structs": {
+        "field": {
+            "a": "1",
+            "b": "2"
+        }
+    },
+    "tail": {}
+}
+-- data.cue --
+a: "44"
+strings: {
+	c: "cc"
+	d: "foo"
+}
+lists: {
+	e: ["1"]
+	f: ["2"]
+	g: ["3", "4", "5"]
+	h: ["3", "4", "5"] // Last element should not be rewritten!
+},
+structs: {
+	field: {
+		a: "1"
+		b: "2"
+	}
+}
+tail: {}
+-- out/jsonpb/data.json --
+a: 44
+strings: {
+	c: "cc"
+	d: "foo"
+}
+lists: {
+	e: [1]
+	f: [2]
+	g: [3, 4, 5]
+	h: [3, 4, "5"]
+}
+structs: {
+	field: {
+		a: 1
+		b: "2"
+	}
+}
+tail: {}
+-- out/jsonpb/data.cue --
+a: 44
+strings: {
+	c: "cc"
+	d: "foo"
+}
+lists: {
+	e: [1]
+	f: [2]
+	g: [3, 4, 5]
+	h: [3, 4, "5"] // Last element should not be rewritten!
+}
+structs: {
+	field: {
+		a: 1
+		b: "2"
+	}
+}
+tail: {}
diff --git a/encoding/protobuf/jsonpb/testdata/decoder/enums.txtar b/encoding/protobuf/jsonpb/testdata/decoder/enums.txtar
new file mode 100644
index 0000000..5c4d1a2
--- /dev/null
+++ b/encoding/protobuf/jsonpb/testdata/decoder/enums.txtar
@@ -0,0 +1,48 @@
+-- schema.cue --
+enum: [string]: {
+    "foo"
+    #intValue: 1
+} | {
+    "bar"
+    #intValue: 2
+}
+
+singleEnum: { "single", #intValue: 1 }
+
+badEnum: { string, #intValue: 1 } | { "two", #intValue: 2 }
+
+-- data.cue --
+enum: asIs: "foo"
+enum: asIsUnknown: "foobar"
+
+enum: numExistFoo: 1
+enum: numExistBar: 2
+
+singleEnum: 1
+
+-- errors.cue --
+enum: numNotExists: 3
+
+enum: numNotExists: 4
+
+enum: tooLarge: 4_111_222_333_444_555_666_777_888_999
+
+badEnum: 1
+
+-- out/jsonpb/data.cue --
+enum: asIs:        "foo"
+enum: asIsUnknown: "foobar"
+
+enum: numExistFoo: "foo"
+enum: numExistBar: "bar"
+
+singleEnum: "single"
+-- out/jsonpb/errors.cue --
+enum.numNotExists: could not locate integer enum value 3:
+    errors.cue:1:21
+enum.numNotExists: could not locate integer enum value 4:
+    errors.cue:3:21
+enum.tooLarge: invalid enum index: 4111222333444555666777888999: greater than max int64:
+    errors.cue:5:17
+badEnum: invalid string enum: non-concrete value string:
+    schema.cue:11:10
diff --git a/encoding/protobuf/jsonpb/testdata/decoder/null.txtar b/encoding/protobuf/jsonpb/testdata/decoder/null.txtar
new file mode 100644
index 0000000..89f1e60
--- /dev/null
+++ b/encoding/protobuf/jsonpb/testdata/decoder/null.txtar
@@ -0,0 +1,155 @@
+-- schema.cue --
+a0: int
+a1: 1 | *2
+
+a2: string
+a3: "a" | *"b"
+
+a4: bytes
+a5: *'a' | 'b'
+
+a6: [...int]
+a7: *[0] | [...int]
+
+a8: bool
+a9: *true | false
+
+a10: null
+
+a11: null
+a12: null
+
+-- data.yaml --
+# comment a0
+a0: null
+
+# comment a1
+a1: null
+
+# comment a2
+a2: null
+
+# comment a3
+a3: null
+
+# comment a4
+a4: null
+
+# comment a5
+a5: null
+
+# comment a6
+a6: null
+
+# comment a7
+a7: null
+
+# comment a8
+a8: null
+
+# comment a9
+a9: null
+
+# comment a10
+a10: null
+
+-- data.cue --
+// comment a0
+a0: null
+
+// comment a1
+a1: null
+
+// comment a2
+a2: null
+
+// comment a3
+a3: null
+
+// comment a4
+a4: null
+
+// comment a5
+a5: null
+
+// comment a6
+a6: null
+
+// comment a7
+a7: null
+
+// comment a8
+a8: null
+
+// comment a9
+a9: null
+
+// comment a10
+a10: null
+
+-- out/jsonpb/data.yaml --
+	// comment a0
+a0: 0
+
+// comment a1
+a1: 2
+
+// comment a2
+a2: ""
+
+// comment a3
+a3: "b"
+
+// comment a4
+a4: ''
+
+// comment a5
+a5: 'a'
+
+// comment a6
+a6: []
+
+// comment a7
+a7: [0]
+
+// comment a8
+a8: false
+
+// comment a9
+a9: true
+
+// comment a10
+a10: null
+-- out/jsonpb/data.cue --
+	// comment a0
+a0: 0
+
+// comment a1
+a1: 2
+
+// comment a2
+a2: ""
+
+// comment a3
+a3: "b"
+
+// comment a4
+a4: ''
+
+// comment a5
+a5: 'a'
+
+// comment a6
+a6: []
+
+// comment a7
+a7: [0]
+
+// comment a8
+a8: false
+
+// comment a9
+a9: true
+
+// comment a10
+a10: null