encoding/protobuf/jsonpb: add Rewrite* for interpreting JSON in PB terms
This allows code that uses the CUE API to modify an ast.Expr
or ast.File to conform to a CUE schema, allowing mappings
that Protobuf allows, but that are otherwise not allowed by
a strict interpretation of the schema.
Note that this assumes that enum integers can be mapped
to strings with a corresponding #intValue field. This is not
yet set by the proto mapping.
Issue #606
Change-Id: I71d7bfa9e69f985c1eaaf1c1e20e5a473b882e70
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/9243
Reviewed-by: CUE cueckoo <cueckoo@gmail.com>
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/encoding/protobuf/jsonpb/decoder.go b/encoding/protobuf/jsonpb/decoder.go
new file mode 100644
index 0000000..7b616d3
--- /dev/null
+++ b/encoding/protobuf/jsonpb/decoder.go
@@ -0,0 +1,325 @@
+// Copyright 2021 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package jsonpb
+
+import (
+ "encoding/base64"
+ "strings"
+
+ "cuelang.org/go/cue"
+ "cuelang.org/go/cue/ast"
+ "cuelang.org/go/cue/ast/astutil"
+ "cuelang.org/go/cue/errors"
+ "cuelang.org/go/cue/literal"
+ "cuelang.org/go/cue/token"
+ "github.com/cockroachdb/apd/v2"
+)
+
+// Option is an option.
+//
+// There are currently no options.
+type Option func()
+
+// A Decoder interprets CUE expressions as JSON protobuf encodings
+// based on an underlying schema.
+//
+// It bases the mapping on the underlying CUE type, without consulting Protobuf
+// attributes.
+//
+// Mappings per CUE type:
+// for any CUE type:
+// null is omitted if null is not specifically allowed.
+// bytes: if the expression is a string, it is reinterpreted using a
+// base64 encoding. Either standard or URL-safe base64 encoding
+// with/without paddings are accepted.
+// int: string values are interpreted as integers
+// float: string values are interpreted as numbers, and the values "NaN",
+// "Infinity", and "-Infinity" are allowed and converted to
+// to corresponding error values.
+// disjunction of strings:
+// this is assumed to represent a protobuf enum value. Strings
+// are left as is. For integers, the disjunction is resolved
+// by converting it to the string that has a corresponding #intValue
+// value.
+// {}: JSON objects representing any values will be left as is.
+// If the CUE type corresponding to the URL can be determined within
+// the module context it will be unified.
+// time.Time / time.Duration:
+// left as is
+// _: left as is.
+//
+type Decoder struct {
+ schema cue.Value
+}
+
+// NewDecoder creates a Decoder for the given schema.
+func NewDecoder(schema cue.Value, options ...Option) *Decoder {
+ return &Decoder{schema: schema}
+}
+
+// RewriteFile modifies file, interpreting it in terms of the given schema
+// according to the protocol buffer to JSON mapping defined in the protocol
+// buffer spec.
+//
+// RewriteFile is idempotent, calling it multiples times on an expression gives
+// the same result.
+func (d *Decoder) RewriteFile(file *ast.File) error {
+ var r rewriter
+ r.rewriteDecls(d.schema, file.Decls)
+ return r.errs
+}
+
+// RewriteExpr modifies expr, interpreting it in terms of the given schema
+// according to the protocol buffer to JSON mapping defined in the
+// protocol buffer spec.
+//
+// RewriteExpr is idempotent, calling it multiples times on an expression gives
+// the same result.
+func (d *Decoder) RewriteExpr(expr ast.Expr) (ast.Expr, error) {
+ var r rewriter
+ x := r.rewrite(d.schema, expr)
+ return x, r.errs
+}
+
+type rewriter struct {
+ errs errors.Error
+}
+
+func (r *rewriter) addErr(err errors.Error) {
+ r.errs = errors.Append(r.errs, err)
+}
+
+func (r *rewriter) addErrf(p token.Pos, schema cue.Value, format string, args ...interface{}) {
+ format = "%s: " + format
+ args = append([]interface{}{schema.Path()}, args...)
+ r.addErr(errors.Newf(p, format, args...))
+}
+
+func (r *rewriter) rewriteDecls(schema cue.Value, decls []ast.Decl) {
+ for _, f := range decls {
+ field, ok := f.(*ast.Field)
+ if !ok {
+ continue
+ }
+ sel := cue.Label(field.Label)
+ if !sel.IsString() {
+ continue
+ }
+
+ v := schema.LookupPath(cue.MakePath(sel))
+ if !v.Exists() {
+ f := schema.Template()
+ if f == nil {
+ continue
+ }
+ v = f(sel.String())
+ }
+ if !v.Exists() {
+ continue
+ }
+
+ field.Value = r.rewrite(v, field.Value)
+ }
+}
+
+func (r *rewriter) rewrite(schema cue.Value, expr ast.Expr) (x ast.Expr) {
+ defer func() {
+ if expr != x && x != nil {
+ astutil.CopyMeta(x, expr)
+ }
+ }()
+
+ switch x := expr.(type) {
+ case *ast.BasicLit:
+ if x.Kind != token.NULL {
+ break
+ }
+ if schema.IncompleteKind()&cue.NullKind != 0 {
+ break
+ }
+ switch v, _ := schema.Default(); {
+ case v.IsConcrete():
+ if x, _ := v.Syntax(cue.Final()).(ast.Expr); x != nil {
+ return x
+ }
+ default: // default value for type
+ if x := zeroValue(schema, x); x != nil {
+ return x
+ }
+ }
+
+ case *ast.StructLit:
+ r.rewriteDecls(schema, x.Elts)
+ return x
+
+ case *ast.ListLit:
+ elem, _ := schema.Elem()
+ iter, _ := schema.List()
+ for i, e := range x.Elts {
+ v := elem
+ if iter.Next() {
+ v = iter.Value()
+ }
+ if !v.Exists() {
+ break
+ }
+ x.Elts[i] = r.rewrite(v, e)
+ }
+
+ return x
+ }
+
+ switch schema.IncompleteKind() {
+ case cue.IntKind, cue.FloatKind, cue.NumberKind:
+ x, q, str := stringValue(expr)
+ if x == nil || !q.IsDouble() {
+ break
+ }
+
+ var info literal.NumInfo
+ if err := literal.ParseNum(str, &info); err != nil {
+ break
+ }
+ x.Value = str
+ x.Kind = token.FLOAT
+ if info.IsInt() {
+ x.Kind = token.INT
+ }
+
+ case cue.BytesKind:
+ x, q, str := stringValue(expr)
+ if x == nil && q.IsDouble() {
+ break
+ }
+
+ var b []byte
+ var err error
+ for _, enc := range base64Encodings {
+ if b, err = enc.DecodeString(str); err == nil {
+ break
+ }
+ }
+ if err != nil {
+ r.addErrf(expr.Pos(), schema, "failed to decode base64: %v", err)
+ return expr
+ }
+
+ quoter := literal.Bytes
+ if q.IsMulti() {
+ ws := q.Whitespace()
+ tabs := (strings.Count(ws, " ")+3)/4 + strings.Count(ws, "\t")
+ quoter = quoter.WithTabIndent(tabs)
+ }
+ x.Value = quoter.Quote(string(b))
+ return x
+
+ case cue.StringKind:
+ if s, ok := expr.(*ast.BasicLit); ok && s.Kind == token.INT {
+ var info literal.NumInfo
+ if err := literal.ParseNum(s.Value, &info); err != nil || !info.IsInt() {
+ break
+ }
+ var d apd.Decimal
+ if err := info.Decimal(&d); err != nil {
+ break
+ }
+ enum, err := d.Int64()
+ if err != nil {
+ r.addErrf(expr.Pos(), schema, "invalid enum index: %v", err)
+ return expr
+ }
+ op, values := schema.Expr()
+ if op != cue.OrOp {
+ values = []cue.Value{schema} // allow single values.
+ }
+ for _, v := range values {
+ i, err := v.LookupPath(cue.MakePath(cue.Def("#intValue"))).Int64()
+ if err == nil && i == enum {
+ str, err := v.String()
+ if err != nil {
+ r.addErr(errors.Wrapf(err, v.Pos(), "invalid string enum"))
+ return expr
+ }
+ s.Kind = token.STRING
+ s.Value = literal.String.Quote(str)
+
+ return s
+ }
+ }
+ r.addErrf(expr.Pos(), schema,
+ "could not locate integer enum value %d", enum)
+ }
+
+ case cue.StructKind, cue.TopKind:
+ // TODO: Detect and mix in type.
+ }
+ return expr
+}
+
+func zeroValue(v cue.Value, x *ast.BasicLit) ast.Expr {
+ switch v.IncompleteKind() {
+ case cue.StringKind:
+ x.Kind = token.STRING
+ x.Value = `""`
+
+ case cue.BytesKind:
+ x.Kind = token.STRING
+ x.Value = `''`
+
+ case cue.BoolKind:
+ x.Kind = token.FALSE
+ x.Value = "false"
+
+ case cue.NumberKind, cue.IntKind, cue.FloatKind:
+ x.Kind = token.INT
+ x.Value = "0"
+
+ case cue.StructKind:
+ return ast.NewStruct()
+
+ case cue.ListKind:
+ return &ast.ListLit{}
+
+ default:
+ return nil
+ }
+ return x
+}
+
+func stringValue(x ast.Expr) (b *ast.BasicLit, q literal.QuoteInfo, str string) {
+ b, ok := x.(*ast.BasicLit)
+ if !ok || b.Kind != token.STRING {
+ return nil, q, ""
+ }
+ q, p, _, err := literal.ParseQuotes(b.Value, b.Value)
+ if err != nil {
+ return nil, q, ""
+ }
+
+ str, err = q.Unquote(b.Value[p:])
+ if err != nil {
+ return nil, q, ""
+ }
+
+ return b, q, str
+}
+
+// These are all the allowed base64 encodings.
+var base64Encodings = []base64.Encoding{
+ *base64.StdEncoding,
+ *base64.URLEncoding,
+ *base64.RawStdEncoding,
+ *base64.RawURLEncoding,
+}
diff --git a/encoding/protobuf/jsonpb/decoder_test.go b/encoding/protobuf/jsonpb/decoder_test.go
new file mode 100644
index 0000000..779c5ef
--- /dev/null
+++ b/encoding/protobuf/jsonpb/decoder_test.go
@@ -0,0 +1,133 @@
+// Copyright 2021 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package jsonpb
+
+import (
+ "strings"
+ "testing"
+
+ "cuelang.org/go/cue"
+ "cuelang.org/go/cue/ast"
+ "cuelang.org/go/cue/ast/astutil"
+ "cuelang.org/go/cue/errors"
+ "cuelang.org/go/cue/format"
+ "cuelang.org/go/cue/parser"
+ "cuelang.org/go/encoding/json"
+ "cuelang.org/go/encoding/yaml"
+ "cuelang.org/go/internal/cuetest"
+ "cuelang.org/go/internal/cuetxtar"
+)
+
+func TestParse(t *testing.T) {
+ test := cuetxtar.TxTarTest{
+ Root: "./testdata/decoder",
+ Name: "jsonpb",
+ Update: cuetest.UpdateGoldenFiles,
+ }
+
+ r := cue.Runtime{}
+
+ test.Run(t, func(t *cuetxtar.Test) {
+ // TODO: use high-level API.
+
+ var schema cue.Value
+ var file *ast.File
+
+ for _, f := range t.Archive.Files {
+ switch {
+ case f.Name == "schema.cue":
+ inst, err := r.Compile(f.Name, f.Data)
+ if err != nil {
+ t.WriteErrors(errors.Promote(err, "test"))
+ return
+ }
+ schema = inst.Value()
+ continue
+
+ case strings.HasPrefix(f.Name, "out/"):
+ continue
+
+ case strings.HasSuffix(f.Name, ".cue"):
+ f, err := parser.ParseFile(f.Name, f.Data, parser.ParseComments)
+ if err != nil {
+ t.Fatal(err)
+ }
+ file = f
+
+ case strings.HasSuffix(f.Name, ".json"):
+ x, err := json.Extract(f.Name, f.Data)
+ if err != nil {
+ t.Fatal(err)
+ }
+ file, err = astutil.ToFile(x)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ case strings.HasSuffix(f.Name, ".yaml"):
+ f, err := yaml.Extract(f.Name, f.Data)
+ if err != nil {
+ t.Fatal(err)
+ }
+ file = f
+ }
+
+ w := t.Writer(f.Name)
+ err := NewDecoder(schema).RewriteFile(file)
+ if err != nil {
+ errors.Print(w, err, nil)
+ continue
+ }
+
+ b, err := format.Node(file)
+ if err != nil {
+ t.Fatal(err)
+ }
+ _, _ = w.Write(b)
+ }
+ })
+}
+
+// For debugging purposes: DO NOT REMOVE.
+func TestX(t *testing.T) {
+ const schema = `
+
+ `
+ const data = `
+`
+ if strings.TrimSpace(data) == "" {
+ t.Skip()
+ }
+ var r cue.Runtime
+ inst, err := r.Compile("schema", schema)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ file, err := parser.ParseFile("data", data)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if err := NewDecoder(inst.Value()).RewriteFile(file); err != nil {
+ t.Fatal(err)
+ }
+
+ b, err := format.Node(file)
+ if err != nil {
+ t.Fatal(err)
+ }
+ t.Error(string(b))
+}
diff --git a/encoding/protobuf/jsonpb/jsonpb.go b/encoding/protobuf/jsonpb/jsonpb.go
new file mode 100644
index 0000000..df07a38
--- /dev/null
+++ b/encoding/protobuf/jsonpb/jsonpb.go
@@ -0,0 +1,17 @@
+// Copyright 2021 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package jsonpb rewrites a CUE expression based upon the Protobuf
+// interpretation of JSON.
+package jsonpb
diff --git a/encoding/protobuf/jsonpb/testdata/decoder/base64.txtar b/encoding/protobuf/jsonpb/testdata/decoder/base64.txtar
new file mode 100644
index 0000000..7476699
--- /dev/null
+++ b/encoding/protobuf/jsonpb/testdata/decoder/base64.txtar
@@ -0,0 +1,31 @@
+-- schema.cue --
+b: [string]: bytes
+
+-- std.cue --
+b: hello: "SGVsbG8sIOS4lueVjA=="
+b: noPad: "SGVsbG8sIOS4lueVjA"
+b: bar: "c29tZSBkYXRhIHdpdGggACBhbmQg77u/"
+
+// A large one-line text.
+b: multi: """
+ TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz
+ IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg
+ dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu
+ dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo
+ ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=
+ """
+
+-- url.cue --
+b: bar: "c29tZSBkYXRhIHdpdGggACBhbmQg77u_"
+
+-- out/jsonpb/std.cue --
+b: hello: 'Hello, 世界'
+b: noPad: 'Hello, 世界'
+b: bar: 'some data with \x00 and \ufeff'
+
+// A large one-line text.
+b: multi: '''
+ Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.
+ '''
+-- out/jsonpb/url.cue --
+b: bar: 'some data with \x00 and \ufeff'
diff --git a/encoding/protobuf/jsonpb/testdata/decoder/basic.txtar b/encoding/protobuf/jsonpb/testdata/decoder/basic.txtar
new file mode 100644
index 0000000..cf8fa7f
--- /dev/null
+++ b/encoding/protobuf/jsonpb/testdata/decoder/basic.txtar
@@ -0,0 +1,97 @@
+-- schema.cue --
+a: int
+
+strings: {
+ c: string
+ d: "foo" | "bar"
+}
+
+lists: {
+ e: [...int]
+ f: [int, int]
+ g: [int, int, ...int]
+ h: [int, int]
+}
+
+structs: [string]: {
+ a: int
+}
+
+-- data.json --
+{
+ "a": "44",
+ "strings": {
+ "c": "cc",
+ "d": "foo"
+ },
+ "lists": {
+ "e": ["1"],
+ "f": ["2"],
+ "g": ["3", "4", "5"],
+ "h": ["3", "4", "5"]
+ },
+ "structs": {
+ "field": {
+ "a": "1",
+ "b": "2"
+ }
+ },
+ "tail": {}
+}
+-- data.cue --
+a: "44"
+strings: {
+ c: "cc"
+ d: "foo"
+}
+lists: {
+ e: ["1"]
+ f: ["2"]
+ g: ["3", "4", "5"]
+ h: ["3", "4", "5"] // Last element should not be rewritten!
+},
+structs: {
+ field: {
+ a: "1"
+ b: "2"
+ }
+}
+tail: {}
+-- out/jsonpb/data.json --
+a: 44
+strings: {
+ c: "cc"
+ d: "foo"
+}
+lists: {
+ e: [1]
+ f: [2]
+ g: [3, 4, 5]
+ h: [3, 4, "5"]
+}
+structs: {
+ field: {
+ a: 1
+ b: "2"
+ }
+}
+tail: {}
+-- out/jsonpb/data.cue --
+a: 44
+strings: {
+ c: "cc"
+ d: "foo"
+}
+lists: {
+ e: [1]
+ f: [2]
+ g: [3, 4, 5]
+ h: [3, 4, "5"] // Last element should not be rewritten!
+}
+structs: {
+ field: {
+ a: 1
+ b: "2"
+ }
+}
+tail: {}
diff --git a/encoding/protobuf/jsonpb/testdata/decoder/enums.txtar b/encoding/protobuf/jsonpb/testdata/decoder/enums.txtar
new file mode 100644
index 0000000..5c4d1a2
--- /dev/null
+++ b/encoding/protobuf/jsonpb/testdata/decoder/enums.txtar
@@ -0,0 +1,48 @@
+-- schema.cue --
+enum: [string]: {
+ "foo"
+ #intValue: 1
+} | {
+ "bar"
+ #intValue: 2
+}
+
+singleEnum: { "single", #intValue: 1 }
+
+badEnum: { string, #intValue: 1 } | { "two", #intValue: 2 }
+
+-- data.cue --
+enum: asIs: "foo"
+enum: asIsUnknown: "foobar"
+
+enum: numExistFoo: 1
+enum: numExistBar: 2
+
+singleEnum: 1
+
+-- errors.cue --
+enum: numNotExists: 3
+
+enum: numNotExists: 4
+
+enum: tooLarge: 4_111_222_333_444_555_666_777_888_999
+
+badEnum: 1
+
+-- out/jsonpb/data.cue --
+enum: asIs: "foo"
+enum: asIsUnknown: "foobar"
+
+enum: numExistFoo: "foo"
+enum: numExistBar: "bar"
+
+singleEnum: "single"
+-- out/jsonpb/errors.cue --
+enum.numNotExists: could not locate integer enum value 3:
+ errors.cue:1:21
+enum.numNotExists: could not locate integer enum value 4:
+ errors.cue:3:21
+enum.tooLarge: invalid enum index: 4111222333444555666777888999: greater than max int64:
+ errors.cue:5:17
+badEnum: invalid string enum: non-concrete value string:
+ schema.cue:11:10
diff --git a/encoding/protobuf/jsonpb/testdata/decoder/null.txtar b/encoding/protobuf/jsonpb/testdata/decoder/null.txtar
new file mode 100644
index 0000000..89f1e60
--- /dev/null
+++ b/encoding/protobuf/jsonpb/testdata/decoder/null.txtar
@@ -0,0 +1,155 @@
+-- schema.cue --
+a0: int
+a1: 1 | *2
+
+a2: string
+a3: "a" | *"b"
+
+a4: bytes
+a5: *'a' | 'b'
+
+a6: [...int]
+a7: *[0] | [...int]
+
+a8: bool
+a9: *true | false
+
+a10: null
+
+a11: null
+a12: null
+
+-- data.yaml --
+# comment a0
+a0: null
+
+# comment a1
+a1: null
+
+# comment a2
+a2: null
+
+# comment a3
+a3: null
+
+# comment a4
+a4: null
+
+# comment a5
+a5: null
+
+# comment a6
+a6: null
+
+# comment a7
+a7: null
+
+# comment a8
+a8: null
+
+# comment a9
+a9: null
+
+# comment a10
+a10: null
+
+-- data.cue --
+// comment a0
+a0: null
+
+// comment a1
+a1: null
+
+// comment a2
+a2: null
+
+// comment a3
+a3: null
+
+// comment a4
+a4: null
+
+// comment a5
+a5: null
+
+// comment a6
+a6: null
+
+// comment a7
+a7: null
+
+// comment a8
+a8: null
+
+// comment a9
+a9: null
+
+// comment a10
+a10: null
+
+-- out/jsonpb/data.yaml --
+ // comment a0
+a0: 0
+
+// comment a1
+a1: 2
+
+// comment a2
+a2: ""
+
+// comment a3
+a3: "b"
+
+// comment a4
+a4: ''
+
+// comment a5
+a5: 'a'
+
+// comment a6
+a6: []
+
+// comment a7
+a7: [0]
+
+// comment a8
+a8: false
+
+// comment a9
+a9: true
+
+// comment a10
+a10: null
+-- out/jsonpb/data.cue --
+ // comment a0
+a0: 0
+
+// comment a1
+a1: 2
+
+// comment a2
+a2: ""
+
+// comment a3
+a3: "b"
+
+// comment a4
+a4: ''
+
+// comment a5
+a5: 'a'
+
+// comment a6
+a6: []
+
+// comment a7
+a7: [0]
+
+// comment a8
+a8: false
+
+// comment a9
+a9: true
+
+// comment a10
+a10: null