internal/protobuf: add proto definition extraction
Change-Id: Ia356bcb951e30bcc8030b51cbc5d9949391e84dc
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/1980
Reviewed-by: Marcel van Lohuizen <mpvl@google.com>
diff --git a/internal/protobuf/parse.go b/internal/protobuf/parse.go
new file mode 100644
index 0000000..8b8ede9
--- /dev/null
+++ b/internal/protobuf/parse.go
@@ -0,0 +1,620 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package protobuf
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "path"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+
+ "cuelang.org/go/cue/ast"
+ "cuelang.org/go/cue/parser"
+ "cuelang.org/go/cue/token"
+ "github.com/emicklei/proto"
+ "golang.org/x/xerrors"
+)
+
+type sharedState struct {
+ paths []string
+}
+
+func (s *sharedState) parse(filename string, r io.Reader) (p *protoConverter, err error) {
+ // Determine files to convert.
+ if r == nil {
+ f, err := os.Open(filename)
+ if err != nil {
+ return nil, xerrors.Errorf("protobuf: %w", err)
+ }
+ defer f.Close()
+ r = f
+ }
+
+ parser := proto.NewParser(r)
+ if filename != "" {
+ parser.Filename(filename)
+ }
+ d, err := parser.Parse()
+ if err != nil {
+ return nil, xerrors.Errorf("protobuf: %w", err)
+ }
+
+ p = &protoConverter{
+ state: s,
+ used: map[string]bool{},
+ symbols: map[string]bool{},
+ }
+
+ defer func() {
+ switch x := recover().(type) {
+ case nil:
+ case protoError:
+ err = &ProtoError{
+ Filename: filename,
+ Path: strings.Join(p.path, "."),
+ Err: x.error,
+ }
+ default:
+ panic(x)
+ }
+ }()
+
+ p.file = &ast.File{Filename: filename}
+
+ p.addNames(d.Elements)
+
+ // Parse package definitions.
+ for _, e := range d.Elements {
+ switch x := e.(type) {
+ case *proto.Package:
+ p.protoPkg = x.Name
+ case *proto.Option:
+ if x.Name == "go_package" {
+ str, err := strconv.Unquote(x.Constant.SourceRepresentation())
+ if err != nil {
+ failf("unquoting package filed: %v", err)
+ }
+ split := strings.Split(str, ";")
+ p.goPkgPath = split[0]
+ switch len(split) {
+ case 1:
+ p.goPkg = path.Base(str)
+ case 2:
+ p.goPkg = split[1]
+ default:
+ failf("unexpected ';' in %q", str)
+ }
+ p.file.Name = ast.NewIdent(p.goPkg)
+ // name.AddComment(comment(x.Comment, true))
+ // name.AddComment(comment(x.InlineComment, false))
+ }
+ }
+ }
+
+ for _, e := range d.Elements {
+ switch x := e.(type) {
+ case *proto.Import:
+ p.doImport(x)
+ }
+ }
+
+ imports := &ast.ImportDecl{}
+ p.file.Decls = append(p.file.Decls, imports)
+
+ for _, e := range d.Elements {
+ p.topElement(e)
+ }
+
+ used := []string{}
+ for k := range p.used {
+ used = append(used, k)
+ }
+ sort.Strings(used)
+
+ for _, v := range used {
+ imports.Specs = append(imports.Specs, &ast.ImportSpec{
+ Path: &ast.BasicLit{Kind: token.STRING, Value: strconv.Quote(v)},
+ })
+ }
+
+ if len(imports.Specs) == 0 {
+ p.file.Decls = p.file.Decls[1:]
+ }
+
+ return p, nil
+}
+
+// A protoConverter converts a proto definition to CUE. Proto files map to
+// CUE files one to one.
+type protoConverter struct {
+ state *sharedState
+
+ proto3 bool
+
+ protoPkg string
+ goPkg string
+ goPkgPath string
+
+ // w bytes.Buffer
+ file *ast.File
+ inBody bool
+
+ imports map[string]string
+ used map[string]bool
+
+ path []string
+ scope []map[string]mapping // for symbols resolution within package.
+ symbols map[string]bool // symbols provided by package
+}
+
+type mapping struct {
+ ref string
+ pkg *protoConverter
+}
+
+type pkgInfo struct {
+ importPath string // the import path
+ goPath string // The Go import path
+ shortName string // Used for the cue package path, default is base of goPath
+}
+
+func (p *protoConverter) addRef(from, to string) {
+ top := p.scope[len(p.scope)-1]
+ if _, ok := top[from]; ok {
+ failf("entity %q already defined", from)
+ }
+ top[from] = mapping{ref: to}
+}
+
+func (p *protoConverter) addNames(elems []proto.Visitee) {
+ p.scope = append(p.scope, map[string]mapping{})
+ for _, e := range elems {
+ var name string
+ switch x := e.(type) {
+ case *proto.Message:
+ if x.IsExtend {
+ continue
+ }
+ name = x.Name
+ case *proto.Enum:
+ name = x.Name
+ default:
+ continue
+ }
+ sym := strings.Join(append(p.path, name), ".")
+ p.symbols[sym] = true
+ p.addRef(name, strings.Join(append(p.path, name), "_"))
+ }
+}
+
+func (p *protoConverter) popNames() {
+ p.scope = p.scope[:len(p.scope)-1]
+}
+
+func (p *protoConverter) resolve(name string, options []*proto.Option) string {
+ if strings.HasPrefix(name, ".") {
+ return p.resolveTopScope(name[1:], options)
+ }
+ for i := len(p.scope) - 1; i > 0; i-- {
+ if m, ok := p.scope[i][name]; ok {
+ return m.ref
+ }
+ }
+ return p.resolveTopScope(name, options)
+}
+
+func (p *protoConverter) resolveTopScope(name string, options []*proto.Option) string {
+ for i := 0; i < len(name); i++ {
+ k := strings.IndexByte(name[i:], '.')
+ i += k
+ if k == -1 {
+ i = len(name)
+ }
+ if m, ok := p.scope[0][name[:i]]; ok {
+ if m.pkg != nil {
+ p.used[m.pkg.goPkgPath] = true
+ }
+ return m.ref + name[i:]
+ }
+ }
+ if s, ok := protoToCUE(name, options); ok {
+ return s
+ }
+ failf("name %q not found", name)
+ return ""
+}
+
+func (p *protoConverter) doImport(v *proto.Import) {
+ if v.Filename == "cuelang/cue.proto" {
+ return
+ }
+
+ filename := ""
+ for _, p := range p.state.paths {
+ name := filepath.Join(p, v.Filename)
+ _, err := os.Stat(name)
+ if err != nil {
+ continue
+ }
+ filename = name
+ break
+ }
+
+ if filename == "" {
+ p.mustBuiltinPackage(v.Filename)
+ return
+ }
+
+ imp, err := p.state.parse(filename, nil)
+ if err != nil {
+ fail(err)
+ }
+
+ prefix := ""
+ if imp.goPkgPath != p.goPkgPath {
+ prefix = imp.goPkg + "."
+ }
+
+ pkgNamespace := strings.Split(imp.protoPkg, ".")
+ curNamespace := strings.Split(p.protoPkg, ".")
+ for {
+ for k := range imp.symbols {
+ ref := k
+ if len(pkgNamespace) > 0 {
+ ref = strings.Join(append(pkgNamespace, k), ".")
+ }
+ if _, ok := p.scope[0][ref]; !ok {
+ pkg := imp
+ if imp.goPkgPath == p.goPkgPath {
+ pkg = nil
+ }
+ p.scope[0][ref] = mapping{prefix + k, pkg}
+ }
+ }
+ if len(pkgNamespace) == 0 {
+ break
+ }
+ if len(curNamespace) == 0 || pkgNamespace[0] != curNamespace[0] {
+ break
+ }
+ pkgNamespace = pkgNamespace[1:]
+ curNamespace = curNamespace[1:]
+ }
+}
+
+func (p *protoConverter) stringLit(s string) *ast.BasicLit {
+ return &ast.BasicLit{Kind: token.STRING, Value: strconv.Quote(s)}
+}
+
+func (p *protoConverter) ref() *ast.Ident {
+ return ast.NewIdent(strings.Join(p.path, "_"))
+}
+
+func (p *protoConverter) subref(name string) *ast.Ident {
+ return ast.NewIdent(strings.Join(append(p.path, name), "_"))
+}
+
+func (p *protoConverter) addTag(f *ast.Field, body string) {
+ tag := "@protobuf(" + body + ")"
+ f.Attrs = append(f.Attrs, &ast.Attribute{Text: tag})
+}
+
+func (p *protoConverter) topElement(v proto.Visitee) {
+ switch x := v.(type) {
+ case *proto.Syntax:
+ p.proto3 = x.Value == "proto3"
+
+ case *proto.Comment:
+ if p.inBody {
+ p.file.Decls = append(p.file.Decls, comment(x, true))
+ } else {
+ addComments(p.file, 0, x, nil)
+ }
+
+ case *proto.Enum:
+ p.enum(x)
+
+ case *proto.Package:
+ if doc := x.Doc(); doc != nil {
+ addComments(p.file, 0, doc, nil)
+ }
+ // p.inBody bool
+
+ case *proto.Message:
+ p.message(x)
+
+ case *proto.Option:
+ case *proto.Import:
+ // already handled.
+
+ default:
+ failf("unsupported type %T", x)
+ }
+}
+
+func (p *protoConverter) message(v *proto.Message) {
+ defer func(saved []string) { p.path = saved }(p.path)
+ p.path = append(p.path, v.Name)
+
+ p.addNames(v.Elements)
+ defer p.popNames()
+
+ // TODO: handle IsExtend/ proto2
+
+ s := &ast.StructLit{
+ // TOOD: set proto file position.
+ }
+
+ ref := p.ref()
+ if v.Comment == nil {
+ ref.NamePos = newSection
+ }
+ f := &ast.Field{Label: ref, Value: s}
+ addComments(f, 1, v.Comment, nil)
+
+ // In CUE a message is always defined at the top level.
+ p.file.Decls = append(p.file.Decls, f)
+
+ for i, e := range v.Elements {
+ p.messageField(s, i, e)
+ }
+}
+
+func (p *protoConverter) messageField(s *ast.StructLit, i int, v proto.Visitee) {
+ switch x := v.(type) {
+ case *proto.Comment:
+ s.Elts = append(s.Elts, comment(x, true))
+
+ case *proto.NormalField:
+ f := p.parseField(s, i, x.Field)
+
+ if x.Repeated {
+ f.Value = &ast.ListLit{
+ Ellipsis: token.Pos(token.NoSpace),
+ Type: f.Value,
+ }
+ }
+
+ case *proto.MapField:
+ f := &ast.Field{}
+
+ // All keys are converted to strings.
+ // TODO: support integer keys.
+ f.Label = &ast.TemplateLabel{Ident: ast.NewIdent("_")}
+ f.Value = ast.NewIdent(p.resolve(x.Type, x.Options))
+
+ name := labelName(x.Name)
+ f = &ast.Field{
+ Label: ast.NewIdent(name),
+ Value: &ast.StructLit{Elts: []ast.Decl{f}},
+ }
+ addComments(f, i, x.Comment, x.InlineComment)
+
+ o := optionParser{message: s, field: f}
+ o.tags = fmt.Sprintf("%d,type=map<%s,%s>", x.Sequence, x.KeyType, x.Type)
+ if x.Name != name {
+ o.tags += "," + x.Name
+ }
+ s.Elts = append(s.Elts, f)
+ o.parse(x.Options)
+ p.addTag(f, o.tags)
+
+ case *proto.Enum:
+ p.enum(x)
+
+ case *proto.Message:
+ p.message(x)
+
+ case *proto.Oneof:
+ p.oneOf(x)
+
+ default:
+ failf("unsupported type %T", v)
+ }
+}
+
+// enum converts a proto enum definition to CUE.
+//
+// An enum will generate two top-level definitions:
+//
+// Enum:
+// "Value1" |
+// "Value2" |
+// "Value3"
+//
+// and
+//
+// Enum_value: {
+// "Value1": 0
+// "Value2": 1
+// }
+//
+// Enums are always defined at the top level. The name of a nested enum
+// will be prefixed with the name of its parent and an underscore.
+func (p *protoConverter) enum(x *proto.Enum) {
+ if len(x.Elements) == 0 {
+ failf("empty enum")
+ }
+
+ name := p.subref(x.Name)
+
+ p.addNames(x.Elements)
+
+ if len(p.path) == 0 {
+ defer func() { p.path = p.path[:0] }()
+ p.path = append(p.path, x.Name)
+ }
+
+ // Top-level enum entry.
+ enum := &ast.Field{Label: name}
+ addComments(enum, 1, x.Comment, nil)
+
+ // Top-level enum values entry.
+ valueName := ast.NewIdent(name.Name + "_value")
+ valueName.NamePos = newSection
+ valueMap := &ast.StructLit{}
+ d := &ast.Field{Label: valueName, Value: valueMap}
+ // addComments(valueMap, 1, x.Comment, nil)
+
+ p.file.Decls = append(p.file.Decls, enum, d)
+
+ // The line comments for an enum field need to attach after the '|', which
+ // is only known at the next iteration.
+ var lastComment *proto.Comment
+ for i, v := range x.Elements {
+ switch y := v.(type) {
+ case *proto.EnumField:
+ // Add enum value to map
+ f := &ast.Field{
+ Label: p.stringLit(y.Name),
+ Value: &ast.BasicLit{Value: strconv.Itoa(y.Integer)},
+ }
+ valueMap.Elts = append(valueMap.Elts, f)
+
+ // add to enum disjunction
+ value := p.stringLit(y.Name)
+
+ var e ast.Expr = value
+ // Make the first value the default value.
+ if i == 0 {
+ e = &ast.UnaryExpr{OpPos: newline, Op: token.MUL, X: value}
+ } else {
+ value.ValuePos = newline
+ }
+ addComments(e, i, y.Comment, nil)
+ if enum.Value != nil {
+ e = &ast.BinaryExpr{X: enum.Value, Op: token.OR, Y: e}
+ if cg := comment(lastComment, false); cg != nil {
+ cg.Position = 2
+ e.AddComment(cg)
+ }
+ }
+ enum.Value = e
+
+ if y.Comment != nil {
+ lastComment = nil
+ addComments(f, 0, nil, y.InlineComment)
+ } else {
+ lastComment = y.InlineComment
+ }
+
+ // a := fmt.Sprintf("@protobuf(enum,name=%s)", y.Name)
+ // f.Attrs = append(f.Attrs, &ast.Attribute{Text: a})
+ }
+ }
+ addComments(enum.Value, 1, nil, lastComment)
+}
+
+func (p *protoConverter) oneOf(x *proto.Oneof) {
+ f := &ast.Field{
+ Label: p.ref(),
+ }
+ f.AddComment(comment(x.Comment, true))
+
+ p.file.Decls = append(p.file.Decls, f)
+
+ for _, v := range x.Elements {
+ s := &ast.StructLit{}
+ switch x := v.(type) {
+ case *proto.OneOfField:
+ f := p.parseField(s, 0, x.Field)
+ f.Optional = token.NoPos
+
+ default:
+ p.messageField(s, 1, v)
+ }
+ var e ast.Expr = s
+ if f.Value != nil {
+ e = &ast.BinaryExpr{X: f.Value, Op: token.OR, Y: s}
+ }
+ f.Value = e
+ }
+}
+
+func (p *protoConverter) parseField(s *ast.StructLit, i int, x *proto.Field) *ast.Field {
+ f := &ast.Field{}
+ addComments(f, i, x.Comment, x.InlineComment)
+
+ name := labelName(x.Name)
+ f.Label = ast.NewIdent(name)
+ typ := p.resolve(x.Type, x.Options)
+ f.Value = ast.NewIdent(typ)
+ s.Elts = append(s.Elts, f)
+
+ o := optionParser{message: s, field: f}
+
+ // body of @protobuf tag: sequence[,type][,name=<name>][,...]
+ o.tags += fmt.Sprint(x.Sequence)
+ if x.Type != typ {
+ o.tags += ",type=" + x.Type
+ }
+ if x.Name != name {
+ o.tags += ",name=" + x.Name
+ }
+ o.parse(x.Options)
+ p.addTag(f, o.tags)
+
+ if !o.required {
+ f.Optional = token.Pos(token.NoSpace)
+ }
+ return f
+}
+
+type optionParser struct {
+ message *ast.StructLit
+ field *ast.Field
+ required bool
+ tags string
+}
+
+func (p *optionParser) parse(options []*proto.Option) {
+
+ // TODO: handle options
+ // - translate options to tags
+ // - interpret CUE options.
+ for _, o := range options {
+ switch o.Name {
+ case "(cue_opt).required":
+ p.required = true
+ // TODO: Dropping comments. Maybe add a dummy tag?
+
+ case "(cue.val)":
+ // TODO: set filename and base offset.
+ fset := token.NewFileSet()
+ expr, err := parser.ParseExpr(fset, "", o.Constant.Source)
+ if err != nil {
+ failf("invalid cue.val value: %v", err)
+ }
+ // Any further checks will be done at the end.
+ constraint := &ast.Field{Label: p.field.Label, Value: expr}
+ addComments(constraint, 1, o.Comment, o.InlineComment)
+ p.message.Elts = append(p.message.Elts, constraint)
+ if !p.required {
+ constraint.Optional = token.Pos(token.NoSpace)
+ }
+
+ default:
+ // TODO: dropping comments. Maybe add dummy tag?
+
+ // TODO: should CUE support nested attributes?
+ source := o.Constant.SourceRepresentation()
+ p.tags += "," + quote("option("+o.Name+","+source+")")
+ }
+ }
+}