blob: 8606bcb7b20741933c1bc9ffac2f99ad0c4dfe57 [file] [log] [blame]
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +01001// Copyright 2020 CUE Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Marcel van Lohuizen77ffe9d2020-03-06 22:11:21 +010015// TODO: make this package public in cuelang.org/go/encoding
16// once stabalized.
17
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +010018package encoding
19
20import (
21 "bytes"
22 "fmt"
23 "io"
24 "io/ioutil"
Marcel van Lohuizenf89aa482020-03-10 15:36:08 +010025 "net/url"
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +010026 "os"
27 "strings"
28
Marcel van Lohuizen845df052020-07-26 13:15:45 +020029 "cuelang.org/go/cue"
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +010030 "cuelang.org/go/cue/ast"
31 "cuelang.org/go/cue/build"
Marcel van Lohuizen81a20d52020-03-08 13:52:28 +010032 "cuelang.org/go/cue/errors"
Marcel van Lohuizen6e8fdd42020-02-28 16:46:19 +010033 "cuelang.org/go/cue/format"
34 "cuelang.org/go/cue/parser"
Marcel van Lohuizen81a20d52020-03-08 13:52:28 +010035 "cuelang.org/go/cue/token"
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +010036 "cuelang.org/go/encoding/json"
Marcel van Lohuizenf89aa482020-03-10 15:36:08 +010037 "cuelang.org/go/encoding/jsonschema"
38 "cuelang.org/go/encoding/openapi"
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +010039 "cuelang.org/go/encoding/protobuf"
Marcel van Lohuizen48059412020-04-09 10:50:02 +020040 "cuelang.org/go/internal"
Marcel van Lohuizened5689f2020-02-15 14:29:15 +010041 "cuelang.org/go/internal/filetypes"
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +010042 "cuelang.org/go/internal/third_party/yaml"
Marcel van Lohuizenf395f122020-05-17 15:46:41 +020043 "golang.org/x/text/encoding/unicode"
44 "golang.org/x/text/transform"
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +010045)
46
47type Decoder struct {
Marcel van Lohuizen51148912020-04-21 11:18:00 +020048 cfg *Config
49 closer io.Closer
50 next func() (ast.Expr, error)
51 interpretFunc interpretFunc
52 interpretation build.Interpretation
53 expr ast.Expr
54 file *ast.File
55 filename string // may change on iteration for some formats
56 id string
57 index int
58 err error
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +010059}
60
Marcel van Lohuizenf9f8e622020-03-31 17:17:26 +020061type interpretFunc func(*cue.Instance) (file *ast.File, id string, err error)
62
Marcel van Lohuizen77ffe9d2020-03-06 22:11:21 +010063// ID returns a canonical identifier for the decoded object or "" if no such
64// identifier could be found.
65func (i *Decoder) ID() string {
66 return i.id
67}
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +010068func (i *Decoder) Filename() string { return i.filename }
Marcel van Lohuizen51148912020-04-21 11:18:00 +020069
70// Interpretation returns the current interpretation detected by Detect.
71func (i *Decoder) Interpretation() build.Interpretation {
72 return i.interpretation
73}
74func (i *Decoder) Index() int { return i.index }
75func (i *Decoder) Done() bool { return i.err != nil }
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +010076
77func (i *Decoder) Next() {
Marcel van Lohuizen77ffe9d2020-03-06 22:11:21 +010078 if i.err != nil {
79 return
80 }
81 // Decoder level
Marcel van Lohuizenf9f8e622020-03-31 17:17:26 +020082 i.file = nil
Marcel van Lohuizen77ffe9d2020-03-06 22:11:21 +010083 i.expr, i.err = i.next()
84 i.index++
85 if i.err != nil {
86 return
87 }
Marcel van Lohuizen8dcec3c2020-04-11 12:26:04 +020088 i.doInterpret()
89}
90
91func (i *Decoder) doInterpret() {
Marcel van Lohuizen77ffe9d2020-03-06 22:11:21 +010092 // Interpretations
Marcel van Lohuizen51148912020-04-21 11:18:00 +020093 if i.interpretFunc != nil {
Marcel van Lohuizen77ffe9d2020-03-06 22:11:21 +010094 var r cue.Runtime
Marcel van Lohuizenf9f8e622020-03-31 17:17:26 +020095 i.file = i.File()
96 inst, err := r.CompileFile(i.file)
Marcel van Lohuizen77ffe9d2020-03-06 22:11:21 +010097 if err != nil {
98 i.err = err
99 return
100 }
Marcel van Lohuizen51148912020-04-21 11:18:00 +0200101 i.file, i.id, i.err = i.interpretFunc(inst)
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +0100102 }
103}
104
Marcel van Lohuizened5689f2020-02-15 14:29:15 +0100105func toFile(x ast.Expr) *ast.File {
Marcel van Lohuizen48059412020-04-09 10:50:02 +0200106 return internal.ToFile(x)
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +0100107}
108
Marcel van Lohuizened5689f2020-02-15 14:29:15 +0100109func valueToFile(v cue.Value) *ast.File {
Marcel van Lohuizen48059412020-04-09 10:50:02 +0200110 return internal.ToFile(v.Syntax())
Marcel van Lohuizened5689f2020-02-15 14:29:15 +0100111}
112
113func (i *Decoder) File() *ast.File {
114 if i.file != nil {
115 return i.file
116 }
117 return toFile(i.expr)
118}
119
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +0100120func (i *Decoder) Err() error {
121 if i.err == io.EOF {
122 return nil
123 }
124 return i.err
125}
126
127func (i *Decoder) Close() {
128 i.closer.Close()
129}
130
131type Config struct {
Marcel van Lohuizened5689f2020-02-15 14:29:15 +0100132 Mode filetypes.Mode
133
134 // Out specifies an overwrite destination.
135 Out io.Writer
136 Stdin io.Reader
137 Stdout io.Writer
138
Marcel van Lohuizen77ffe9d2020-03-06 22:11:21 +0100139 PkgName string // package name for files to generate
140
Marcel van Lohuizen81a20d52020-03-08 13:52:28 +0100141 Force bool // overwrite existing files.
Marcel van Lohuizen0059b2b2020-04-14 15:31:11 +0200142 Strict bool
Marcel van Lohuizen81a20d52020-03-08 13:52:28 +0100143 Stream bool // will potentially write more than one document per file
144 AllErrors bool
Marcel van Lohuizened5689f2020-02-15 14:29:15 +0100145
146 EscapeHTML bool
147 ProtoPath []string
Marcel van Lohuizen6e8fdd42020-02-28 16:46:19 +0100148 Format []format.Option
Marcel van Lohuizencd2b3ef2020-04-11 01:47:34 +0200149 ParseFile func(name string, src interface{}) (*ast.File, error)
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +0100150}
151
152// NewDecoder returns a stream of non-rooted data expressions. The encoding
153// type of f must be a data type, but does not have to be an encoding that
154// can stream. stdin is used in case the file is "-".
155func NewDecoder(f *build.File, cfg *Config) *Decoder {
Marcel van Lohuizen81a20d52020-03-08 13:52:28 +0100156 if cfg == nil {
157 cfg = &Config{}
158 }
159 i := &Decoder{filename: f.Filename, cfg: cfg}
Marcel van Lohuizen334313c2020-03-06 15:26:32 +0100160 i.next = func() (ast.Expr, error) {
161 if i.err != nil {
162 return nil, i.err
163 }
164 return nil, io.EOF
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +0100165 }
Marcel van Lohuizen334313c2020-03-06 15:26:32 +0100166
Marcel van Lohuizen81a20d52020-03-08 13:52:28 +0100167 if file, ok := f.Source.(*ast.File); ok {
168 i.file = file
Marcel van Lohuizen334313c2020-03-06 15:26:32 +0100169 i.closer = ioutil.NopCloser(strings.NewReader(""))
Marcel van Lohuizen81a20d52020-03-08 13:52:28 +0100170 i.validate(file, f)
Marcel van Lohuizen334313c2020-03-06 15:26:32 +0100171 return i
172 }
173
Marcel van Lohuizenf395f122020-05-17 15:46:41 +0200174 rc, err := reader(f, cfg.Stdin)
175 i.closer = rc
Marcel van Lohuizen334313c2020-03-06 15:26:32 +0100176 i.err = err
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +0100177 if err != nil {
178 return i
179 }
180
Marcel van Lohuizenf395f122020-05-17 15:46:41 +0200181 // For now we assume that all encodings require UTF-8. This will not be the
182 // case for some binary protocols. We need to exempt those explicitly here
183 // once we introduce them.
184 // TODO: this code also allows UTF16, which is too permissive for some
185 // encodings. Switch to unicode.UTF8Sig once available.
186 t := unicode.BOMOverride(unicode.UTF8.NewDecoder())
187 r := transform.NewReader(rc, t)
188
Marcel van Lohuizenf89aa482020-03-10 15:36:08 +0100189 switch f.Interpretation {
190 case "":
Marcel van Lohuizenf9f8e622020-03-31 17:17:26 +0200191 case build.Auto:
192 openAPI := openAPIFunc(cfg, f)
193 jsonSchema := jsonSchemaFunc(cfg, f)
Marcel van Lohuizen51148912020-04-21 11:18:00 +0200194 i.interpretFunc = func(inst *cue.Instance) (file *ast.File, id string, err error) {
195 switch i.interpretation = Detect(inst.Value()); i.interpretation {
Marcel van Lohuizenf9f8e622020-03-31 17:17:26 +0200196 case build.JSONSchema:
197 return jsonSchema(inst)
198 case build.OpenAPI:
199 return openAPI(inst)
200 }
201 return i.file, "", i.err
202 }
Marcel van Lohuizenf89aa482020-03-10 15:36:08 +0100203 case build.OpenAPI:
Marcel van Lohuizen51148912020-04-21 11:18:00 +0200204 i.interpretation = build.OpenAPI
205 i.interpretFunc = openAPIFunc(cfg, f)
Marcel van Lohuizenf89aa482020-03-10 15:36:08 +0100206 case build.JSONSchema:
Marcel van Lohuizen51148912020-04-21 11:18:00 +0200207 i.interpretation = build.JSONSchema
208 i.interpretFunc = jsonSchemaFunc(cfg, f)
Marcel van Lohuizenf89aa482020-03-10 15:36:08 +0100209 default:
210 i.err = fmt.Errorf("unsupported interpretation %q", f.Interpretation)
211 }
212
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +0100213 path := f.Filename
214 switch f.Encoding {
Marcel van Lohuizen6e8fdd42020-02-28 16:46:19 +0100215 case build.CUE:
Marcel van Lohuizencd2b3ef2020-04-11 01:47:34 +0200216 if cfg.ParseFile == nil {
217 i.file, i.err = parser.ParseFile(path, r, parser.ParseComments)
218 } else {
219 i.file, i.err = cfg.ParseFile(path, r)
220 }
Marcel van Lohuizen81a20d52020-03-08 13:52:28 +0100221 i.validate(i.file, f)
Marcel van Lohuizen8dcec3c2020-04-11 12:26:04 +0200222 if i.err == nil {
223 i.doInterpret()
224 }
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +0100225 case build.JSON, build.JSONL:
226 i.next = json.NewDecoder(nil, path, r).Extract
227 i.Next()
228 case build.YAML:
229 d, err := yaml.NewDecoder(path, r)
230 i.err = err
231 i.next = d.Decode
232 i.Next()
233 case build.Text:
234 b, err := ioutil.ReadAll(r)
235 i.err = err
236 i.expr = ast.NewString(string(b))
237 case build.Protobuf:
Marcel van Lohuizen77ffe9d2020-03-06 22:11:21 +0100238 paths := &protobuf.Config{
239 Paths: cfg.ProtoPath,
240 PkgName: cfg.PkgName,
241 }
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +0100242 i.file, i.err = protobuf.Extract(path, r, paths)
243 default:
Marcel van Lohuizen77ffe9d2020-03-06 22:11:21 +0100244 i.err = fmt.Errorf("unsupported encoding %q", f.Encoding)
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +0100245 }
246
247 return i
248}
249
Marcel van Lohuizenf9f8e622020-03-31 17:17:26 +0200250func jsonSchemaFunc(cfg *Config, f *build.File) interpretFunc {
251 return func(i *cue.Instance) (file *ast.File, id string, err error) {
252 id = f.Tags["id"]
253 if id == "" {
254 id, _ = i.Lookup("$id").String()
255 }
256 if id != "" {
257 u, err := url.Parse(id)
258 if err != nil {
259 return nil, "", errors.Wrapf(err, token.NoPos, "invalid id")
260 }
261 u.Scheme = ""
262 id = strings.TrimPrefix(u.String(), "//")
263 }
264 cfg := &jsonschema.Config{
265 ID: id,
266 PkgName: cfg.PkgName,
Marcel van Lohuizen0059b2b2020-04-14 15:31:11 +0200267
268 Strict: cfg.Strict,
Marcel van Lohuizenf9f8e622020-03-31 17:17:26 +0200269 }
Marcel van Lohuizen519db582020-06-11 18:17:18 +0200270 file, err = jsonschema.Extract(i, cfg)
271 // TODO: simplify currently erases file line info. Reintroduce after fix.
272 // file, err = simplify(file, err)
Marcel van Lohuizenf9f8e622020-03-31 17:17:26 +0200273 return file, id, err
274 }
275}
276
277func openAPIFunc(c *Config, f *build.File) interpretFunc {
278 cfg := &openapi.Config{PkgName: c.PkgName}
279 return func(i *cue.Instance) (file *ast.File, id string, err error) {
Marcel van Lohuizen519db582020-06-11 18:17:18 +0200280 file, err = openapi.Extract(i, cfg)
281 // TODO: simplify currently erases file line info. Reintroduce after fix.
282 // file, err = simplify(file, err)
Marcel van Lohuizenf9f8e622020-03-31 17:17:26 +0200283 return file, "", err
284 }
285}
286
Marcel van Lohuizen01fbfa12020-02-13 20:34:59 +0100287func reader(f *build.File, stdin io.Reader) (io.ReadCloser, error) {
288 switch s := f.Source.(type) {
289 case nil:
290 // Use the file name.
291 case string:
292 return ioutil.NopCloser(strings.NewReader(s)), nil
293 case []byte:
294 return ioutil.NopCloser(bytes.NewReader(s)), nil
295 case *bytes.Buffer:
296 // is io.Reader, but it needs to be readable repeatedly
297 if s != nil {
298 return ioutil.NopCloser(bytes.NewReader(s.Bytes())), nil
299 }
300 default:
301 return nil, fmt.Errorf("invalid source type %T", f.Source)
302 }
303 // TODO: should we allow this?
304 if f.Filename == "-" {
305 return ioutil.NopCloser(stdin), nil
306 }
307 return os.Open(f.Filename)
308}
Marcel van Lohuizen81a20d52020-03-08 13:52:28 +0100309
310func shouldValidate(i *filetypes.FileInfo) bool {
311 // TODO: We ignore attributes for now. They should be enabled by default.
312 return false ||
313 !i.Definitions ||
314 !i.Data ||
315 !i.Optional ||
316 !i.Constraints ||
317 !i.References ||
318 !i.Cycles ||
319 !i.KeepDefaults ||
320 !i.Incomplete ||
321 !i.Imports ||
322 !i.Docs
323}
324
325type validator struct {
326 allErrors bool
327 count int
328 errs errors.Error
329 fileinfo *filetypes.FileInfo
330}
331
332func (d *Decoder) validate(f *ast.File, b *build.File) {
333 if d.err != nil {
334 return
335 }
336 fi, err := filetypes.FromFile(b, filetypes.Input)
337 if err != nil {
338 d.err = err
339 return
340 }
341 if !shouldValidate(fi) {
342 return
343 }
344
345 v := validator{fileinfo: fi, allErrors: d.cfg.AllErrors}
346 ast.Walk(f, v.validate, nil)
347 d.err = v.errs
348}
349
350func (v *validator) validate(n ast.Node) bool {
351 if v.count > 10 {
352 return false
353 }
354
355 i := v.fileinfo
356
357 // TODO: Cycles
358
359 ok := true
360 check := func(n ast.Node, option bool, s string, cond bool) {
361 if !option && cond {
362 v.errs = errors.Append(v.errs, errors.Newf(n.Pos(),
363 "%s not allowed in %s mode", s, v.fileinfo.Form))
364 v.count++
365 ok = false
366 }
367 }
368
369 // For now we don't make any distinction between these modes.
370
371 constraints := i.Constraints && i.Incomplete && i.Optional && i.References
372
373 check(n, i.Docs, "comments", len(ast.Comments(n)) > 0)
374
375 switch x := n.(type) {
376 case *ast.CommentGroup:
377 check(n, i.Docs, "comments", len(ast.Comments(n)) > 0)
378 return false
379
380 case *ast.ImportDecl, *ast.ImportSpec:
381 check(n, i.Imports, "imports", true)
382
383 case *ast.Field:
Marcel van Lohuizen88376852020-05-08 11:06:27 +0200384 check(n, i.Definitions, "definitions",
385 x.Token == token.ISA || internal.IsDefinition(x.Label))
386 check(n, i.Data, "regular fields", internal.IsRegularField(x))
Marcel van Lohuizen81a20d52020-03-08 13:52:28 +0100387 check(n, constraints, "optional fields", x.Optional != token.NoPos)
388
389 _, _, err := ast.LabelName(x.Label)
390 check(n, constraints, "optional fields", err != nil)
391
392 check(n, i.Attributes, "attributes", len(x.Attrs) > 0)
393 ast.Walk(x.Value, v.validate, nil)
394 return false
395
396 case *ast.UnaryExpr:
397 switch x.Op {
398 case token.MUL:
399 check(n, i.KeepDefaults, "default values", true)
400 case token.SUB, token.ADD:
401 // The parser represents negative numbers as an unary expression.
402 // Allow one `-` or `+`.
403 _, ok := x.X.(*ast.BasicLit)
404 check(n, constraints, "expressions", !ok)
405 case token.LSS, token.LEQ, token.EQL, token.GEQ, token.GTR,
406 token.NEQ, token.NMAT, token.MAT:
407 check(n, constraints, "constraints", true)
408 default:
409 check(n, constraints, "expressions", true)
410 }
411
412 case *ast.BinaryExpr, *ast.ParenExpr, *ast.IndexExpr, *ast.SliceExpr,
413 *ast.CallExpr, *ast.Comprehension, *ast.ListComprehension,
414 *ast.Interpolation:
415 check(n, constraints, "expressions", true)
416
417 case *ast.Ellipsis:
418 check(n, constraints, "ellipsis", true)
419
Marcel van Lohuizen724da182020-04-15 14:11:31 +0200420 case *ast.Ident, *ast.SelectorExpr, *ast.Alias, *ast.LetClause:
Marcel van Lohuizen81a20d52020-03-08 13:52:28 +0100421 check(n, i.References, "references", true)
422
423 default:
424 // Other types are either always okay or handled elsewhere.
425 }
426 return ok
427}
Marcel van Lohuizenf89aa482020-03-10 15:36:08 +0100428
429// simplify reformats a File. To be used as a wrapper for Extract functions.
430//
431// It currently does so by formatting the file using fmt.Format and then
432// reparsing it. This is not ideal, but the package format does not provide a
433// way to do so differently.
434func simplify(f *ast.File, err error) (*ast.File, error) {
435 if err != nil {
436 return nil, err
437 }
Marcel van Lohuizen519db582020-06-11 18:17:18 +0200438 // This needs to be a function that modifies f in order to maintain line
439 // number information.
Marcel van Lohuizenf89aa482020-03-10 15:36:08 +0100440 b, err := format.Node(f, format.Simplify())
441 if err != nil {
442 return nil, err
443 }
444 return parser.ParseFile(f.Filename, b, parser.ParseComments)
445}