internal/encoding: support input validation

Also fixes a bug in go.go, which did not obey
the `json:"-"` field tag.

This does not yet introduce a file extension for
possible combinations like cue+data.

Closes #130

Change-Id: I03f18afb64e48d84cf6a645f5fb5f7c91299b032
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/5261
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cmd/cue/cmd/common.go b/cmd/cue/cmd/common.go
index dc49c6f..e0cf5c5 100644
--- a/cmd/cue/cmd/common.go
+++ b/cmd/cue/cmd/common.go
@@ -448,6 +448,7 @@
 		Stdin:     stdin,
 		Stdout:    b.cmd.OutOrStdout(),
 		ProtoPath: flagProtoPath.StringArray(b.cmd),
+		AllErrors: flagAllErrors.Bool(b.cmd),
 	}
 	return nil
 }
diff --git a/cmd/cue/cmd/flags.go b/cmd/cue/cmd/flags.go
index 07941ee..43095e1 100644
--- a/cmd/cue/cmd/flags.go
+++ b/cmd/cue/cmd/flags.go
@@ -21,15 +21,16 @@
 
 // Common flags
 const (
-	flagAll      flagName = "all"
-	flagDryrun   flagName = "dryrun"
-	flagVerbose  flagName = "verbose"
-	flagTrace    flagName = "trace"
-	flagForce    flagName = "force"
-	flagIgnore   flagName = "ignore"
-	flagSimplify flagName = "simplify"
-	flagPackage  flagName = "package"
-	flagTags     flagName = "tags"
+	flagAll       flagName = "all"
+	flagDryrun    flagName = "dryrun"
+	flagVerbose   flagName = "verbose"
+	flagAllErrors flagName = "all-errors"
+	flagTrace     flagName = "trace"
+	flagForce     flagName = "force"
+	flagIgnore    flagName = "ignore"
+	flagSimplify  flagName = "simplify"
+	flagPackage   flagName = "package"
+	flagTags      flagName = "tags"
 
 	flagExpression flagName = "expression"
 	flagSchema     flagName = "schema"
@@ -61,6 +62,7 @@
 		"proceed in the presence of errors")
 	f.BoolP(string(flagVerbose), "v", false,
 		"print information about progress")
+	f.BoolP(string(flagAllErrors), "E", false, "print all available errors")
 }
 
 func addOrphanFlags(f *pflag.FlagSet) {
diff --git a/cmd/cue/cmd/testdata/script/file_forms.txt b/cmd/cue/cmd/testdata/script/file_forms.txt
new file mode 100644
index 0000000..ff42e8b
--- /dev/null
+++ b/cmd/cue/cmd/testdata/script/file_forms.txt
@@ -0,0 +1,39 @@
+cue eval data: foo.cue
+cmp stdout expect-data-foo
+
+! cue eval data: bar.cue
+cmp stderr expect-data-bar
+
+cue eval graph: bar.cue
+cmp stdout expect-graph-bar
+
+-- foo.cue --
+a: 4
+b: {
+    c: 1
+}
+// Duplicates are still allowed.
+b: {
+    d: 2
+}
+
+-- bar.cue --
+a: 4
+b: {
+    c: a
+}
+
+-- expect-data-foo --
+a: 4
+b: {
+    c: 1
+    d: 2
+}
+-- expect-data-bar --
+references not allowed in data mode:
+    ./bar.cue:3:8
+-- expect-graph-bar --
+a: 4
+b: {
+    c: 4
+}
diff --git a/cmd/cue/cmd/testdata/script/help_cmd.txt b/cmd/cue/cmd/testdata/script/help_cmd.txt
index e4f6113..a1165fb 100644
--- a/cmd/cue/cmd/testdata/script/help_cmd.txt
+++ b/cmd/cue/cmd/testdata/script/help_cmd.txt
@@ -225,9 +225,10 @@
   -t, --tags stringArray   set the value of a tagged field
 
 Global Flags:
-  -i, --ignore     proceed in the presence of errors
-  -s, --simplify   simplify output
-      --trace      trace computation
-  -v, --verbose    print information about progress
+  -E, --all-errors   print all available errors
+  -i, --ignore       proceed in the presence of errors
+  -s, --simplify     simplify output
+      --trace        trace computation
+  -v, --verbose      print information about progress
 
 Use "cue cmd [command] --help" for more information about a command.
diff --git a/cmd/cue/cmd/testdata/script/help_hello.txt b/cmd/cue/cmd/testdata/script/help_hello.txt
index 4a0a093..28159e9 100644
--- a/cmd/cue/cmd/testdata/script/help_hello.txt
+++ b/cmd/cue/cmd/testdata/script/help_hello.txt
@@ -29,7 +29,8 @@
   -h, --help   help for hello
 
 Global Flags:
-  -i, --ignore     proceed in the presence of errors
-  -s, --simplify   simplify output
-      --trace      trace computation
-  -v, --verbose    print information about progress
+  -E, --all-errors   print all available errors
+  -i, --ignore       proceed in the presence of errors
+  -s, --simplify     simplify output
+      --trace        trace computation
+  -v, --verbose      print information about progress
diff --git a/cue/build/file.go b/cue/build/file.go
index 167012a..1ce636a 100644
--- a/cue/build/file.go
+++ b/cue/build/file.go
@@ -23,7 +23,7 @@
 	Form           Form              `json:"form,omitempty"`
 	Tags           map[string]string `json:"tags,omitempty"` // code=go
 
-	Source interface{} // TODO: swap out with concrete type.
+	Source interface{} `json:"-"` // TODO: swap out with concrete type.
 }
 
 // A Encoding indicates a file format for representing a program.
diff --git a/cue/go.go b/cue/go.go
index f47eee8..b0204d0 100644
--- a/cue/go.go
+++ b/cue/go.go
@@ -362,6 +362,9 @@
 				if !nilIsTop && isNil(val) {
 					continue
 				}
+				if tag, _ := t.Tag.Lookup("json"); tag == "-" {
+					continue
+				}
 				if isOmitEmpty(&t) && isZero(val) {
 					continue
 				}
diff --git a/cue/go_test.go b/cue/go_test.go
index db7fe08..a17573f 100644
--- a/cue/go_test.go
+++ b/cue/go_test.go
@@ -116,6 +116,13 @@
 	}, {
 		struct {
 			A int
+			B int `json:"-"`
+			C int `json:",omitempty"`
+		}{3, 4, 0},
+		"<0>{A: 3}",
+	}, {
+		struct {
+			A int
 			B int
 		}{3, 4},
 		"<0>{A: 3, B: 4}",
diff --git a/internal/encoding/encoding.go b/internal/encoding/encoding.go
index 178d506..f2a936a 100644
--- a/internal/encoding/encoding.go
+++ b/internal/encoding/encoding.go
@@ -25,8 +25,10 @@
 	"cuelang.org/go/cue"
 	"cuelang.org/go/cue/ast"
 	"cuelang.org/go/cue/build"
+	"cuelang.org/go/cue/errors"
 	"cuelang.org/go/cue/format"
 	"cuelang.org/go/cue/parser"
+	"cuelang.org/go/cue/token"
 	"cuelang.org/go/encoding/json"
 	"cuelang.org/go/encoding/protobuf"
 	"cuelang.org/go/internal/filetypes"
@@ -34,6 +36,7 @@
 )
 
 type Decoder struct {
+	cfg      *Config
 	closer   io.Closer
 	next     func() (ast.Expr, error)
 	expr     ast.Expr
@@ -103,8 +106,9 @@
 	Stdin  io.Reader
 	Stdout io.Writer
 
-	Force  bool // overwrite existing files.
-	Stream bool // will potentially write more than one document per file
+	Force     bool // overwrite existing files.
+	Stream    bool // will potentially write more than one document per file
+	AllErrors bool
 
 	EscapeHTML bool
 	ProtoPath  []string
@@ -115,7 +119,10 @@
 // type of f must be a data type, but does not have to be an encoding that
 // can stream. stdin is used in case the file is "-".
 func NewDecoder(f *build.File, cfg *Config) *Decoder {
-	i := &Decoder{filename: f.Filename}
+	if cfg == nil {
+		cfg = &Config{}
+	}
+	i := &Decoder{filename: f.Filename, cfg: cfg}
 	i.next = func() (ast.Expr, error) {
 		if i.err != nil {
 			return nil, i.err
@@ -123,10 +130,10 @@
 		return nil, io.EOF
 	}
 
-	if f, ok := f.Source.(*ast.File); ok {
-		i.file = f
+	if file, ok := f.Source.(*ast.File); ok {
+		i.file = file
 		i.closer = ioutil.NopCloser(strings.NewReader(""))
-		// TODO: verify input format for CUE.
+		i.validate(file, f)
 		return i
 	}
 
@@ -141,7 +148,7 @@
 	switch f.Encoding {
 	case build.CUE:
 		i.file, i.err = parser.ParseFile(path, r, parser.ParseComments)
-		// TODO: verify input format
+		i.validate(i.file, f)
 	case build.JSON, build.JSONL:
 		i.next = json.NewDecoder(nil, path, r).Extract
 		i.Next()
@@ -186,3 +193,121 @@
 	}
 	return os.Open(f.Filename)
 }
+
+func shouldValidate(i *filetypes.FileInfo) bool {
+	// TODO: We ignore attributes for now. They should be enabled by default.
+	return false ||
+		!i.Definitions ||
+		!i.Data ||
+		!i.Optional ||
+		!i.Constraints ||
+		!i.References ||
+		!i.Cycles ||
+		!i.KeepDefaults ||
+		!i.Incomplete ||
+		!i.Imports ||
+		!i.Docs
+}
+
+type validator struct {
+	allErrors bool
+	count     int
+	errs      errors.Error
+	fileinfo  *filetypes.FileInfo
+}
+
+func (d *Decoder) validate(f *ast.File, b *build.File) {
+	if d.err != nil {
+		return
+	}
+	fi, err := filetypes.FromFile(b, filetypes.Input)
+	if err != nil {
+		d.err = err
+		return
+	}
+	if !shouldValidate(fi) {
+		return
+	}
+
+	v := validator{fileinfo: fi, allErrors: d.cfg.AllErrors}
+	ast.Walk(f, v.validate, nil)
+	d.err = v.errs
+}
+
+func (v *validator) validate(n ast.Node) bool {
+	if v.count > 10 {
+		return false
+	}
+
+	i := v.fileinfo
+
+	// TODO: Cycles
+
+	ok := true
+	check := func(n ast.Node, option bool, s string, cond bool) {
+		if !option && cond {
+			v.errs = errors.Append(v.errs, errors.Newf(n.Pos(),
+				"%s not allowed in %s mode", s, v.fileinfo.Form))
+			v.count++
+			ok = false
+		}
+	}
+
+	// For now we don't make any distinction between these modes.
+
+	constraints := i.Constraints && i.Incomplete && i.Optional && i.References
+
+	check(n, i.Docs, "comments", len(ast.Comments(n)) > 0)
+
+	switch x := n.(type) {
+	case *ast.CommentGroup:
+		check(n, i.Docs, "comments", len(ast.Comments(n)) > 0)
+		return false
+
+	case *ast.ImportDecl, *ast.ImportSpec:
+		check(n, i.Imports, "imports", true)
+
+	case *ast.Field:
+		check(n, i.Definitions, "definitions", x.Token == token.ISA)
+		check(n, i.Data, "regular fields", x.Token != token.ISA)
+		check(n, constraints, "optional fields", x.Optional != token.NoPos)
+
+		_, _, err := ast.LabelName(x.Label)
+		check(n, constraints, "optional fields", err != nil)
+
+		check(n, i.Attributes, "attributes", len(x.Attrs) > 0)
+		ast.Walk(x.Value, v.validate, nil)
+		return false
+
+	case *ast.UnaryExpr:
+		switch x.Op {
+		case token.MUL:
+			check(n, i.KeepDefaults, "default values", true)
+		case token.SUB, token.ADD:
+			// The parser represents negative numbers as an unary expression.
+			// Allow one `-` or `+`.
+			_, ok := x.X.(*ast.BasicLit)
+			check(n, constraints, "expressions", !ok)
+		case token.LSS, token.LEQ, token.EQL, token.GEQ, token.GTR,
+			token.NEQ, token.NMAT, token.MAT:
+			check(n, constraints, "constraints", true)
+		default:
+			check(n, constraints, "expressions", true)
+		}
+
+	case *ast.BinaryExpr, *ast.ParenExpr, *ast.IndexExpr, *ast.SliceExpr,
+		*ast.CallExpr, *ast.Comprehension, *ast.ListComprehension,
+		*ast.Interpolation:
+		check(n, constraints, "expressions", true)
+
+	case *ast.Ellipsis:
+		check(n, constraints, "ellipsis", true)
+
+	case *ast.Ident, *ast.SelectorExpr, *ast.Alias:
+		check(n, i.References, "references", true)
+
+	default:
+		// Other types are either always okay or handled elsewhere.
+	}
+	return ok
+}
diff --git a/internal/encoding/encoding_test.go b/internal/encoding/encoding_test.go
new file mode 100644
index 0000000..37aa0e8
--- /dev/null
+++ b/internal/encoding/encoding_test.go
@@ -0,0 +1,86 @@
+// Copyright 2020 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"path"
+	"strings"
+	"testing"
+
+	"cuelang.org/go/cue/build"
+	"cuelang.org/go/cue/parser"
+)
+
+func TestValidate(t *testing.T) {
+	testCases := []struct {
+		form build.Form
+		in   string
+		err  string
+	}{{
+		form: "data",
+		in: `
+		// Foo
+		a: 2
+		"b-b": 3
+		s: -2
+		a: +2
+		`,
+	}, {
+		form: "graph",
+		in: `
+		X=3
+		a: X
+		"b-b": 3
+		s: a
+		`,
+	},
+
+		{form: "data", err: "imports", in: `import "foo" `},
+		{form: "data", err: "references", in: `a: a`},
+		{form: "data", err: "expressions", in: `a: 1 + 3`},
+		{form: "data", err: "expressions", in: `a: 1 + 3`},
+		{form: "data", err: "definitions", in: `a :: 1`},
+		{form: "data", err: "constraints", in: `a: <1`},
+		{form: "data", err: "expressions", in: `a: !true`},
+		{form: "data", err: "expressions", in: `a: 1 | 2`},
+		{form: "data", err: "expressions", in: `a: 1 | *2`},
+		{form: "data", err: "references", in: `X=3, a: X`},
+		{form: "data", err: "expressions", in: `2+2`},
+		{form: "data", err: "expressions", in: `"\(3)"`},
+		{form: "data", err: "expressions", in: `for x in [2] { a: 2 }`},
+		{form: "data", err: "expressions", in: `a: len([])`},
+		{form: "data", err: "ellipsis", in: `a: [...]`},
+	}
+	for _, tc := range testCases {
+		t.Run(path.Join(string(tc.form), tc.in), func(t *testing.T) {
+			f, err := parser.ParseFile("", tc.in, parser.ParseComments)
+			if err != nil {
+				t.Fatal(err)
+			}
+			d := Decoder{cfg: &Config{}}
+			d.validate(f, &build.File{
+				Filename: "foo.cue",
+				Encoding: build.CUE,
+				Form:     tc.form,
+			})
+			if (tc.err == "") != (d.err == nil) {
+				t.Errorf("error: got %v; want %v", tc.err == "", d.err == nil)
+			}
+			if d.err != nil && !strings.Contains(d.err.Error(), tc.err) {
+				t.Errorf("error message did not contain %q: %v", tc.err, d.err)
+			}
+		})
+	}
+}