cmd/cue: allow vet to check CUE against JSON and YAML

This need some changes in the loader
to allow specifying non-CUE files on
the command line.

Change-Id: I6faa1225e4f4972c6ee8ee7f6183b07fa5b1ecf4
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/2921
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cmd/cue/cmd/common_test.go b/cmd/cue/cmd/common_test.go
index 50513cf..9d8c73b 100644
--- a/cmd/cue/cmd/common_test.go
+++ b/cmd/cue/cmd/common_test.go
@@ -74,7 +74,11 @@
 				return
 			}
 
-			cmd.SetArgs(append(args, "./"+path))
+			extra := args
+			if len(args) == 0 {
+				extra = append(args, "./"+path)
+			}
+			cmd.SetArgs(extra)
 			rOut, wOut := io.Pipe()
 			cmd.SetOutput(wOut)
 			var bOut []byte
diff --git a/cmd/cue/cmd/testdata/hello/vet.out b/cmd/cue/cmd/testdata/hello/vet.out
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/cmd/cue/cmd/testdata/hello/vet.out
diff --git a/cmd/cue/cmd/testdata/partial/vet.out b/cmd/cue/cmd/testdata/partial/vet.out
new file mode 100644
index 0000000..e696e99
--- /dev/null
+++ b/cmd/cue/cmd/testdata/partial/vet.out
@@ -0,0 +1 @@
+some instances are incomplete; use the -c flag to show errors or suppress this message
diff --git a/cmd/cue/cmd/testdata/partial/vet_conc.out b/cmd/cue/cmd/testdata/partial/vet_conc.out
new file mode 100644
index 0000000..eae33c1
--- /dev/null
+++ b/cmd/cue/cmd/testdata/partial/vet_conc.out
@@ -0,0 +1,6 @@
+sum: incomplete value ((1 | 2)):
+    ./testdata/partial/partial.cue:4:6
+b.idx: invalid non-ground value string (must be concrete int|string):
+    ./testdata/partial/partial.cue:7:9
+b.str: incomplete value (string):
+    ./testdata/partial/partial.cue:8:7
diff --git a/cmd/cue/cmd/testdata/vet/data.yaml b/cmd/cue/cmd/testdata/vet/data.yaml
new file mode 100644
index 0000000..06bc87d
--- /dev/null
+++ b/cmd/cue/cmd/testdata/vet/data.yaml
@@ -0,0 +1,16 @@
+# translated messages
+translations:
+  hello:
+    lang: gsw
+    text: Grüetzi
+---
+translations:
+  hello:
+    lang: no
+    text: Hallo
+---
+translations:
+  hello:
+    lang: nl
+    text: Hallo
+skip: true
diff --git a/cmd/cue/cmd/testdata/vet/vet.cue b/cmd/cue/cmd/testdata/vet/vet.cue
new file mode 100644
index 0000000..8bffdf0
--- /dev/null
+++ b/cmd/cue/cmd/testdata/vet/vet.cue
@@ -0,0 +1,6 @@
+
+translations <_> lang: string
+
+File :: {
+	translations: {...}
+}
diff --git a/cmd/cue/cmd/testdata/vet/vet_expr.out b/cmd/cue/cmd/testdata/vet/vet_expr.out
new file mode 100644
index 0000000..82323b8
--- /dev/null
+++ b/cmd/cue/cmd/testdata/vet/vet_expr.out
@@ -0,0 +1,2 @@
+field "skip" not allowed in closed struct:
+    ./testdata/vet/vet.cue:4:9
diff --git a/cmd/cue/cmd/testdata/vet/vet_file.out b/cmd/cue/cmd/testdata/vet/vet_file.out
new file mode 100644
index 0000000..132f05c
--- /dev/null
+++ b/cmd/cue/cmd/testdata/vet/vet_file.out
@@ -0,0 +1,3 @@
+translations.hello.lang: conflicting values false and string (mismatched types bool and string):
+    ./testdata/vet/data.yaml:9:11
+    ./testdata/vet/vet.cue:2:24
diff --git a/cmd/cue/cmd/vet.go b/cmd/cue/cmd/vet.go
index 70e7452..5399551 100644
--- a/cmd/cue/cmd/vet.go
+++ b/cmd/cue/cmd/vet.go
@@ -15,36 +15,90 @@
 package cmd
 
 import (
+	"bytes"
+	"fmt"
+	"io/ioutil"
+	"path/filepath"
+
 	"cuelang.org/go/cue"
 	"cuelang.org/go/cue/ast"
+	"cuelang.org/go/cue/encoding"
 	"cuelang.org/go/cue/parser"
+	"cuelang.org/go/internal"
 	"github.com/spf13/cobra"
 	"golang.org/x/text/message"
 )
 
+const vetDoc = `vet validates CUE and other data files
+
+By default it will only validate if there are no errors.
+The -c validates that all regular fields are concrete.
+
+
+Checking non-CUE files
+
+Vet can also check non-CUE files. The following file formats are
+currently supported:
+
+  Format       Extensions
+	JSON       .json .jsonl .ndjson
+	YAML       .yaml .yml
+
+To activate this mode, the non-cue files must be explicitly mentioned on the
+command line. There must also be at least one CUE file to hold the constraints.
+
+In this mode, each file will be verified against a CUE constraint. If the files
+contain multiple objects (such as using --- in YAML), they will all be verified
+individually.
+
+By default, each file is checked against the root of the loaded CUE files.
+The -e can be used to only verify files against the result of an expression
+evaluated within the CUE files. This can be useful if the CUE files contain
+a set of definitions to pick from.
+
+Examples:
+
+  # Check files against a CUE file:
+  cue vet foo.yaml foo.cue
+
+  # Check files against a particular expression
+  cue vet translations/*.yaml foo.cue -e Translation
+
+If more than one expression is given, all must match all values.
+`
+
 func newVetCmd() *cobra.Command {
 	cmd := &cobra.Command{
 		Use:   "vet",
-		Short: "validate CUE configurations",
+		Short: "validate data",
+		Long:  vetDoc,
 		RunE:  doVet,
 	}
 
 	cmd.Flags().BoolP(string(flagConcrete), "c", false,
 		"require the evaluation to be concrete")
 
+	cmd.Flags().StringArrayP(string(flagExpression), "e", nil,
+		"use this expression to validate non-CUE files")
+
 	return cmd
 }
 
 func doVet(cmd *cobra.Command, args []string) error {
-	instances := buildFromArgs(cmd, args)
+	builds := loadFromArgs(cmd, args)
+	if builds == nil {
+		return nil
+	}
+	instances := buildInstances(cmd, builds)
 
-	var exprs []ast.Expr
-	for _, e := range flagExpression.StringArray(cmd) {
-		expr, err := parser.ParseExpr("<expression flag>", e)
-		if err != nil {
-			return err
+	// Go into a special vet mode if the user explicitly specified non-cue
+	// files on the command line.
+	for _, a := range args {
+		enc := encoding.MapExtension(filepath.Ext(a))
+		if enc != nil && enc.Name() != "cue" {
+			vetFiles(cmd, instances[0], builds[0].DataFiles)
+			return nil
 		}
-		exprs = append(exprs, expr)
 	}
 
 	shown := false
@@ -80,3 +134,52 @@
 	}
 	return nil
 }
+
+func vetFiles(cmd *cobra.Command, inst *cue.Instance, files []string) {
+	expressions := flagExpression.StringArray(cmd)
+
+	var check cue.Value
+
+	if len(expressions) == 0 {
+		check = inst.Value()
+	}
+
+	for _, e := range expressions {
+		expr, err := parser.ParseExpr("<expression flag>", e)
+		exitIfErr(cmd, inst, err, true)
+
+		v := inst.Eval(expr)
+		exitIfErr(cmd, inst, v.Err(), true)
+		check = check.Unify(v)
+	}
+
+	for _, f := range files {
+		b, err := ioutil.ReadFile(f)
+		exitIfErr(cmd, inst, err, true)
+
+		ext := filepath.Ext(filepath.Ext(f))
+		enc := encoding.MapExtension(ext)
+		if enc == nil {
+			exitIfErr(cmd, inst, fmt.Errorf("unrecognized extension %q", ext), true)
+		}
+
+		var exprs []ast.Expr
+		switch enc.Name() {
+		case "json":
+			exprs, err = handleJSON(f, bytes.NewReader(b))
+		case "yaml":
+			exprs, err = handleYAML(f, bytes.NewReader(b))
+		default:
+			exitIfErr(cmd, inst, fmt.Errorf("vet does not support %q", enc.Name()), true)
+		}
+		exitIfErr(cmd, inst, err, true)
+
+		r := internal.GetRuntime(inst).(*cue.Runtime)
+		for _, expr := range exprs {
+			body, err := r.CompileExpr(expr)
+			exitIfErr(cmd, inst, err, false)
+			v := body.Value().Unify(check)
+			exitIfErr(cmd, inst, v.Err(), false)
+		}
+	}
+}
diff --git a/cmd/cue/cmd/vet_test.go b/cmd/cue/cmd/vet_test.go
new file mode 100644
index 0000000..0786095
--- /dev/null
+++ b/cmd/cue/cmd/vet_test.go
@@ -0,0 +1,33 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import "testing"
+
+func TestVet(t *testing.T) {
+	runCommand(t, newVetCmd(), "vet")
+
+	cmd := newVetCmd()
+	mustParseFlags(t, cmd, "-c")
+	runCommand(t, cmd, "vet_conc")
+
+	cmd = newVetCmd()
+	runCommand(t, cmd, "vet_file", "./testdata/vet/vet.cue", "./testdata/vet/data.yaml")
+
+	cmd = newVetCmd()
+	mustParseFlags(t, cmd, "-e", "File")
+	runCommand(t, cmd, "vet_expr", "./testdata/vet/vet.cue", "./testdata/vet/data.yaml")
+
+}
diff --git a/cue/encoding/encoding.go b/cue/encoding/encoding.go
index 8d4fe22..6c7e3f0 100644
--- a/cue/encoding/encoding.go
+++ b/cue/encoding/encoding.go
@@ -40,6 +40,7 @@
 }
 
 var (
+	cueEnc      = &Encoding{name: "cue"}
 	jsonEnc     = &Encoding{name: "json"}
 	yamlEnc     = &Encoding{name: "yaml"}
 	protodefEnc = &Encoding{name: "protobuf"}
@@ -47,6 +48,7 @@
 
 // extensions maps a file extension to a Kind.
 var extensions = map[string]*Encoding{
+	".cue":    cueEnc,
 	".json":   jsonEnc,
 	".jsonl":  jsonEnc,
 	".ndjson": jsonEnc,
diff --git a/cue/load/fs.go b/cue/load/fs.go
index abf830f..004866d 100644
--- a/cue/load/fs.go
+++ b/cue/load/fs.go
@@ -53,7 +53,7 @@
 func (fs *fileSystem) getDir(dir string, create bool) map[string]*overlayFile {
 	dir = filepath.Clean(dir)
 	m, ok := fs.overlayDirs[dir]
-	if !ok {
+	if !ok && create {
 		m = map[string]*overlayFile{}
 		fs.overlayDirs[dir] = m
 	}
diff --git a/cue/load/loader.go b/cue/load/loader.go
index 06165d6..c831705 100644
--- a/cue/load/loader.go
+++ b/cue/load/loader.go
@@ -26,6 +26,7 @@
 	"unicode"
 
 	build "cuelang.org/go/cue/build"
+	"cuelang.org/go/cue/encoding"
 	"cuelang.org/go/cue/errors"
 	"cuelang.org/go/cue/token"
 )
@@ -47,7 +48,10 @@
 
 	l := c.loader
 
-	if len(args) > 0 && strings.HasSuffix(args[0], cueSuffix) {
+	// TODO: this is work in progress. We aim to replace the original Go
+	// implementation, which is not ideal for CUE.
+
+	if len(args) > 0 && encoding.MapExtension(filepath.Ext(args[0])) != nil {
 		return []*build.Instance{l.cueFilesPackage(args)}
 	}
 
@@ -98,15 +102,22 @@
 	pos := token.NoPos
 	cfg := l.cfg
 	// ModInit() // TODO: support modules
+	pkg := l.cfg.Context.NewInstance(cfg.Dir, l.loadFunc(cfg.Dir))
+
 	for _, f := range files {
-		if !strings.HasSuffix(f, ".cue") {
+		if cfg.isDir(f) {
 			return cfg.newErrInstance(nil, f,
-				errors.Newf(pos, "named files must be .cue files"))
+				errors.Newf(pos, "cannot mix files with directories %v", f))
+		}
+		ext := filepath.Ext(f)
+		enc := encoding.MapExtension(ext)
+		if enc == nil {
+			return cfg.newErrInstance(nil, f,
+				errors.Newf(pos, "unrecognized extension %q", ext))
 		}
 	}
 
-	pkg := l.cfg.Context.NewInstance(cfg.Dir, l.loadFunc(cfg.Dir))
-	// TODO: add fiels directly?
+	// TODO: add fiedls directly?
 	fp := newFileProcessor(cfg, pkg)
 	for _, file := range files {
 		path := file