cmd/cue/cmd: move vet to filetypes

Also change package loading. This now allows packages
and files arguments. File arguments must follow the
packages.

Closes #183

Change-Id: Ie88f486f50e10de4ce029c54bc4c6f8c12bae25e
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/5020
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cmd/cue/cmd/common.go b/cmd/cue/cmd/common.go
index e7465ea..8d52bde 100644
--- a/cmd/cue/cmd/common.go
+++ b/cmd/cue/cmd/common.go
@@ -31,6 +31,7 @@
 	"cuelang.org/go/cue/errors"
 	"cuelang.org/go/cue/load"
 	"cuelang.org/go/cue/parser"
+	"cuelang.org/go/cue/token"
 )
 
 // Disallow
@@ -117,6 +118,100 @@
 	return binst
 }
 
+// A buildPlan defines what should be done based on command line
+// arguments and flags.
+//
+// TODO: allow --merge/-m to mix in other packages.
+type buildPlan struct {
+	cmd   *Command
+	insts []*build.Instance
+
+	// If orphanFiles are mixed with CUE files and/or if placement flags are used,
+	// the instance is also included in insts.
+	orphanedData   []*build.File
+	orphanedSchema []*build.File
+	orphanInstance *build.Instance
+
+	merge []*build.Instance
+}
+
+func (b *buildPlan) instances() []*cue.Instance {
+	if len(b.insts) == 0 {
+		return nil
+	}
+	return buildInstances(b.cmd, b.insts)
+}
+
+func parseArgs(cmd *Command, args []string, cfg *load.Config) (*buildPlan, error) {
+	if cfg == nil {
+		cfg = defaultConfig
+	}
+	builds := loadFromArgs(cmd, args, cfg)
+	if builds == nil {
+		return nil, errors.Newf(token.NoPos, "invalid args")
+	}
+	decorateInstances(cmd, flagTags.StringArray(cmd), builds)
+
+	return splitBuilds(cmd, builds)
+}
+
+func splitBuilds(cmd *Command, builds []*build.Instance) (*buildPlan, errors.Error) {
+	p := &buildPlan{cmd: cmd}
+
+	for _, b := range builds {
+		if !b.User {
+			p.insts = append(p.insts, b)
+			continue
+		}
+
+		if len(b.BuildFiles) > 0 {
+			p.insts = append(p.insts, b)
+		}
+
+		if len(b.OrphanedFiles) > 0 {
+			if p.orphanInstance != nil {
+				return nil, errors.Newf(token.NoPos,
+					"builds contain two file packages")
+			}
+			p.orphanInstance = b
+		}
+
+		for _, f := range b.OrphanedFiles {
+			switch f.Interpretation {
+			case build.JSONSchema, build.OpenAPI:
+				p.orphanedSchema = append(p.orphanedSchema, f)
+				continue
+			}
+			switch f.Encoding {
+			case build.Protobuf:
+				p.orphanedSchema = append(p.orphanedSchema, f)
+			case build.YAML, build.JSON, build.Text:
+				p.orphanedData = append(p.orphanedData, f)
+			default:
+				return nil, errors.Newf(token.NoPos,
+					"unsupported encoding %q", f.Encoding)
+			}
+		}
+	}
+
+	return p, nil
+}
+
+func (b *buildPlan) singleInstance() *cue.Instance {
+	var p *build.Instance
+	switch len(b.insts) {
+	case 0:
+		return nil
+	case 1:
+		p = b.insts[0]
+	default:
+		exitOnErr(b.cmd, errors.Newf(token.NoPos,
+			"cannot combine data streaming with multiple instances"), true)
+		return nil
+	}
+	return buildInstances(b.cmd, []*build.Instance{p})[0]
+}
+
 func buildInstances(cmd *Command, binst []*build.Instance) []*cue.Instance {
 	// TODO:
 	// If there are no files and User is true, then use those?
diff --git a/cmd/cue/cmd/testdata/script/vet_altdata.txt b/cmd/cue/cmd/testdata/script/vet_altdata.txt
new file mode 100644
index 0000000..4db9322
--- /dev/null
+++ b/cmd/cue/cmd/testdata/script/vet_altdata.txt
@@ -0,0 +1,8 @@
+cue vet schema.cue json: foo.data
+! stderr .
+
+-- schema.cue --
+[string]: string
+
+-- foo.data --
+{ "a": "b" }
diff --git a/cmd/cue/cmd/testdata/script/vet_data.txt b/cmd/cue/cmd/testdata/script/vet_data.txt
new file mode 100644
index 0000000..0bf809d
--- /dev/null
+++ b/cmd/cue/cmd/testdata/script/vet_data.txt
@@ -0,0 +1,26 @@
+! cue vet schema.cue data.yaml
+cmp stderr expect-stderr
+
+-- schema.cue --
+Language :: {
+	tag:  string
+	name: =~"^\\p{Lu}" // Must start with an uppercase letter.
+}
+languages: [...Language]
+
+-- data.yaml --
+languages:
+  - tag: en
+    name: English
+  - tag: nl
+    name: dutch
+  - tag: no
+    name: Norwegian
+
+-- expect-stderr --
+languages.2.tag: conflicting values string and false (mismatched types string and bool):
+    ./data.yaml:6:11
+    ./schema.cue:2:8
+languages.1.name: invalid value "dutch" (does not match =~"^\\p{Lu}"):
+    ./schema.cue:3:8
+    ./data.yaml:5:12
diff --git a/cmd/cue/cmd/vet.go b/cmd/cue/cmd/vet.go
index 07bd442..a4960b7 100644
--- a/cmd/cue/cmd/vet.go
+++ b/cmd/cue/cmd/vet.go
@@ -15,19 +15,14 @@
 package cmd
 
 import (
-	"bytes"
-	"fmt"
-	"io/ioutil"
-	"path/filepath"
-
 	"github.com/spf13/cobra"
 	"golang.org/x/text/message"
 
 	"cuelang.org/go/cue"
-	"cuelang.org/go/cue/ast"
-	"cuelang.org/go/cue/encoding"
+	"cuelang.org/go/cue/errors"
 	"cuelang.org/go/cue/parser"
 	"cuelang.org/go/internal"
+	"cuelang.org/go/internal/encoding"
 )
 
 const vetDoc = `vet validates CUE and other data files
@@ -44,6 +39,7 @@
   Format       Extensions
 	JSON       .json .jsonl .ndjson
 	YAML       .yaml .yml
+	TEXT       .txt  (validate a single string value)
 
 To activate this mode, the non-cue files must be explicitly mentioned on the
 command line. There must also be at least one CUE file to hold the constraints.
@@ -79,6 +75,7 @@
 	cmd.Flags().BoolP(string(flagConcrete), "c", false,
 		"require the evaluation to be concrete")
 
+	// TODO: change to -d as -e means something different here as then in eval.
 	cmd.Flags().StringArrayP(string(flagExpression), "e", nil,
 		"use this expression to validate non-CUE files")
 
@@ -88,27 +85,29 @@
 	return cmd
 }
 
+// doVet validates instances. There are two modes:
+// - Only packages: vet all these packages
+// - Data files: compare each data instance against a single package.
+//
+// It is invalid to have data files with other than exactly one package.
+//
+// TODO: allow unrooted schema, such as JSON schema to compare against
+// other values.
 func doVet(cmd *Command, args []string) error {
-	builds := loadFromArgs(cmd, args, defaultConfig)
-	if builds == nil {
-		return nil
-	}
-	decorateInstances(cmd, flagTags.StringArray(cmd), builds)
-	instances := buildInstances(cmd, builds)
+	b, err := parseArgs(cmd, args, nil)
+	exitOnErr(cmd, err, true)
 
 	// Go into a special vet mode if the user explicitly specified non-cue
 	// files on the command line.
-	for _, a := range args {
-		enc := encoding.MapExtension(filepath.Ext(a))
-		if enc != nil && enc.Name() != "cue" {
-			vetFiles(cmd, instances[0], builds[0].DataFiles)
-			return nil
-		}
+	// TODO: unify these two modes.
+	if len(b.orphanedData) > 0 {
+		vetFiles(cmd, b)
+		return nil
 	}
 
 	shown := false
 
-	for _, inst := range instances {
+	for _, inst := range b.instances() {
 		// TODO: use ImportPath or some other sanitized path.
 
 		concrete := true
@@ -140,11 +139,17 @@
 	return nil
 }
 
-func vetFiles(cmd *Command, inst *cue.Instance, files []string) {
+func vetFiles(cmd *Command, b *buildPlan) {
+	// Use -r type root, instead of -e
 	expressions := flagExpression.StringArray(cmd)
 
 	var check cue.Value
 
+	inst := b.singleInstance()
+	if inst == nil {
+		exitOnErr(cmd, errors.New("data files specified without a schema"), true)
+	}
+
 	if len(expressions) == 0 {
 		check = inst.Value()
 	}
@@ -158,30 +163,17 @@
 		check = check.Unify(v)
 	}
 
-	for _, f := range files {
-		b, err := ioutil.ReadFile(f)
-		exitIfErr(cmd, inst, err, true)
+	r := internal.GetRuntime(inst).(*cue.Runtime)
 
-		ext := filepath.Ext(filepath.Ext(f))
-		enc := encoding.MapExtension(ext)
-		if enc == nil {
-			exitIfErr(cmd, inst, fmt.Errorf("unrecognized extension %q", ext), true)
-		}
-
-		var exprs []ast.Expr
-		switch enc.Name() {
-		case "json":
-			exprs, err = handleJSON(f, bytes.NewReader(b))
-		case "yaml":
-			exprs, err = handleYAML(f, bytes.NewReader(b))
-		default:
-			exitIfErr(cmd, inst, fmt.Errorf("vet does not support %q", enc.Name()), true)
-		}
-		exitIfErr(cmd, inst, err, true)
-
-		r := internal.GetRuntime(inst).(*cue.Runtime)
-		for _, expr := range exprs {
-			body, err := r.CompileExpr(expr)
+	for _, f := range b.orphanedData {
+		i := encoding.NewDecoder(f, &encoding.Config{
+			Stdin:     stdin,
+			Stdout:    stdout,
+			ProtoPath: flagProtoPath.StringArray(cmd),
+		})
+		defer i.Close()
+		for ; !i.Done(); i.Next() {
+			body, err := r.CompileExpr(i.Expr())
 			exitIfErr(cmd, inst, err, true)
 			v := body.Value().Unify(check)
 			if err := v.Err(); err != nil {
@@ -192,5 +184,6 @@
 				exitIfErr(cmd, inst, err, false)
 			}
 		}
+		exitIfErr(cmd, inst, i.Err(), false)
 	}
 }
diff --git a/cue/build/file.go b/cue/build/file.go
index dbe1e73..167012a 100644
--- a/cue/build/file.go
+++ b/cue/build/file.go
@@ -22,21 +22,23 @@
 	Interpretation Interpretation    `json:"interpretation,omitempty"`
 	Form           Form              `json:"form,omitempty"`
 	Tags           map[string]string `json:"tags,omitempty"` // code=go
+
+	Source interface{} // TODO: swap out with concrete type.
 }
 
 // A Encoding indicates a file format for representing a program.
 type Encoding string
 
 const (
-	CUE   Encoding = "cue"
-	JSON  Encoding = "json"
-	YAML  Encoding = "yaml"
-	JSONL Encoding = "jsonl"
-	TXT   Encoding = "txt"
+	CUE      Encoding = "cue"
+	JSON     Encoding = "json"
+	YAML     Encoding = "yaml"
+	JSONL    Encoding = "jsonl"
+	Text     Encoding = "text"
+	Protobuf Encoding = "protobuf"
 
 	// TODO:
 	// TOML
-	// Proto
 	// TextProto
 	// BinProto
 
diff --git a/cue/build/instance.go b/cue/build/instance.go
index 0011dfc..042ad58 100644
--- a/cue/build/instance.go
+++ b/cue/build/instance.go
@@ -35,7 +35,14 @@
 type Instance struct {
 	ctxt *Context
 
+	BuildFiles    []*File // files to be inclduded in the build
+	IgnoredFiles  []*File // files excluded for this build
+	OrphanedFiles []*File // recognized file formats not part of any build
+	InvalidFiles  []*File // could not parse these files
+	UnknownFiles  []*File // unknown file types
+
 	// Files contains the AST for all files part of this instance.
+	// TODO: the intent is to deprecate this in favor of BuildFiles.
 	Files []*ast.File
 
 	loadFunc LoadFunc
@@ -97,11 +104,15 @@
 	AllTags []string
 
 	Standard bool // Is a builtin package
-	Local    bool
+	User     bool // True if package was created from individual files.
 
-	// Relative to Dir
-	CUEFiles        []string // .cue source files
-	DataFiles       []string // recognized data files (.json, .yaml, etc.)
+	// Deprecated: use BuildFiles
+	CUEFiles []string // .cue source files
+	// Deprecated: use BuildFiles and OrphanedFiles
+	DataFiles []string // recognized data files (.json, .yaml, etc.)
+
+	// The intent is to also deprecate the following fields in favor of
+	// IgnoredFiles and UnknownFiles.
 	TestCUEFiles    []string // .cue test files (_test.cue)
 	ToolCUEFiles    []string // .cue tool files (_tool.cue)
 	IgnoredCUEFiles []string // .cue source files ignored for this build
diff --git a/cue/load/config.go b/cue/load/config.go
index bb8b902..2b35577 100644
--- a/cue/load/config.go
+++ b/cue/load/config.go
@@ -40,13 +40,52 @@
 // Some of the aspects of this documentation, like flags and handling '--' need
 // to be implemented by the tools.
 const FromArgsUsage = `
-<args> is a list of arguments denoting a set of instances.
-It may take one of two forms:
+<args> is a list of arguments denoting a set of instances of the form:
 
-1. A list of *.cue source files.
+   <package>* <file_args>*
 
-   All of the specified files are loaded, parsed and type-checked
-   as a single instance.
+1. A list of source files
+
+   CUE files are parsed, loaded and unified into a single instance. All files
+   must have the same package name.
+
+   Data files, like YAML or JSON, are handled in one of two ways:
+
+   a. Explicitly mapped into a single CUE namespace, using the --path, --files
+      and --list flags. In this case these are unified into a single instance
+      along with any other CUE files.
+
+   b. Treated as a stream of data elements that each is optionally unified with
+      a single instance, which either consists of the other CUE files specified
+       on the command line or a single package.
+
+   By default, the format of files is derived from the file extension.
+   This behavior may be modified with file arguments of the form <qualifiers>:
+   For instance,
+
+      cue eval foo.cue json: bar.data
+
+   indicates that the bar.data file should be interpreted as a JSON file.
+   A qualifier applies to all files following it until the next qualifier.
+
+   The following qualifiers are available:
+
+      encodings
+      cue           CUE definitions and data
+      json          JSON data, one value only
+      jsonl         newline-separated JSON values
+      yaml          a YAML file, may contain a stream
+      proto         Protobuf definitions
+
+      interpretations
+      jsonschema   data encoding describes JSON Schema
+      openapi      data encoding describes Open API
+
+      formats
+      data         output as -- or only accept -- data
+      graph        data allowing references or anchors
+      schema       output as schema; defaults JSON files to JSON Schema
+      def          full definitions, including documentation
 
 2. A list of relative directories to denote a package instance.
 
@@ -54,18 +93,21 @@
    The instance contains all files in this directory and ancestor directories,
    up to the module root, with the same package name. The package name must
    be either uniquely determined by the files in the given directory, or
-   explicitly defined using the '-p' flag.
+   explicitly defined using a package name qualifier. For instance, ./...:foo
+   selects all packages named foo in the any subdirectory of the current
+   working directory.
 
-   Files without a package clause are ignored.
+   3. An import path referring to a directory within the current module
 
-   Files ending in *_test.cue files are only loaded when testing.
+   All CUE files in that directory, and all the ancestor directories up to the
+   module root (if applicable), with a package name corresponding to the base
+   name of the directory or the optional explicit package name are loaded into
+   a single instance.
 
-3. A list of import paths, each denoting a package.
-
-   The package's directory is loaded from the package cache. The version of the
-   package is defined in the modules cue.mod file.
-
-A '--' argument terminates the list of packages.
+   Examples, assume a module name of acme.org/root:
+      example.com/foo   package in cue.mod
+      ./foo             package corresponding to foo directory
+      .:bar             package in current directory with package name bar
 `
 
 // GenPath reports the directory in which to store generated
@@ -179,7 +221,6 @@
 	p.Module = c.Module
 
 	if isLocalImport(path) {
-		p.Local = true
 		if c.Dir == "" {
 			err = errors.Append(err, errors.Newf(pos, "cwd unknown"))
 		}
diff --git a/cue/load/import.go b/cue/load/import.go
index 473e73d..3839ad7 100644
--- a/cue/load/import.go
+++ b/cue/load/import.go
@@ -26,11 +26,11 @@
 
 	"cuelang.org/go/cue/ast"
 	"cuelang.org/go/cue/build"
-	"cuelang.org/go/cue/encoding"
 	"cuelang.org/go/cue/errors"
 	"cuelang.org/go/cue/parser"
 	"cuelang.org/go/cue/token"
 	"cuelang.org/go/internal"
+	"cuelang.org/go/internal/filetypes"
 )
 
 // An importMode controls the behavior of the Import method.
@@ -150,7 +150,14 @@
 				if f.IsDir() {
 					continue
 				}
-				fp.add(pos, dir, f.Name(), importComment)
+				file, err := filetypes.ParseFile(f.Name(), filetypes.Input)
+				if err != nil {
+					p.UnknownFiles = append(p.UnknownFiles, &build.File{
+						Filename: f.Name(),
+					})
+					continue // skip unrecognized file types
+				}
+				fp.add(pos, dir, file, importComment)
 			}
 
 			if fp.pkg.PkgName == "" || !inModule || l.cfg.isRoot(dir) || dir == d[0] {
@@ -314,20 +321,23 @@
 	return nil
 }
 
-func (fp *fileProcessor) add(pos token.Pos, root, path string, mode importMode) (added bool) {
+func (fp *fileProcessor) add(pos token.Pos, root string, file *build.File, mode importMode) (added bool) {
+	path := file.Filename
 	fullPath := path
 	if !filepath.IsAbs(path) {
 		fullPath = filepath.Join(root, path)
 	}
+	file.Filename = fullPath
+
 	name := filepath.Base(fullPath)
 	dir := filepath.Dir(fullPath)
 
-	ext := nameExt(name)
 	p := fp.pkg
 
 	badFile := func(err errors.Error) bool {
 		fp.err = errors.Append(fp.err, err)
 		p.InvalidCUEFiles = append(p.InvalidCUEFiles, fullPath)
+		p.InvalidFiles = append(p.InvalidFiles, file)
 		return true
 	}
 
@@ -336,9 +346,11 @@
 		return badFile(err)
 	}
 	if !match {
-		if ext == cueSuffix {
+		if file.Encoding == build.CUE && file.Interpretation == "" {
 			p.IgnoredCUEFiles = append(p.IgnoredCUEFiles, fullPath)
-		} else if encoding.MapExtension(ext) != nil {
+			p.IgnoredFiles = append(p.IgnoredFiles, file)
+		} else {
+			p.OrphanedFiles = append(p.OrphanedFiles, file)
 			p.DataFiles = append(p.DataFiles, fullPath)
 		}
 		return false // don't mark as added
@@ -353,6 +365,7 @@
 	_, pkg, _ := internal.PackageInfo(pf)
 	if pkg == "" && mode&allowAnonymous == 0 {
 		p.IgnoredCUEFiles = append(p.IgnoredCUEFiles, fullPath)
+		p.IgnoredFiles = append(p.IgnoredFiles, file)
 		return false // don't mark as added
 	}
 
@@ -362,6 +375,7 @@
 	} else if pkg != p.PkgName {
 		if fp.ignoreOther {
 			p.IgnoredCUEFiles = append(p.IgnoredCUEFiles, fullPath)
+			p.IgnoredFiles = append(p.IgnoredFiles, file)
 			return false
 		}
 		return badFile(&MultiplePackageError{
@@ -411,10 +425,13 @@
 	switch {
 	case isTest:
 		p.TestCUEFiles = append(p.TestCUEFiles, fullPath)
+		// TODO: what is the BuildFiles equivalent?
 	case isTool:
 		p.ToolCUEFiles = append(p.ToolCUEFiles, fullPath)
+		// TODO: what is the BuildFiles equivalent?
 	default:
 		p.CUEFiles = append(p.CUEFiles, fullPath)
+		p.BuildFiles = append(p.BuildFiles, file)
 	}
 	return true
 }
diff --git a/cue/load/loader.go b/cue/load/loader.go
index dd41f65..4cef6ed 100644
--- a/cue/load/loader.go
+++ b/cue/load/loader.go
@@ -25,10 +25,11 @@
 	"strings"
 	"unicode"
 
+	"cuelang.org/go/cue/ast"
 	"cuelang.org/go/cue/build"
-	"cuelang.org/go/cue/encoding"
 	"cuelang.org/go/cue/errors"
 	"cuelang.org/go/cue/token"
+	"cuelang.org/go/internal/filetypes"
 )
 
 // Instances returns the instances named by the command line arguments 'args'.
@@ -48,27 +49,70 @@
 
 	l := c.loader
 
-	// TODO: this is work in progress. We aim to replace the original Go
-	// implementation, which is not ideal for CUE.
-	if len(args) > 0 {
-		arg := args[0]
-		if arg == "-" || encoding.MapExtension(filepath.Ext(arg)) != nil {
-			return []*build.Instance{l.cueFilesPackage(args)}
-		}
+	// TODO: require packages to be placed before files. At some point this
+	// could be relaxed.
+	i := 0
+	for ; i < len(args) && isPkg(args[i]); i++ {
 	}
 
 	a := []*build.Instance{}
-	for _, m := range l.importPaths(args) {
-		if m.Err != nil {
-			inst := c.newErrInstance(token.NoPos, "", m.Err)
-			a = append(a, inst)
-			continue
+
+	if len(args) == 0 || i > 0 {
+		for _, m := range l.importPaths(args[:i]) {
+			if m.Err != nil {
+				inst := c.newErrInstance(token.NoPos, "", m.Err)
+				a = append(a, inst)
+				continue
+			}
+			a = append(a, m.Pkgs...)
 		}
-		a = append(a, m.Pkgs...)
 	}
+
+	if args = args[i:]; len(args) > 0 {
+		files, err := filetypes.ParseArgs(args[i:])
+		if err != nil {
+			return []*build.Instance{c.newErrInstance(token.NoPos, "", err)}
+		}
+		a = append(a, l.cueFilesPackage(files))
+	}
+
 	return a
 }
 
+func isPkg(s string) bool {
+	if s == "." || s == ".." {
+		return true
+	}
+	if s == "-" {
+		return false
+	}
+
+	// This goes of the assumption that file names may not have a `:` in their
+	// name in cue.
+	// A filename must have an extension or be preceded by a qualifier argument.
+	// So strings of the form foo/bar:baz, where bar is a valid identifier and
+	// absolute package
+	if p := strings.LastIndexByte(s, ':'); p > 0 {
+		if !ast.IsValidIdent(s[p+1:]) {
+			return false
+		}
+		// For a non-pkg, the part before : may only be lowercase and '+'.
+		// In addition, a package necessarily must have a slash of some form.
+		return strings.ContainsAny(s[:p], `/.\`)
+	}
+
+	// Assuming we terminate search for packages once a scoped qualifier is
+	// found, we know that any file without an extension (except maybe '-')
+	// is invalid. We can therefore assume it is a package.
+	// The section may still contain a dot, for instance ./foo/. or ./foo/...
+	return strings.TrimLeft(filepath.Ext(s), ".") == ""
+
+	// NOTE/TODO: we have not needed to check whether it is an absolute package
+	// or whether the package starts with a dot. Potentially we could thus relax
+	// the requirement that packages be dots if it is clear that the package
+	// name will not interfere with command names in all circumstances.
+}
+
 // Mode flags for loadImport and download (in get.go).
 const (
 	// resolveImport means that loadImport should do import path expansion.
@@ -97,32 +141,27 @@
 
 // cueFilesPackage creates a package for building a collection of CUE files
 // (typically named on the command line).
-func (l *loader) cueFilesPackage(files []string) *build.Instance {
+func (l *loader) cueFilesPackage(files []*build.File) *build.Instance {
 	pos := token.NoPos
 	cfg := l.cfg
 	cfg.filesMode = true
 	// ModInit() // TODO: support modules
 	pkg := l.cfg.Context.NewInstance(cfg.Dir, l.loadFunc())
 
-	for _, f := range files {
+	for _, bf := range files {
+		f := bf.Filename
 		if cfg.isDir(f) {
 			return cfg.newErrInstance(token.NoPos, toImportPath(f),
-				errors.Newf(pos, "cannot mix files with directories %v", f))
-		}
-		ext := filepath.Ext(f)
-		enc := encoding.MapExtension(ext)
-		if enc == nil {
-			return cfg.newErrInstance(token.NoPos, toImportPath(f),
-				errors.Newf(pos, "unrecognized extension %q", ext))
+				errors.Newf(pos, "file is a directory %v", f))
 		}
 	}
 
 	// TODO: add fields directly?
 	fp := newFileProcessor(cfg, pkg)
 	for _, file := range files {
-		path := file
-		if !filepath.IsAbs(file) {
-			path = filepath.Join(cfg.Dir, file)
+		path := file.Filename
+		if !filepath.IsAbs(path) {
+			path = filepath.Join(cfg.Dir, path)
 		}
 		fi, err := cfg.fileSystem.stat(path)
 		if err != nil {
@@ -131,7 +170,7 @@
 		}
 		if fi.IsDir() {
 			return cfg.newErrInstance(pos, toImportPath(path),
-				errors.Newf(pos, "%s is a directory, should be a CUE file", file))
+				errors.Newf(pos, "%s is a directory, should be a CUE file", file.Filename))
 		}
 		fp.add(pos, cfg.Dir, file, allowAnonymous)
 	}
@@ -156,14 +195,13 @@
 
 	l.addFiles(cfg.Dir, pkg)
 
-	pkg.Local = true
+	pkg.User = true
 	l.stk.Push("user")
-	pkg.Complete()
+	_ = pkg.Complete()
 	l.stk.Pop()
-	pkg.Local = true
+	pkg.User = true
 	//pkg.LocalPrefix = dirToImportPath(dir)
 	pkg.DisplayPath = "command-line-arguments"
-	pkg.Match = files
 
 	return pkg
 }
diff --git a/cue/load/loader_test.go b/cue/load/loader_test.go
index e678b05..6071db3 100644
--- a/cue/load/loader_test.go
+++ b/cue/load/loader_test.go
@@ -340,3 +340,34 @@
 		}
 	}
 }
+
+func TestIsPkg(t *testing.T) {
+	testCases := []struct {
+		in  string
+		out bool
+	}{
+		{".", true},
+		{"..", true},
+		{"../.../foo", true},
+		{".../foo", true},
+		{"./:foo", true},
+		{"foo.bar/foo", true},
+
+		// Not supported yet, but could be and isn't anything else valid.
+		{":foo", true},
+
+		{"foo.bar", false},
+		{"foo:", false},
+		{"foo:bar:baz", false},
+		{"-", false},
+		{"-:foo", false},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.in, func(t *testing.T) {
+			got := isPkg(tc.in)
+			if got != tc.out {
+				t.Errorf("got %v; want %v", got, tc.out)
+			}
+		})
+	}
+}
diff --git a/cue/load/match.go b/cue/load/match.go
index 6047b20..cb61e07 100644
--- a/cue/load/match.go
+++ b/cue/load/match.go
@@ -16,6 +16,7 @@
 
 import (
 	"bytes"
+	"path"
 	"strings"
 	"unicode"
 
@@ -50,11 +51,7 @@
 		return
 	}
 
-	i := strings.LastIndex(name, ".")
-	if i < 0 {
-		i = len(name)
-	}
-	ext := name[i:]
+	ext := path.Ext(name)
 
 	switch ext {
 	case cueSuffix:
diff --git a/cue/load/package.go b/cue/load/package.go
index 54c34b3..79759a5 100644
--- a/cue/load/package.go
+++ b/cue/load/package.go
@@ -16,9 +16,6 @@
 
 import (
 	"unicode/utf8"
-
-	"cuelang.org/go/cue/build"
-	"cuelang.org/go/internal/str"
 )
 
 // Package rules:
@@ -34,20 +31,6 @@
 // starting point to load a package. An instance defines a package-directory
 // pair.
 
-// allFiles returns the names of all the files considered for the package.
-// This is used for sanity and security checks, so we include all files,
-// even IgnoredGoFiles, because some subcommands consider them.
-func allFiles(p *build.Instance) []string {
-	return str.StringList(
-		p.CUEFiles,
-		p.ToolCUEFiles,
-		p.TestCUEFiles,
-		p.IgnoredCUEFiles,
-		p.InvalidCUEFiles,
-		p.DataFiles,
-	)
-}
-
 // safeArg reports whether arg is a "safe" command-line argument,
 // meaning that when it appears in a command-line, it probably
 // doesn't have some special meaning other than its own name.
diff --git a/cue/load/search.go b/cue/load/search.go
index 9a3f2a3..602f538 100644
--- a/cue/load/search.go
+++ b/cue/load/search.go
@@ -211,7 +211,7 @@
 			case nil:
 				break
 			case *NoFilesError:
-				if c.DataFiles && len(p.DataFiles) > 0 {
+				if c.DataFiles && len(p.OrphanedFiles) > 0 {
 					break
 				}
 				return nil
diff --git a/internal/encoding/encoding.go b/internal/encoding/encoding.go
new file mode 100644
index 0000000..a032bde
--- /dev/null
+++ b/internal/encoding/encoding.go
@@ -0,0 +1,152 @@
+// Copyright 2020 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"strings"
+
+	"cuelang.org/go/cue/ast"
+	"cuelang.org/go/cue/build"
+	"cuelang.org/go/encoding/json"
+	"cuelang.org/go/encoding/protobuf"
+	"cuelang.org/go/internal/third_party/yaml"
+)
+
+type Decoder struct {
+	closer   io.Closer
+	next     func() (ast.Expr, error)
+	expr     ast.Expr
+	file     *ast.File
+	filename string // may change on iteration for some formats
+	index    int
+	err      error
+}
+
+func (i *Decoder) Expr() ast.Expr   { return i.expr }
+func (i *Decoder) Filename() string { return i.filename }
+func (i *Decoder) Index() int       { return i.index }
+func (i *Decoder) Done() bool       { return i.err != nil }
+
+func (i *Decoder) Next() {
+	if i.err == nil {
+		i.expr, i.err = i.next()
+		i.index++
+	}
+}
+
+func (i *Decoder) File() *ast.File {
+	if i.file != nil {
+		return i.file
+	}
+	switch x := i.expr.(type) {
+	case nil:
+		return nil
+	case *ast.StructLit:
+		return &ast.File{Decls: x.Elts}
+	default:
+		return &ast.File{
+			Decls: []ast.Decl{&ast.EmbedDecl{Expr: i.expr}},
+		}
+	}
+}
+
+func (i *Decoder) Err() error {
+	if i.err == io.EOF {
+		return nil
+	}
+	return i.err
+}
+
+func (i *Decoder) Close() {
+	i.closer.Close()
+}
+
+type Config struct {
+	Stdin     io.Reader
+	Stdout    io.Writer
+	ProtoPath []string
+}
+
+// NewDecoder returns a stream of non-rooted data expressions. The encoding
+// type of f must be a data type, but does not have to be an encoding that
+// can stream. stdin is used in case the file is "-".
+func NewDecoder(f *build.File, cfg *Config) *Decoder {
+	r, err := reader(f, cfg.Stdin)
+	i := &Decoder{
+		closer:   r,
+		err:      err,
+		filename: f.Filename,
+		next: func() (ast.Expr, error) {
+			if err == nil {
+				err = io.EOF
+			}
+			return nil, io.EOF
+		},
+	}
+	if err != nil {
+		return i
+	}
+
+	path := f.Filename
+	switch f.Encoding {
+	case build.JSON, build.JSONL:
+		i.next = json.NewDecoder(nil, path, r).Extract
+		i.Next()
+	case build.YAML:
+		d, err := yaml.NewDecoder(path, r)
+		i.err = err
+		i.next = d.Decode
+		i.Next()
+	case build.Text:
+		b, err := ioutil.ReadAll(r)
+		i.err = err
+		i.expr = ast.NewString(string(b))
+	case build.Protobuf:
+		paths := &protobuf.Config{Paths: cfg.ProtoPath}
+		i.file, i.err = protobuf.Extract(path, r, paths)
+	default:
+		i.err = fmt.Errorf("unsupported stream type %q", f.Encoding)
+	}
+
+	return i
+}
+
+func reader(f *build.File, stdin io.Reader) (io.ReadCloser, error) {
+	switch s := f.Source.(type) {
+	case nil:
+		// Use the file name.
+	case string:
+		return ioutil.NopCloser(strings.NewReader(s)), nil
+	case []byte:
+		return ioutil.NopCloser(bytes.NewReader(s)), nil
+	case *bytes.Buffer:
+		// is io.Reader, but it needs to be readable repeatedly
+		if s != nil {
+			return ioutil.NopCloser(bytes.NewReader(s.Bytes())), nil
+		}
+	default:
+		return nil, fmt.Errorf("invalid source type %T", f.Source)
+	}
+	// TODO: should we allow this?
+	if f.Filename == "-" {
+		return ioutil.NopCloser(stdin), nil
+	}
+	return os.Open(f.Filename)
+}