encoding/jsonschema: fix a few json schema bugs

Issue #378

- encoders should strip leading BOM. (Note that we do
not write the BOM back).

- fix bug in Validate that incorrectly required a struct
to be concrete when in non-concrete mode

- improve some error messages

- disable example handling (was a noop currently anyway)

- don't mark a type corresponding to a constraint as  used
if it is not actually used.

Change-Id: I687fb1e17b9d4c2ef2136f829690569cf77d6707
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/6104
Reviewed-by: Paul Jolly <paul@myitcv.org.uk>
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cmd/cue/cmd/testdata/script/def_jsonschema.txt b/cmd/cue/cmd/testdata/script/def_jsonschema.txt
index 9974959..551374f 100644
--- a/cmd/cue/cmd/testdata/script/def_jsonschema.txt
+++ b/cmd/cue/cmd/testdata/script/def_jsonschema.txt
@@ -33,7 +33,7 @@
 	...
 }
 -- schema.json --
-{
+{
   "$id": "https://example.com/person.schema.json",
   "$schema": "http://json-schema.org/draft-07/schema#",
   "title": "Person",
diff --git a/cue/types.go b/cue/types.go
index e9a3dd7..8a05140 100644
--- a/cue/types.go
+++ b/cue/types.go
@@ -2047,7 +2047,9 @@
 		x.depth++
 		obj, err := v.structValOpts(ctx, opts)
 		if err != nil {
-			x.errs = errors.Append(x.errs, v.toErr(err))
+			if !isIncomplete(err) && opts.concrete {
+				x.errs = errors.Append(x.errs, v.toErr(err))
+			}
 		}
 		for i := 0; i < obj.Len(); i++ {
 			_, v := obj.At(i)
diff --git a/cue/types_test.go b/cue/types_test.go
index 3c40abc..0d9ddf9 100644
--- a/cue/types_test.go
+++ b/cue/types_test.go
@@ -1363,6 +1363,19 @@
 		a: b: c: *["\(x)"] | _
 		d: yaml.Marshal(a.b)
 		`,
+	}, {
+		desc: "allow non-concrete values for definitions",
+		in: `
+		variables: #variables
+
+		{[!~"^[.]"]: #job}
+
+		#variables: [string]: int | string
+
+		#job: ({a: int} | {b: int}) & {
+			"variables"?: #variables
+		}
+		`,
 	}}
 	for _, tc := range testCases {
 		t.Run(tc.desc, func(t *testing.T) {
diff --git a/cue/value.go b/cue/value.go
index 8abaf0b..00a958f 100644
--- a/cue/value.go
+++ b/cue/value.go
@@ -198,7 +198,9 @@
 
 type bytesLit struct {
 	baseValue
-	b  []byte
+	b []byte
+	// Also support https://github.com/dlclark/regexp2 to
+	// accommodate JSON Schema?
 	re *regexp.Regexp // only set if needed
 }
 
diff --git a/encoding/jsonschema/constraints.go b/encoding/jsonschema/constraints.go
index 2ad9a90..08822a9 100644
--- a/encoding/jsonschema/constraints.go
+++ b/encoding/jsonschema/constraints.go
@@ -23,6 +23,9 @@
 	"cuelang.org/go/internal"
 )
 
+// TODO: skip invalid regexps containing ?! and foes.
+// alternatively, fall back to  https://github.com/dlclark/regexp2
+
 type constraint struct {
 	key string
 
@@ -71,11 +74,11 @@
 
 func addDefinitions(n cue.Value, s *state) {
 	if n.Kind() != cue.StructKind {
-		s.errf(n, `"definitions" expected an object, found %v`, n.Kind)
+		s.errf(n, `"definitions" expected an object, found %s`, n.Kind())
 	}
 
 	if len(s.path) != 1 {
-		s.errf(n, `"definitions" expected an object, found %v`, n.Kind)
+		s.errf(n, `"definitions" only allowed at root`)
 	}
 
 	old := s.isSchema
@@ -191,11 +194,12 @@
 		if n.Kind() != cue.ListKind {
 			s.errf(n, `value of "examples" must be an array, found %v`, n.Kind)
 		}
-		for _, n := range s.listItems("examples", n, true) {
-			if ex := s.schema(n); !isAny(ex) {
-				s.examples = append(s.examples, ex)
-			}
-		}
+		// TODO: implement examples properly.
+		// for _, n := range s.listItems("examples", n, true) {
+		// 	if ex := s.value(n); !isAny(ex) {
+		// 		s.examples = append(s.examples, ex)
+		// 	}
+		// }
 	}),
 
 	p0("description", func(n cue.Value, s *state) {
@@ -340,11 +344,13 @@
 	}),
 
 	p0d("contentMediaType", 7, func(n cue.Value, s *state) {
-		s.usedTypes |= cue.StringKind
+		// TODO: only mark as used if it generates something.
+		// s.usedTypes |= cue.StringKind
 	}),
 
 	p0d("contentEncoding", 7, func(n cue.Value, s *state) {
-		s.usedTypes |= cue.StringKind
+		// TODO: only mark as used if it generates something.
+		// s.usedTypes |= cue.StringKind
 		// 7bit, 8bit, binary, quoted-printable and base64.
 		// RFC 2054, part 6.1.
 		// https://tools.ietf.org/html/rfc2045
@@ -424,12 +430,13 @@
 	}),
 
 	p1("required", func(n cue.Value, s *state) {
-		s.usedTypes |= cue.StructKind
 		if n.Kind() != cue.ListKind {
 			s.errf(n, `value of "required" must be list of strings, found %v`, n.Kind)
 			return
 		}
 
+		s.usedTypes |= cue.StructKind
+
 		if s.obj == nil {
 			s.obj = &ast.StructLit{}
 			// TODO: detect that properties is defined somewhere.
@@ -439,7 +446,10 @@
 		// Create field map
 		fields := map[string]*ast.Field{}
 		for _, d := range s.obj.Elts {
-			f := d.(*ast.Field)
+			f, ok := d.(*ast.Field)
+			if !ok {
+				continue // Could be embedding? See cirrus.json
+			}
 			str, _, err := ast.LabelName(f.Label)
 			if err == nil {
 				fields[str] = f
@@ -466,10 +476,9 @@
 	}),
 
 	p0d("propertyNames", 6, func(n cue.Value, s *state) {
-		s.usedTypes |= cue.StructKind
-
 		// [=~pattern]: _
 		if names, _ := s.schemaState(n, cue.StringKind, false); !isAny(names) {
+			s.usedTypes |= cue.StructKind
 			s.addConjunct(ast.NewStruct(ast.NewList((names)), ast.NewIdent("_")))
 		}
 	}),
@@ -529,12 +538,15 @@
 	}),
 
 	p2("additionalProperties", func(n cue.Value, s *state) {
-		s.usedTypes |= cue.StructKind
 		switch n.Kind() {
 		case cue.BoolKind:
 			s.closeStruct = !s.boolValue(n)
+			if !s.closeStruct {
+				s.usedTypes |= cue.StructKind
+			}
 
 		case cue.StructKind:
+			s.usedTypes |= cue.StructKind
 			s.closeStruct = true
 			if s.obj == nil {
 				s.obj = &ast.StructLit{}
@@ -586,10 +598,19 @@
 	}),
 
 	p0("additionalItems", func(n cue.Value, s *state) {
-		s.usedTypes |= cue.ListKind
-		if s.list != nil {
-			elem := s.schema(n)
-			s.list.Elts = append(s.list.Elts, &ast.Ellipsis{Type: elem})
+		switch n.Kind() {
+		case cue.BoolKind:
+			// TODO: support
+
+		case cue.StructKind:
+			if s.list != nil {
+				s.usedTypes |= cue.ListKind
+				elem := s.schema(n)
+				s.list.Elts = append(s.list.Elts, &ast.Ellipsis{Type: elem})
+			}
+
+		default:
+			s.errf(n, `value of "additionalItems" must be an object or boolean`)
 		}
 	}),
 
diff --git a/encoding/jsonschema/testdata/basic.txtar b/encoding/jsonschema/testdata/basic.txtar
index d2308b3..45d8d74 100644
--- a/encoding/jsonschema/testdata/basic.txtar
+++ b/encoding/jsonschema/testdata/basic.txtar
@@ -12,7 +12,10 @@
       "type": "object",
       "required": [ "name" ],
       "properties": {
-        "name": { "type": "string" },
+        "name": {
+          "type": "string",
+          "examples": [ "foo" ]
+        },
         "address": {
             "description": "where does this person live?",
             "type": "string",
diff --git a/internal/encoding/encoding.go b/internal/encoding/encoding.go
index a828a1b..dbe069e 100644
--- a/internal/encoding/encoding.go
+++ b/internal/encoding/encoding.go
@@ -40,6 +40,8 @@
 	"cuelang.org/go/internal"
 	"cuelang.org/go/internal/filetypes"
 	"cuelang.org/go/internal/third_party/yaml"
+	"golang.org/x/text/encoding/unicode"
+	"golang.org/x/text/transform"
 )
 
 type Decoder struct {
@@ -169,13 +171,21 @@
 		return i
 	}
 
-	r, err := reader(f, cfg.Stdin)
-	i.closer = r
+	rc, err := reader(f, cfg.Stdin)
+	i.closer = rc
 	i.err = err
 	if err != nil {
 		return i
 	}
 
+	// For now we assume that all encodings require UTF-8. This will not be the
+	// case for some binary protocols. We need to exempt those explicitly here
+	// once we introduce them.
+	// TODO: this code also allows UTF16, which is too permissive for some
+	// encodings. Switch to unicode.UTF8Sig once available.
+	t := unicode.BOMOverride(unicode.UTF8.NewDecoder())
+	r := transform.NewReader(rc, t)
+
 	switch f.Interpretation {
 	case "":
 	case build.Auto: