cue: use `_#` instead of `#_` for hidden definition

This slighly complicates the spec (though minimal).

The main benefit, though, is to allow for more straighforward
roundtrippability between languages. For instance, a language
may define an identifier starting with `_` that needs to be
mapped to a definition. Now, to avoid the definition from becoming
hidden, this `_` needs to be removed and bookkeeping in the form
of attributes need to be added to preserve the name in a round trip.

With the rewording, the `#` marks the start of the identifier and
an optionally preceding `_` indicates hidden independent of this.
This allows the Go mapping to preserve naming without any
rewrites, for instance.

The spec now also more clearly distinguishes between
hidden and definition fields.

Change-Id: Ic7bdcc3527d0639b0943246ef0af629c6a477493
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/5980
Reviewed-by: Marcel van Lohuizen <mpvl@google.com>
diff --git a/cue/ast.go b/cue/ast.go
index fa50077..a79dc51 100644
--- a/cue/ast.go
+++ b/cue/ast.go
@@ -195,21 +195,6 @@
 	return str
 }
 
-func isDef(f *ast.Field) bool {
-	if f.Token == token.ISA {
-		return true
-	}
-	switch x := f.Label.(type) {
-	case *ast.Alias:
-		if ident, ok := x.Expr.(*ast.Ident); ok {
-			return strings.HasPrefix(ident.Name, "#")
-		}
-	case *ast.Ident:
-		return strings.HasPrefix(x.Name, "#")
-	}
-	return false
-}
-
 // We probably don't need to call Walk.s
 func (v *astVisitor) walk(astNode ast.Node) (ret value) {
 	switch n := astNode.(type) {
@@ -396,7 +381,7 @@
 
 	case *ast.Field:
 		opt := n.Optional != token.NoPos
-		isDef := isDef(n)
+		isDef := internal.IsDefinition(n.Label) || n.Token == token.ISA
 		if isDef {
 			ctx := v.ctx()
 			ctx.inDefinition++
diff --git a/cue/ast/ident.go b/cue/ast/ident.go
index 24e5322..35f1ca3 100644
--- a/cue/ast/ident.go
+++ b/cue/ast/ident.go
@@ -21,6 +21,7 @@
 
 	"cuelang.org/go/cue/errors"
 	"cuelang.org/go/cue/token"
+	"cuelang.org/go/pkg/strings"
 )
 
 func isLetter(ch rune) bool {
@@ -38,12 +39,26 @@
 		return false
 	}
 
-	r, sz := utf8.DecodeRuneInString(ident)
-	switch {
-	case isDigit(r):
-		return false
-	case r == '#':
-		ident = ident[sz:]
+	consumed := false
+	if strings.HasPrefix(ident, "_") {
+		ident = ident[1:]
+		consumed = true
+		if len(ident) == 0 {
+			return true
+		}
+	}
+	if strings.HasPrefix(ident, "#") {
+		ident = ident[1:]
+		consumed = true
+		if len(ident) == 0 {
+			return false
+		}
+	}
+
+	if !consumed {
+		if r, _ := utf8.DecodeRuneInString(ident); isDigit(r) {
+			return false
+		}
 	}
 
 	for _, r := range ident {
@@ -109,16 +124,27 @@
 		quoted = true
 	}
 
-	r, sz := utf8.DecodeRuneInString(ident)
-	switch {
-	case isDigit(r):
-		return "", errors.Newf(pos, "invalid character '%s' in identifier", string(r))
-	case r == '#':
-	default:
-		sz = 0
+	p := 0
+	if strings.HasPrefix(ident, "_") {
+		p++
+		if len(ident) == 1 {
+			return ident, nil
+		}
+	}
+	if strings.HasPrefix(ident[p:], "#") {
+		p++
+		if len(ident) == p {
+			return "", errors.Newf(pos, "invalid identifier '_#'")
+		}
 	}
 
-	for _, r := range ident[sz:] {
+	if p == 0 {
+		if r, _ := utf8.DecodeRuneInString(ident); isDigit(r) {
+			return "", errors.Newf(pos, "invalid character '%s' in identifier", string(r))
+		}
+	}
+
+	for _, r := range ident[p:] {
 		if isLetter(r) || isDigit(r) || r == '_' || r == '$' {
 			continue
 		}
diff --git a/cue/ast/ident_test.go b/cue/ast/ident_test.go
index eefd093..3bbef90 100644
--- a/cue/ast/ident_test.go
+++ b/cue/ast/ident_test.go
@@ -15,6 +15,7 @@
 package ast_test
 
 import (
+	"strings"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -49,6 +50,25 @@
 		out:     "foo",
 		isIdent: true,
 	}, {
+		in:      &ast.Ident{Name: ""},
+		out:     "",
+		isIdent: false,
+		err:     true,
+	}, {
+		in:      &ast.Ident{Name: "#"},
+		out:     "",
+		isIdent: false,
+		err:     true,
+	}, {
+		in:      &ast.Ident{Name: "_#"},
+		out:     "",
+		isIdent: false,
+		err:     true,
+	}, {
+		in:      &ast.Ident{Name: "_"},
+		out:     "_",
+		isIdent: true,
+	}, {
 		in:      &ast.Ident{Name: "8ball"},
 		out:     "",
 		isIdent: false,
@@ -58,10 +78,22 @@
 		out:     "_hidden",
 		isIdent: true,
 	}, {
+		in:      &ast.Ident{Name: "#A"},
+		out:     "#A",
+		isIdent: true,
+	}, {
 		in:      &ast.Ident{Name: "#Def"},
 		out:     "#Def",
 		isIdent: true,
 	}, {
+		in:      &ast.Ident{Name: "_#Def"},
+		out:     "_#Def",
+		isIdent: true,
+	}, {
+		in:      &ast.Ident{Name: "#_Def"},
+		out:     "#_Def",
+		isIdent: true,
+	}, {
 		in:      &ast.Ident{Name: "`foo-bar`"},
 		out:     "foo-bar",
 		isIdent: true,
@@ -94,6 +126,10 @@
 	for _, tc := range testCases {
 		b, _ := format.Node(tc.in)
 		t.Run(string(b), func(t *testing.T) {
+			if id, ok := tc.in.(*ast.Ident); ok && !strings.HasPrefix(id.Name, "`") {
+				assert.Equal(t, tc.isIdent, ast.IsValidIdent(id.Name))
+			}
+
 			str, isIdent, err := ast.LabelName(tc.in)
 			assert.Equal(t, tc.out, str, "value")
 			assert.Equal(t, tc.isIdent, isIdent, "isIdent")
diff --git a/cue/build.go b/cue/build.go
index 27408fc..33c24c5 100644
--- a/cue/build.go
+++ b/cue/build.go
@@ -17,7 +17,6 @@
 import (
 	"path"
 	"strconv"
-	"strings"
 	"sync"
 
 	"cuelang.org/go/cue/ast"
@@ -298,10 +297,10 @@
 	}
 	f <<= labelShift
 	if isIdent {
-		if strings.HasPrefix(s, "#") {
+		if internal.IsDef(s) {
 			f |= definition
 		}
-		if strings.HasPrefix(s, "_") || strings.HasPrefix(s, "#_") {
+		if internal.IsHidden(s) {
 			f |= hidden
 		}
 	}
diff --git a/cue/format/simplify.go b/cue/format/simplify.go
index 5f9c664..f498197 100644
--- a/cue/format/simplify.go
+++ b/cue/format/simplify.go
@@ -16,10 +16,10 @@
 
 import (
 	"strconv"
-	"strings"
 
 	"cuelang.org/go/cue/ast"
 	"cuelang.org/go/cue/ast/astutil"
+	"cuelang.org/go/internal"
 )
 
 // labelSimplifier rewrites string labels to identifiers if
@@ -87,10 +87,7 @@
 	switch x := n.(type) {
 	case *ast.BasicLit:
 		str, err := strconv.Unquote(x.Value)
-		if err != nil ||
-			!ast.IsValidIdent(str) ||
-			strings.HasPrefix(str, "_") ||
-			strings.HasPrefix(str, "#") {
+		if err != nil || !ast.IsValidIdent(str) || internal.IsDefOrHidden(str) {
 			return false
 		}
 		s.scope[str] = true
@@ -108,10 +105,7 @@
 	switch x := c.Node().(type) {
 	case *ast.BasicLit:
 		str, err := strconv.Unquote(x.Value)
-		if err == nil &&
-			s.scope[str] &&
-			!strings.HasPrefix(str, "_") &&
-			!strings.HasPrefix(str, "#") {
+		if err == nil && s.scope[str] && !internal.IsDefOrHidden(str) {
 			c.Replace(ast.NewIdent(str))
 		}
 	}
diff --git a/cue/scanner/scanner.go b/cue/scanner/scanner.go
index 0547952..c988256 100644
--- a/cue/scanner/scanner.go
+++ b/cue/scanner/scanner.go
@@ -268,6 +268,9 @@
 
 func (s *Scanner) scanFieldIdentifier() string {
 	offs := s.offset
+	if s.ch == '_' {
+		s.next()
+	}
 	if s.ch == '#' {
 		s.next()
 	}
@@ -810,7 +813,7 @@
 				lit = "_|_"
 			} else {
 				tok = token.IDENT
-				lit = "_" + s.scanIdentifier()
+				lit = "_" + s.scanFieldIdentifier()
 			}
 			insertEOL = true
 		case '`':
diff --git a/cue/scanner/scanner_test.go b/cue/scanner/scanner_test.go
index d40bbfb..123962e 100644
--- a/cue/scanner/scanner_test.go
+++ b/cue/scanner/scanner_test.go
@@ -77,7 +77,12 @@
 	{token.IDENT, "foobar", literal},
 	{token.IDENT, "$foobar", literal},
 	{token.IDENT, "#foobar", literal},
+	{token.IDENT, "#0", literal},
 	{token.IDENT, "_foobar", literal},
+	{token.IDENT, "__foobar", literal},
+	{token.IDENT, "#_foobar", literal},
+	{token.IDENT, "_#foobar", literal},
+	{token.IDENT, "__#foobar", literal},
 	{token.IDENT, "`foobar`", literal},
 	{token.IDENT, "a۰۱۸", literal},
 	{token.IDENT, "foo६४", literal},
diff --git a/cue/types_test.go b/cue/types_test.go
index 5d7a880..ceac420 100644
--- a/cue/types_test.go
+++ b/cue/types_test.go
@@ -859,9 +859,9 @@
 		def: "_foo",
 		out: `_|_(defintion "_foo" not found)`,
 	}, {
-		in:  `#_foo: 3`,
-		def: "#_foo",
-		out: `_|_(defintion "#_foo" not found)`,
+		in:  `_#foo: 3`,
+		def: "_#foo",
+		out: `_|_(defintion "_#foo" not found)`,
 	}}
 
 	for _, tc := range testCases {
diff --git a/doc/ref/spec.md b/doc/ref/spec.md
index a0af53e..122b133 100644
--- a/doc/ref/spec.md
+++ b/doc/ref/spec.md
@@ -178,10 +178,11 @@
 
 Identifiers name entities such as fields and aliases.
 An identifier is a sequence of one or more letters (which includes `_` and `$`)
-and digits, optionally preceded by a `#`.
-It may not be `_`, `$`, or `#`.
+and digits, optionally preceded by `#` or `_#`.
+It may not be `_`, `$`, `#`, or `_#`.
 The first character in an identifier must be a letter or `#`.
-Identifiers starting with a `#` or `_` are reserved for definitions.
+Identifiers starting with a `#` or `_` are reserved for definitions and hidden
+fields.
 
 <!--
 TODO: allow identifiers as defined in Unicode UAX #31
@@ -191,7 +192,7 @@
 -->
 
 ```
-identifier  = [ "#" ] letter { letter | unicode_digit } .
+identifier  = [ "#" | "_#" ] letter { letter | unicode_digit } .
 ```
 
 ```
@@ -1292,12 +1293,13 @@
 ```
 
 
-#### Definitions
+#### Definitions and hidden fields
 
-A field is a _definition_ if its identifier starts with `#` or `_`.
-Definitions are not emitted as part of the model and are never required
-to be concrete when emitting data.
-For definitions with identifiers starting with `#`,
+A field is a _definition_ if its identifier starts with `#` or `_#`.
+A field is _hidden_ if its starts with a `_`.
+Definitions and hidden fields are not emitted when converting a CUE program
+to data and are never required to be concrete.
+For definitions
 literal structs that are part of a definition's value are implicitly closed,
 but may unify unrestricted with other structs within the field's declaration.
 This excludes literals structs in embeddings and aliases.
@@ -1699,10 +1701,9 @@
 
 An identifier of a package may be exported to permit access to it
 from another package.
-All identifiers of regular fields (those not starting with a `#` or `_`)
-are exported.
-A definition identifier is exported if it does not start with `_` or `#_`.
-Any other defintion is not visible outside the package and resides
+All identifiers not starting with `_` (so all regular fields and definitions
+starting with `#`) are exported.
+Any identifier starting with `_` is not visible outside the package and resides
 in a separate namespace than namesake identifiers of other packages.
 
 ```
@@ -1718,7 +1719,7 @@
     #C: {    // visible outside mypackage
         d: 4 // visible outside mypackage
     }
-    #_E: foo // not visible outside mypackage
+    _#E: foo // not visible outside mypackage
 }
 ```
 
diff --git a/encoding/jsonschema/ref.go b/encoding/jsonschema/ref.go
index 59c2130..b0fa788 100644
--- a/encoding/jsonschema/ref.go
+++ b/encoding/jsonschema/ref.go
@@ -22,6 +22,7 @@
 	"cuelang.org/go/cue/ast"
 	"cuelang.org/go/cue/errors"
 	"cuelang.org/go/cue/token"
+	"cuelang.org/go/internal"
 )
 
 func (d *decoder) parseRef(p token.Pos, str string) []string {
@@ -99,8 +100,7 @@
 	name := a[1]
 	if ast.IsValidIdent(name) &&
 		name != rootDefs[1:] &&
-		!strings.HasPrefix(name, "#") &&
-		!strings.HasPrefix(name, "_") {
+		!internal.IsDefOrHidden(name) {
 		return []ast.Label{ast.NewIdent("#" + name)}, nil
 	}
 	return []ast.Label{ast.NewIdent(rootDefs), ast.NewString(name)}, nil
diff --git a/encoding/openapi/decode.go b/encoding/openapi/decode.go
index 05a27c3..a8a61b8 100644
--- a/encoding/openapi/decode.go
+++ b/encoding/openapi/decode.go
@@ -135,8 +135,7 @@
 	name := a[2]
 	if ast.IsValidIdent(name) &&
 		name != rootDefs[1:] &&
-		!strings.HasPrefix(name, "#") &&
-		!strings.HasPrefix(name, "_") {
+		!internal.IsDefOrHidden(name) {
 		return []ast.Label{ast.NewIdent("#" + name)}, nil
 	}
 	return []ast.Label{ast.NewIdent(rootDefs), ast.NewString(name)}, nil
diff --git a/internal/internal.go b/internal/internal.go
index 87df900..9ff02c8 100644
--- a/internal/internal.go
+++ b/internal/internal.go
@@ -295,14 +295,26 @@
 	return false
 }
 
+func IsDef(s string) bool {
+	return strings.HasPrefix(s, "#") || strings.HasPrefix(s, "_#")
+}
+
+func IsHidden(s string) bool {
+	return strings.HasPrefix(s, "_")
+}
+
+func IsDefOrHidden(s string) bool {
+	return strings.HasPrefix(s, "#") || strings.HasPrefix(s, "_")
+}
+
 func IsDefinition(label ast.Label) bool {
 	switch x := label.(type) {
 	case *ast.Alias:
 		if ident, ok := x.Expr.(*ast.Ident); ok {
-			return strings.HasPrefix(ident.Name, "#")
+			return IsDef(ident.Name)
 		}
 	case *ast.Ident:
-		return strings.HasPrefix(x.Name, "#")
+		return IsDef(x.Name)
 	}
 	return false
 }