cue: implement quoted identifiers
Spec says: don't allow strings as identifiers.
We left this out to allow for transition.
Also moved LabelName to internal.
Change-Id: I8b7935011ce53ec39527a851008c4824ad9daf7f
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/3181
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cmd/cue/cmd/import.go b/cmd/cue/cmd/import.go
index 4a601f8..fcc0d29 100644
--- a/cmd/cue/cmd/import.go
+++ b/cmd/cue/cmd/import.go
@@ -655,7 +655,7 @@
name := ""
switch x := n.(type) {
case *ast.Field:
- name, _ = ast.LabelName(x.Label)
+ name, _ = internal.LabelName(x.Label)
case *ast.Alias:
name = x.Ident.Name
}
@@ -682,7 +682,7 @@
continue
}
- name, ident := ast.LabelName(f.Label)
+ name, ident := internal.LabelName(f.Label)
if name == "" || !ident {
continue
}
diff --git a/cmd/cue/cmd/trim.go b/cmd/cue/cmd/trim.go
index 081ac71..abbda95 100644
--- a/cmd/cue/cmd/trim.go
+++ b/cmd/cue/cmd/trim.go
@@ -515,7 +515,7 @@
for _, d := range decls {
if f, ok := d.(*ast.Field); ok {
- label, _ := ast.LabelName(f.Label)
+ label, _ := internal.LabelName(f.Label)
v := m.Lookup(label)
if inNodes(rm, f.Value) && (allow || v.Exists()) {
continue
diff --git a/cue/ast.go b/cue/ast.go
index 17605d2..c1dec8a 100644
--- a/cue/ast.go
+++ b/cue/ast.go
@@ -145,7 +145,8 @@
func (v *astVisitor) resolve(n *ast.Ident) value {
ctx := v.ctx()
- label := v.label(n.Name, true)
+ name := v.ident(n)
+ label := v.label(name, true)
if r := v.resolveRoot; r != nil {
for _, a := range r.arcs {
if a.feature == label {
@@ -154,7 +155,7 @@
}
}
if v.inSelector > 0 {
- if p := getBuiltinShorthandPkg(ctx, n.Name); p != nil {
+ if p := getBuiltinShorthandPkg(ctx, name); p != nil {
return &nodeRef{newExpr(n), p, label}
}
}
@@ -181,6 +182,15 @@
return impInst.rootValue.evalPartial(ctx)
}
+func (v *astVisitor) ident(n *ast.Ident) string {
+ str, err := ast.ParseIdent(n)
+ if err != nil {
+ v.errf(n, "invalid literal: %v", err)
+ return n.Name
+ }
+ return str
+}
+
// We probably don't need to call Walk.s
func (v *astVisitor) walk(astNode ast.Node) (ret value) {
switch n := astNode.(type) {
@@ -326,7 +336,7 @@
case *ast.TemplateLabel:
v.sel = "*"
- f := v.label(x.Ident.Name, true)
+ f := v.label(v.ident(x.Ident), true)
sig := ¶ms{}
sig.add(f, &basicType{newNode(field.Label), stringKind})
@@ -338,7 +348,7 @@
fc.isTemplate = true
case *ast.BasicLit, *ast.Ident:
- name, ok := ast.LabelName(x)
+ name, ok := internal.LabelName(x)
if !ok {
return v.errf(x, "invalid field name: %v", x)
}
@@ -374,6 +384,9 @@
opt := n.Optional != token.NoPos
isDef := n.Token == token.ISA
if isDef {
+ if opt {
+ v.errf(n, "a definition may not be optional")
+ }
ctx := v.ctx()
ctx.inDefinition++
defer func() { ctx.inDefinition-- }()
@@ -397,7 +410,7 @@
v.errf(x, "map element type cannot be a definition")
}
v.sel = "*"
- f := v.label(x.Ident.Name, true)
+ f := v.label(v.ident(x.Ident), true)
sig := ¶ms{}
sig.add(f, &basicType{newNode(n.Label), stringKind})
@@ -412,7 +425,7 @@
if internal.DropOptional && opt {
break
}
- v.sel, _ = ast.LabelName(x)
+ v.sel, _ = internal.LabelName(x)
if v.sel == "_" {
if _, ok := x.(*ast.BasicLit); ok {
v.sel = "*"
@@ -459,7 +472,9 @@
// Expressions
case *ast.Ident:
- if n.Name == "_" {
+ name := v.ident(n)
+
+ if name == "_" {
ret = &top{newNode(n)}
break
}
@@ -469,7 +484,7 @@
break
}
- switch n.Name {
+ switch name {
case "_":
return &top{newExpr(n)}
case "string":
@@ -496,11 +511,11 @@
case "or":
return orBuiltin
}
- if r, ok := predefinedRanges[n.Name]; ok {
+ if r, ok := predefinedRanges[name]; ok {
return r
}
- ret = v.errf(n, "reference %q not found", n.Name)
+ ret = v.errf(n, "reference %q not found", name)
break
}
@@ -512,7 +527,7 @@
break
}
- f := v.label(n.Name, true)
+ f := v.label(name, true)
if n.Scope != nil {
n2 := v.mapScope(n.Scope)
if l, ok := n2.(*lambdaExpr); ok && len(l.params.arcs) == 1 {
@@ -589,7 +604,7 @@
ret = &selectorExpr{
newExpr(n),
v.walk(n.X),
- v.label(n.Sel.Name, true),
+ v.label(v.ident(n.Sel), true),
}
v.inSelector--
@@ -702,12 +717,12 @@
key := "_"
if n.Key != nil {
- key = n.Key.Name
+ key = v.ident(n.Key)
}
f := v.label(key, true)
fn.add(f, &basicType{newExpr(n.Key), stringKind | intKind})
- f = v.label(n.Value.Name, true)
+ f = v.label(v.ident(n.Value), true)
fn.add(f, &top{})
y = &feed{newExpr(n.Source), v.walk(n.Source), fn}
diff --git a/cue/ast/ast.go b/cue/ast/ast.go
index e558b79..da6b72c 100644
--- a/cue/ast/ast.go
+++ b/cue/ast/ast.go
@@ -17,7 +17,6 @@
package ast // import "cuelang.org/go/cue/ast"
import (
- "strconv"
"strings"
"cuelang.org/go/cue/token"
@@ -95,40 +94,12 @@
// A Label is any prduction that can be used as a LHS label.
type Label interface {
Node
- labelName() (name string, isIdent bool)
+ labelNode()
}
-func (n *Ident) labelName() (string, bool) {
- return n.Name, true
-}
+type label struct{}
-func (n *BasicLit) labelName() (string, bool) {
- switch n.Kind {
- case token.STRING:
- // Use strconv to only allow double-quoted, single-line strings.
- if str, err := strconv.Unquote(n.Value); err == nil {
- return str, true
- }
- case token.NULL, token.TRUE, token.FALSE:
- return n.Value, true
-
- // TODO: allow numbers to be fields?
- }
- return "", false
-}
-
-func (n *TemplateLabel) labelName() (string, bool) {
- return n.Ident.Name, false
-}
-
-func (n *Interpolation) labelName() (string, bool) {
- return "", false
-}
-
-// LabelName reports the name of a label, if known, and whether it is valid.
-func LabelName(x Label) (name string, ok bool) {
- return x.labelName()
-}
+func (l label) labelNode() {}
// Clause nodes are part of comprehensions.
type Clause interface {
@@ -352,6 +323,7 @@
// An Ident node represents an left-hand side identifier.
type Ident struct {
+ label
comments
NamePos token.Pos // identifier position
@@ -367,6 +339,7 @@
// A TemplateLabel represents a field template declaration in a struct.
type TemplateLabel struct {
+ label
comments
Langle token.Pos
Ident *Ident
@@ -375,6 +348,7 @@
// A BasicLit node represents a literal of basic type.
type BasicLit struct {
+ label
comments
ValuePos token.Pos // literal position
Kind token.Token // INT, FLOAT, DURATION, or STRING
@@ -382,7 +356,8 @@
}
// A Interpolation node represents a string or bytes interpolation.
-type Interpolation struct { // TODO: rename to TemplateLit
+type Interpolation struct {
+ label
comments
Elts []Expr // interleaving of strings and expressions.
}
@@ -564,7 +539,7 @@
// NewIdent creates a new Ident without position.
// Useful for ASTs generated by code other than the Go
func NewIdent(name string) *Ident {
- return &Ident{comments{}, token.NoPos, name, nil, nil}
+ return &Ident{label{}, comments{}, token.NoPos, name, nil, nil}
}
func (id *Ident) String() string {
diff --git a/cue/ast/ident.go b/cue/ast/ident.go
new file mode 100644
index 0000000..f153b51
--- /dev/null
+++ b/cue/ast/ident.go
@@ -0,0 +1,95 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ast
+
+import (
+ "strconv"
+ "unicode"
+ "unicode/utf8"
+
+ "cuelang.org/go/cue/errors"
+ "cuelang.org/go/cue/token"
+)
+
+func isLetter(ch rune) bool {
+ return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
+}
+
+func isDigit(ch rune) bool {
+ // TODO(mpvl): Is this correct?
+ return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
+}
+
+// QuoteIdent quotes an identifier, if needed, and reports
+// an error if the identifier is invalid.
+func QuoteIdent(ident string) (string, error) {
+ if ident != "" && ident[0] == '`' {
+ if _, err := strconv.Unquote(ident); err != nil {
+ return "", errors.Newf(token.NoPos, "invalid quoted identifier %q", ident)
+ }
+ return ident, nil
+ }
+
+ // TODO: consider quoting keywords
+ // switch ident {
+ // case "for", "in", "if", "let", "true", "false", "null":
+ // goto escape
+ // }
+
+ for _, r := range ident {
+ if isLetter(r) || isDigit(r) || r == '_' {
+ continue
+ }
+ if r == '-' {
+ goto escape
+ }
+ return "", errors.Newf(token.NoPos, "invalid character '%s' in identifier", string(r))
+ }
+
+ return ident, nil
+
+escape:
+ return "`" + ident + "`", nil
+}
+
+// ParseIdent unquotes a possibly quoted identifier and validates
+// if the result is valid.
+func ParseIdent(n *Ident) (string, error) {
+ ident := n.Name
+ if ident == "" {
+ return "", errors.Newf(n.Pos(), "empty identifier")
+ }
+ quoted := false
+ if ident[0] == '`' {
+ u, err := strconv.Unquote(ident)
+ if err != nil {
+ return "", errors.Newf(n.Pos(), "invalid quoted identifier")
+ }
+ ident = u
+ quoted = true
+ }
+
+ for _, r := range ident {
+ if isLetter(r) || isDigit(r) || r == '_' {
+ continue
+ }
+ if r == '-' && quoted {
+ continue
+ }
+ return "", errors.Newf(n.Pos(), "invalid character '%s' in identifier", string(r))
+ }
+
+ return ident, nil
+}
diff --git a/cue/build.go b/cue/build.go
index 356046d..cff3d7a 100644
--- a/cue/build.go
+++ b/cue/build.go
@@ -267,10 +267,11 @@
func (idx *index) nodeLabel(n ast.Node) (f label, ok bool) {
switch x := n.(type) {
case *ast.BasicLit:
- name, ok := ast.LabelName(x)
+ name, ok := internal.LabelName(x)
return idx.label(name, false), ok
case *ast.Ident:
- return idx.label(x.Name, true), true
+ name, err := ast.ParseIdent(x)
+ return idx.label(name, true), err == nil
}
return 0, false
}
diff --git a/cue/format/node.go b/cue/format/node.go
index 70f08fd..0058cd1 100644
--- a/cue/format/node.go
+++ b/cue/format/node.go
@@ -329,17 +329,19 @@
case *ast.Ident:
// Escape an identifier that has invalid characters. This may happen,
// if the AST is not generated by the parser.
- if isValidIdent(n.Name) {
- f.print(n.NamePos, n)
- } else {
- f.print(n.NamePos, strconv.Quote(n.Name))
+ name, err := ast.QuoteIdent(n.Name)
+ if err != nil {
+ name = strconv.Quote(n.Name)
}
+ f.print(n.NamePos, name)
case *ast.BasicLit:
if f.cfg.simplify && n.Kind == token.STRING && len(n.Value) > 2 {
s := n.Value
unquoted, err := strconv.Unquote(s)
if err == nil {
+ // TODO: only do this when requested and
+ // if this doesn't unshadow anything.
if isValidIdent(unquoted) {
f.print(n.ValuePos, unquoted)
break
diff --git a/cue/format/printer.go b/cue/format/printer.go
index 920bb6e..ab0faa9 100644
--- a/cue/format/printer.go
+++ b/cue/format/printer.go
@@ -107,6 +107,9 @@
case *ast.Ident:
data = x.Name
+ if q, err := ast.QuoteIdent(data); err == nil {
+ data = q
+ }
impliedComma = true
p.lastTok = token.IDENT
diff --git a/cue/parser/resolve.go b/cue/parser/resolve.go
index f164fd0..ce78a86 100644
--- a/cue/parser/resolve.go
+++ b/cue/parser/resolve.go
@@ -22,6 +22,7 @@
"cuelang.org/go/cue/ast"
"cuelang.org/go/cue/token"
+ "cuelang.org/go/internal"
)
// resolve resolves all identifiers in a file. Unresolved identifiers are
@@ -60,11 +61,10 @@
for _, d := range decls {
switch x := d.(type) {
case *ast.Field:
- if name, ok := ast.LabelName(x.Label); ok {
- s.insert(name, x.Value)
- }
+ name, _ := internal.LabelName(x.Label)
+ s.insert(name, x.Value)
case *ast.Alias:
- name := x.Ident.Name
+ name, _ := internal.LabelName(x.Ident)
s.insert(name, x)
// Handle imports
}
@@ -73,6 +73,9 @@
}
func (s *scope) insert(name string, n ast.Node) {
+ if name == "" {
+ return
+ }
if _, existing := s.lookup(name); existing != nil {
_, isAlias1 := n.(*ast.Alias)
_, isAlias2 := existing.(*ast.Alias)
@@ -127,7 +130,7 @@
walk(s, label)
case *ast.TemplateLabel:
s := newScope(s.file, s, x, nil)
- name, _ := ast.LabelName(label)
+ name, _ := internal.LabelName(label)
s.insert(name, x.Label) // Field used for entire lambda.
walk(s, x.Value)
return nil
@@ -158,7 +161,11 @@
return nil
case *ast.Ident:
- if obj, node := s.lookup(x.Name); node != nil {
+ name, ok := internal.LabelName(x)
+ if !ok {
+ break
+ }
+ if obj, node := s.lookup(name); node != nil {
x.Node = node
x.Scope = obj
} else {
@@ -175,9 +182,11 @@
walk(s, f.Source)
s = newScope(s.file, s, f, nil)
if f.Key != nil {
- s.insert(f.Key.Name, f.Key)
+ name, _ := internal.LabelName(f.Key)
+ s.insert(name, f.Key)
}
- s.insert(f.Value.Name, f.Value)
+ name, _ := internal.LabelName(f.Value)
+ s.insert(name, f.Value)
} else {
walk(s, c)
}
diff --git a/cue/resolve_test.go b/cue/resolve_test.go
index 9839e49..2ab7650 100644
--- a/cue/resolve_test.go
+++ b/cue/resolve_test.go
@@ -570,6 +570,23 @@
`,
out: "<0>{a: 3, b: <1>{c: <2>{d: 3}}, c: <3>{c: 2}, d: <4>{d: 2}}",
}, {
+ in: "`foo-bar`: 3\n x: `foo-bar`,",
+ out: `<0>{foo-bar: 3, x: 3}`,
+ }, {
+ desc: "resolution of quoted identifiers",
+ in: `
+ package foo
+
+` + "`foo-bar`" + `: 2
+"baz": ` + "`foo-bar`" + `
+
+a: {
+ qux: 3
+ ` + "`qux-quux`" + `: qux
+ "qaz": ` + "`qux-quux`" + `
+}`,
+ out: "<0>{foo-bar: 2, baz: 2, a: <1>{qux: 3, qux-quux: 3, qaz: 3}}",
+ }, {
in: `
a: _
b: a
diff --git a/cue/scanner/scanner.go b/cue/scanner/scanner.go
index d8f19e4..317e5e6 100644
--- a/cue/scanner/scanner.go
+++ b/cue/scanner/scanner.go
@@ -22,6 +22,7 @@
"fmt"
"path/filepath"
"strconv"
+ "strings"
"unicode"
"unicode/utf8"
@@ -287,6 +288,36 @@
return string(s.src[offs:s.offset])
}
+func isExtendedIdent(r rune) bool {
+ return strings.IndexRune("-_#$%. ", r) >= 0
+}
+
+func (s *Scanner) scanQuotedIdentifier() string {
+ offs := s.offset - 1 // quote already consumed
+ hasInvalid := false
+ for ; ; s.next() {
+ switch {
+ default:
+ if !hasInvalid {
+ s.errf(s.offset, "invalid character '%s' in identifier", string(s.ch))
+ hasInvalid = true
+ }
+ continue
+
+ case isLetter(s.ch) || isDigit(s.ch) || isExtendedIdent(s.ch):
+ continue
+
+ case s.ch == '`':
+ s.next()
+ return string(s.src[offs:s.offset])
+
+ case s.ch == '\n':
+ s.errf(s.offset, "quoted identifier not terminated")
+ return string(s.src[offs:s.offset])
+ }
+ }
+}
+
func digitVal(ch rune) int {
switch {
case '0' <= ch && ch <= '9':
@@ -815,6 +846,11 @@
lit = "_" + s.scanIdentifier()
}
insertEOL = true
+ case '`':
+ tok = token.IDENT
+ lit = s.scanQuotedIdentifier()
+ insertEOL = true
+
case '\n':
// we only reach here if s.insertComma was
// set in the first place and exited early
diff --git a/cue/scanner/scanner_test.go b/cue/scanner/scanner_test.go
index 11cce96..3ea635e 100644
--- a/cue/scanner/scanner_test.go
+++ b/cue/scanner/scanner_test.go
@@ -74,6 +74,7 @@
{token.BOTTOM, "_|_", literal},
{token.IDENT, "foobar", literal},
+ {token.IDENT, "`foobar`", literal},
{token.IDENT, "a۰۱۸", literal},
{token.IDENT, "foo६४", literal},
{token.IDENT, "bar9876", literal},
@@ -731,6 +732,9 @@
{`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"},
{`_|`, token.ILLEGAL, 0, "", "illegal token '_|'; expected '_'"},
+ {"`foo=bar`", token.IDENT, 4, "`foo=bar`", "invalid character '=' in identifier"},
+ {"`foo\nbar`", token.IDENT, 4, "`foo", "quoted identifier not terminated"},
+
{`@`, token.ATTRIBUTE, 1, `@`, "invalid attribute: expected '('"},
{`@foo`, token.ATTRIBUTE, 4, `@foo`, "invalid attribute: expected '('"},
{`@foo(`, token.ATTRIBUTE, 5, `@foo(`, "attribute missing ')'"},
diff --git a/go.mod b/go.mod
index efe3e60..4fd22ad 100644
--- a/go.mod
+++ b/go.mod
@@ -14,6 +14,7 @@
github.com/retr0h/go-gilt v0.0.0-20190206215556-f73826b37af2
github.com/spf13/cobra v0.0.3
github.com/spf13/pflag v1.0.3
+ github.com/stretchr/testify v1.2.0
golang.org/x/net v0.0.0-20190311183353-d8887717615a
golang.org/x/sync v0.0.0-20190423024810-112230192c58
golang.org/x/text v0.3.2
diff --git a/internal/internal.go b/internal/internal.go
index f4e0ab7..051cdac 100644
--- a/internal/internal.go
+++ b/internal/internal.go
@@ -20,6 +20,8 @@
// TODO: refactor packages as to make this package unnecessary.
import (
+ "strconv"
+
"cuelang.org/go/cue/ast"
"cuelang.org/go/cue/token"
"github.com/cockroachdb/apd/v2"
@@ -100,3 +102,35 @@
}
return nil, "", f.Pos()
}
+
+// LabelName reports the name of a label, if known, and whether it is valid.
+func LabelName(l ast.Label) (name string, ok bool) {
+ switch n := l.(type) {
+ case *ast.Ident:
+ str, err := ast.ParseIdent(n)
+ if err != nil {
+ return "", false
+ }
+ return str, true
+
+ case *ast.BasicLit:
+ switch n.Kind {
+ case token.STRING:
+ // Use strconv to only allow double-quoted, single-line strings.
+ if str, err := strconv.Unquote(n.Value); err == nil {
+ return str, true
+ }
+
+ case token.NULL, token.TRUE, token.FALSE:
+ return n.Value, true
+
+ // TODO: allow numbers to be fields?
+ }
+
+ case *ast.TemplateLabel:
+ return n.Ident.Name, false
+
+ }
+ // This includes interpolation.
+ return "", false
+}
diff --git a/internal/internal_test.go b/internal/internal_test.go
new file mode 100644
index 0000000..c56ce82
--- /dev/null
+++ b/internal/internal_test.go
@@ -0,0 +1,57 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package internal_test
+
+import (
+ "testing"
+
+ "cuelang.org/go/cue/ast"
+ "cuelang.org/go/cue/format"
+ "cuelang.org/go/cue/token"
+ "cuelang.org/go/internal"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestLabelName(t *testing.T) {
+ testCases := []struct {
+ in ast.Label
+ out string
+ ok bool
+ }{{
+ in: &ast.BasicLit{Kind: token.STRING, Value: `"foo-bar"`},
+ out: "foo-bar",
+ ok: true,
+ }, {
+ in: &ast.BasicLit{Kind: token.STRING, Value: `"foo bar"`},
+ out: "foo bar",
+ ok: true,
+ }, {
+ in: &ast.Ident{Name: "`foo-bar`"},
+ out: "foo-bar",
+ ok: true,
+ }, {
+ in: &ast.Ident{Name: "`foo-bar\x00`"},
+ out: "",
+ ok: false,
+ }}
+ for _, tc := range testCases {
+ b, _ := format.Node(tc.in)
+ t.Run(string(b), func(t *testing.T) {
+ str, ok := internal.LabelName(tc.in)
+ assert.Equal(t, tc.out, str)
+ assert.Equal(t, tc.ok, ok)
+ })
+ }
+}
diff --git a/internal/third_party/yaml/decode.go b/internal/third_party/yaml/decode.go
index 22a6420..00a7e19 100644
--- a/internal/third_party/yaml/decode.go
+++ b/internal/third_party/yaml/decode.go
@@ -16,6 +16,7 @@
"cuelang.org/go/cue/ast"
"cuelang.org/go/cue/token"
+ "cuelang.org/go/internal"
)
const (
@@ -694,7 +695,7 @@
key := labelStr(label)
for _, decl := range m.Elts {
f := decl.(*ast.Field)
- name, _ := ast.LabelName(f.Label)
+ name, _ := internal.LabelName(f.Label)
if name == key {
f.Value = d.unmarshal(n.children[i+1])
continue outer