Merge "cue: hoist literal string parsing into new package"
diff --git a/cmd/cue/cmd/import.go b/cmd/cue/cmd/import.go
index 1d988b5..4b975ba 100644
--- a/cmd/cue/cmd/import.go
+++ b/cmd/cue/cmd/import.go
@@ -32,6 +32,7 @@
"cuelang.org/go/cue/ast"
"cuelang.org/go/cue/encoding"
"cuelang.org/go/cue/format"
+ "cuelang.org/go/cue/literal"
"cuelang.org/go/cue/load"
"cuelang.org/go/cue/parser"
"cuelang.org/go/cue/token"
@@ -658,7 +659,7 @@
continue
}
- str, err := cue.Unquote(lit.Value)
+ str, err := literal.Unquote(lit.Value)
if err != nil {
continue
}
diff --git a/cue/ast.go b/cue/ast.go
index 0f3086d..3ca7fd5 100644
--- a/cue/ast.go
+++ b/cue/ast.go
@@ -16,11 +16,11 @@
import (
"fmt"
- "strconv"
"strings"
"cuelang.org/go/cue/ast"
"cuelang.org/go/cue/build"
+ "cuelang.org/go/cue/literal"
"cuelang.org/go/cue/token"
)
@@ -131,7 +131,7 @@
if !isBottom(val) {
return val
}
- path, err := strconv.Unquote(imp.Path.Value)
+ path, err := literal.Unquote(imp.Path.Value)
if err != nil {
return ctx.mkErr(newNode(imp), "illformed import spec")
}
@@ -383,7 +383,7 @@
}
lit := &interpolation{baseValue: newExpr(n), k: stringKind}
value = lit
- info, prefixLen, _, err := ParseQuotes(first.Value, last.Value)
+ info, prefixLen, _, err := literal.ParseQuotes(first.Value, last.Value)
if err != nil {
return v.error(n, "invalid interpolation: %v", err)
}
diff --git a/cue/builtin.go b/cue/builtin.go
index b6fdd17..39c3fad 100644
--- a/cue/builtin.go
+++ b/cue/builtin.go
@@ -24,10 +24,10 @@
"path"
"reflect"
"sort"
- "strconv"
"strings"
"cuelang.org/go/cue/ast"
+ "cuelang.org/go/cue/literal"
"github.com/cockroachdb/apd"
)
@@ -349,7 +349,7 @@
// lookupBuiltinPkg returns the builtin package for the given path if it exists.
func lookupBuiltinPkg(ctx *context, imp *ast.ImportSpec) evaluated {
- path, err := strconv.Unquote(imp.Path.Value)
+ path, err := literal.Unquote(imp.Path.Value)
if err != nil {
return ctx.mkErr(newNode(imp), "illformed import spec")
}
diff --git a/cue/builtins.go b/cue/builtins.go
index f16d702..51162d5 100644
--- a/cue/builtins.go
+++ b/cue/builtins.go
@@ -25,6 +25,7 @@
"text/template"
"unicode"
+ "cuelang.org/go/cue/literal"
"cuelang.org/go/cue/parser"
"cuelang.org/go/cue/token"
"cuelang.org/go/internal/third_party/yaml"
@@ -1283,7 +1284,7 @@
Func: func(c *callCtxt) {
s := c.string(0)
c.ret, c.err = func() (interface{}, error) {
- return Unquote(s)
+ return literal.Unquote(s)
}()
},
}, {
diff --git a/cue/lit.go b/cue/lit.go
index e524b1b..74e3ccd 100644
--- a/cue/lit.go
+++ b/cue/lit.go
@@ -16,12 +16,10 @@
import (
"math/big"
- "strings"
- "unicode"
- "unicode/utf8"
"cuelang.org/go/cue/ast"
"cuelang.org/go/cue/errors"
+ "cuelang.org/go/cue/literal"
"github.com/cockroachdb/apd"
)
@@ -34,267 +32,6 @@
var errInvalidString = errors.New("invalid string")
-// Unquote interprets s as a single- or double-quoted, single- or multi-line
-// string, possibly with custom escape delimiters, returning the string value
-// that s quotes.
-func Unquote(s string) (string, error) {
- info, nStart, _, err := ParseQuotes(s, s)
- if err != nil {
- return "", err
- }
- s = s[nStart:]
- return info.Unquote(s)
-}
-
-// Unquote unquotes the given string. It must be terminated with a quote or an
-// interpolation start.
-func (q QuoteInfo) Unquote(s string) (string, error) {
- if len(s) > 0 && !q.multiline {
- if contains(s, '\n') || contains(s, '\r') {
- return "", errSyntax
- }
- // Is it trivial? Avoid allocation.
- if s[len(s)-1] == q.char &&
- q.numHash == 0 &&
- !contains(s, '\\') &&
- !contains(s[:len(s)-1], q.char) {
- return s[:len(s)-1], nil
- }
- }
-
- var runeTmp [utf8.UTFMax]byte
- buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
- stripNL := false
- for len(s) > 0 {
- switch s[0] {
- case '\r':
- s = s[1:]
- continue
- case '\n':
- switch {
- case !q.multiline:
- fallthrough
- default:
- return "", errInvalidWhitespace
- case strings.HasPrefix(s[1:], q.whitespace):
- s = s[1+len(q.whitespace):]
- case strings.HasPrefix(s[1:], "\n"):
- s = s[1:]
- }
- stripNL = true
- buf = append(buf, '\n')
- continue
- }
- c, multibyte, ss, err := unquoteChar(s, q)
- if err != nil {
- return "", err
- }
- // TODO: handle surrogates: if we have a left-surrogate, expect the
- // next value to be a right surrogate. Otherwise this is an error.
- s = ss
- if c < 0 {
- if c == -2 {
- stripNL = false
- }
- if stripNL {
- // Strip the last newline, but only if it came from a closing
- // quote.
- buf = buf[:len(buf)-1]
- }
- return string(buf), nil
- }
- stripNL = false
- if c < utf8.RuneSelf || !multibyte {
- buf = append(buf, byte(c))
- } else {
- n := utf8.EncodeRune(runeTmp[:], c)
- buf = append(buf, runeTmp[:n]...)
- }
- }
- // allow unmatched quotes if already checked.
- return "", errUnmatchedQuote
-}
-
-// contains reports whether the string contains the byte c.
-func contains(s string, c byte) bool {
- for i := 0; i < len(s); i++ {
- if s[i] == c {
- return true
- }
- }
- return false
-}
-
-// unquoteChar decodes the first character or byte in the escaped string.
-// It returns four values:
-//
-// 1) value, the decoded Unicode code point or byte value; the special value
-// of -1 indicates terminated by quotes and -2 means terminated by \(.
-// 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
-// 3) tail, the remainder of the string after the character; and
-// 4) an error that will be nil if the character is syntactically valid.
-//
-// The second argument, kind, specifies the type of literal being parsed
-// and therefore which kind of escape sequences are permitted.
-// For kind 's' only JSON escapes and \u{ are permitted.
-// For kind 'b' also hexadecimal and octal escape sequences are permitted.
-//
-// The third argument, quote, specifies that an ASCII quoting character that
-// is not permitted in the output.
-func unquoteChar(s string, info QuoteInfo) (value rune, multibyte bool, tail string, err error) {
- // easy cases
- switch c := s[0]; {
- case c == info.char && info.char != 0:
- for i := 1; byte(i) < info.numChar; i++ {
- if i >= len(s) || s[i] != info.char {
- return rune(info.char), false, s[1:], nil
- }
- }
- for i := 0; i < info.numHash; i++ {
- if i+int(info.numChar) >= len(s) || s[i+int(info.numChar)] != '#' {
- return rune(info.char), false, s[1:], nil
- }
- }
- if ln := int(info.numChar) + info.numHash; len(s) != ln {
- // TODO: terminating quote in middle of string
- return 0, false, s[ln:], errSyntax
- }
- return -1, false, "", nil
- case c >= utf8.RuneSelf:
- r, size := utf8.DecodeRuneInString(s)
- return r, true, s[size:], nil
- case c != '\\':
- return rune(s[0]), false, s[1:], nil
- }
-
- if len(s) <= 1+info.numHash {
- return '\\', false, s[1:], nil
- }
- for i := 1; i <= info.numHash && i < len(s); i++ {
- if s[i] != '#' {
- return '\\', false, s[1:], nil
- }
- }
-
- c := s[1+info.numHash]
- s = s[2+info.numHash:]
-
- switch c {
- case 'a':
- value = '\a'
- case 'b':
- value = '\b'
- case 'f':
- value = '\f'
- case 'n':
- value = '\n'
- case 'r':
- value = '\r'
- case 't':
- value = '\t'
- case 'v':
- value = '\v'
- case '/':
- value = '/'
- case 'x', 'u', 'U':
- n := 0
- switch c {
- case 'x':
- n = 2
- case 'u':
- n = 4
- case 'U':
- n = 8
- }
- var v rune
- if len(s) < n {
- err = errSyntax
- return
- }
- for j := 0; j < n; j++ {
- x, ok := unhex(s[j])
- if !ok {
- err = errSyntax
- return
- }
- v = v<<4 | x
- }
- s = s[n:]
- if c == 'x' {
- if info.char == '"' {
- err = errSyntax
- return
- }
- // single-byte string, possibly not UTF-8
- value = v
- break
- }
- if v > utf8.MaxRune {
- err = errSyntax
- return
- }
- value = v
- multibyte = true
- case '0', '1', '2', '3', '4', '5', '6', '7':
- if info.char == '"' {
- err = errSyntax
- return
- }
- v := rune(c) - '0'
- if len(s) < 2 {
- err = errSyntax
- return
- }
- for j := 0; j < 2; j++ { // one digit already; two more
- x := rune(s[j]) - '0'
- if x < 0 || x > 7 {
- err = errSyntax
- return
- }
- v = (v << 3) | x
- }
- s = s[2:]
- if v > 255 {
- err = errSyntax
- return
- }
- value = v
- case '\\':
- value = '\\'
- case '\'', '"':
- // TODO: should we allow escaping of quotes regardless?
- if c != info.char {
- err = errSyntax
- return
- }
- value = rune(c)
- case '(':
- if s != "" {
- // TODO: terminating quote in middle of string
- return 0, false, s, errSyntax
- }
- value = -2
- default:
- err = errSyntax
- return
- }
- tail = s
- return
-}
-
-func unhex(b byte) (v rune, ok bool) {
- c := rune(b)
- switch {
- case '0' <= c && c <= '9':
- return c - '0', true
- case 'a' <= c && c <= 'f':
- return c - 'a' + 10, true
- case 'A' <= c && c <= 'F':
- return c - 'A' + 10, true
- }
- return
-}
-
type numInfo struct {
rep multiplier
k kind
@@ -426,7 +163,7 @@
}
switch p.ch {
case '"', '\'', '`', '#':
- info, nStart, _, err := ParseQuotes(s, s)
+ info, nStart, _, err := literal.ParseQuotes(s, s)
if err != nil {
return p.error(l, err.Error())
}
@@ -447,100 +184,8 @@
return n
}
-var (
- errStringTooShort = errors.New("invalid string: too short")
- errInvalidWhitespace = errors.New("invalid string: invalid whitespace")
- errMissingNewline = errors.New(
- "invalid string: opening quote of multiline string must be followed by newline")
- errUnmatchedQuote = errors.New("invalid string: unmatched quote")
-)
-
-// QuoteInfo describes the type of quotes used for a string.
-type QuoteInfo struct {
- quote string
- whitespace string
- numHash int
- multiline bool
- char byte
- numChar byte
-}
-
-// IsDouble reports whether the literal uses double quotes.
-func (q QuoteInfo) IsDouble() bool {
- return q.char == '"'
-}
-
-// ParseQuotes checks if the opening quotes in start matches the ending quotes
-// in end and reports its type as q or an error if they do not matching or are
-// invalid. nStart indicates the number of bytes used for the opening quote.
-func ParseQuotes(start, end string) (q QuoteInfo, nStart, nEnd int, err error) {
- for i, c := range start {
- if c != '#' {
- break
- }
- q.numHash = i + 1
- }
- if len(start) < 2+2*q.numHash {
- return q, 0, 0, errStringTooShort
- }
- s := start[q.numHash:]
- switch s[0] {
- case '"', '\'':
- q.char = s[0]
- if len(s) > 3 && s[1] == s[0] && s[2] == s[0] {
- switch s[3] {
- case '\n':
- q.quote = start[:3+q.numHash]
- case '\r':
- if len(s) > 4 && s[4] == '\n' {
- q.quote = start[:4+q.numHash]
- break
- }
- fallthrough
- default:
- return q, 0, 0, errMissingNewline
- }
- q.multiline = true
- q.numChar = 3
- nStart = len(q.quote) + 1 // add whitespace later
- } else {
- q.quote = start[:1+q.numHash]
- q.numChar = 1
- nStart = len(q.quote)
- }
- default:
- return q, 0, 0, errSyntax
- }
- quote := start[:int(q.numChar)+q.numHash]
- for i := 0; i < len(quote); i++ {
- if j := len(end) - i - 1; j < 0 || quote[i] != end[j] {
- return q, 0, 0, errUnmatchedQuote
- }
- }
- if q.multiline {
- i := len(end) - len(quote)
- for i > 0 {
- r, size := utf8.DecodeLastRuneInString(end[:i])
- if r == '\n' || !unicode.IsSpace(r) {
- break
- }
- i -= size
- }
- q.whitespace = end[i : len(end)-len(quote)]
-
- if len(start) > nStart && start[nStart] != '\n' {
- if !strings.HasPrefix(start[nStart:], q.whitespace) {
- return q, 0, 0, errInvalidWhitespace
- }
- nStart += len(q.whitespace)
- }
- }
-
- return q, nStart, int(q.numChar) + q.numHash, nil
-}
-
// parseString decodes a string without the starting and ending quotes.
-func parseString(ctx *context, node ast.Expr, q QuoteInfo, s string) (n value) {
+func parseString(ctx *context, node ast.Expr, q literal.QuoteInfo, s string) (n value) {
src := newExpr(node)
str, err := q.Unquote(s)
if err != nil {
diff --git a/cue/lit_test.go b/cue/lit_test.go
index 8263be2..5c3c7f7 100644
--- a/cue/lit_test.go
+++ b/cue/lit_test.go
@@ -25,150 +25,6 @@
"github.com/google/go-cmp/cmp/cmpopts"
)
-func TestUnquote(t *testing.T) {
- testCases := []struct {
- in, out string
- err error
- }{
- {`"Hello"`, "Hello", nil},
- {`'Hello'`, "Hello", nil},
- {`'Hellø'`, "Hellø", nil},
- {`"""` + "\n\t\tHello\n\t\t" + `"""`, "Hello", nil},
- {"'''\n\t\tHello\n\t\t'''", "Hello", nil},
- {"'''\n\t\tHello\n\n\t\t'''", "Hello\n", nil},
- {"'''\n\n\t\tHello\n\t\t'''", "\nHello", nil},
- {"'''\n\n\n\n\t\t'''", "\n\n", nil},
- {"'''\n\t\t'''", "", nil},
- {`"""` + "\n\raaa\n\rbbb\n\r" + `"""`, "aaa\nbbb", nil},
- {`'\a\b\f\n\r\t\v\'\\\/'`, "\a\b\f\n\r\t\v'\\/", nil},
- {`"\a\b\f\n\r\t\v\"\\\/"`, "\a\b\f\n\r\t\v\"\\/", nil},
- {`#"The sequence "\U0001F604" renders as \#U0001F604."#`,
- `The sequence "\U0001F604" renders as 😄.`,
- nil},
- {`" \U00010FfF"`, " \U00010fff", nil},
- {`"\u0061 "`, "a ", nil},
- {`'\x61\x55'`, "\x61\x55", nil},
- {`'\061\055'`, "\061\055", nil},
- {`'\377 '`, "\377 ", nil},
- {"'e\u0300\\n'", "e\u0300\n", nil},
- {`'\06\055'`, "", errSyntax},
- {`'\0'`, "", errSyntax},
- {`"\06\055"`, "", errSyntax}, // too short
- {`'\777 '`, "", errSyntax}, // overflow
- {`'\U012301'`, "", errSyntax}, // too short
- {`'\U0123012G'`, "", errSyntax}, // invalid digit G
- {`"\x04"`, "", errSyntax}, // not allowed in strings
- {`'\U01230123'`, "", errSyntax}, // too large
-
- {`"\\"`, "\\", nil},
- {`"\'"`, "", errSyntax},
- {`"\q"`, "", errSyntax},
- {"'\n'", "", errSyntax},
- {"'---\n---'", "", errSyntax},
- {"'''\r'''", "", errMissingNewline},
-
- {`#"Hello"#`, "Hello", nil},
- {`#"Hello\v"#`, "Hello\\v", nil},
- {`#"Hello\#v\r"#`, "Hello\v\\r", nil},
- {`##"Hello\##v\r"##`, "Hello\v\\r", nil},
- {`##"Hello\##v"##`, "Hello\v", nil},
- {"#'''\n\t\tHello\\#v\n\t\t'''#", "Hello\v", nil},
- {"##'''\n\t\tHello\\#v\n\t\t'''##", "Hello\\#v", nil},
- {`#"""` + "\n\t\t\\#r\n\t\t" + `"""#`, "\r", nil},
- {`#""#`, "", nil},
- {`#"This is a "dog""#`, `This is a "dog"`, nil},
- {"#\"\"\"\n\"\n\"\"\"#", `"`, nil},
- {"#\"\"\"\n\"\"\"\n\"\"\"#", `"""`, nil},
- {"#\"\"\"\n\na\n\n\"\"\"#", "\na\n", nil},
- // Gobble extra \r
- {"#\"\"\"\n\ra\n\r\"\"\"#", `a`, nil},
- {"#\"\"\"\n\r\n\ra\n\r\n\r\"\"\"#", "\na\n", nil},
- // Make sure this works for Windows.
- {"#\"\"\"\r\n\r\na\r\n\r\n\"\"\"#", "\na\n", nil},
- {"#\"\"\"\r\n \r\n a\r\n \r\n \"\"\"#", "\na\n", nil},
- {"#\"\"\"\r\na\r\n\"\"\"#", `a`, nil},
- {"#\"\"\"\r\n\ra\r\n\r\"\"\"#", `a`, nil},
- {`####" \"####`, ` \`, nil},
-
- {"```", "", errSyntax},
- {"Hello", "", errSyntax},
- {`"Hello`, "", errUnmatchedQuote},
- {`"""Hello"""`, "", errMissingNewline},
- {"'''\n Hello\n '''", "", errInvalidWhitespace},
- {"'''\n a\n b\n '''", "", errInvalidWhitespace},
- {`"Hello""`, "", errSyntax},
- {`#"Hello"`, "", errUnmatchedQuote},
- {`#"Hello'#`, "", errUnmatchedQuote},
- {`#"""#`, "", errMissingNewline},
-
- // TODO: should these be legal?
- {`#"""#`, "", errMissingNewline},
- }
- for i, tc := range testCases {
- t.Run(fmt.Sprintf("%d/%s", i, tc.in), func(t *testing.T) {
- if got, err := Unquote(tc.in); err != tc.err {
- t.Errorf("error: got %q; want %q", err, tc.err)
- } else if got != tc.out {
- t.Errorf("value: got %q; want %q", got, tc.out)
- }
- })
- }
-}
-
-func TestInterpolation(t *testing.T) {
- testCases := []struct {
- quotes string
- in string
- out string
- err error
- }{
- {`""`, `foo\(`, "foo", nil},
- {`"""` + "\n" + `"""`, `foo`, "", errUnmatchedQuote},
- {`#""#`, `foo\#(`, "foo", nil},
- {`#""#`, `foo\(`, "", errUnmatchedQuote},
- {`""`, `foo\(bar`, "", errSyntax},
- {`""`, ``, "", errUnmatchedQuote},
- {`#""#`, `"`, "", errUnmatchedQuote},
- {`#""#`, `\`, "", errUnmatchedQuote},
- {`##""##`, `\'`, "", errUnmatchedQuote},
- }
- for i, tc := range testCases {
- t.Run(fmt.Sprintf("%d/%s/%s", i, tc.quotes, tc.in), func(t *testing.T) {
- info, _, _, _ := ParseQuotes(tc.quotes, tc.quotes)
- if got, err := info.Unquote(tc.in); err != tc.err {
- t.Errorf("error: got %q; want %q", err, tc.err)
- } else if got != tc.out {
- t.Errorf("value: got %q; want %q", got, tc.out)
- }
- })
- }
-}
-
-func TestIsDouble(t *testing.T) {
- testCases := []struct {
- quotes string
- double bool
- }{
- {`""`, true},
- {`"""` + "\n" + `"""`, true},
- {`#""#`, true},
- {`''`, false},
- {`'''` + "\n" + `'''`, false},
- {`#''#`, false},
- }
- for i, tc := range testCases {
- t.Run(fmt.Sprintf("%d/%s", i, tc.quotes), func(t *testing.T) {
- info, _, _, err := ParseQuotes(tc.quotes, tc.quotes)
- if err != nil {
- t.Fatal(err)
- }
- if got := info.IsDouble(); got != tc.double {
- t.Errorf("got %v; want %v", got, tc.double)
- }
- })
- }
-}
-
var defIntBase = newNumBase(&ast.BasicLit{}, newNumInfo(numKind, 0, 10, false))
var defRatBase = newNumBase(&ast.BasicLit{}, newNumInfo(floatKind, 0, 10, false))
diff --git a/cue/literal/doc.go b/cue/literal/doc.go
new file mode 100644
index 0000000..3d3095c
--- /dev/null
+++ b/cue/literal/doc.go
@@ -0,0 +1,17 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package literal implements conversions to and from string representations of
+// basic data types.
+package literal
diff --git a/cue/literal/string.go b/cue/literal/string.go
new file mode 100644
index 0000000..3a7fea9
--- /dev/null
+++ b/cue/literal/string.go
@@ -0,0 +1,373 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package literal
+
+import (
+ "errors"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+)
+
+var (
+ errSyntax = errors.New("invalid syntax")
+ errInvalidString = errors.New("invalid string")
+ errInvalidWhitespace = errors.New("invalid string: invalid whitespace")
+ errMissingNewline = errors.New(
+ "invalid string: opening quote of multiline string must be followed by newline")
+ errUnmatchedQuote = errors.New("invalid string: unmatched quote")
+)
+
+// Unquote interprets s as a single- or double-quoted, single- or multi-line
+// string, possibly with custom escape delimiters, returning the string value
+// that s quotes.
+func Unquote(s string) (string, error) {
+ info, nStart, _, err := ParseQuotes(s, s)
+ if err != nil {
+ return "", err
+ }
+ s = s[nStart:]
+ return info.Unquote(s)
+}
+
+// QuoteInfo describes the type of quotes used for a string.
+type QuoteInfo struct {
+ quote string
+ whitespace string
+ numHash int
+ multiline bool
+ char byte
+ numChar byte
+}
+
+// IsDouble reports whether the literal uses double quotes.
+func (q QuoteInfo) IsDouble() bool {
+ return q.char == '"'
+}
+
+// ParseQuotes checks if the opening quotes in start matches the ending quotes
+// in end and reports its type as q or an error if they do not matching or are
+// invalid. nStart indicates the number of bytes used for the opening quote.
+func ParseQuotes(start, end string) (q QuoteInfo, nStart, nEnd int, err error) {
+ for i, c := range start {
+ if c != '#' {
+ break
+ }
+ q.numHash = i + 1
+ }
+ s := start[q.numHash:]
+ switch s[0] {
+ case '"', '\'':
+ q.char = s[0]
+ if len(s) > 3 && s[1] == s[0] && s[2] == s[0] {
+ switch s[3] {
+ case '\n':
+ q.quote = start[:3+q.numHash]
+ case '\r':
+ if len(s) > 4 && s[4] == '\n' {
+ q.quote = start[:4+q.numHash]
+ break
+ }
+ fallthrough
+ default:
+ return q, 0, 0, errMissingNewline
+ }
+ q.multiline = true
+ q.numChar = 3
+ nStart = len(q.quote) + 1 // add whitespace later
+ } else {
+ q.quote = start[:1+q.numHash]
+ q.numChar = 1
+ nStart = len(q.quote)
+ }
+ default:
+ return q, 0, 0, errSyntax
+ }
+ quote := start[:int(q.numChar)+q.numHash]
+ for i := 0; i < len(quote); i++ {
+ if j := len(end) - i - 1; j < 0 || quote[i] != end[j] {
+ return q, 0, 0, errUnmatchedQuote
+ }
+ }
+ if q.multiline {
+ i := len(end) - len(quote)
+ for i > 0 {
+ r, size := utf8.DecodeLastRuneInString(end[:i])
+ if r == '\n' || !unicode.IsSpace(r) {
+ break
+ }
+ i -= size
+ }
+ q.whitespace = end[i : len(end)-len(quote)]
+
+ if len(start) > nStart && start[nStart] != '\n' {
+ if !strings.HasPrefix(start[nStart:], q.whitespace) {
+ return q, 0, 0, errInvalidWhitespace
+ }
+ nStart += len(q.whitespace)
+ }
+ }
+
+ return q, nStart, int(q.numChar) + q.numHash, nil
+}
+
+// Unquote unquotes the given string. It must be terminated with a quote or an
+// interpolation start.
+func (q QuoteInfo) Unquote(s string) (string, error) {
+ if len(s) > 0 && !q.multiline {
+ if contains(s, '\n') || contains(s, '\r') {
+ return "", errSyntax
+ }
+ // Is it trivial? Avoid allocation.
+ if s[len(s)-1] == q.char &&
+ q.numHash == 0 &&
+ !contains(s, '\\') &&
+ !contains(s[:len(s)-1], q.char) {
+ return s[:len(s)-1], nil
+ }
+ }
+
+ var runeTmp [utf8.UTFMax]byte
+ buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
+ stripNL := false
+ for len(s) > 0 {
+ switch s[0] {
+ case '\r':
+ s = s[1:]
+ continue
+ case '\n':
+ switch {
+ case !q.multiline:
+ fallthrough
+ default:
+ return "", errInvalidWhitespace
+ case strings.HasPrefix(s[1:], q.whitespace):
+ s = s[1+len(q.whitespace):]
+ case strings.HasPrefix(s[1:], "\n"):
+ s = s[1:]
+ }
+ stripNL = true
+ buf = append(buf, '\n')
+ continue
+ }
+ c, multibyte, ss, err := unquoteChar(s, q)
+ if err != nil {
+ return "", err
+ }
+ // TODO: handle surrogates: if we have a left-surrogate, expect the
+ // next value to be a right surrogate. Otherwise this is an error.
+ s = ss
+ if c < 0 {
+ if c == -2 {
+ stripNL = false
+ }
+ if stripNL {
+ // Strip the last newline, but only if it came from a closing
+ // quote.
+ buf = buf[:len(buf)-1]
+ }
+ return string(buf), nil
+ }
+ stripNL = false
+ if c < utf8.RuneSelf || !multibyte {
+ buf = append(buf, byte(c))
+ } else {
+ n := utf8.EncodeRune(runeTmp[:], c)
+ buf = append(buf, runeTmp[:n]...)
+ }
+ }
+ // allow unmatched quotes if already checked.
+ return "", errUnmatchedQuote
+}
+
+// contains reports whether the string contains the byte c.
+func contains(s string, c byte) bool {
+ for i := 0; i < len(s); i++ {
+ if s[i] == c {
+ return true
+ }
+ }
+ return false
+}
+
+// unquoteChar decodes the first character or byte in the escaped string.
+// It returns four values:
+//
+// 1) value, the decoded Unicode code point or byte value; the special value
+// of -1 indicates terminated by quotes and -2 means terminated by \(.
+// 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
+// 3) tail, the remainder of the string after the character; and
+// 4) an error that will be nil if the character is syntactically valid.
+//
+// The second argument, kind, specifies the type of literal being parsed
+// and therefore which kind of escape sequences are permitted.
+// For kind 's' only JSON escapes and \u{ are permitted.
+// For kind 'b' also hexadecimal and octal escape sequences are permitted.
+//
+// The third argument, quote, specifies that an ASCII quoting character that
+// is not permitted in the output.
+func unquoteChar(s string, info QuoteInfo) (value rune, multibyte bool, tail string, err error) {
+ // easy cases
+ switch c := s[0]; {
+ case c == info.char && info.char != 0:
+ for i := 1; byte(i) < info.numChar; i++ {
+ if i >= len(s) || s[i] != info.char {
+ return rune(info.char), false, s[1:], nil
+ }
+ }
+ for i := 0; i < info.numHash; i++ {
+ if i+int(info.numChar) >= len(s) || s[i+int(info.numChar)] != '#' {
+ return rune(info.char), false, s[1:], nil
+ }
+ }
+ if ln := int(info.numChar) + info.numHash; len(s) != ln {
+ // TODO: terminating quote in middle of string
+ return 0, false, s[ln:], errSyntax
+ }
+ return -1, false, "", nil
+ case c >= utf8.RuneSelf:
+ r, size := utf8.DecodeRuneInString(s)
+ return r, true, s[size:], nil
+ case c != '\\':
+ return rune(s[0]), false, s[1:], nil
+ }
+
+ if len(s) <= 1+info.numHash {
+ return '\\', false, s[1:], nil
+ }
+ for i := 1; i <= info.numHash && i < len(s); i++ {
+ if s[i] != '#' {
+ return '\\', false, s[1:], nil
+ }
+ }
+
+ c := s[1+info.numHash]
+ s = s[2+info.numHash:]
+
+ switch c {
+ case 'a':
+ value = '\a'
+ case 'b':
+ value = '\b'
+ case 'f':
+ value = '\f'
+ case 'n':
+ value = '\n'
+ case 'r':
+ value = '\r'
+ case 't':
+ value = '\t'
+ case 'v':
+ value = '\v'
+ case '/':
+ value = '/'
+ case 'x', 'u', 'U':
+ n := 0
+ switch c {
+ case 'x':
+ n = 2
+ case 'u':
+ n = 4
+ case 'U':
+ n = 8
+ }
+ var v rune
+ if len(s) < n {
+ err = errSyntax
+ return
+ }
+ for j := 0; j < n; j++ {
+ x, ok := unhex(s[j])
+ if !ok {
+ err = errSyntax
+ return
+ }
+ v = v<<4 | x
+ }
+ s = s[n:]
+ if c == 'x' {
+ if info.char == '"' {
+ err = errSyntax
+ return
+ }
+ // single-byte string, possibly not UTF-8
+ value = v
+ break
+ }
+ if v > utf8.MaxRune {
+ err = errSyntax
+ return
+ }
+ value = v
+ multibyte = true
+ case '0', '1', '2', '3', '4', '5', '6', '7':
+ if info.char == '"' {
+ err = errSyntax
+ return
+ }
+ v := rune(c) - '0'
+ if len(s) < 2 {
+ err = errSyntax
+ return
+ }
+ for j := 0; j < 2; j++ { // one digit already; two more
+ x := rune(s[j]) - '0'
+ if x < 0 || x > 7 {
+ err = errSyntax
+ return
+ }
+ v = (v << 3) | x
+ }
+ s = s[2:]
+ if v > 255 {
+ err = errSyntax
+ return
+ }
+ value = v
+ case '\\':
+ value = '\\'
+ case '\'', '"':
+ // TODO: should we allow escaping of quotes regardless?
+ if c != info.char {
+ err = errSyntax
+ return
+ }
+ value = rune(c)
+ case '(':
+ if s != "" {
+ // TODO: terminating quote in middle of string
+ return 0, false, s, errSyntax
+ }
+ value = -2
+ default:
+ err = errSyntax
+ return
+ }
+ tail = s
+ return
+}
+
+func unhex(b byte) (v rune, ok bool) {
+ c := rune(b)
+ switch {
+ case '0' <= c && c <= '9':
+ return c - '0', true
+ case 'a' <= c && c <= 'f':
+ return c - 'a' + 10, true
+ case 'A' <= c && c <= 'F':
+ return c - 'A' + 10, true
+ }
+ return
+}
diff --git a/cue/literal/string_test.go b/cue/literal/string_test.go
new file mode 100644
index 0000000..dd00b64
--- /dev/null
+++ b/cue/literal/string_test.go
@@ -0,0 +1,164 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package literal
+
+import (
+ "fmt"
+ "testing"
+)
+
+func TestUnquote(t *testing.T) {
+ testCases := []struct {
+ in, out string
+ err error
+ }{
+ {`"Hello"`, "Hello", nil},
+ {`'Hello'`, "Hello", nil},
+ {`'Hellø'`, "Hellø", nil},
+ {`"""` + "\n\t\tHello\n\t\t" + `"""`, "Hello", nil},
+ {"'''\n\t\tHello\n\t\t'''", "Hello", nil},
+ {"'''\n\t\tHello\n\n\t\t'''", "Hello\n", nil},
+ {"'''\n\n\t\tHello\n\t\t'''", "\nHello", nil},
+ {"'''\n\n\n\n\t\t'''", "\n\n", nil},
+ {"'''\n\t\t'''", "", nil},
+ {`"""` + "\n\raaa\n\rbbb\n\r" + `"""`, "aaa\nbbb", nil},
+ {`'\a\b\f\n\r\t\v\'\\\/'`, "\a\b\f\n\r\t\v'\\/", nil},
+ {`"\a\b\f\n\r\t\v\"\\\/"`, "\a\b\f\n\r\t\v\"\\/", nil},
+ {`#"The sequence "\U0001F604" renders as \#U0001F604."#`,
+ `The sequence "\U0001F604" renders as 😄.`,
+ nil},
+ {`" \U00010FfF"`, " \U00010fff", nil},
+ {`"\u0061 "`, "a ", nil},
+ {`'\x61\x55'`, "\x61\x55", nil},
+ {`'\061\055'`, "\061\055", nil},
+ {`'\377 '`, "\377 ", nil},
+ {"'e\u0300\\n'", "e\u0300\n", nil},
+ {`'\06\055'`, "", errSyntax},
+ {`'\0'`, "", errSyntax},
+ {`"\06\055"`, "", errSyntax}, // too short
+ {`'\777 '`, "", errSyntax}, // overflow
+ {`'\U012301'`, "", errSyntax}, // too short
+ {`'\U0123012G'`, "", errSyntax}, // invalid digit G
+ {`"\x04"`, "", errSyntax}, // not allowed in strings
+ {`'\U01230123'`, "", errSyntax}, // too large
+
+ {`"\\"`, "\\", nil},
+ {`"\'"`, "", errSyntax},
+ {`"\q"`, "", errSyntax},
+ {"'\n'", "", errSyntax},
+ {"'---\n---'", "", errSyntax},
+ {"'''\r'''", "", errMissingNewline},
+
+ {`#"Hello"#`, "Hello", nil},
+ {`#"Hello\v"#`, "Hello\\v", nil},
+ {`#"Hello\#v\r"#`, "Hello\v\\r", nil},
+ {`##"Hello\##v\r"##`, "Hello\v\\r", nil},
+ {`##"Hello\##v"##`, "Hello\v", nil},
+ {"#'''\n\t\tHello\\#v\n\t\t'''#", "Hello\v", nil},
+ {"##'''\n\t\tHello\\#v\n\t\t'''##", "Hello\\#v", nil},
+ {`#"""` + "\n\t\t\\#r\n\t\t" + `"""#`, "\r", nil},
+ {`#""#`, "", nil},
+ {`#"This is a "dog""#`, `This is a "dog"`, nil},
+ {"#\"\"\"\n\"\n\"\"\"#", `"`, nil},
+ {"#\"\"\"\n\"\"\"\n\"\"\"#", `"""`, nil},
+ {"#\"\"\"\n\na\n\n\"\"\"#", "\na\n", nil},
+ // Gobble extra \r
+ {"#\"\"\"\n\ra\n\r\"\"\"#", `a`, nil},
+ {"#\"\"\"\n\r\n\ra\n\r\n\r\"\"\"#", "\na\n", nil},
+ // Make sure this works for Windows.
+ {"#\"\"\"\r\n\r\na\r\n\r\n\"\"\"#", "\na\n", nil},
+ {"#\"\"\"\r\n \r\n a\r\n \r\n \"\"\"#", "\na\n", nil},
+ {"#\"\"\"\r\na\r\n\"\"\"#", `a`, nil},
+ {"#\"\"\"\r\n\ra\r\n\r\"\"\"#", `a`, nil},
+ {`####" \"####`, ` \`, nil},
+
+ {"```", "", errSyntax},
+ {"Hello", "", errSyntax},
+ {`"Hello`, "", errUnmatchedQuote},
+ {`"""Hello"""`, "", errMissingNewline},
+ {"'''\n Hello\n '''", "", errInvalidWhitespace},
+ {"'''\n a\n b\n '''", "", errInvalidWhitespace},
+ {`"Hello""`, "", errSyntax},
+ {`#"Hello"`, "", errUnmatchedQuote},
+ {`#"Hello'#`, "", errUnmatchedQuote},
+ {`#"""#`, "", errMissingNewline},
+
+ // TODO: should these be legal?
+ {`#"""#`, "", errMissingNewline},
+ }
+ for i, tc := range testCases {
+ t.Run(fmt.Sprintf("%d/%s", i, tc.in), func(t *testing.T) {
+ if got, err := Unquote(tc.in); err != tc.err {
+ t.Errorf("error: got %q; want %q", err, tc.err)
+ } else if got != tc.out {
+ t.Errorf("value: got %q; want %q", got, tc.out)
+ }
+ })
+ }
+}
+
+func TestInterpolation(t *testing.T) {
+ testCases := []struct {
+ quotes string
+ in string
+ out string
+ err error
+ }{
+ {`""`, `foo\(`, "foo", nil},
+ {`"""` + "\n" + `"""`, `foo`, "", errUnmatchedQuote},
+ {`#""#`, `foo\#(`, "foo", nil},
+ {`#""#`, `foo\(`, "", errUnmatchedQuote},
+ {`""`, `foo\(bar`, "", errSyntax},
+ {`""`, ``, "", errUnmatchedQuote},
+ {`#""#`, `"`, "", errUnmatchedQuote},
+ {`#""#`, `\`, "", errUnmatchedQuote},
+ {`##""##`, `\'`, "", errUnmatchedQuote},
+ }
+ for i, tc := range testCases {
+ t.Run(fmt.Sprintf("%d/%s/%s", i, tc.quotes, tc.in), func(t *testing.T) {
+ info, _, _, _ := ParseQuotes(tc.quotes, tc.quotes)
+ if got, err := info.Unquote(tc.in); err != tc.err {
+ t.Errorf("error: got %q; want %q", err, tc.err)
+ } else if got != tc.out {
+ t.Errorf("value: got %q; want %q", got, tc.out)
+ }
+ })
+ }
+}
+
+func TestIsDouble(t *testing.T) {
+ testCases := []struct {
+ quotes string
+ double bool
+ }{
+ {`""`, true},
+ {`"""` + "\n" + `"""`, true},
+ {`#""#`, true},
+ {`''`, false},
+ {`'''` + "\n" + `'''`, false},
+ {`#''#`, false},
+ }
+ for i, tc := range testCases {
+ t.Run(fmt.Sprintf("%d/%s", i, tc.quotes), func(t *testing.T) {
+ info, _, _, err := ParseQuotes(tc.quotes, tc.quotes)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if got := info.IsDouble(); got != tc.double {
+ t.Errorf("got %v; want %v", got, tc.double)
+ }
+ })
+ }
+}
diff --git a/cue/parser/parser.go b/cue/parser/parser.go
index 461b78b..77f483b 100644
--- a/cue/parser/parser.go
+++ b/cue/parser/parser.go
@@ -16,12 +16,12 @@
import (
"fmt"
- "strconv"
"strings"
"unicode"
"cuelang.org/go/cue/ast"
"cuelang.org/go/cue/errors"
+ "cuelang.org/go/cue/literal"
"cuelang.org/go/cue/scanner"
"cuelang.org/go/cue/token"
)
@@ -1225,7 +1225,7 @@
func isValidImport(lit string) bool {
const illegalChars = `!"#$%&'()*,:;<=>?[\]^{|}` + "`\uFFFD"
- s, _ := strconv.Unquote(lit) // go/scanner returns a legal string literal
+ s, _ := literal.Unquote(lit) // go/scanner returns a legal string literal
for _, r := range s {
if !unicode.IsGraphic(r) || unicode.IsSpace(r) || strings.ContainsRune(illegalChars, r) {
return false
diff --git a/pkg/strconv/manual.go b/pkg/strconv/manual.go
index e179ad4..cad0a30 100644
--- a/pkg/strconv/manual.go
+++ b/pkg/strconv/manual.go
@@ -14,11 +14,13 @@
package strconv
+import "cuelang.org/go/cue/literal"
+
// Unquote interprets s as a single-quoted, double-quoted,
// or backquoted CUE string literal, returning the string value
// that s quotes.
func Unquote(s string) (string, error) {
- return Unquote(s)
+ return literal.Unquote(s)
}
// TODO: replace parsing functions with parsing to apd