cue: implement custom escape delimiters
Aka raw strings.
This approach to raw strings is a verbatim copy of raw
strings as introduced in Swift 5.
The API of ResumeInterpolation has been changed as it was
otherwise too awkward.
See https://github.com/apple/swift-evolution/blob/master/proposals/0200-raw-string-escaping.md.
Change-Id: Ib25b749cb1573fc2b4072c90878c3c63ebb5fd38
diff --git a/cue/ast.go b/cue/ast.go
index 9dbed65..0f3086d 100644
--- a/cue/ast.go
+++ b/cue/ast.go
@@ -17,6 +17,7 @@
import (
"fmt"
"strconv"
+ "strings"
"cuelang.org/go/cue/ast"
"cuelang.org/go/cue/build"
@@ -382,36 +383,28 @@
}
lit := &interpolation{baseValue: newExpr(n), k: stringKind}
value = lit
- quote, err := stringType(first.Value)
+ info, prefixLen, _, err := ParseQuotes(first.Value, last.Value)
if err != nil {
return v.error(n, "invalid interpolation: %v", err)
}
- if quote[0] == '\'' {
- return v.error(n, "interpolation not implemented for bytes: %v", err)
- }
- ws, err := wsPrefix(last.Value, quote)
- if err != nil {
- return v.error(n, "invalid interpolation: %v", err)
- }
- prefix := quote
- multi := len(quote) == 3
- p := v.litParser
+ prefix := ""
for i := 0; i < len(n.Elts); i += 2 {
l, ok := n.Elts[i].(*ast.BasicLit)
if !ok {
return v.error(n, "invalid interpolation")
}
- if err := p.init(l); err != nil {
- return v.error(n, "invalid interpolation: %v", err)
+ s := l.Value
+ if !strings.HasPrefix(s, prefix) {
+ return v.error(l, "invalid interpolation: unmatched ')'")
}
+ s = l.Value[prefixLen:]
+ x := parseString(v.ctx(), l, info, s)
+ lit.parts = append(lit.parts, x)
if i+1 < len(n.Elts) {
- x := p.parseString(prefix, `\(`, ws, multi, quote[0])
- lit.parts = append(lit.parts, x, v.walk(n.Elts[i+1]))
- } else {
- x := p.parseString(prefix, quote, ws, multi, quote[0])
- lit.parts = append(lit.parts, x)
+ lit.parts = append(lit.parts, v.walk(n.Elts[i+1]))
}
prefix = ")"
+ prefixLen = 1
}
case *ast.ListLit:
diff --git a/cue/lit.go b/cue/lit.go
index 7632f9a..e524b1b 100644
--- a/cue/lit.go
+++ b/cue/lit.go
@@ -34,58 +34,37 @@
var errInvalidString = errors.New("invalid string")
-// Unquote interprets s as a single-quoted, double-quoted, or backquoted CUE
-// string literal, returning the string value that s quotes.
+// Unquote interprets s as a single- or double-quoted, single- or multi-line
+// string, possibly with custom escape delimiters, returning the string value
+// that s quotes.
func Unquote(s string) (string, error) {
- quote, err := stringType(s)
+ info, nStart, _, err := ParseQuotes(s, s)
if err != nil {
return "", err
}
- prefix, err := wsPrefix(s, quote)
- if err != nil {
- return "", err
- }
- s = s[len(quote) : len(s)-len(quote)]
- return unquote(quote[0], len(quote) == 3, true, prefix, s)
+ s = s[nStart:]
+ return info.Unquote(s)
}
-// unquote interprets s as a CUE string, where quote identifies the string type:
-// s: Unicode string (normal double quoted strings)
-// b: Binary strings: allows escape sequences that may result in invalid
-// Unicode.
-// r: raw strings.
-//
-// quote indicates the quote used. This is relevant for raw strings, as they
-// may not contain the quoting character itself.
-func unquote(quote byte, multiline, first bool, wsPrefix, s string) (string, error) {
- if quote == '`' {
- if contains(s, quote) {
- return "", errSyntax
- }
- if contains(s, '\r') {
- // -1 because we know there is at least one \r to remove.
- buf := make([]byte, 0, len(s)-1)
- for i := 0; i < len(s); i++ {
- if s[i] != '\r' {
- buf = append(buf, s[i])
- }
- }
- return string(buf), nil
- }
- return s, nil
- }
- if !multiline {
- if contains(s, '\n') {
+// Unquote unquotes the given string. It must be terminated with a quote or an
+// interpolation start.
+func (q QuoteInfo) Unquote(s string) (string, error) {
+ if len(s) > 0 && !q.multiline {
+ if contains(s, '\n') || contains(s, '\r') {
return "", errSyntax
}
// Is it trivial? Avoid allocation.
- if !contains(s, '\\') && !contains(s, quote) {
- return s, nil
+ if s[len(s)-1] == q.char &&
+ q.numHash == 0 &&
+ !contains(s, '\\') &&
+ !contains(s[:len(s)-1], q.char) {
+ return s[:len(s)-1], nil
}
}
var runeTmp [utf8.UTFMax]byte
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
+ stripNL := false
for len(s) > 0 {
switch s[0] {
case '\r':
@@ -93,28 +72,38 @@
continue
case '\n':
switch {
- case !multiline:
+ case !q.multiline:
fallthrough
default:
- return "", errSyntax
- case strings.HasPrefix(s[1:], wsPrefix):
- s = s[1+len(wsPrefix):]
+ return "", errInvalidWhitespace
+ case strings.HasPrefix(s[1:], q.whitespace):
+ s = s[1+len(q.whitespace):]
case strings.HasPrefix(s[1:], "\n"):
s = s[1:]
}
- if !first && len(s) > 0 {
- buf = append(buf, '\n')
- }
- first = false
+ stripNL = true
+ buf = append(buf, '\n')
continue
}
- c, multibyte, ss, err := unquoteChar(s, quote)
+ c, multibyte, ss, err := unquoteChar(s, q)
if err != nil {
return "", err
}
// TODO: handle surrogates: if we have a left-surrogate, expect the
// next value to be a right surrogate. Otherwise this is an error.
s = ss
+ if c < 0 {
+ if c == -2 {
+ stripNL = false
+ }
+ if stripNL {
+ // Strip the last newline, but only if it came from a closing
+ // quote.
+ buf = buf[:len(buf)-1]
+ }
+ return string(buf), nil
+ }
+ stripNL = false
if c < utf8.RuneSelf || !multibyte {
buf = append(buf, byte(c))
} else {
@@ -122,7 +111,8 @@
buf = append(buf, runeTmp[:n]...)
}
}
- return string(buf), nil
+ // allow unmatched quotes if already checked.
+ return "", errUnmatchedQuote
}
// contains reports whether the string contains the byte c.
@@ -138,7 +128,8 @@
// unquoteChar decodes the first character or byte in the escaped string.
// It returns four values:
//
-// 1) value, the decoded Unicode code point or byte value;
+// 1) value, the decoded Unicode code point or byte value; the special value
+// of -1 indicates terminated by quotes and -2 means terminated by \(.
// 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
// 3) tail, the remainder of the string after the character; and
// 4) an error that will be nil if the character is syntactically valid.
@@ -150,12 +141,25 @@
//
// The third argument, quote, specifies that an ASCII quoting character that
// is not permitted in the output.
-func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
+func unquoteChar(s string, info QuoteInfo) (value rune, multibyte bool, tail string, err error) {
// easy cases
switch c := s[0]; {
- case c == quote && quote != 0:
- err = errSyntax
- return
+ case c == info.char && info.char != 0:
+ for i := 1; byte(i) < info.numChar; i++ {
+ if i >= len(s) || s[i] != info.char {
+ return rune(info.char), false, s[1:], nil
+ }
+ }
+ for i := 0; i < info.numHash; i++ {
+ if i+int(info.numChar) >= len(s) || s[i+int(info.numChar)] != '#' {
+ return rune(info.char), false, s[1:], nil
+ }
+ }
+ if ln := int(info.numChar) + info.numHash; len(s) != ln {
+ // TODO: terminating quote in middle of string
+ return 0, false, s[ln:], errSyntax
+ }
+ return -1, false, "", nil
case c >= utf8.RuneSelf:
r, size := utf8.DecodeRuneInString(s)
return r, true, s[size:], nil
@@ -163,13 +167,17 @@
return rune(s[0]), false, s[1:], nil
}
- // hard case: c is backslash
- if len(s) <= 1 {
- err = errSyntax
- return
+ if len(s) <= 1+info.numHash {
+ return '\\', false, s[1:], nil
}
- c := s[1]
- s = s[2:]
+ for i := 1; i <= info.numHash && i < len(s); i++ {
+ if s[i] != '#' {
+ return '\\', false, s[1:], nil
+ }
+ }
+
+ c := s[1+info.numHash]
+ s = s[2+info.numHash:]
switch c {
case 'a':
@@ -186,6 +194,8 @@
value = '\t'
case 'v':
value = '\v'
+ case '/':
+ value = '/'
case 'x', 'u', 'U':
n := 0
switch c {
@@ -211,7 +221,7 @@
}
s = s[n:]
if c == 'x' {
- if quote == '"' {
+ if info.char == '"' {
err = errSyntax
return
}
@@ -226,7 +236,7 @@
value = v
multibyte = true
case '0', '1', '2', '3', '4', '5', '6', '7':
- if quote == '"' {
+ if info.char == '"' {
err = errSyntax
return
}
@@ -253,11 +263,17 @@
value = '\\'
case '\'', '"':
// TODO: should we allow escaping of quotes regardless?
- if c != quote {
+ if c != info.char {
err = errSyntax
return
}
value = rune(c)
+ case '(':
+ if s != "" {
+ // TODO: terminating quote in middle of string
+ return 0, false, s, errSyntax
+ }
+ value = -2
default:
err = errSyntax
return
@@ -409,16 +425,13 @@
return err
}
switch p.ch {
- case '"', '\'', '`':
- quote, err := stringType(l.Value)
+ case '"', '\'', '`', '#':
+ info, nStart, _, err := ParseQuotes(s, s)
if err != nil {
return p.error(l, err.Error())
}
- ws, err := wsPrefix(l.Value, quote)
- if err != nil {
- return p.error(l, err.Error())
- }
- return p.parseString(quote, quote, ws, len(quote) == 3, quote[0])
+ s := p.src[nStart:]
+ return parseString(p.ctx, p.node, info, s)
case '.':
p.next()
n = p.scanNumber(true)
@@ -435,72 +448,108 @@
}
var (
- errStringTooShort = errors.New("invalid string: too short")
- errMissingNewline = errors.New(
+ errStringTooShort = errors.New("invalid string: too short")
+ errInvalidWhitespace = errors.New("invalid string: invalid whitespace")
+ errMissingNewline = errors.New(
"invalid string: opening quote of multiline string must be followed by newline")
errUnmatchedQuote = errors.New("invalid string: unmatched quote")
)
-// stringType reports the type of quoting used, being ther a ", ', """, or ''',
-// or `.
-func stringType(s string) (quote string, err error) {
- if len(s) < 2 {
- return "", errStringTooShort
- }
- switch s[0] {
- case '"', '\'':
- if len(s) > 3 && s[1] == s[0] && s[2] == s[0] {
- if s[3] != '\n' {
- return "", errMissingNewline
- }
- return s[:3], nil
- }
- case '`':
- default:
- return "", errSyntax
- }
- return s[:1], nil
+// QuoteInfo describes the type of quotes used for a string.
+type QuoteInfo struct {
+ quote string
+ whitespace string
+ numHash int
+ multiline bool
+ char byte
+ numChar byte
}
-func wsPrefix(s, quote string) (ws string, err error) {
- for i := 0; i < len(quote); i++ {
- if j := len(s) - i - 1; j < 0 || quote[i] != s[j] {
- return "", errUnmatchedQuote
- }
- }
- i := len(s) - len(quote)
- for i > 0 {
- r, size := utf8.DecodeLastRuneInString(s[:i])
- if r == '\n' || !unicode.IsSpace(r) {
+// IsDouble reports whether the literal uses double quotes.
+func (q QuoteInfo) IsDouble() bool {
+ return q.char == '"'
+}
+
+// ParseQuotes checks if the opening quotes in start matches the ending quotes
+// in end and reports its type as q or an error if they do not matching or are
+// invalid. nStart indicates the number of bytes used for the opening quote.
+func ParseQuotes(start, end string) (q QuoteInfo, nStart, nEnd int, err error) {
+ for i, c := range start {
+ if c != '#' {
break
}
- i -= size
+ q.numHash = i + 1
}
- return s[i : len(s)-len(quote)], nil
+ if len(start) < 2+2*q.numHash {
+ return q, 0, 0, errStringTooShort
+ }
+ s := start[q.numHash:]
+ switch s[0] {
+ case '"', '\'':
+ q.char = s[0]
+ if len(s) > 3 && s[1] == s[0] && s[2] == s[0] {
+ switch s[3] {
+ case '\n':
+ q.quote = start[:3+q.numHash]
+ case '\r':
+ if len(s) > 4 && s[4] == '\n' {
+ q.quote = start[:4+q.numHash]
+ break
+ }
+ fallthrough
+ default:
+ return q, 0, 0, errMissingNewline
+ }
+ q.multiline = true
+ q.numChar = 3
+ nStart = len(q.quote) + 1 // add whitespace later
+ } else {
+ q.quote = start[:1+q.numHash]
+ q.numChar = 1
+ nStart = len(q.quote)
+ }
+ default:
+ return q, 0, 0, errSyntax
+ }
+ quote := start[:int(q.numChar)+q.numHash]
+ for i := 0; i < len(quote); i++ {
+ if j := len(end) - i - 1; j < 0 || quote[i] != end[j] {
+ return q, 0, 0, errUnmatchedQuote
+ }
+ }
+ if q.multiline {
+ i := len(end) - len(quote)
+ for i > 0 {
+ r, size := utf8.DecodeLastRuneInString(end[:i])
+ if r == '\n' || !unicode.IsSpace(r) {
+ break
+ }
+ i -= size
+ }
+ q.whitespace = end[i : len(end)-len(quote)]
+
+ if len(start) > nStart && start[nStart] != '\n' {
+ if !strings.HasPrefix(start[nStart:], q.whitespace) {
+ return q, 0, 0, errInvalidWhitespace
+ }
+ nStart += len(q.whitespace)
+ }
+ }
+
+ return q, nStart, int(q.numChar) + q.numHash, nil
}
-func (p *litParser) parseString(prefix, suffix, ws string, multi bool, quote byte) (n value) {
- if len(p.src) < len(prefix)+len(suffix) {
- return p.error(p.node, "invalid string: too short")
- }
- for _, r := range prefix {
- if byte(r) != p.ch {
- return p.error(p.node, "invalid interpolation: expected %q", prefix)
- }
- p.next()
- }
- if !strings.HasSuffix(p.src, suffix) {
- return p.error(p.node, "invalid interpolation: unmatched ')'", suffix)
- }
- start, end := len(prefix), len(p.src)-len(suffix)
- str, err := unquote(quote, multi, len(prefix) == 3, ws, p.src[start:end])
+// parseString decodes a string without the starting and ending quotes.
+func parseString(ctx *context, node ast.Expr, q QuoteInfo, s string) (n value) {
+ src := newExpr(node)
+ str, err := q.Unquote(s)
if err != nil {
- return p.error(p.node, err, "invalid string: %v", err)
+ return ctx.mkErr(src, err, "invalid string: %v", err)
}
- if quote == '"' {
- return &stringLit{newExpr(p.node), str}
+ if q.IsDouble() {
+ return &stringLit{src, str}
}
- return &bytesLit{newExpr(p.node), []byte(str)}
+ return &bytesLit{src, []byte(str)}
}
func (p *litParser) digitVal(ch byte) (d int) {
diff --git a/cue/lit_test.go b/cue/lit_test.go
index 6287bfa..8263be2 100644
--- a/cue/lit_test.go
+++ b/cue/lit_test.go
@@ -32,25 +32,80 @@
}{
{`"Hello"`, "Hello", nil},
{`'Hello'`, "Hello", nil},
- {"`Hello`", "Hello", nil},
+ {`'Hellø'`, "Hellø", nil},
{`"""` + "\n\t\tHello\n\t\t" + `"""`, "Hello", nil},
{"'''\n\t\tHello\n\t\t'''", "Hello", nil},
{"'''\n\t\tHello\n\n\t\t'''", "Hello\n", nil},
{"'''\n\n\t\tHello\n\t\t'''", "\nHello", nil},
{"'''\n\n\n\n\t\t'''", "\n\n", nil},
{"'''\n\t\t'''", "", nil},
- {"`\n\tHello`", "\n\tHello", nil},
- {"`aaa\n\rbbb`", "aaa\nbbb", nil},
{`"""` + "\n\raaa\n\rbbb\n\r" + `"""`, "aaa\nbbb", nil},
+ {`'\a\b\f\n\r\t\v\'\\\/'`, "\a\b\f\n\r\t\v'\\/", nil},
+ {`"\a\b\f\n\r\t\v\"\\\/"`, "\a\b\f\n\r\t\v\"\\/", nil},
+ {`#"The sequence "\U0001F604" renders as \#U0001F604."#`,
+ `The sequence "\U0001F604" renders as 😄.`,
+ nil},
+ {`" \U00010FfF"`, " \U00010fff", nil},
+ {`"\u0061 "`, "a ", nil},
+ {`'\x61\x55'`, "\x61\x55", nil},
+ {`'\061\055'`, "\061\055", nil},
+ {`'\377 '`, "\377 ", nil},
+ {"'e\u0300\\n'", "e\u0300\n", nil},
+ {`'\06\055'`, "", errSyntax},
+ {`'\0'`, "", errSyntax},
+ {`"\06\055"`, "", errSyntax}, // too short
+ {`'\777 '`, "", errSyntax}, // overflow
+ {`'\U012301'`, "", errSyntax}, // too short
+ {`'\U0123012G'`, "", errSyntax}, // invalid digit G
+ {`"\x04"`, "", errSyntax}, // not allowed in strings
+ {`'\U01230123'`, "", errSyntax}, // too large
+
+ {`"\\"`, "\\", nil},
+ {`"\'"`, "", errSyntax},
+ {`"\q"`, "", errSyntax},
+ {"'\n'", "", errSyntax},
+ {"'---\n---'", "", errSyntax},
+ {"'''\r'''", "", errMissingNewline},
+
+ {`#"Hello"#`, "Hello", nil},
+ {`#"Hello\v"#`, "Hello\\v", nil},
+ {`#"Hello\#v\r"#`, "Hello\v\\r", nil},
+ {`##"Hello\##v\r"##`, "Hello\v\\r", nil},
+ {`##"Hello\##v"##`, "Hello\v", nil},
+ {"#'''\n\t\tHello\\#v\n\t\t'''#", "Hello\v", nil},
+ {"##'''\n\t\tHello\\#v\n\t\t'''##", "Hello\\#v", nil},
+ {`#"""` + "\n\t\t\\#r\n\t\t" + `"""#`, "\r", nil},
+ {`#""#`, "", nil},
+ {`#"This is a "dog""#`, `This is a "dog"`, nil},
+ {"#\"\"\"\n\"\n\"\"\"#", `"`, nil},
+ {"#\"\"\"\n\"\"\"\n\"\"\"#", `"""`, nil},
+ {"#\"\"\"\n\na\n\n\"\"\"#", "\na\n", nil},
+ // Gobble extra \r
+ {"#\"\"\"\n\ra\n\r\"\"\"#", `a`, nil},
+ {"#\"\"\"\n\r\n\ra\n\r\n\r\"\"\"#", "\na\n", nil},
+ // Make sure this works for Windows.
+ {"#\"\"\"\r\n\r\na\r\n\r\n\"\"\"#", "\na\n", nil},
+ {"#\"\"\"\r\n \r\n a\r\n \r\n \"\"\"#", "\na\n", nil},
+ {"#\"\"\"\r\na\r\n\"\"\"#", `a`, nil},
+ {"#\"\"\"\r\n\ra\r\n\r\"\"\"#", `a`, nil},
+ {`####" \"####`, ` \`, nil},
{"```", "", errSyntax},
{"Hello", "", errSyntax},
{`"Hello`, "", errUnmatchedQuote},
{`"""Hello"""`, "", errMissingNewline},
+ {"'''\n Hello\n '''", "", errInvalidWhitespace},
+ {"'''\n a\n b\n '''", "", errInvalidWhitespace},
{`"Hello""`, "", errSyntax},
+ {`#"Hello"`, "", errUnmatchedQuote},
+ {`#"Hello'#`, "", errUnmatchedQuote},
+ {`#"""#`, "", errMissingNewline},
+
+ // TODO: should these be legal?
+ {`#"""#`, "", errMissingNewline},
}
- for _, tc := range testCases {
- t.Run(tc.in, func(t *testing.T) {
+ for i, tc := range testCases {
+ t.Run(fmt.Sprintf("%d/%s", i, tc.in), func(t *testing.T) {
if got, err := Unquote(tc.in); err != tc.err {
t.Errorf("error: got %q; want %q", err, tc.err)
} else if got != tc.out {
@@ -60,6 +115,60 @@
}
}
+func TestInterpolation(t *testing.T) {
+ testCases := []struct {
+ quotes string
+ in string
+ out string
+ err error
+ }{
+ {`""`, `foo\(`, "foo", nil},
+ {`"""` + "\n" + `"""`, `foo`, "", errUnmatchedQuote},
+ {`#""#`, `foo\#(`, "foo", nil},
+ {`#""#`, `foo\(`, "", errUnmatchedQuote},
+ {`""`, `foo\(bar`, "", errSyntax},
+ {`""`, ``, "", errUnmatchedQuote},
+ {`#""#`, `"`, "", errUnmatchedQuote},
+ {`#""#`, `\`, "", errUnmatchedQuote},
+ {`##""##`, `\'`, "", errUnmatchedQuote},
+ }
+ for i, tc := range testCases {
+ t.Run(fmt.Sprintf("%d/%s/%s", i, tc.quotes, tc.in), func(t *testing.T) {
+ info, _, _, _ := ParseQuotes(tc.quotes, tc.quotes)
+ if got, err := info.Unquote(tc.in); err != tc.err {
+ t.Errorf("error: got %q; want %q", err, tc.err)
+ } else if got != tc.out {
+ t.Errorf("value: got %q; want %q", got, tc.out)
+ }
+ })
+ }
+}
+
+func TestIsDouble(t *testing.T) {
+ testCases := []struct {
+ quotes string
+ double bool
+ }{
+ {`""`, true},
+ {`"""` + "\n" + `"""`, true},
+ {`#""#`, true},
+ {`''`, false},
+ {`'''` + "\n" + `'''`, false},
+ {`#''#`, false},
+ }
+ for i, tc := range testCases {
+ t.Run(fmt.Sprintf("%d/%s", i, tc.quotes), func(t *testing.T) {
+ info, _, _, err := ParseQuotes(tc.quotes, tc.quotes)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if got := info.IsDouble(); got != tc.double {
+ t.Errorf("got %v; want %v", got, tc.double)
+ }
+ })
+ }
+}
+
var defIntBase = newNumBase(&ast.BasicLit{}, newNumInfo(numKind, 0, 10, false))
var defRatBase = newNumBase(&ast.BasicLit{}, newNumInfo(floatKind, 0, 10, false))
diff --git a/cue/parser/parser.go b/cue/parser/parser.go
index 9d0d308..461b78b 100644
--- a/cue/parser/parser.go
+++ b/cue/parser/parser.go
@@ -1179,12 +1179,6 @@
last := &ast.BasicLit{ValuePos: pos, Kind: token.STRING, Value: lit}
exprs := []ast.Expr{last}
- quote := rune(lit[0])
- numQuotes := 1
- if len(lit) > 2 && lit[0] == lit[1] {
- numQuotes = 3
- }
-
for p.tok == token.LPAREN {
c.pos = 1
p.expect(token.LPAREN)
@@ -1196,7 +1190,7 @@
if p.tok != token.RPAREN {
p.error(p.pos, "expected ')' for string interpolation")
}
- lit = p.scanner.ResumeInterpolation(quote, numQuotes)
+ lit = p.scanner.ResumeInterpolation()
pos = p.pos
p.next()
last = &ast.BasicLit{
diff --git a/cue/parser/parser_test.go b/cue/parser/parser_test.go
index fdae715..77e7cb3 100644
--- a/cue/parser/parser_test.go
+++ b/cue/parser/parser_test.go
@@ -401,32 +401,32 @@
`"a"`: true,
`"a/b"`: true,
`"a.b"`: true,
- `"m\x61th"`: true,
+ `'m\x61th'`: true,
`"greek/αβ"`: true,
`""`: false,
- // Each of these pairs tests both `` vs "" strings
+ // Each of these pairs tests both #""# vs "" strings
// and also use of invalid characters spelled out as
// escape sequences and written directly.
// For example `"\x00"` tests import "\x00"
// while "`\x00`" tests import `<actual-NUL-byte>`.
- "`a`": true,
- `"\x00"`: false,
- "`\x00`": false,
- `"\x7f"`: false,
- "`\x7f`": false,
- `"a!"`: false,
- "`a!`": false,
- `"a b"`: false,
- "`a b`": false,
- `"a\\b"`: false,
- "`a\\b`": false,
- "\"`a`\"": false,
- "`\"a\"`": false,
- `"\x80\x80"`: false,
- "`\x80\x80`": false,
- `"\xFFFD"`: false,
- "`\xFFFD`": false,
+ `#"a"#`: true,
+ `"\x00"`: false,
+ "'\x00'": false,
+ `"\x7f"`: false,
+ "`\x7f`": false,
+ `"a!"`: false,
+ "#'a!'#": false,
+ `"a b"`: false,
+ `#"a b"#`: false,
+ `"a\\b"`: false,
+ "#\"a\\b\"#": false,
+ "\"`a`\"": false,
+ "#'\"a\"'#": false,
+ `"\x80\x80"`: false,
+ "#'\x80\x80'#": false,
+ `"\xFFFD"`: false,
+ "#'\xFFFD'#": false,
}
for path, isValid := range imports {
t.Run(path, func(t *testing.T) {
diff --git a/cue/resolve_test.go b/cue/resolve_test.go
index 9700eba..3cce0cc 100644
--- a/cue/resolve_test.go
+++ b/cue/resolve_test.go
@@ -845,6 +845,59 @@
e: "\([])"`,
out: `<0>{a: "4", b: "one 4 two 4one", c: "one", d: ""+<1>.r+"", r: _, u: ""+_+"", e: _|_([]:expression in interpolation must evaluate to a number kind or string (found list))}`,
}, {
+ desc: "multiline interpolation",
+ in: `
+ a1: """
+ before
+ \(4)
+ after
+ """
+ a2: """
+ before
+ \(4)
+
+ """
+ a3: """
+
+ \(4)
+ after
+ """
+ a4: """
+
+ \(4)
+
+ """
+ m1: """
+ before
+ \(
+ 4)
+ after
+ """
+ m2: """
+ before
+ \(
+ 4)
+
+ """
+ m3: """
+
+ \(
+
+ 4)
+ after
+ """
+ m4: """
+
+ \(
+ 4)
+
+ """
+ `,
+ out: `<0>{` +
+ `a1: "before\n4\nafter", a2: "before\n4\n", a3: "\n4\nafter", a4: "\n4\n", ` +
+ `m1: "before\n4\nafter", m2: "before\n4\n", m3: "\n4\nafter", m4: "\n4\n"` +
+ `}`,
+ }, {
desc: "diamond-shaped constraints",
in: `
S: {
diff --git a/cue/scanner/scanner.go b/cue/scanner/scanner.go
index aab40d5..f3ce8ed 100644
--- a/cue/scanner/scanner.go
+++ b/cue/scanner/scanner.go
@@ -49,10 +49,18 @@
spacesSinceLast int
insertEOL bool // insert a comma before next newline
+ quoteStack []quoteInfo
+
// public state - ok to modify
ErrorCount int // number of errors encountered
}
+type quoteInfo struct {
+ char rune
+ numChar int
+ numHash int
+}
+
const bom = 0xFEFF // byte order mark, only permitted as very first character
// Read the next Unicode char into s.ch.
@@ -406,16 +414,22 @@
// escaped quote. In case of a syntax error, it stops at the offending
// character (without consuming it) and returns false. Otherwise
// it returns true.
-func (s *Scanner) scanEscape(quote rune) (ok, template bool) {
+func (s *Scanner) scanEscape(quote quoteInfo) (ok, interpolation bool) {
+ for i := 0; i < quote.numHash; i++ {
+ if s.ch != '#' {
+ return true, false
+ }
+ s.next()
+ }
+
offs := s.offset
var n int
var base, max uint32
switch s.ch {
- // TODO: remove
case '(':
return true, true
- case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
+ case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote.char:
s.next()
return true, false
case '0', '1', '2', '3', '4', '5', '6', '7':
@@ -454,6 +468,8 @@
n--
}
+ // TODO: this is valid JSON, so remove, but normalize and report an error
+ // if for unmatched surrogate pairs .
if x > max || 0xD800 <= x && x < 0xE000 {
s.error(offs, "escape sequence is invalid Unicode code point")
return false, false
@@ -462,7 +478,7 @@
return true, false
}
-func (s *Scanner) scanString(quote rune, offset, numQuotes int) (token.Token, string) {
+func (s *Scanner) scanString(offset int, quote quoteInfo) (token.Token, string) {
// ", """, ', or ''' opening already consumed
offs := s.offset - offset
@@ -471,11 +487,11 @@
hasCR := false
extra := 0
for {
- ch, n := s.consumeQuotes(quote, numQuotes)
- if n == numQuotes {
+ ch, ok := s.consumeStringClose(quote)
+ if ok {
break
}
- if (numQuotes != 3 && ch == '\n') || ch < 0 {
+ if (quote.numChar != 3 && ch == '\n') || ch < 0 {
s.error(offs, "string literal not terminated")
lit := s.src[offs:s.offset]
if hasCR {
@@ -483,17 +499,17 @@
}
return tok, string(lit)
}
- if ch == '\r' && numQuotes == 3 {
+ if ch == '\r' && quote.numChar == 3 {
hasCR = true
}
s.next()
if ch == '\\' {
- if s.ch == '(' {
+ if _, interpolation := s.scanEscape(quote); interpolation {
tok = token.INTERPOLATION
extra = 1
+ s.quoteStack = append(s.quoteStack, quote)
break
}
- s.scanEscape(quote)
}
}
lit := s.src[offs : s.offset+extra]
@@ -513,6 +529,27 @@
return s.ch, n
}
+func (s *Scanner) consumeStringClose(quote quoteInfo) (next rune, atEnd bool) {
+ for i := 0; i < quote.numChar; i++ {
+ if s.ch != quote.char {
+ return s.ch, false
+ }
+ s.next()
+ }
+ hasHash := s.hashCount(quote)
+ return s.ch, hasHash
+}
+
+func (s *Scanner) hashCount(quote quoteInfo) bool {
+ for i := 0; i < quote.numHash; i++ {
+ if s.ch != '#' {
+ return false
+ }
+ s.next()
+ }
+ return true
+}
+
func stripCR(b []byte) []byte {
c := make([]byte, len(b))
i := 0
@@ -525,34 +562,6 @@
return c[:i]
}
-func (s *Scanner) scanRawString() string {
- // '`' opening already consumed
- offs := s.offset - 1
-
- hasCR := false
- for {
- ch := s.ch
- if ch < 0 {
- s.error(offs, "raw string literal not terminated")
- break
- }
- s.next()
- if ch == '`' {
- break
- }
- if ch == '\r' {
- hasCR = true
- }
- }
-
- lit := s.src[offs:s.offset]
- if hasCR {
- lit = stripCR(lit)
- }
-
- return string(lit)
-}
-
func (s *Scanner) skipWhitespace(inc int) {
for {
switch s.ch {
@@ -586,8 +595,10 @@
}
// ResumeInterpolation resumes scanning of a string interpolation.
-func (s *Scanner) ResumeInterpolation(quote rune, numQuotes int) string {
- _, str := s.scanString(quote, 1, numQuotes)
+func (s *Scanner) ResumeInterpolation() string {
+ quote := s.quoteStack[len(s.quoteStack)-1]
+ s.quoteStack = s.quoteStack[:len(s.quoteStack)-1]
+ _, str := s.scanString(1, quote)
return str
}
@@ -665,6 +676,7 @@
tok, lit = s.scanNumber(false)
default:
s.next() // always make progress
+ var quote quoteInfo
switch ch {
case -1:
if s.insertEOL {
@@ -676,7 +688,7 @@
if s.ch == '|' {
// Unconditionally require this to be followed by another
// underscore to avoid needing an extra lookahead.
- // Note that `_|x` is always equal to x.
+ // Note that `_|x` is always equal to _.
s.next()
if s.ch != '_' {
s.error(s.file.Offset(pos), "illegal token '_|'; expected '_'")
@@ -699,22 +711,33 @@
// from s.skipWhitespace()
s.insertEOL = false // newline consumed
return s.file.Pos(offset, token.Elided), token.COMMA, "\n"
+ case '#':
+ for quote.numHash = 1; s.ch == '#'; quote.numHash++ {
+ s.next()
+ }
+ ch = s.ch
+ if ch != '\'' && ch != '"' {
+ break
+ }
+ s.next()
+ fallthrough
case '"', '\'':
insertEOL = true
+ quote.char = ch
+ quote.numChar = 1
+ offs := s.offset - 1 - quote.numHash
switch _, n := s.consumeQuotes(ch, 2); n {
case 1:
- if ch == '"' {
- tok, lit = token.STRING, `""`
- } else {
- tok, lit = token.STRING, `''`
+ if ch == '"' || ch == '\'' {
+ if !s.hashCount(quote) {
+ s.error(offs, "string literal not terminated")
+ }
+ tok, lit = token.STRING, string(s.src[offs:s.offset])
}
default:
- tok, lit = s.scanString(ch, n+1, n+1)
+ quote.numChar = n + 1
+ tok, lit = s.scanString(quote.numChar+quote.numHash, quote)
}
- case '`':
- insertEOL = true
- tok = token.STRING
- lit = s.scanRawString()
case ':':
tok = token.COLON
case ';':
diff --git a/cue/scanner/scanner_test.go b/cue/scanner/scanner_test.go
index 53d1bb0..d0c63cb 100644
--- a/cue/scanner/scanner_test.go
+++ b/cue/scanner/scanner_test.go
@@ -19,7 +19,6 @@
"io/ioutil"
"os"
"path/filepath"
- "reflect"
"runtime"
"strings"
"testing"
@@ -92,25 +91,22 @@
{token.FLOAT, "1e+100", literal},
{token.FLOAT, "1e-100", literal},
{token.FLOAT, "2.71828e-1000", literal},
- {token.STRING, "`aa\n\n`", literal},
{token.STRING, "'a'", literal},
{token.STRING, "'\\000'", literal},
{token.STRING, "'\\xFF'", literal},
{token.STRING, "'\\uff16'", literal},
{token.STRING, "'\\U0000ff16'", literal},
{token.STRING, "'foobar'", literal},
- {token.STRING, "`" + `foo
- bar` +
- "`",
- literal,
- },
- {token.STRING, "`foobar`", literal},
- {token.STRING, "`\r`", literal},
- {token.STRING, "`foo\r\nbar`", literal},
+ {token.STRING, `#"foobar"#`, literal},
+ {token.STRING, `#"\r"#`, literal},
+ {token.STRING, `#"\("#`, literal},
+ {token.STRING, `#"\q"#`, literal},
+ {token.STRING, `###"\##q"###`, literal},
{token.STRING, "'" + `\r` + "'", literal},
{token.STRING, "'foo" + `\r\n` + "bar'", literal},
{token.STRING, `"foobar"`, literal},
{token.STRING, `"""\n foobar\n """`, literal},
+ {token.STRING, `#"""\n \(foobar\n """#`, literal},
// Operators and delimiters
{token.ADD, "+", operator},
@@ -299,7 +295,7 @@
// the illegal token literal indicates what
// kind of semicolon literal to expect
commaLit := "\n"
- if lit[0] == '#' {
+ if lit[0] == '~' {
commaLit = ","
}
// next token must be a comma
@@ -323,11 +319,11 @@
}
var lines = []string{
- // # indicates a comma present in the source
+ // ~ indicates a comma present in the source
// ? indicates an automatically inserted comma
"",
- "\ufeff#,", // first BOM is ignored
- "#,",
+ "\ufeff~,", // first BOM is ignored
+ "~,",
"foo?\n",
"_foo?\n",
"123?\n",
@@ -336,7 +332,7 @@
"_|_?\n",
"_|_?\n",
`"x"` + "?\n",
- "`x`?\n",
+ "#'x'#?\n",
`"""
foo
"""` + "?\n",
@@ -379,7 +375,7 @@
"[[\n",
"{\n",
"{{\n",
- "#,\n",
+ "~,\n",
".\n",
")?\n",
@@ -581,16 +577,18 @@
}
}
-func TestScanTemplate(t *testing.T) {
+func TestScanInterpolation(t *testing.T) {
// error handler
eh := func(pos token.Position, msg string) {
t.Errorf("error handler called (pos = %v, msg = %s)", pos, msg)
}
- trim := func(s string) string { return strings.Trim(s, `"\\()`) }
+ trim := func(s string) string { return strings.Trim(s, `#"\\()`) }
sources := []string{
`"first\(first)\\second\(second)"`,
+ `#"first\#(first)\second\#(second)"#`,
`"level\( ["foo", "level", level ][2] )end\( end )"`,
+ `##"level\##( ["foo", "level", level ][2] )end\##( end )"##`,
`"level\( { "foo": 1, "bar": level } )end\(end)"`,
}
for i, src := range sources {
@@ -610,7 +608,7 @@
count++
case token.RPAREN:
if count--; count == 0 {
- str = trim(s.ResumeInterpolation('"', 1))
+ str = trim(s.ResumeInterpolation())
}
case token.INTERPOLATION:
str = trim(lit)
@@ -626,14 +624,14 @@
}
func TestStdErrorHander(t *testing.T) {
- const src = "#\n" + // illegal character, cause an error
- "# #\n" + // two errors on the same line
+ const src = "~\n" + // illegal character, cause an error
+ "~ ~\n" + // two errors on the same line
"//line File2:20\n" +
- "#\n" + // different file, but same line
+ "~\n" + // different file, but same line
"//line File2:1\n" +
- "# #\n" + // same file, decreasing line number
+ "~ ~\n" + // same file, decreasing line number
"//line File1:1\n" +
- "# # #" // original file, line 1 again
+ "~ ~ ~" // original file, line 1 again
var list errors.List
eh := func(pos token.Position, msg string) { list.AddNew(pos, msg) }
@@ -743,10 +741,6 @@
{`"\U00000000"`, token.STRING, 0, `"\U00000000"`, ""},
{`"\Uffffffff"`, token.STRING, 2, `"\Uffffffff"`, "escape sequence is invalid Unicode code point"},
{`'`, token.STRING, 0, `'`, "string literal not terminated"},
- // TODO
- // {`'\`, token.STRING, 0, `'\`, "raw string literal not terminated"}, // "escape sequence not terminated"},
- // {"`\n", token.STRING, 0, s"`\n", "raw string literal not terminated"},
- // {"'\n ", token.STRING, 0, "'", "raw string literal not terminated"},
{`""`, token.STRING, 0, `""`, ""},
{`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"},
{`""abc`, token.STRING, 0, `""`, ""},
@@ -754,11 +748,19 @@
{`'''abc`, token.STRING, 0, `'''abc`, "string literal not terminated"},
{"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"},
{"\"abc\n ", token.STRING, 0, `"abc`, "string literal not terminated"},
- {"``", token.STRING, 0, "``", ""},
+ {`#""`, token.STRING, 0, `#""`, "string literal not terminated"},
+ {`#"""`, token.STRING, 0, `#"""`, "string literal not terminated"},
+ {`#""#`, token.STRING, 0, `#""#`, ""},
// {"$", IDENT, 0, "$", ""}, // TODO: for root of file?
- {"`", token.STRING, 0, "`", "raw string literal not terminated"},
+ {"#'", token.STRING, 0, "#'", "string literal not terminated"},
{"''", token.STRING, 0, "''", ""},
{"'", token.STRING, 0, "'", "string literal not terminated"},
+ {`"\("`, token.INTERPOLATION, 0, `"\(`, ""},
+ {`#"\("#`, token.STRING, 0, `#"\("#`, ""},
+ {`#"\#("#`, token.INTERPOLATION, 0, `#"\#(`, ""},
+ {`"\q"`, token.STRING, 2, `"\q"`, "unknown escape sequence"},
+ {`#"\q"#`, token.STRING, 0, `#"\q"#`, ""},
+ {`#"\#q"#`, token.STRING, 4, `#"\#q"#`, "unknown escape sequence"},
{"/**/", token.COMMENT, 0, "/**/", ""},
{"/*", token.COMMENT, 0, "/*", "comment not terminated"},
{"0", token.INT, 0, "0", ""},
@@ -864,329 +866,3 @@
}
}
}
-
-func TestScanner_next(t *testing.T) {
- tests := []struct {
- name string
- s *Scanner
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- tt.s.next()
- }
-}
-
-func TestScanner_Init(t *testing.T) {
- type args struct {
- file *token.File
- src []byte
- err errors.Handler
- mode Mode
- }
- tests := []struct {
- name string
- s *Scanner
- args args
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- tt.s.Init(tt.args.file, tt.args.src, tt.args.err, tt.args.mode)
- }
-}
-
-func TestScanner_error(t *testing.T) {
- type args struct {
- offs int
- msg string
- }
- tests := []struct {
- name string
- s *Scanner
- args args
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- tt.s.error(tt.args.offs, tt.args.msg)
- }
-}
-
-func TestScanner_interpretLineComment(t *testing.T) {
- type args struct {
- text []byte
- }
- tests := []struct {
- name string
- s *Scanner
- args args
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- tt.s.interpretLineComment(tt.args.text)
- }
-}
-
-func TestScanner_scanComment(t *testing.T) {
- tests := []struct {
- name string
- s *Scanner
- want string
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- if got := tt.s.scanComment(); got != tt.want {
- t.Errorf("%q. Scanner.scanComment() = %v, want %v", tt.name, got, tt.want)
- }
- }
-}
-
-func TestScanner_findLineEnd(t *testing.T) {
- tests := []struct {
- name string
- s *Scanner
- want bool
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- if got := tt.s.findLineEnd(); got != tt.want {
- t.Errorf("%q. Scanner.findLineEnd() = %v, want %v", tt.name, got, tt.want)
- }
- }
-}
-
-func Test_isLetter(t *testing.T) {
- type args struct {
- ch rune
- }
- tests := []struct {
- name string
- args args
- want bool
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- if got := isLetter(tt.args.ch); got != tt.want {
- t.Errorf("%q. isLetter() = %v, want %v", tt.name, got, tt.want)
- }
- }
-}
-
-func Test_isDigit(t *testing.T) {
- type args struct {
- ch rune
- }
- tests := []struct {
- name string
- args args
- want bool
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- if got := isDigit(tt.args.ch); got != tt.want {
- t.Errorf("%q. isDigit() = %v, want %v", tt.name, got, tt.want)
- }
- }
-}
-
-func TestScanner_scanIdentifier(t *testing.T) {
- tests := []struct {
- name string
- s *Scanner
- want string
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- if got := tt.s.scanIdentifier(); got != tt.want {
- t.Errorf("%q. Scanner.scanIdentifier() = %v, want %v", tt.name, got, tt.want)
- }
- }
-}
-
-func Test_digitVal(t *testing.T) {
- type args struct {
- ch rune
- }
- tests := []struct {
- name string
- args args
- want int
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- if got := digitVal(tt.args.ch); got != tt.want {
- t.Errorf("%q. digitVal() = %v, want %v", tt.name, got, tt.want)
- }
- }
-}
-
-func TestScanner_scanMantissa(t *testing.T) {
- type args struct {
- base int
- }
- tests := []struct {
- name string
- s *Scanner
- args args
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- tt.s.scanMantissa(tt.args.base)
- }
-}
-
-func TestScanner_scanNumber(t *testing.T) {
- type args struct {
- seenDecimalPoint bool
- }
- tests := []struct {
- name string
- s *Scanner
- args args
- want token.Token
- want1 string
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- got, got1 := tt.s.scanNumber(tt.args.seenDecimalPoint)
- if !reflect.DeepEqual(got, tt.want) {
- t.Errorf("%q. Scanner.scanNumber() got = %v, want %v", tt.name, got, tt.want)
- }
- if got1 != tt.want1 {
- t.Errorf("%q. Scanner.scanNumber() got1 = %v, want %v", tt.name, got1, tt.want1)
- }
- }
-}
-
-func TestScanner_scanEscape(t *testing.T) {
- type args struct {
- quote rune
- }
- tests := []struct {
- name string
- s *Scanner
- args args
- want bool
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- if got, _ := tt.s.scanEscape(tt.args.quote); got != tt.want {
- t.Errorf("%q. Scanner.scanEscape() = %v, want %v", tt.name, got, tt.want)
- }
- }
-}
-
-func TestScanner_scanString(t *testing.T) {
- tests := []struct {
- name string
- s *Scanner
- want string
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- if _, got := tt.s.scanString(rune(tt.name[0]), 1, 1); got != tt.want {
- t.Errorf("%q. Scanner.scanString() = %v, want %v", tt.name, got, tt.want)
- }
- }
-}
-
-func Test_stripCR(t *testing.T) {
- type args struct {
- b []byte
- }
- tests := []struct {
- name string
- args args
- want []byte
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- if got := stripCR(tt.args.b); !reflect.DeepEqual(got, tt.want) {
- t.Errorf("%q. stripCR() = %v, want %v", tt.name, got, tt.want)
- }
- }
-}
-
-func TestScanner_scanRawString(t *testing.T) {
- tests := []struct {
- name string
- s *Scanner
- want string
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- if got := tt.s.scanRawString(); got != tt.want {
- t.Errorf("%q. Scanner.scanRawString() = %v, want %v", tt.name, got, tt.want)
- }
- }
-}
-
-func TestScanner_skipWhitespace(t *testing.T) {
- tests := []struct {
- name string
- s *Scanner
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- tt.s.skipWhitespace(1)
- }
-}
-
-func TestScanner_switch2(t *testing.T) {
- type args struct {
- tok0 token.Token
- tok1 token.Token
- }
- tests := []struct {
- name string
- s *Scanner
- args args
- want token.Token
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- if got := tt.s.switch2(tt.args.tok0, tt.args.tok1); !reflect.DeepEqual(got, tt.want) {
- t.Errorf("%q. Scanner.switch2() = %v, want %v", tt.name, got, tt.want)
- }
- }
-}
-
-func TestScanner_Scan(t *testing.T) {
- tests := []struct {
- name string
- s *Scanner
- wantPos token.Pos
- wantTok token.Token
- wantLit string
- }{
- // TODO: Add test cases.
- }
- for _, tt := range tests {
- gotPos, gotTok, gotLit := tt.s.Scan()
- if !reflect.DeepEqual(gotPos, tt.wantPos) {
- t.Errorf("%q. Scanner.Scan() gotPos = %v, want %v", tt.name, gotPos, tt.wantPos)
- }
- if !reflect.DeepEqual(gotTok, tt.wantTok) {
- t.Errorf("%q. Scanner.Scan() gotTok = %v, want %v", tt.name, gotTok, tt.wantTok)
- }
- if gotLit != tt.wantLit {
- t.Errorf("%q. Scanner.Scan() gotLit = %v, want %v", tt.name, gotLit, tt.wantLit)
- }
- }
-}
diff --git a/cue/token/token.go b/cue/token/token.go
index 7db3329..560573b 100644
--- a/cue/token/token.go
+++ b/cue/token/token.go
@@ -55,6 +55,7 @@
IDIV // div
IMOD // mod
+ // TODO: rename to AND and OR
UNIFY // &
DISJUNCTION // |
diff --git a/doc/ref/spec.md b/doc/ref/spec.md
index dd1df4d..9153fa3 100644
--- a/doc/ref/spec.md
+++ b/doc/ref/spec.md
@@ -356,10 +356,15 @@
String literals may only be valid UTF-8.
Byte sequences may contain any sequence of bytes.
-Several backslash escapes allow arbitrary values to be encoded as ASCII text
-in interpreted strings.
+Several escape sequences allow arbitrary values to be encoded as ASCII text.
+An escape sequence starts with an _escape delimiter_, which is `\` by default.
+The escape delimiter may be altered to be `\` plus a fixed number of
+hash symbols `#`
+by padding the start and end of a string or byte sequence literal
+with this number of hash symbols.
+
There are four ways to represent the integer value as a numeric constant: `\x`
-followed by exactly two hexadecimal digits; \u followed by exactly four
+followed by exactly two hexadecimal digits; `\u` followed by exactly four
hexadecimal digits; `\U` followed by exactly eight hexadecimal digits, and a
plain backslash `\` followed by exactly three octal digits.
In each case the value of the literal is the value represented by the
@@ -373,7 +378,7 @@
Hexadecimal escapes satisfy this condition by construction.
The escapes `\u` and `\U` represent Unicode code points so within them
some values are illegal, in particular those above `0x10FFFF`.
-Surrogate halves are allowed to be compatible with JSON,
+Surrogate halves are allowed,
but are translated into their non-surrogate equivalent internally.
The three-digit octal (`\nnn`) and two-digit hexadecimal (`\xnn`) escapes
@@ -384,8 +389,6 @@
the two bytes `0xc3 0xbf` of the UTF-8
encoding of character `U+00FF`.
-After a backslash, certain single-character escapes represent special values:
-
```
\a U+0007 alert or bell
\b U+0008 backspace
@@ -406,24 +409,35 @@
All other sequences starting with a backslash are illegal inside literals.
```
-escaped_char = `\` ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | `\` | "'" | `"` ) .
-unicode_value = unicode_char | little_u_value | big_u_value | escaped_char .
+escaped_char = `\` { `#` } ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | `\` | "'" | `"` ) .
byte_value = octal_byte_value | hex_byte_value .
octal_byte_value = `\` octal_digit octal_digit octal_digit .
hex_byte_value = `\` "x" hex_digit hex_digit .
little_u_value = `\` "u" hex_digit hex_digit hex_digit hex_digit .
big_u_value = `\` "U" hex_digit hex_digit hex_digit hex_digit
hex_digit hex_digit hex_digit hex_digit .
+unicode_value = unicode_char | little_u_value | big_u_value | escaped_char .
+interpolation = "\(" Expression ")" .
-string_lit = interpreted_string_lit |
- interpreted_bytes_lit |
- multiline_lit .
+string_lit = simple_string_lit |
+ multiline_string_lit |
+ simple_bytes_lit |
+ multiline_bytes_lit |
+ `#` string_lit `#` .
-interpolation = "\(" Expression ")" .
-interpreted_string_lit = `"` { unicode_value | interpolation } `"` .
-interpreted_bytes_lit = `"` { unicode_value | interpolation | byte_value } `"` .
+simple_string_lit = `"` { unicode_value | interpolation } `"` .
+simple_bytes_lit = `"` { unicode_value | interpolation | byte_value } `"` .
+multiline_string_lit = `"""` newline
+ { unicode_value | interpolation | newline }
+ newline `"""` .
+multiline_bytes_lit = "'''" newline
+ { unicode_value | interpolation | byte_value | newline }
+ newline "'''" .
```
+Carriage return characters (`\r`) inside string literals are discarded from
+the raw string value.
+
```
'a\000\xab'
'\007'
@@ -438,6 +452,10 @@
"\xff\u00FF"
"\uD800" // illegal: surrogate half (TODO: probably should allow)
"\U00110000" // illegal: invalid Unicode code point
+
+#"This is not an \(interpolation)"#
+#"This is an \#(interpolation)"#
+#"The sequence "\U0001F604" renders as \#U0001F604."#
```
These examples all represent the same string:
@@ -455,13 +473,11 @@
combining form involving an accent and a letter, the result will appear as two
code points if placed in a string literal.
-Each of the interpreted string variants have a multiline equivalent.
-Multiline interpreted strings are like their single-line equivalent,
+Strings and byte sequences have a multiline equivalent.
+Multiline strings are like their single-line equivalent,
but allow newline characters.
-Carriage return characters (`\r`) inside raw string literals are discarded from
-the raw string value.
-Multiline interpreted strings and byte sequences respectively start with
+Multiline strings and byte sequences respectively start with
a triple double quote (`"""`) or triple single quote (`'''`),
immediately followed by a newline, which is discarded from the string contents.
The string is closed by a matching triple quote, which must be by itself
@@ -473,16 +489,6 @@
To include it is suffices to escape one of the quotes.
```
-multiline_lit = multiline_string_lit | multiline_bytes_lit .
-multiline_string_lit = `"""` newline
- { unicode_char | interpolation | newline }
- newline `"""` .
-multiline_bytes_lit = "'''" newline
- { unicode_char | interpolation | newline | byte_value }
- newline "'''" .
-```
-
-```
"""
lily:
out of the water
@@ -923,7 +929,7 @@
FieldDecl = Label { Label } ":" Expression .
AliasDecl = Label "=" Expression .
-Label = identifier | interpreted_string_lit | TemplateLabel .
+Label = identifier | simple_string_lit | TemplateLabel .
TemplateLabel = "<" identifier ">" .
Tag = "#" identifier [ ":" json_string ] .
```
@@ -1851,7 +1857,7 @@
_Field comprehensions_ follow a `Field` with a clause sequence, where the
label and value of the field are evaluated for each iteration.
-The label must be an identifier or interpreted_string_lit, where the
+The label must be an identifier or simple_string_lit, where the
later may be a string interpolation that refers to the identifiers defined
in the clauses.
Values of iterations that map to the same label unify into a single field.