cue/scanner: improve escape modifier handling
- missing \n now detected in scanner
- fixed bug of mishandling misstarts of string closing
- different error message if newline is missing
Change-Id: Ie39463d6429d8bb4dc52f8308892ffe9102b007d
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/2328
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cue/lit_test.go b/cue/lit_test.go
index b98c892..fe8a9e5 100644
--- a/cue/lit_test.go
+++ b/cue/lit_test.go
@@ -97,6 +97,9 @@
{"false", falseSentinel},
{"fls", &bottom{}},
{`"foo"`, &stringLit{str: "foo"}},
+ {`#"foo"#`, &stringLit{str: "foo"}},
+ {`#""foo"#`, &stringLit{str: `"foo`}},
+ {`#" ""#`, &stringLit{str: ` "`}},
{`"\"foo\""`, &stringLit{str: `"foo"`}},
{`"foo\u0032"`, &stringLit{str: `foo2`}},
{`"foo\U00000033"`, &stringLit{str: `foo3`}},
diff --git a/cue/literal/string_test.go b/cue/literal/string_test.go
index 88c392b..07de07e 100644
--- a/cue/literal/string_test.go
+++ b/cue/literal/string_test.go
@@ -75,6 +75,10 @@
{"##'''\n\t\tHello\\#v\n\t\t'''##", "Hello\\#v", nil},
{`#"""` + "\n\t\t\\#r\n\t\t" + `"""#`, "\r", nil},
{`#""#`, "", nil},
+ {`#" ""#`, ` "`, nil},
+ {`#" """#`, ` ""`, nil},
+ {`##" """# "##`, ` """# `, nil},
+ {`##" """# "##`, ` """# `, nil},
{`#"This is a "dog""#`, `This is a "dog"`, nil},
{"#\"\"\"\n\"\n\"\"\"#", `"`, nil},
{"#\"\"\"\n\"\"\"\n\"\"\"#", `"""`, nil},
@@ -98,9 +102,9 @@
{`"Hello""`, "", errSyntax},
{`#"Hello"`, "", errUnmatchedQuote},
{`#"Hello'#`, "", errUnmatchedQuote},
- {`#"""#`, "", errMissingNewline},
+ {`#""" """#`, "", errMissingNewline},
- // TODO: should these be legal?
+ // TODO: should this be legal?
{`#"""#`, "", errMissingNewline},
}
for i, tc := range testCases {
diff --git a/cue/scanner/scanner.go b/cue/scanner/scanner.go
index 1591fd4..3834033 100644
--- a/cue/scanner/scanner.go
+++ b/cue/scanner/scanner.go
@@ -484,19 +484,15 @@
return true, false
}
-func (s *Scanner) scanString(offset int, quote quoteInfo) (token.Token, string) {
+func (s *Scanner) scanString(offs int, quote quoteInfo) (token.Token, string) {
// ", """, ', or ''' opening already consumed
- offs := s.offset - offset
tok := token.STRING
hasCR := false
extra := 0
for {
- ch, ok := s.consumeStringClose(quote)
- if ok {
- break
- }
+ ch := s.ch
if (quote.numChar != 3 && ch == '\n') || ch < 0 {
s.errf(offs, "string literal not terminated")
lit := s.src[offs:s.offset]
@@ -505,10 +501,15 @@
}
return tok, string(lit)
}
+
+ s.next()
+ ch, ok := s.consumeStringClose(ch, quote)
+ if ok {
+ break
+ }
if ch == '\r' && quote.numChar == 3 {
hasCR = true
}
- s.next()
if ch == '\\' {
if _, interpolation := s.scanEscape(quote); interpolation {
tok = token.INTERPOLATION
@@ -535,25 +536,34 @@
return s.ch, n
}
-func (s *Scanner) consumeStringClose(quote quoteInfo) (next rune, atEnd bool) {
- for i := 0; i < quote.numChar; i++ {
- if s.ch != quote.char {
- return s.ch, false
+func (s *Scanner) consumeStringClose(ch rune, quote quoteInfo) (next rune, atEnd bool) {
+ if quote.char != ch {
+ return ch, false
+ }
+ numChar := quote.numChar
+ n := numChar + quote.numHash
+ want := quote.char
+ for i := 1; i < n; i++ {
+ if i == numChar {
+ want = '#'
}
+ if want != s.ch {
+ return ch, false
+ }
+ ch = s.ch
s.next()
}
- hasHash := s.hashCount(quote)
- return s.ch, hasHash
+ return s.ch, true
}
-func (s *Scanner) hashCount(quote quoteInfo) bool {
+func (s *Scanner) checkHashCount(offs int, quote quoteInfo) {
for i := 0; i < quote.numHash; i++ {
if s.ch != '#' {
- return false
+ s.errf(offs, "string literal not terminated")
+ return
}
s.next()
}
- return true
}
func stripCR(b []byte) []byte {
@@ -695,7 +705,7 @@
// ResumeInterpolation resumes scanning of a string interpolation.
func (s *Scanner) ResumeInterpolation() string {
quote := s.popInterpolation()
- _, str := s.scanString(1, quote)
+ _, str := s.scanString(s.offset-1, quote)
return str
}
@@ -824,16 +834,31 @@
quote.numChar = 1
offs := s.offset - 1 - quote.numHash
switch _, n := s.consumeQuotes(ch, 2); n {
+ case 0:
+ quote.numChar = 1
+ tok, lit = s.scanString(offs, quote)
case 1:
- if ch == '"' || ch == '\'' {
- if !s.hashCount(quote) {
- s.errf(offs, "string literal not terminated")
+ s.checkHashCount(offs, quote)
+ tok, lit = token.STRING, string(s.src[offs:s.offset])
+ case 2:
+ quote.numChar = 3
+ switch s.ch {
+ case '\n':
+ s.next()
+ tok, lit = s.scanString(offs, quote)
+ case '\r':
+ s.next()
+ if s.ch == '\n' {
+ s.next()
+ tok, lit = s.scanString(offs, quote)
+ break
}
+ fallthrough
+ default:
+ s.errf(offs, "expected newline after multiline quote %s",
+ s.src[offs:s.offset])
tok, lit = token.STRING, string(s.src[offs:s.offset])
}
- default:
- quote.numChar = n + 1
- tok, lit = s.scanString(quote.numChar+quote.numHash, quote)
}
case '@':
insertEOL = true
diff --git a/cue/scanner/scanner_test.go b/cue/scanner/scanner_test.go
index cc5c433..a1b8741 100644
--- a/cue/scanner/scanner_test.go
+++ b/cue/scanner/scanner_test.go
@@ -106,6 +106,7 @@
{token.STRING, "'\\U0000ff16'", literal},
{token.STRING, "'foobar'", literal},
{token.STRING, `'foo\/bar'`, literal},
+ {token.STRING, `#" ""#`, literal},
{token.STRING, `#"foobar"#`, literal},
{token.STRING, `#"\r"#`, literal},
{token.STRING, `#"\("#`, literal},
@@ -114,8 +115,12 @@
{token.STRING, "'" + `\r` + "'", literal},
{token.STRING, "'foo" + `\r\n` + "bar'", literal},
{token.STRING, `"foobar"`, literal},
- {token.STRING, `"""\n foobar\n """`, literal},
- {token.STRING, `#"""\n \(foobar\n """#`, literal},
+ {token.STRING, "\"\"\"\n foobar\n \"\"\"", literal},
+ {token.STRING, "#\"\"\"\n \\(foobar\n \"\"\"#", literal},
+ // TODO: should we preserve the \r instead and have it removed by the
+ // literal parser? This would allow preserving \r for formatting without
+ // changing the semantics of evaluation.
+ {token.STRING, "#\"\"\"\r\n \\(foobar\n \"\"\"#", literal},
// Operators and delimiters
{token.ADD, "+", operator},
@@ -771,12 +776,13 @@
{`""`, token.STRING, 0, `""`, ""},
{`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"},
{`""abc`, token.STRING, 0, `""`, ""},
- {`"""abc`, token.STRING, 0, `"""abc`, "string literal not terminated"},
- {`'''abc`, token.STRING, 0, `'''abc`, "string literal not terminated"},
+ {"\"\"\"\nabc", token.STRING, 0, "\"\"\"\nabc", "string literal not terminated"},
+ {"'''\nabc", token.STRING, 0, "'''\nabc", "string literal not terminated"},
{"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"},
{"\"abc\n ", token.STRING, 0, `"abc`, "string literal not terminated"},
+ {"\"abc\r\n ", token.STRING, 0, "\"abc\r", "string literal not terminated"},
{`#""`, token.STRING, 0, `#""`, "string literal not terminated"},
- {`#"""`, token.STRING, 0, `#"""`, "string literal not terminated"},
+ {`#"""`, token.STRING, 0, `#"""`, `expected newline after multiline quote #"""`},
{`#""#`, token.STRING, 0, `#""#`, ""},
// {"$", IDENT, 0, "$", ""}, // TODO: for root of file?
{"#'", token.STRING, 0, "#'", "string literal not terminated"},