cue/scanner: improve escape modifier handling

- missing \n now detected in scanner
- fixed bug of mishandling misstarts of string closing
- different error message if newline is missing

Change-Id: Ie39463d6429d8bb4dc52f8308892ffe9102b007d
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/2328
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cue/lit_test.go b/cue/lit_test.go
index b98c892..fe8a9e5 100644
--- a/cue/lit_test.go
+++ b/cue/lit_test.go
@@ -97,6 +97,9 @@
 		{"false", falseSentinel},
 		{"fls", &bottom{}},
 		{`"foo"`, &stringLit{str: "foo"}},
+		{`#"foo"#`, &stringLit{str: "foo"}},
+		{`#""foo"#`, &stringLit{str: `"foo`}},
+		{`#" ""#`, &stringLit{str: ` "`}},
 		{`"\"foo\""`, &stringLit{str: `"foo"`}},
 		{`"foo\u0032"`, &stringLit{str: `foo2`}},
 		{`"foo\U00000033"`, &stringLit{str: `foo3`}},
diff --git a/cue/literal/string_test.go b/cue/literal/string_test.go
index 88c392b..07de07e 100644
--- a/cue/literal/string_test.go
+++ b/cue/literal/string_test.go
@@ -75,6 +75,10 @@
 		{"##'''\n\t\tHello\\#v\n\t\t'''##", "Hello\\#v", nil},
 		{`#"""` + "\n\t\t\\#r\n\t\t" + `"""#`, "\r", nil},
 		{`#""#`, "", nil},
+		{`#" ""#`, ` "`, nil},
+		{`#" """#`, ` ""`, nil},
+		{`##" """# "##`, ` """# `, nil},
+		{`##" """# "##`, ` """# `, nil},
 		{`#"This is a "dog""#`, `This is a "dog"`, nil},
 		{"#\"\"\"\n\"\n\"\"\"#", `"`, nil},
 		{"#\"\"\"\n\"\"\"\n\"\"\"#", `"""`, nil},
@@ -98,9 +102,9 @@
 		{`"Hello""`, "", errSyntax},
 		{`#"Hello"`, "", errUnmatchedQuote},
 		{`#"Hello'#`, "", errUnmatchedQuote},
-		{`#"""#`, "", errMissingNewline},
+		{`#""" """#`, "", errMissingNewline},
 
-		// TODO: should these be legal?
+		// TODO: should this be legal?
 		{`#"""#`, "", errMissingNewline},
 	}
 	for i, tc := range testCases {
diff --git a/cue/scanner/scanner.go b/cue/scanner/scanner.go
index 1591fd4..3834033 100644
--- a/cue/scanner/scanner.go
+++ b/cue/scanner/scanner.go
@@ -484,19 +484,15 @@
 	return true, false
 }
 
-func (s *Scanner) scanString(offset int, quote quoteInfo) (token.Token, string) {
+func (s *Scanner) scanString(offs int, quote quoteInfo) (token.Token, string) {
 	// ", """, ', or ''' opening already consumed
-	offs := s.offset - offset
 
 	tok := token.STRING
 
 	hasCR := false
 	extra := 0
 	for {
-		ch, ok := s.consumeStringClose(quote)
-		if ok {
-			break
-		}
+		ch := s.ch
 		if (quote.numChar != 3 && ch == '\n') || ch < 0 {
 			s.errf(offs, "string literal not terminated")
 			lit := s.src[offs:s.offset]
@@ -505,10 +501,15 @@
 			}
 			return tok, string(lit)
 		}
+
+		s.next()
+		ch, ok := s.consumeStringClose(ch, quote)
+		if ok {
+			break
+		}
 		if ch == '\r' && quote.numChar == 3 {
 			hasCR = true
 		}
-		s.next()
 		if ch == '\\' {
 			if _, interpolation := s.scanEscape(quote); interpolation {
 				tok = token.INTERPOLATION
@@ -535,25 +536,34 @@
 	return s.ch, n
 }
 
-func (s *Scanner) consumeStringClose(quote quoteInfo) (next rune, atEnd bool) {
-	for i := 0; i < quote.numChar; i++ {
-		if s.ch != quote.char {
-			return s.ch, false
+func (s *Scanner) consumeStringClose(ch rune, quote quoteInfo) (next rune, atEnd bool) {
+	if quote.char != ch {
+		return ch, false
+	}
+	numChar := quote.numChar
+	n := numChar + quote.numHash
+	want := quote.char
+	for i := 1; i < n; i++ {
+		if i == numChar {
+			want = '#'
 		}
+		if want != s.ch {
+			return ch, false
+		}
+		ch = s.ch
 		s.next()
 	}
-	hasHash := s.hashCount(quote)
-	return s.ch, hasHash
+	return s.ch, true
 }
 
-func (s *Scanner) hashCount(quote quoteInfo) bool {
+func (s *Scanner) checkHashCount(offs int, quote quoteInfo) {
 	for i := 0; i < quote.numHash; i++ {
 		if s.ch != '#' {
-			return false
+			s.errf(offs, "string literal not terminated")
+			return
 		}
 		s.next()
 	}
-	return true
 }
 
 func stripCR(b []byte) []byte {
@@ -695,7 +705,7 @@
 // ResumeInterpolation resumes scanning of a string interpolation.
 func (s *Scanner) ResumeInterpolation() string {
 	quote := s.popInterpolation()
-	_, str := s.scanString(1, quote)
+	_, str := s.scanString(s.offset-1, quote)
 	return str
 }
 
@@ -824,16 +834,31 @@
 			quote.numChar = 1
 			offs := s.offset - 1 - quote.numHash
 			switch _, n := s.consumeQuotes(ch, 2); n {
+			case 0:
+				quote.numChar = 1
+				tok, lit = s.scanString(offs, quote)
 			case 1:
-				if ch == '"' || ch == '\'' {
-					if !s.hashCount(quote) {
-						s.errf(offs, "string literal not terminated")
+				s.checkHashCount(offs, quote)
+				tok, lit = token.STRING, string(s.src[offs:s.offset])
+			case 2:
+				quote.numChar = 3
+				switch s.ch {
+				case '\n':
+					s.next()
+					tok, lit = s.scanString(offs, quote)
+				case '\r':
+					s.next()
+					if s.ch == '\n' {
+						s.next()
+						tok, lit = s.scanString(offs, quote)
+						break
 					}
+					fallthrough
+				default:
+					s.errf(offs, "expected newline after multiline quote %s",
+						s.src[offs:s.offset])
 					tok, lit = token.STRING, string(s.src[offs:s.offset])
 				}
-			default:
-				quote.numChar = n + 1
-				tok, lit = s.scanString(quote.numChar+quote.numHash, quote)
 			}
 		case '@':
 			insertEOL = true
diff --git a/cue/scanner/scanner_test.go b/cue/scanner/scanner_test.go
index cc5c433..a1b8741 100644
--- a/cue/scanner/scanner_test.go
+++ b/cue/scanner/scanner_test.go
@@ -106,6 +106,7 @@
 	{token.STRING, "'\\U0000ff16'", literal},
 	{token.STRING, "'foobar'", literal},
 	{token.STRING, `'foo\/bar'`, literal},
+	{token.STRING, `#" ""#`, literal},
 	{token.STRING, `#"foobar"#`, literal},
 	{token.STRING, `#"\r"#`, literal},
 	{token.STRING, `#"\("#`, literal},
@@ -114,8 +115,12 @@
 	{token.STRING, "'" + `\r` + "'", literal},
 	{token.STRING, "'foo" + `\r\n` + "bar'", literal},
 	{token.STRING, `"foobar"`, literal},
-	{token.STRING, `"""\n  foobar\n  """`, literal},
-	{token.STRING, `#"""\n  \(foobar\n  """#`, literal},
+	{token.STRING, "\"\"\"\n  foobar\n  \"\"\"", literal},
+	{token.STRING, "#\"\"\"\n  \\(foobar\n  \"\"\"#", literal},
+	// TODO: should we preserve the \r instead and have it removed by the
+	// literal parser? This would allow preserving \r for formatting without
+	// changing the semantics of evaluation.
+	{token.STRING, "#\"\"\"\r\n  \\(foobar\n  \"\"\"#", literal},
 
 	// Operators and delimiters
 	{token.ADD, "+", operator},
@@ -771,12 +776,13 @@
 	{`""`, token.STRING, 0, `""`, ""},
 	{`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"},
 	{`""abc`, token.STRING, 0, `""`, ""},
-	{`"""abc`, token.STRING, 0, `"""abc`, "string literal not terminated"},
-	{`'''abc`, token.STRING, 0, `'''abc`, "string literal not terminated"},
+	{"\"\"\"\nabc", token.STRING, 0, "\"\"\"\nabc", "string literal not terminated"},
+	{"'''\nabc", token.STRING, 0, "'''\nabc", "string literal not terminated"},
 	{"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"},
 	{"\"abc\n   ", token.STRING, 0, `"abc`, "string literal not terminated"},
+	{"\"abc\r\n   ", token.STRING, 0, "\"abc\r", "string literal not terminated"},
 	{`#""`, token.STRING, 0, `#""`, "string literal not terminated"},
-	{`#"""`, token.STRING, 0, `#"""`, "string literal not terminated"},
+	{`#"""`, token.STRING, 0, `#"""`, `expected newline after multiline quote #"""`},
 	{`#""#`, token.STRING, 0, `#""#`, ""},
 	// {"$", IDENT, 0, "$", ""}, // TODO: for root of file?
 	{"#'", token.STRING, 0, "#'", "string literal not terminated"},