blob: 9cbe6e82f9da22c7235ecdde7a9cac93103de948 [file] [log] [blame]
// Copyright 2020 CUE Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package literal
import (
"strconv"
"strings"
"unicode/utf8"
)
// Form defines how to quote a string or bytes literal.
type Form struct {
hashCount int
quote byte
multiline bool
auto bool
exact bool
asciiOnly bool
graphicOnly bool
indent string
tripleQuote string
}
// TODO:
// - Fixed or max level of escape modifiers (#""#).
// - Option to fall back to bytes if value cannot be represented as string.
// E.g. ExactString.
// - QuoteExact that fails with an error if a string cannot be represented
// without loss.
// - Handle auto-breaking for long lines (Swift-style, \-terminated lines).
// This is not supported yet in CUE, but may, and should be considered as
// a possibility in API design.
// - Other possible convenience forms: Blob (auto-break bytes), String (bytes
// or string), Label.
// WithTabIndent returns a new Form with indentation set to the given number
// of tabs. The result will be a multiline string.
func (f Form) WithTabIndent(n int) Form {
f.indent = tabs(n)
f.multiline = true
return f
}
const tabIndent = "\t\t\t\t\t\t\t\t\t\t\t\t"
func tabs(n int) string {
if n < len(tabIndent) {
return tabIndent[:n]
}
return strings.Repeat("\t", n)
}
// WithOptionalIndent is like WithTabIndent, but only returns a multiline
// strings if it doesn't contain any newline characters.
func (f Form) WithOptionalTabIndent(tabs int) Form {
if tabs < len(tabIndent) {
f.indent = tabIndent[:tabs]
} else {
f.indent = strings.Repeat("\t", tabs)
}
f.auto = true
return f
}
// WithASCIIOnly ensures the quoted strings consists solely of valid ASCII
// characters.
func (f Form) WithASCIIOnly() Form {
f.asciiOnly = true
return f
}
// WithGraphicOnly ensures the quoted strings consists solely of printable
// characters.
func (f Form) WithGraphicOnly() Form {
f.graphicOnly = true
return f
}
var (
// String defines the format of a CUE string. Conversions may be lossy.
String Form = stringForm
// TODO: ExactString: quotes to bytes type if the string cannot be
// represented without loss of accuracy.
// Label is like Text, but optimized for labels.
Label Form = stringForm
// Bytes defines the format of bytes literal.
Bytes Form = bytesForm
stringForm = Form{
quote: '"',
tripleQuote: `"""`,
}
bytesForm = Form{
quote: '\'',
tripleQuote: `'''`,
exact: true,
}
)
// Quote returns CUE string literal representing s. The returned string uses CUE
// escape sequences (\t, \n, \u00FF, \u0100) for control characters and
// non-printable characters as defined by strconv.IsPrint.
//
// It reports an error if the string cannot be converted to the desired form.
func (f Form) Quote(s string) string {
return string(f.Append(make([]byte, 0, 3*len(s)/2), s))
}
const (
lowerhex = "0123456789abcdef"
)
// Append appends a CUE string literal representing s, as generated by Quote, to
// buf and returns the extended buffer.
func (f Form) Append(buf []byte, s string) []byte {
if f.auto && strings.ContainsRune(s, '\n') {
f.multiline = true
}
if f.multiline {
f.hashCount = f.requiredHashCount(s)
}
// Often called with big strings, so preallocate. If there's quoting,
// this is conservative but still helps a lot.
if cap(buf)-len(buf) < len(s) {
nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
copy(nBuf, buf)
buf = nBuf
}
for i := 0; i < f.hashCount; i++ {
buf = append(buf, '#')
}
if f.multiline {
buf = append(buf, f.quote, f.quote, f.quote, '\n')
if s == "" {
buf = append(buf, f.indent...)
buf = append(buf, f.quote, f.quote, f.quote)
return buf
}
if len(s) > 0 && s[0] != '\n' {
buf = append(buf, f.indent...)
}
} else {
buf = append(buf, f.quote)
}
buf = f.appendEscaped(buf, s)
if f.multiline {
buf = append(buf, '\n')
buf = append(buf, f.indent...)
buf = append(buf, f.quote, f.quote, f.quote)
} else {
buf = append(buf, f.quote)
}
for i := 0; i < f.hashCount; i++ {
buf = append(buf, '#')
}
return buf
}
// AppendEscaped appends a CUE string literal representing s, as generated by
// Quote but without the quotes, to buf and returns the extended buffer.
//
// It does not include the last indentation.
func (f Form) AppendEscaped(buf []byte, s string) []byte {
if f.auto && strings.ContainsRune(s, '\n') {
f.multiline = true
}
// Often called with big strings, so preallocate. If there's quoting,
// this is conservative but still helps a lot.
if cap(buf)-len(buf) < len(s) {
nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
copy(nBuf, buf)
buf = nBuf
}
buf = f.appendEscaped(buf, s)
return buf
}
func (f Form) appendEscaped(buf []byte, s string) []byte {
for width := 0; len(s) > 0; s = s[width:] {
r := rune(s[0])
width = 1
if r >= utf8.RuneSelf {
r, width = utf8.DecodeRuneInString(s)
}
if f.exact && width == 1 && r == utf8.RuneError {
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[s[0]>>4])
buf = append(buf, lowerhex[s[0]&0xF])
continue
}
if f.multiline && r == '\n' {
buf = append(buf, '\n')
if len(s) > 1 && s[1] != '\n' {
buf = append(buf, f.indent...)
}
continue
}
buf = f.appendEscapedRune(buf, r)
}
return buf
}
func (f *Form) appendEscapedRune(buf []byte, r rune) []byte {
var runeTmp [utf8.UTFMax]byte
if (!f.multiline && r == rune(f.quote)) || r == '\\' { // always backslashed
buf = f.appendEscape(buf)
buf = append(buf, byte(r))
return buf
}
if f.asciiOnly {
if r < utf8.RuneSelf && strconv.IsPrint(r) {
buf = append(buf, byte(r))
return buf
}
} else if strconv.IsPrint(r) || f.graphicOnly && isInGraphicList(r) {
n := utf8.EncodeRune(runeTmp[:], r)
buf = append(buf, runeTmp[:n]...)
return buf
}
buf = f.appendEscape(buf)
switch r {
case '\a':
buf = append(buf, 'a')
case '\b':
buf = append(buf, 'b')
case '\f':
buf = append(buf, 'f')
case '\n':
buf = append(buf, 'n')
case '\r':
buf = append(buf, 'r')
case '\t':
buf = append(buf, 't')
case '\v':
buf = append(buf, 'v')
default:
switch {
case r < ' ' && f.exact:
buf = append(buf, 'x')
buf = append(buf, lowerhex[byte(r)>>4])
buf = append(buf, lowerhex[byte(r)&0xF])
case r > utf8.MaxRune:
r = 0xFFFD
fallthrough
case r < 0x10000:
buf = append(buf, 'u')
for s := 12; s >= 0; s -= 4 {
buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
default:
buf = append(buf, 'U')
for s := 28; s >= 0; s -= 4 {
buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
}
}
return buf
}
func (f *Form) appendEscape(buf []byte) []byte {
buf = append(buf, '\\')
for i := 0; i < f.hashCount; i++ {
buf = append(buf, '#')
}
return buf
}
// requiredHashCount returns the number of # characters
// that are required to quote the multiline string s.
func (f *Form) requiredHashCount(s string) int {
hashCount := 0
i := 0
// Find all occurrences of the triple-quote and count
// the maximum number of succeeding # characters.
for {
j := strings.Index(s[i:], f.tripleQuote)
if j == -1 {
break
}
i += j + 3
// Absorb all extra quotes, so we
// get to the end of the sequence.
for ; i < len(s); i++ {
if s[i] != f.quote {
break
}
}
e := i - 1
// Count succeeding # characters.
for ; i < len(s); i++ {
if s[i] != '#' {
break
}
}
if nhash := i - e; nhash > hashCount {
hashCount = nhash
}
}
return hashCount
}
// isInGraphicList reports whether the rune is in the isGraphic list. This separation
// from IsGraphic allows quoteWith to avoid two calls to IsPrint.
// Should be called only if IsPrint fails.
func isInGraphicList(r rune) bool {
// We know r must fit in 16 bits - see makeisprint.go.
if r > 0xFFFF {
return false
}
rr := uint16(r)
i := bsearch16(isGraphic, rr)
return i < len(isGraphic) && rr == isGraphic[i]
}
// bsearch16 returns the smallest i such that a[i] >= x.
// If there is no such i, bsearch16 returns len(a).
func bsearch16(a []uint16, x uint16) int {
i, j := 0, len(a)
for i < j {
h := i + (j-i)/2
if a[h] < x {
i = h + 1
} else {
j = h
}
}
return i
}
// isGraphic lists the graphic runes not matched by IsPrint.
var isGraphic = []uint16{
0x00a0,
0x1680,
0x2000,
0x2001,
0x2002,
0x2003,
0x2004,
0x2005,
0x2006,
0x2007,
0x2008,
0x2009,
0x200a,
0x202f,
0x205f,
0x3000,
}