internal/core/adt: support interpolation of bytes and bool Also fixes bytes interpolation, which was still WIP. For bool: use JSON representation For bytes: assume bytes are UTF-8 and replace illegal characters according to the recommendations of the Unicode consortium and W3C requirement for character encodings. (So not the Go standard for replacement.) Details clarified in spec. Fixes #475. Change-Id: I94c068f8a73a3948194b179a33e556c37692c05f Reviewed-on: https://cue-review.googlesource.com/c/cue/+/6951 Reviewed-by: Marcel van Lohuizen <mpvl@golang.org> Reviewed-by: CUE cueckoo <cueckoo@gmail.com>

commit: 30ca0629cdcc2668bb2972691609300db2b3a52a [log] [tgz]
author: Marcel van Lohuizen <mpvl@golang.org> Sat Aug 22 14:07:59 2020 +0200
committer: Marcel van Lohuizen <mpvl@golang.org> Thu Sep 10 15:28:14 2020 +0000
tree: c100fe1237ac9fb65f2ce873010e2bc1722c4522
parent: dd0fa8872279d5601c32d9353fc92423653571c1 [diff]
diff --git a/cue/testdata/interpolation/041_interpolation.txtar b/cue/testdata/interpolation/041_interpolation.txtar
index f23b618..8f2dd9b 100644
--- a/cue/testdata/interpolation/041_interpolation.txtar
+++ b/cue/testdata/interpolation/041_interpolation.txtar

@@ -33,7 +33,7 @@
 }
 -- out/eval --
 Errors:
-e: invalid interpolation: cannot use [] (type list) as type (string|number):
+e: invalid interpolation: cannot use [] (type list) as type (bool|string|bytes|number):
     ./in.cue:7:4
 
 Result:
@@ -52,7 +52,7 @@
   }
   r: (_){ _ }
   e: (_|_){
-    // [eval] e: invalid interpolation: cannot use [] (type list) as type (string|number):
+    // [eval] e: invalid interpolation: cannot use [] (type list) as type (bool|string|bytes|number):
     //     ./in.cue:7:4
   }
 }

diff --git a/cue/testdata/interpolation/scalars.txtar b/cue/testdata/interpolation/scalars.txtar
new file mode 100644
index 0000000..23cce03
--- /dev/null
+++ b/cue/testdata/interpolation/scalars.txtar

@@ -0,0 +1,48 @@
+-- in.cue --
+bool1: "1+1=2:  \(true)"
+bool1: "1+1=2:  \(true)"
+bool2: "1+1=1:  \(false)"
+
+// one replacement character
+b1: 'a\xED\x95a'
+bytes1s: "\(b1)"
+bytes1b: '\(b1)'
+
+// two replacement characters
+b2: 'a\x80\x95a'
+bytes2s: "\(b2)"
+bytes2b: '\(b2)'
+
+// preserve precision
+n1: "\(1) \(2.00)"
+
+// but normalize representation
+n2: "\(1e2)"
+-- out/eval --
+(struct){
+  bool1: (string){ "1+1=2:  true" }
+  bool2: (string){ "1+1=1:  false" }
+  b1: (bytes){ 'a\xed\x95a' }
+  bytes1s: (string){ "a�a" }
+  bytes1b: (bytes){ 'a\xed\x95a' }
+  b2: (bytes){ 'a\x80\x95a' }
+  bytes2s: (string){ "a��a" }
+  bytes2b: (bytes){ 'a\x80\x95a' }
+  n1: (string){ "1 2.00" }
+  n2: (string){ "1E+2" }
+}
+-- out/compile --
+--- in.cue
+{
+  bool1: "1+1=2:  \(true)"
+  bool1: "1+1=2:  \(true)"
+  bool2: "1+1=1:  \(false)"
+  b1: 'a\xed\x95a'
+  bytes1s: "\(〈0;b1〉)"
+  bytes1b: '\(〈0;b1〉)'
+  b2: 'a\x80\x95a'
+  bytes2s: "\(〈0;b2〉)"
+  bytes2b: '\(〈0;b2〉)'
+  n1: "\(1) \(2.00)"
+  n2: "\(1E+2)"
+}

diff --git a/doc/ref/spec.md b/doc/ref/spec.md
index 11712e2..f7f0326 100644
--- a/doc/ref/spec.md
+++ b/doc/ref/spec.md

@@ -2609,8 +2609,21 @@
 String interpolation may be used in single- and double-quoted strings, as well
 as their multiline equivalent.
 
-A placeholder consists of "\(" followed by an expression and a ")". The
-expression is evaluated within the scope within which the string is defined.
+A placeholder consists of "\(" followed by an expression and a ")".
+The expression is evaluated in the scope within which the string is defined.
+
+The result of the expression is substituted as follows:
+- string: as is
+- bool: the JSON representation of the bool
+- number: a JSON representation of the number that preserves the
+precision of the underlying binary coded decimal
+- bytes: as if substituted within single quotes or
+converted to valid UTF-8 replacing the
+maximal subpart of ill-formed subsequences with a single
+replacement character (W3C encoding standard) otherwise
+- list: illegal
+- struct: illegal
+
 
 ```
 a: "World"

diff --git a/internal/core/adt/context.go b/internal/core/adt/context.go
index db385b8..9a36cc1 100644
--- a/internal/core/adt/context.go
+++ b/internal/core/adt/context.go

@@ -20,7 +20,7 @@
 	"regexp"
 
 	"github.com/cockroachdb/apd/v2"
-	"golang.org/x/text/runes"
+	"golang.org/x/text/encoding/unicode"
 
 	"cuelang.org/go/cue/ast"
 	"cuelang.org/go/cue/errors"
@@ -738,9 +738,17 @@
 	return c.stringValue(v, nil)
 }
 
-// ToString returns the string value of a numeric or string value.
+// ToBytes returns the bytes value of a scalar value.
+func (c *OpContext) ToBytes(v Value) []byte {
+	if x, ok := v.(*Bytes); ok {
+		return x.B
+	}
+	return []byte(c.ToString(v))
+}
+
+// ToString returns the string value of a scalar value.
 func (c *OpContext) ToString(v Value) string {
-	return c.toStringValue(v, StringKind|NumKind, nil)
+	return c.toStringValue(v, StringKind|NumKind|BytesKind|BoolKind, nil)
 
 }
 
@@ -766,11 +774,17 @@
 		return x.Str
 
 	case *Bytes:
-		return string(runes.ReplaceIllFormed().Bytes(x.B))
+		return bytesToString(x.B)
 
 	case *Num:
 		return x.X.String()
 
+	case *Bool:
+		if x.B {
+			return "true"
+		}
+		return "false"
+
 	default:
 		c.addErrf(IncompleteError, c.pos(),
 			"non-concrete value %s (type %s)", c.Str(v), v.Kind())
@@ -778,6 +792,11 @@
 	return ""
 }
 
+func bytesToString(b []byte) string {
+	b, _ = unicode.UTF8.NewDecoder().Bytes(b)
+	return string(b)
+}
+
 func (c *OpContext) bytesValue(v Value, as interface{}) []byte {
 	v = Unwrap(v)
 	if isError(v) {

diff --git a/internal/core/adt/expr.go b/internal/core/adt/expr.go
index 82df806..8fbdcec 100644
--- a/internal/core/adt/expr.go
+++ b/internal/core/adt/expr.go

@@ -717,8 +717,11 @@
 	buf := bytes.Buffer{}
 	for _, e := range x.Parts {
 		v := c.value(e)
-		s := c.ToString(v)
-		buf.WriteString(s)
+		if x.K == BytesKind {
+			buf.Write(c.ToBytes(v))
+		} else {
+			buf.WriteString(c.ToString(v))
+		}
 	}
 	if err := c.Err(); err != nil {
 		err = &Bottom{
@@ -729,9 +732,9 @@
 		// return nil
 		return err
 	}
-	// if k == bytesKind {
-	// 	return &BytesLit{x.source, buf.String(), nil}
-	// }
+	if x.K == BytesKind {
+		return &Bytes{x.Src, buf.Bytes(), nil}
+	}
 	return &String{x.Src, buf.String(), nil}
 }
 

diff --git a/internal/core/compile/compile.go b/internal/core/compile/compile.go
index 853a6a6..dbb556a 100644
--- a/internal/core/compile/compile.go
+++ b/internal/core/compile/compile.go

@@ -799,11 +799,16 @@
 		if len(n.Elts) == 1 {
 			return c.expr(n.Elts[0])
 		}
-		lit := &adt.Interpolation{Src: n, K: adt.StringKind}
+		lit := &adt.Interpolation{Src: n}
 		info, prefixLen, _, err := literal.ParseQuotes(first.Value, last.Value)
 		if err != nil {
 			return c.errf(n, "invalid interpolation: %v", err)
 		}
+		if info.IsDouble() {
+			lit.K = adt.StringKind
+		} else {
+			lit.K = adt.BytesKind
+		}
 		prefix := ""
 		for i := 0; i < len(n.Elts); i += 2 {
 			l, ok := n.Elts[i].(*ast.BasicLit)

diff --git a/internal/core/debug/compact.go b/internal/core/debug/compact.go
index b40ada5..cbd2dfb 100644
--- a/internal/core/debug/compact.go
+++ b/internal/core/debug/compact.go

@@ -226,20 +226,7 @@
 		w.string("]")
 
 	case *adt.Interpolation:
-		w.string(`"`)
-		for i := 0; i < len(x.Parts); i += 2 {
-			if s, ok := x.Parts[i].(*adt.String); ok {
-				w.string(s.Str)
-			} else {
-				w.string("<bad string>")
-			}
-			if i+1 < len(x.Parts) {
-				w.string(`\(`)
-				w.node(x.Parts[i+1])
-				w.string(`)`)
-			}
-		}
-		w.string(`"`)
+		w.interpolation(x)
 
 	case *adt.UnaryExpr:
 		fmt.Fprint(w, x.Op)

diff --git a/internal/core/debug/debug.go b/internal/core/debug/debug.go
index b61d58f..af3f413 100644
--- a/internal/core/debug/debug.go
+++ b/internal/core/debug/debug.go

@@ -106,6 +106,36 @@
 	}
 }
 
+func (w *printer) interpolation(x *adt.Interpolation) {
+	quote := `"`
+	if x.K == adt.BytesKind {
+		quote = `'`
+	}
+	w.string(quote)
+	for i := 0; i < len(x.Parts); i += 2 {
+		switch x.K {
+		case adt.StringKind:
+			if s, ok := x.Parts[i].(*adt.String); ok {
+				w.string(s.Str)
+			} else {
+				w.string("<bad string>")
+			}
+		case adt.BytesKind:
+			if s, ok := x.Parts[i].(*adt.Bytes); ok {
+				_, _ = w.Write(s.B)
+			} else {
+				w.string("<bad bytes>")
+			}
+		}
+		if i+1 < len(x.Parts) {
+			w.string(`\(`)
+			w.node(x.Parts[i+1])
+			w.string(`)`)
+		}
+	}
+	w.string(quote)
+}
+
 func (w *printer) node(n adt.Node) {
 	switch x := n.(type) {
 	case *adt.Vertex:
@@ -375,20 +405,7 @@
 		w.string("]")
 
 	case *adt.Interpolation:
-		w.string(`"`)
-		for i := 0; i < len(x.Parts); i += 2 {
-			if s, ok := x.Parts[i].(*adt.String); ok {
-				w.string(s.Str)
-			} else {
-				w.string("<bad string>")
-			}
-			if i+1 < len(x.Parts) {
-				w.string(`\(`)
-				w.node(x.Parts[i+1])
-				w.string(`)`)
-			}
-		}
-		w.string(`"`)
+		w.interpolation(x)
 
 	case *adt.UnaryExpr:
 		fmt.Fprint(w, x.Op)
commit	30ca0629cdcc2668bb2972691609300db2b3a52a	[log] [tgz]
author	Marcel van Lohuizen <mpvl@golang.org>	Sat Aug 22 14:07:59 2020 +0200
committer	Marcel van Lohuizen <mpvl@golang.org>	Thu Sep 10 15:28:14 2020 +0000
tree	c100fe1237ac9fb65f2ce873010e2bc1722c4522
parent	dd0fa8872279d5601c32d9353fc92423653571c1 [diff]