cue: precompile regular expressions

This can result in a pretty significant
performance gain.

Change-Id: Id77976eac1afb0e1ea461ae5c65a9a1785d346e3
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/2350
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cue/ast.go b/cue/ast.go
index 797dc60..2b5bab4 100644
--- a/cue/ast.go
+++ b/cue/ast.go
@@ -258,7 +258,7 @@
 		list := &list{baseValue: newExpr(n), elem: s}
 		list.initLit()
 		if n.Ellipsis != token.NoPos || n.Type != nil {
-			list.len = newBound(list.baseValue, opGeq, intKind, list.len)
+			list.len = newBound(v.ctx(), list.baseValue, opGeq, intKind, list.len)
 			if n.Type != nil {
 				list.typ = v1.walk(n.Type)
 			}
@@ -313,7 +313,7 @@
 			// to rewrite it.
 
 			if name != "" {
-				yielder.key = &stringLit{newNode(x), name}
+				yielder.key = &stringLit{newNode(x), name, nil}
 				yielder.value = v.walk(field.Value)
 			}
 
@@ -553,6 +553,7 @@
 		case token.GEQ, token.GTR, token.LSS, token.LEQ,
 			token.NEQ, token.MAT, token.NMAT:
 			value = newBound(
+				v.ctx(),
 				newExpr(n),
 				tokenMap[n.Op],
 				topKind|nonGround,
@@ -574,12 +575,12 @@
 			value = d
 
 		default:
-			value = &binaryExpr{
+			value = updateBin(v.ctx(), &binaryExpr{
 				newExpr(n),
 				tokenMap[n.Op], // op
 				v.walk(n.X),    // left
 				v.walk(n.Y),    // right
-			}
+			})
 		}
 
 	case *ast.CommentGroup:
diff --git a/cue/binop.go b/cue/binop.go
index 260aaf4..b7daf6b 100644
--- a/cue/binop.go
+++ b/cue/binop.go
@@ -320,7 +320,7 @@
 			if k == x.k {
 				return x
 			}
-			return newBound(newSrc.base(), x.op, k, xv)
+			return newBound(ctx, newSrc.base(), x.op, k, xv)
 
 		case *bound:
 			yv := y.value.(evaluated)
@@ -718,25 +718,33 @@
 			return cmpTonode(src, op, strings.Compare(x.str, str))
 		case opAdd:
 			src := binSrc(src.Pos(), op, x, other)
-			return &stringLit{src, x.str + str}
+			return &stringLit{src, x.str + str, nil}
 		case opMat:
-			b, err := regexp.MatchString(str, x.str)
-			if err != nil {
-				return ctx.mkErr(src, "error parsing regexp: %v", err)
+			if y.re == nil {
+				// This really should not happen, but leave in for safety.
+				b, err := regexp.MatchString(str, x.str)
+				if err != nil {
+					return ctx.mkErr(src, "error parsing regexp: %v", err)
+				}
+				return boolTonode(src, b)
 			}
-			return boolTonode(src, b)
+			return boolTonode(src, y.re.MatchString(x.str))
 		case opNMat:
-			b, err := regexp.MatchString(str, x.str)
-			if err != nil {
-				return ctx.mkErr(src, "error parsing regexp: %v", err)
+			if y.re == nil {
+				// This really should not happen, but leave in for safety.
+				b, err := regexp.MatchString(str, x.str)
+				if err != nil {
+					return ctx.mkErr(src, "error parsing regexp: %v", err)
+				}
+				return boolTonode(src, !b)
 			}
-			return boolTonode(src, !b)
+			return boolTonode(src, !y.re.MatchString(x.str))
 		}
 	case *numLit:
 		switch op {
 		case opMul:
 			src := binSrc(src.Pos(), op, x, other)
-			return &stringLit{src, strings.Repeat(x.str, y.intValue(ctx))}
+			return &stringLit{src, strings.Repeat(x.str, y.intValue(ctx)), nil}
 		}
 	}
 	return ctx.mkIncompatible(src, op, x, other)
@@ -762,14 +770,14 @@
 		case opAdd:
 			copy := append([]byte(nil), x.b...)
 			copy = append(copy, b...)
-			return &bytesLit{binSrc(src.Pos(), op, x, other), copy}
+			return &bytesLit{binSrc(src.Pos(), op, x, other), copy, nil}
 		}
 
 	case *numLit:
 		switch op {
 		case opMul:
 			src := binSrc(src.Pos(), op, x, other)
-			return &bytesLit{src, bytes.Repeat(x.b, y.intValue(ctx))}
+			return &bytesLit{src, bytes.Repeat(x.b, y.intValue(ctx)), nil}
 		}
 	}
 	return ctx.mkIncompatible(src, op, x, other)
diff --git a/cue/eval.go b/cue/eval.go
index aa91722..96edf07 100644
--- a/cue/eval.go
+++ b/cue/eval.go
@@ -189,7 +189,7 @@
 	if v == x.value {
 		return x
 	}
-	return newBound(x.baseValue, x.op, x.k, v)
+	return newBound(ctx, x.baseValue, x.op, x.k, v)
 }
 
 func (x *interpolation) evalPartial(ctx *context) (result evaluated) {
@@ -214,7 +214,7 @@
 			}
 		}
 	}
-	return &stringLit{x.baseValue, buf.String()}
+	return &stringLit{x.baseValue, buf.String(), nil}
 }
 
 func (x *list) evalPartial(ctx *context) (result evaluated) {
diff --git a/cue/go.go b/cue/go.go
index 9e97e52..70c05d0 100644
--- a/cue/go.go
+++ b/cue/go.go
@@ -254,9 +254,9 @@
 	case bool:
 		return &boolLit{src.base(), v}
 	case string:
-		return &stringLit{src.base(), v}
+		return &stringLit{src.base(), v, nil}
 	case []byte:
-		return &bytesLit{src.base(), v}
+		return &bytesLit{src.base(), v, nil}
 	case int:
 		return toInt(ctx, src, int64(v))
 	case int8:
diff --git a/cue/lit.go b/cue/lit.go
index 5e7f14c..2429b62 100644
--- a/cue/lit.go
+++ b/cue/lit.go
@@ -184,9 +184,9 @@
 		return ctx.mkErr(src, err, "invalid string: %v", err)
 	}
 	if q.IsDouble() {
-		return &stringLit{src, str}
+		return &stringLit{src, str, nil}
 	}
-	return &bytesLit{src, []byte(str)}
+	return &bytesLit{src, []byte(str), nil}
 }
 
 func (p *litParser) digitVal(ch byte) (d int) {
diff --git a/cue/rewrite.go b/cue/rewrite.go
index 50c2522..c135a15 100644
--- a/cue/rewrite.go
+++ b/cue/rewrite.go
@@ -102,7 +102,7 @@
 	if v == x.value {
 		return x
 	}
-	return newBound(x.baseValue, x.op, x.k, v)
+	return newBound(ctx, x.baseValue, x.op, x.k, v)
 }
 
 func (x *interpolation) rewrite(ctx *context, fn rewriteFunc) value {
@@ -182,7 +182,7 @@
 	if left == x.left && right == x.right {
 		return x
 	}
-	return &binaryExpr{x.baseValue, x.op, left, right}
+	return updateBin(ctx, &binaryExpr{x.baseValue, x.op, left, right})
 }
 
 func (x *unification) rewrite(ctx *context, fn rewriteFunc) value {
diff --git a/cue/types.go b/cue/types.go
index 54e2c88..2ca3ede 100644
--- a/cue/types.go
+++ b/cue/types.go
@@ -1116,7 +1116,7 @@
 		return nil
 	}
 	return func(label string) Value {
-		arg := &stringLit{x.baseValue, label}
+		arg := &stringLit{x.baseValue, label, nil}
 		y := fn.call(ctx, x, arg)
 		return newValueRoot(ctx, y)
 	}
@@ -1578,6 +1578,7 @@
 		a = append(a, remakeValue(v, &stringLit{
 			x.baseValue,
 			v.ctx().labelStr(x.feature),
+			nil,
 		}))
 		op = SelectorOp
 	case *indexExpr:
diff --git a/cue/value.go b/cue/value.go
index 93d1188..cc596d7 100644
--- a/cue/value.go
+++ b/cue/value.go
@@ -16,6 +16,7 @@
 
 import (
 	"math/big"
+	"regexp"
 	"sort"
 	"strconv"
 	"time"
@@ -195,8 +196,8 @@
 
 type bytesLit struct {
 	baseValue
-	b []byte
-	// TODO: maintain extended grapheme index cache.
+	b  []byte
+	re *regexp.Regexp // only set if needed
 }
 
 func (x *bytesLit) kind() kind       { return bytesKind }
@@ -243,12 +244,13 @@
 	if len(x.b) < hix {
 		return ctx.mkErr(hi, "slice bounds out of range")
 	}
-	return &bytesLit{x.baseValue, x.b[lox:hix]}
+	return &bytesLit{x.baseValue, x.b[lox:hix], nil}
 }
 
 type stringLit struct {
 	baseValue
 	str string
+	re  *regexp.Regexp // only set if needed
 
 	// TODO: maintain extended grapheme index cache.
 }
@@ -271,7 +273,7 @@
 		return ctx.mkErr(x, "index %d out of bounds", i)
 	}
 	// TODO: this is incorrect.
-	return &stringLit{x.baseValue, string(runes[i : i+1])}
+	return &stringLit{x.baseValue, string(runes[i : i+1]), nil}
 }
 func (x *stringLit) len() int { return len([]rune(x.str)) }
 
@@ -297,7 +299,7 @@
 	if len(runes) < hix {
 		return ctx.mkErr(hi, "slice bounds out of range")
 	}
-	return &stringLit{x.baseValue, string(runes[lox:hix])}
+	return &stringLit{x.baseValue, string(runes[lox:hix]), nil}
 }
 
 type numBase struct {
@@ -391,13 +393,19 @@
 	value value
 }
 
-func newBound(base baseValue, op op, k kind, v value) *bound {
+func newBound(ctx *context, base baseValue, op op, k kind, v value) evaluated {
 	kv := v.kind()
 	if kv.isAnyOf(numKind) {
 		kv |= numKind
 	} else if op == opNeq && kv&atomKind == nullKind {
 		kv = typeKinds &^ nullKind
 	}
+	if op == opMat || op == opNMat {
+		v = compileRegexp(ctx, v)
+		if isBottom(v) {
+			return v.(*bottom)
+		}
+	}
 	return &bound{base, op, unifyType(k&topKind, kv) | nonGround, v}
 }
 
@@ -406,8 +414,8 @@
 }
 
 func mkIntRange(a, b string) evaluated {
-	from := newBound(baseValue{}, opGeq, intKind, parseInt(intKind, a))
-	to := newBound(baseValue{}, opLeq, intKind, parseInt(intKind, b))
+	from := newBound(nil, baseValue{}, opGeq, intKind, parseInt(intKind, a))
+	to := newBound(nil, baseValue{}, opLeq, intKind, parseInt(intKind, b))
 	e := &unification{
 		binSrc(token.NoPos, opUnify, from, to),
 		[]evaluated{from, to},
@@ -422,8 +430,8 @@
 }
 
 func mkFloatRange(a, b string) evaluated {
-	from := newBound(baseValue{}, opGeq, numKind, parseFloat(a))
-	to := newBound(baseValue{}, opLeq, numKind, parseFloat(b))
+	from := newBound(nil, baseValue{}, opGeq, numKind, parseFloat(a))
+	to := newBound(nil, baseValue{}, opLeq, numKind, parseFloat(b))
 	e := &unification{
 		binSrc(token.NoPos, opUnify, from, to),
 		[]evaluated{from, to},
@@ -449,7 +457,7 @@
 
 	// Do not include an alias for "byte", as it would be too easily confused
 	// with the builtin "bytes".
-	"uint":    newBound(baseValue{}, opGeq, intKind, parseInt(intKind, "0")),
+	"uint":    newBound(nil, baseValue{}, opGeq, intKind, parseInt(intKind, "0")),
 	"uint8":   mkIntRange("0", "255"),
 	"uint16":  mkIntRange("0", "65535"),
 	"uint32":  mkIntRange("0", "4294967295"),
@@ -810,7 +818,7 @@
 			return err, nil
 		}
 		name := ctx.labelStr(x.arcs[i].feature)
-		arg := &stringLit{x.baseValue, name}
+		arg := &stringLit{x.baseValue, name, nil}
 		w := fn.call(ctx, x, arg).evalPartial(ctx)
 		v = binOp(ctx, x, opUnify, v, w)
 
@@ -1086,6 +1094,27 @@
 
 func (x *unaryExpr) kind() kind { return x.x.kind() }
 
+func compileRegexp(ctx *context, v value) value {
+	var err error
+	switch x := v.(type) {
+	case *stringLit:
+		if x.re == nil {
+			x.re, err = regexp.Compile(x.str)
+			if err != nil {
+				return ctx.mkErr(v, "could not compile regular expression %q: %v", x.str, err)
+			}
+		}
+	case *bytesLit:
+		if x.re == nil {
+			x.re, err = regexp.Compile(string(x.b))
+			if err != nil {
+				return ctx.mkErr(v, "could not compile regular expression %q: %v", x.b, err)
+			}
+		}
+	}
+	return v
+}
+
 type binaryExpr struct {
 	baseValue
 	op    op
@@ -1115,7 +1144,19 @@
 		// 	return left
 		// }
 	}
-	return &binaryExpr{binSrc(pos, op, left, right), op, left, right}
+	bin := &binaryExpr{binSrc(pos, op, left, right), op, left, right}
+	return updateBin(ctx, bin)
+}
+
+func updateBin(ctx *context, bin *binaryExpr) value {
+	switch bin.op {
+	case opMat, opNMat:
+		bin.right = compileRegexp(ctx, bin.right)
+		if isBottom(bin.right) {
+			return bin.right
+		}
+	}
+	return bin
 }
 
 func (x *binaryExpr) kind() kind {
@@ -1357,6 +1398,7 @@
 			key := &stringLit{
 				x.baseValue,
 				ctx.labelStr(a.feature),
+				nil,
 			}
 			val := src.at(ctx, i)
 			v := fn.call(ctx, x, key, val)