cue: precompile regular expressions
This can result in a pretty significant
performance gain.
Change-Id: Id77976eac1afb0e1ea461ae5c65a9a1785d346e3
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/2350
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cue/ast.go b/cue/ast.go
index 797dc60..2b5bab4 100644
--- a/cue/ast.go
+++ b/cue/ast.go
@@ -258,7 +258,7 @@
list := &list{baseValue: newExpr(n), elem: s}
list.initLit()
if n.Ellipsis != token.NoPos || n.Type != nil {
- list.len = newBound(list.baseValue, opGeq, intKind, list.len)
+ list.len = newBound(v.ctx(), list.baseValue, opGeq, intKind, list.len)
if n.Type != nil {
list.typ = v1.walk(n.Type)
}
@@ -313,7 +313,7 @@
// to rewrite it.
if name != "" {
- yielder.key = &stringLit{newNode(x), name}
+ yielder.key = &stringLit{newNode(x), name, nil}
yielder.value = v.walk(field.Value)
}
@@ -553,6 +553,7 @@
case token.GEQ, token.GTR, token.LSS, token.LEQ,
token.NEQ, token.MAT, token.NMAT:
value = newBound(
+ v.ctx(),
newExpr(n),
tokenMap[n.Op],
topKind|nonGround,
@@ -574,12 +575,12 @@
value = d
default:
- value = &binaryExpr{
+ value = updateBin(v.ctx(), &binaryExpr{
newExpr(n),
tokenMap[n.Op], // op
v.walk(n.X), // left
v.walk(n.Y), // right
- }
+ })
}
case *ast.CommentGroup:
diff --git a/cue/binop.go b/cue/binop.go
index 260aaf4..b7daf6b 100644
--- a/cue/binop.go
+++ b/cue/binop.go
@@ -320,7 +320,7 @@
if k == x.k {
return x
}
- return newBound(newSrc.base(), x.op, k, xv)
+ return newBound(ctx, newSrc.base(), x.op, k, xv)
case *bound:
yv := y.value.(evaluated)
@@ -718,25 +718,33 @@
return cmpTonode(src, op, strings.Compare(x.str, str))
case opAdd:
src := binSrc(src.Pos(), op, x, other)
- return &stringLit{src, x.str + str}
+ return &stringLit{src, x.str + str, nil}
case opMat:
- b, err := regexp.MatchString(str, x.str)
- if err != nil {
- return ctx.mkErr(src, "error parsing regexp: %v", err)
+ if y.re == nil {
+ // This really should not happen, but leave in for safety.
+ b, err := regexp.MatchString(str, x.str)
+ if err != nil {
+ return ctx.mkErr(src, "error parsing regexp: %v", err)
+ }
+ return boolTonode(src, b)
}
- return boolTonode(src, b)
+ return boolTonode(src, y.re.MatchString(x.str))
case opNMat:
- b, err := regexp.MatchString(str, x.str)
- if err != nil {
- return ctx.mkErr(src, "error parsing regexp: %v", err)
+ if y.re == nil {
+ // This really should not happen, but leave in for safety.
+ b, err := regexp.MatchString(str, x.str)
+ if err != nil {
+ return ctx.mkErr(src, "error parsing regexp: %v", err)
+ }
+ return boolTonode(src, !b)
}
- return boolTonode(src, !b)
+ return boolTonode(src, !y.re.MatchString(x.str))
}
case *numLit:
switch op {
case opMul:
src := binSrc(src.Pos(), op, x, other)
- return &stringLit{src, strings.Repeat(x.str, y.intValue(ctx))}
+ return &stringLit{src, strings.Repeat(x.str, y.intValue(ctx)), nil}
}
}
return ctx.mkIncompatible(src, op, x, other)
@@ -762,14 +770,14 @@
case opAdd:
copy := append([]byte(nil), x.b...)
copy = append(copy, b...)
- return &bytesLit{binSrc(src.Pos(), op, x, other), copy}
+ return &bytesLit{binSrc(src.Pos(), op, x, other), copy, nil}
}
case *numLit:
switch op {
case opMul:
src := binSrc(src.Pos(), op, x, other)
- return &bytesLit{src, bytes.Repeat(x.b, y.intValue(ctx))}
+ return &bytesLit{src, bytes.Repeat(x.b, y.intValue(ctx)), nil}
}
}
return ctx.mkIncompatible(src, op, x, other)
diff --git a/cue/eval.go b/cue/eval.go
index aa91722..96edf07 100644
--- a/cue/eval.go
+++ b/cue/eval.go
@@ -189,7 +189,7 @@
if v == x.value {
return x
}
- return newBound(x.baseValue, x.op, x.k, v)
+ return newBound(ctx, x.baseValue, x.op, x.k, v)
}
func (x *interpolation) evalPartial(ctx *context) (result evaluated) {
@@ -214,7 +214,7 @@
}
}
}
- return &stringLit{x.baseValue, buf.String()}
+ return &stringLit{x.baseValue, buf.String(), nil}
}
func (x *list) evalPartial(ctx *context) (result evaluated) {
diff --git a/cue/go.go b/cue/go.go
index 9e97e52..70c05d0 100644
--- a/cue/go.go
+++ b/cue/go.go
@@ -254,9 +254,9 @@
case bool:
return &boolLit{src.base(), v}
case string:
- return &stringLit{src.base(), v}
+ return &stringLit{src.base(), v, nil}
case []byte:
- return &bytesLit{src.base(), v}
+ return &bytesLit{src.base(), v, nil}
case int:
return toInt(ctx, src, int64(v))
case int8:
diff --git a/cue/lit.go b/cue/lit.go
index 5e7f14c..2429b62 100644
--- a/cue/lit.go
+++ b/cue/lit.go
@@ -184,9 +184,9 @@
return ctx.mkErr(src, err, "invalid string: %v", err)
}
if q.IsDouble() {
- return &stringLit{src, str}
+ return &stringLit{src, str, nil}
}
- return &bytesLit{src, []byte(str)}
+ return &bytesLit{src, []byte(str), nil}
}
func (p *litParser) digitVal(ch byte) (d int) {
diff --git a/cue/rewrite.go b/cue/rewrite.go
index 50c2522..c135a15 100644
--- a/cue/rewrite.go
+++ b/cue/rewrite.go
@@ -102,7 +102,7 @@
if v == x.value {
return x
}
- return newBound(x.baseValue, x.op, x.k, v)
+ return newBound(ctx, x.baseValue, x.op, x.k, v)
}
func (x *interpolation) rewrite(ctx *context, fn rewriteFunc) value {
@@ -182,7 +182,7 @@
if left == x.left && right == x.right {
return x
}
- return &binaryExpr{x.baseValue, x.op, left, right}
+ return updateBin(ctx, &binaryExpr{x.baseValue, x.op, left, right})
}
func (x *unification) rewrite(ctx *context, fn rewriteFunc) value {
diff --git a/cue/types.go b/cue/types.go
index 54e2c88..2ca3ede 100644
--- a/cue/types.go
+++ b/cue/types.go
@@ -1116,7 +1116,7 @@
return nil
}
return func(label string) Value {
- arg := &stringLit{x.baseValue, label}
+ arg := &stringLit{x.baseValue, label, nil}
y := fn.call(ctx, x, arg)
return newValueRoot(ctx, y)
}
@@ -1578,6 +1578,7 @@
a = append(a, remakeValue(v, &stringLit{
x.baseValue,
v.ctx().labelStr(x.feature),
+ nil,
}))
op = SelectorOp
case *indexExpr:
diff --git a/cue/value.go b/cue/value.go
index 93d1188..cc596d7 100644
--- a/cue/value.go
+++ b/cue/value.go
@@ -16,6 +16,7 @@
import (
"math/big"
+ "regexp"
"sort"
"strconv"
"time"
@@ -195,8 +196,8 @@
type bytesLit struct {
baseValue
- b []byte
- // TODO: maintain extended grapheme index cache.
+ b []byte
+ re *regexp.Regexp // only set if needed
}
func (x *bytesLit) kind() kind { return bytesKind }
@@ -243,12 +244,13 @@
if len(x.b) < hix {
return ctx.mkErr(hi, "slice bounds out of range")
}
- return &bytesLit{x.baseValue, x.b[lox:hix]}
+ return &bytesLit{x.baseValue, x.b[lox:hix], nil}
}
type stringLit struct {
baseValue
str string
+ re *regexp.Regexp // only set if needed
// TODO: maintain extended grapheme index cache.
}
@@ -271,7 +273,7 @@
return ctx.mkErr(x, "index %d out of bounds", i)
}
// TODO: this is incorrect.
- return &stringLit{x.baseValue, string(runes[i : i+1])}
+ return &stringLit{x.baseValue, string(runes[i : i+1]), nil}
}
func (x *stringLit) len() int { return len([]rune(x.str)) }
@@ -297,7 +299,7 @@
if len(runes) < hix {
return ctx.mkErr(hi, "slice bounds out of range")
}
- return &stringLit{x.baseValue, string(runes[lox:hix])}
+ return &stringLit{x.baseValue, string(runes[lox:hix]), nil}
}
type numBase struct {
@@ -391,13 +393,19 @@
value value
}
-func newBound(base baseValue, op op, k kind, v value) *bound {
+func newBound(ctx *context, base baseValue, op op, k kind, v value) evaluated {
kv := v.kind()
if kv.isAnyOf(numKind) {
kv |= numKind
} else if op == opNeq && kv&atomKind == nullKind {
kv = typeKinds &^ nullKind
}
+ if op == opMat || op == opNMat {
+ v = compileRegexp(ctx, v)
+ if isBottom(v) {
+ return v.(*bottom)
+ }
+ }
return &bound{base, op, unifyType(k&topKind, kv) | nonGround, v}
}
@@ -406,8 +414,8 @@
}
func mkIntRange(a, b string) evaluated {
- from := newBound(baseValue{}, opGeq, intKind, parseInt(intKind, a))
- to := newBound(baseValue{}, opLeq, intKind, parseInt(intKind, b))
+ from := newBound(nil, baseValue{}, opGeq, intKind, parseInt(intKind, a))
+ to := newBound(nil, baseValue{}, opLeq, intKind, parseInt(intKind, b))
e := &unification{
binSrc(token.NoPos, opUnify, from, to),
[]evaluated{from, to},
@@ -422,8 +430,8 @@
}
func mkFloatRange(a, b string) evaluated {
- from := newBound(baseValue{}, opGeq, numKind, parseFloat(a))
- to := newBound(baseValue{}, opLeq, numKind, parseFloat(b))
+ from := newBound(nil, baseValue{}, opGeq, numKind, parseFloat(a))
+ to := newBound(nil, baseValue{}, opLeq, numKind, parseFloat(b))
e := &unification{
binSrc(token.NoPos, opUnify, from, to),
[]evaluated{from, to},
@@ -449,7 +457,7 @@
// Do not include an alias for "byte", as it would be too easily confused
// with the builtin "bytes".
- "uint": newBound(baseValue{}, opGeq, intKind, parseInt(intKind, "0")),
+ "uint": newBound(nil, baseValue{}, opGeq, intKind, parseInt(intKind, "0")),
"uint8": mkIntRange("0", "255"),
"uint16": mkIntRange("0", "65535"),
"uint32": mkIntRange("0", "4294967295"),
@@ -810,7 +818,7 @@
return err, nil
}
name := ctx.labelStr(x.arcs[i].feature)
- arg := &stringLit{x.baseValue, name}
+ arg := &stringLit{x.baseValue, name, nil}
w := fn.call(ctx, x, arg).evalPartial(ctx)
v = binOp(ctx, x, opUnify, v, w)
@@ -1086,6 +1094,27 @@
func (x *unaryExpr) kind() kind { return x.x.kind() }
+func compileRegexp(ctx *context, v value) value {
+ var err error
+ switch x := v.(type) {
+ case *stringLit:
+ if x.re == nil {
+ x.re, err = regexp.Compile(x.str)
+ if err != nil {
+ return ctx.mkErr(v, "could not compile regular expression %q: %v", x.str, err)
+ }
+ }
+ case *bytesLit:
+ if x.re == nil {
+ x.re, err = regexp.Compile(string(x.b))
+ if err != nil {
+ return ctx.mkErr(v, "could not compile regular expression %q: %v", x.b, err)
+ }
+ }
+ }
+ return v
+}
+
type binaryExpr struct {
baseValue
op op
@@ -1115,7 +1144,19 @@
// return left
// }
}
- return &binaryExpr{binSrc(pos, op, left, right), op, left, right}
+ bin := &binaryExpr{binSrc(pos, op, left, right), op, left, right}
+ return updateBin(ctx, bin)
+}
+
+func updateBin(ctx *context, bin *binaryExpr) value {
+ switch bin.op {
+ case opMat, opNMat:
+ bin.right = compileRegexp(ctx, bin.right)
+ if isBottom(bin.right) {
+ return bin.right
+ }
+ }
+ return bin
}
func (x *binaryExpr) kind() kind {
@@ -1357,6 +1398,7 @@
key := &stringLit{
x.baseValue,
ctx.labelStr(a.feature),
+ nil,
}
val := src.at(ctx, i)
v := fn.call(ctx, x, key, val)