pkg/regexp: add more regexp builtins
- find (all) matches
- find (all) submatches
- find (all) named submatches in a map
Change-Id: Ibb13effce9c9452f253c7f39db4c91548d4bef78
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/3763
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cue/builtin_test.go b/cue/builtin_test.go
index a9838bb..1331006 100644
--- a/cue/builtin_test.go
+++ b/cue/builtin_test.go
@@ -277,6 +277,41 @@
test("encoding/csv", `csv.Decode("1,2,3\n4,5,6")`),
`[["1","2","3"],["4","5","6"]]`,
}, {
+ test("regexp", `regexp.Find(#"f\w\w"#, "afoot")`),
+ `"foo"`,
+ }, {
+ test("regexp", `regexp.Find(#"f\w\w"#, "bar")`),
+ `_|_(error in call to regexp.Find: no match)`,
+ }, {
+ test("regexp", `regexp.FindAll(#"f\w\w"#, "afoot afloat from", 2)`),
+ `["foo","flo"]`,
+ }, {
+ test("regexp", `regexp.FindAll(#"f\w\w"#, "afoot afloat from", 2)`),
+ `["foo","flo"]`,
+ }, {
+ test("regexp", `regexp.FindAll(#"f\w\w"#, "bla bla", -1)`),
+ `_|_(error in call to regexp.FindAll: no match)`,
+ }, {
+ test("regexp", `regexp.FindSubmatch(#"f(\w)(\w)"#, "afloat afoot from")`),
+ `["flo","l","o"]`,
+ }, {
+ test("regexp", `regexp.FindAllSubmatch(#"f(\w)(\w)"#, "afloat afoot from", -1)`),
+ `[["flo","l","o"],["foo","o","o"],["fro","r","o"]]`,
+ }, {
+ test("regexp", `regexp.FindAllSubmatch(#"f(\w)(\w)"#, "aglom", -1)`),
+ `_|_(error in call to regexp.FindAllSubmatch: no match)`,
+ }, {
+ test("regexp", `regexp.FindNamedSubmatch(#"f(?P<A>\w)(?P<B>\w)"#, "afloat afoot from")`),
+ `{A: "l", B: "o"}`,
+ }, {
+ test("regexp", `regexp.FindAllNamedSubmatch(#"f(?P<A>\w)(?P<B>\w)"#, "afloat afoot from", -1)`),
+ `[{A: "l", B: "o"},{A: "o", B: "o"},{A: "r", B: "o"}]`,
+ }, {
+ test("regexp", `regexp.FindAllNamedSubmatch(#"f(?P<A>optional)?"#, "fbla", -1)`),
+ `[{A: ""}]`}, {
+ test("regexp", `regexp.FindAllNamedSubmatch(#"f(?P<A>\w)(?P<B>\w)"#, "aglom", -1)`),
+ `_|_(error in call to regexp.FindAllNamedSubmatch: no match)`,
+ }, {
test("strconv", `strconv.FormatBool(true)`),
`"true"`,
}, {
diff --git a/cue/builtins.go b/cue/builtins.go
index 7337780..e47225d 100644
--- a/cue/builtins.go
+++ b/cue/builtins.go
@@ -12,6 +12,7 @@
"encoding/csv"
"encoding/hex"
"encoding/json"
+ "errors"
"fmt"
"html"
"io"
@@ -120,6 +121,10 @@
var pathDir = path.Dir
+var errNoMatch = errors.New("no match")
+
+var errNoNamedGroup = errors.New("no named groups")
+
func timeFormat(value, layout string) (bool, error) {
_, err := time.Parse(layout, value)
if err != nil {
@@ -2022,6 +2027,138 @@
},
"regexp": &builtinPkg{
native: []*builtin{{
+ Name: "Find",
+ Params: []kind{stringKind, stringKind},
+ Result: stringKind,
+ Func: func(c *callCtxt) {
+ pattern, s := c.string(0), c.string(1)
+ c.ret, c.err = func() (interface{}, error) {
+ re, err := regexp.Compile(pattern)
+ if err != nil {
+ return "", err
+ }
+ m := re.FindStringIndex(s)
+ if m == nil {
+ return "", errNoMatch
+ }
+ return s[m[0]:m[1]], nil
+ }()
+ },
+ }, {
+ Name: "FindAll",
+ Params: []kind{stringKind, stringKind, intKind},
+ Result: listKind,
+ Func: func(c *callCtxt) {
+ pattern, s, n := c.string(0), c.string(1), c.int(2)
+ c.ret, c.err = func() (interface{}, error) {
+ re, err := regexp.Compile(pattern)
+ if err != nil {
+ return nil, err
+ }
+ m := re.FindAllString(s, n)
+ if m == nil {
+ return nil, errNoMatch
+ }
+ return m, nil
+ }()
+ },
+ }, {
+ Name: "FindSubmatch",
+ Params: []kind{stringKind, stringKind},
+ Result: listKind,
+ Func: func(c *callCtxt) {
+ pattern, s := c.string(0), c.string(1)
+ c.ret, c.err = func() (interface{}, error) {
+ re, err := regexp.Compile(pattern)
+ if err != nil {
+ return nil, err
+ }
+ m := re.FindStringSubmatch(s)
+ if m == nil {
+ return nil, errNoMatch
+ }
+ return m, nil
+ }()
+ },
+ }, {
+ Name: "FindAllSubmatch",
+ Params: []kind{stringKind, stringKind, intKind},
+ Result: listKind,
+ Func: func(c *callCtxt) {
+ pattern, s, n := c.string(0), c.string(1), c.int(2)
+ c.ret, c.err = func() (interface{}, error) {
+ re, err := regexp.Compile(pattern)
+ if err != nil {
+ return nil, err
+ }
+ m := re.FindAllStringSubmatch(s, n)
+ if m == nil {
+ return nil, errNoMatch
+ }
+ return m, nil
+ }()
+ },
+ }, {
+ Name: "FindNamedSubmatch",
+ Params: []kind{stringKind, stringKind},
+ Result: structKind,
+ Func: func(c *callCtxt) {
+ pattern, s := c.string(0), c.string(1)
+ c.ret, c.err = func() (interface{}, error) {
+ re, err := regexp.Compile(pattern)
+ if err != nil {
+ return nil, err
+ }
+ names := re.SubexpNames()
+ if len(names) == 0 {
+ return nil, errNoNamedGroup
+ }
+ m := re.FindStringSubmatch(s)
+ if m == nil {
+ return nil, errNoMatch
+ }
+ r := make(map[string]string, len(names)-1)
+ for k, name := range names {
+ if name != "" {
+ r[name] = m[k]
+ }
+ }
+ return r, nil
+ }()
+ },
+ }, {
+ Name: "FindAllNamedSubmatch",
+ Params: []kind{stringKind, stringKind, intKind},
+ Result: listKind,
+ Func: func(c *callCtxt) {
+ pattern, s, n := c.string(0), c.string(1), c.int(2)
+ c.ret, c.err = func() (interface{}, error) {
+ re, err := regexp.Compile(pattern)
+ if err != nil {
+ return nil, err
+ }
+ names := re.SubexpNames()
+ if len(names) == 0 {
+ return nil, errNoNamedGroup
+ }
+ m := re.FindAllStringSubmatch(s, n)
+ if m == nil {
+ return nil, errNoMatch
+ }
+ result := make([]map[string]string, len(m))
+ for i, m := range m {
+ r := make(map[string]string, len(names)-1)
+ for k, name := range names {
+ if name != "" {
+ r[name] = m[k]
+ }
+ }
+ result[i] = r
+ }
+ return result, nil
+ }()
+ },
+ }, {
Name: "Match",
Params: []kind{stringKind, stringKind},
Result: boolKind,
diff --git a/pkg/regexp/manual.go b/pkg/regexp/manual.go
new file mode 100644
index 0000000..1a14476
--- /dev/null
+++ b/pkg/regexp/manual.go
@@ -0,0 +1,148 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package regexp
+
+import (
+ "errors"
+ "regexp"
+)
+
+var errNoMatch = errors.New("no match")
+
+// Find returns a string holding the text of the leftmost match in s of
+// the regular expression. It returns bottom if there was no match.
+func Find(pattern, s string) (string, error) {
+ re, err := regexp.Compile(pattern)
+ if err != nil {
+ return "", err
+ }
+ m := re.FindStringIndex(s)
+ if m == nil {
+ return "", errNoMatch
+ }
+ return s[m[0]:m[1]], nil
+}
+
+// FindAll returns a list of all successive matches of the expression. It
+// matches successive non-overlapping matches of the entire expression. Empty
+// matches abutting a preceding match are ignored. The return value is a list
+// containing the successive matches. The integer argument n indicates the
+// maximum number of matches to return for n >= 0, or all matches otherwise. It
+// returns bottom for no match.
+func FindAll(pattern, s string, n int) ([]string, error) {
+ re, err := regexp.Compile(pattern)
+ if err != nil {
+ return nil, err
+ }
+ m := re.FindAllString(s, n)
+ if m == nil {
+ return nil, errNoMatch
+ }
+ return m, nil
+}
+
+// FindSubmatch returns a list of strings holding the text of the leftmost match
+// of the regular expression in s and the matches, if any, of its
+// subexpressions. Submatches are matches of parenthesized subexpressions (also
+// known as capturing groups) within the regular expression, numbered from left
+// to right in order of opening parenthesis. Submatch 0 is the match of the
+// entire expression, submatch 1 the match of the first parenthesized
+// subexpression, and so on. It returns bottom for no match.
+func FindSubmatch(pattern, s string) ([]string, error) {
+ re, err := regexp.Compile(pattern)
+ if err != nil {
+ return nil, err
+ }
+ m := re.FindStringSubmatch(s)
+ if m == nil {
+ return nil, errNoMatch
+ }
+ return m, nil
+}
+
+// FindAllSubmatch finds successive matches as returned by FindSubmatch,
+// observing the rules of FindAll. It returns bottom for no match.
+func FindAllSubmatch(pattern, s string, n int) ([][]string, error) {
+ re, err := regexp.Compile(pattern)
+ if err != nil {
+ return nil, err
+ }
+ m := re.FindAllStringSubmatch(s, n)
+ if m == nil {
+ return nil, errNoMatch
+ }
+ return m, nil
+}
+
+var errNoNamedGroup = errors.New("no named groups")
+
+// FindNamedSubmatch is like FindSubmatch, but returns a map with the names used
+// in capturing groups.
+//
+// Example:
+// regexp.MapSubmatch(#"Hello (?P<person>\w*)!"#, "Hello World!")
+// Output:
+// [{person: "World"}]
+//
+func FindNamedSubmatch(pattern, s string) (map[string]string, error) {
+ re, err := regexp.Compile(pattern)
+ if err != nil {
+ return nil, err
+ }
+ names := re.SubexpNames()
+ if len(names) == 0 {
+ return nil, errNoNamedGroup
+ }
+ m := re.FindStringSubmatch(s)
+ if m == nil {
+ return nil, errNoMatch
+ }
+ r := make(map[string]string, len(names)-1)
+ for k, name := range names {
+ if name != "" {
+ r[name] = m[k]
+ }
+ }
+ return r, nil
+}
+
+// FindAllNamedSubmatch is like FindAllSubmatch, but returns a map with the
+// named used in capturing groups. See FindNamedSubmatch for an example on
+// how to use named groups.
+func FindAllNamedSubmatch(pattern, s string, n int) ([]map[string]string, error) {
+ re, err := regexp.Compile(pattern)
+ if err != nil {
+ return nil, err
+ }
+ names := re.SubexpNames()
+ if len(names) == 0 {
+ return nil, errNoNamedGroup
+ }
+ m := re.FindAllStringSubmatch(s, n)
+ if m == nil {
+ return nil, errNoMatch
+ }
+ result := make([]map[string]string, len(m))
+ for i, m := range m {
+ r := make(map[string]string, len(names)-1)
+ for k, name := range names {
+ if name != "" {
+ r[name] = m[k]
+ }
+ }
+ result[i] = r
+ }
+ return result, nil
+}