pkg/regexp: add more regexp builtins

- find (all) matches
- find (all) submatches
- find (all) named submatches in a map

Change-Id: Ibb13effce9c9452f253c7f39db4c91548d4bef78
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/3763
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cue/builtin_test.go b/cue/builtin_test.go
index a9838bb..1331006 100644
--- a/cue/builtin_test.go
+++ b/cue/builtin_test.go
@@ -277,6 +277,41 @@
 		test("encoding/csv", `csv.Decode("1,2,3\n4,5,6")`),
 		`[["1","2","3"],["4","5","6"]]`,
 	}, {
+		test("regexp", `regexp.Find(#"f\w\w"#, "afoot")`),
+		`"foo"`,
+	}, {
+		test("regexp", `regexp.Find(#"f\w\w"#, "bar")`),
+		`_|_(error in call to regexp.Find: no match)`,
+	}, {
+		test("regexp", `regexp.FindAll(#"f\w\w"#, "afoot afloat from", 2)`),
+		`["foo","flo"]`,
+	}, {
+		test("regexp", `regexp.FindAll(#"f\w\w"#, "afoot afloat from", 2)`),
+		`["foo","flo"]`,
+	}, {
+		test("regexp", `regexp.FindAll(#"f\w\w"#, "bla bla", -1)`),
+		`_|_(error in call to regexp.FindAll: no match)`,
+	}, {
+		test("regexp", `regexp.FindSubmatch(#"f(\w)(\w)"#, "afloat afoot from")`),
+		`["flo","l","o"]`,
+	}, {
+		test("regexp", `regexp.FindAllSubmatch(#"f(\w)(\w)"#, "afloat afoot from", -1)`),
+		`[["flo","l","o"],["foo","o","o"],["fro","r","o"]]`,
+	}, {
+		test("regexp", `regexp.FindAllSubmatch(#"f(\w)(\w)"#, "aglom", -1)`),
+		`_|_(error in call to regexp.FindAllSubmatch: no match)`,
+	}, {
+		test("regexp", `regexp.FindNamedSubmatch(#"f(?P<A>\w)(?P<B>\w)"#, "afloat afoot from")`),
+		`{A: "l", B: "o"}`,
+	}, {
+		test("regexp", `regexp.FindAllNamedSubmatch(#"f(?P<A>\w)(?P<B>\w)"#, "afloat afoot from", -1)`),
+		`[{A: "l", B: "o"},{A: "o", B: "o"},{A: "r", B: "o"}]`,
+	}, {
+		test("regexp", `regexp.FindAllNamedSubmatch(#"f(?P<A>optional)?"#, "fbla", -1)`),
+		`[{A: ""}]`}, {
+		test("regexp", `regexp.FindAllNamedSubmatch(#"f(?P<A>\w)(?P<B>\w)"#, "aglom", -1)`),
+		`_|_(error in call to regexp.FindAllNamedSubmatch: no match)`,
+	}, {
 		test("strconv", `strconv.FormatBool(true)`),
 		`"true"`,
 	}, {
diff --git a/cue/builtins.go b/cue/builtins.go
index 7337780..e47225d 100644
--- a/cue/builtins.go
+++ b/cue/builtins.go
@@ -12,6 +12,7 @@
 	"encoding/csv"
 	"encoding/hex"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"html"
 	"io"
@@ -120,6 +121,10 @@
 
 var pathDir = path.Dir
 
+var errNoMatch = errors.New("no match")
+
+var errNoNamedGroup = errors.New("no named groups")
+
 func timeFormat(value, layout string) (bool, error) {
 	_, err := time.Parse(layout, value)
 	if err != nil {
@@ -2022,6 +2027,138 @@
 	},
 	"regexp": &builtinPkg{
 		native: []*builtin{{
+			Name:   "Find",
+			Params: []kind{stringKind, stringKind},
+			Result: stringKind,
+			Func: func(c *callCtxt) {
+				pattern, s := c.string(0), c.string(1)
+				c.ret, c.err = func() (interface{}, error) {
+					re, err := regexp.Compile(pattern)
+					if err != nil {
+						return "", err
+					}
+					m := re.FindStringIndex(s)
+					if m == nil {
+						return "", errNoMatch
+					}
+					return s[m[0]:m[1]], nil
+				}()
+			},
+		}, {
+			Name:   "FindAll",
+			Params: []kind{stringKind, stringKind, intKind},
+			Result: listKind,
+			Func: func(c *callCtxt) {
+				pattern, s, n := c.string(0), c.string(1), c.int(2)
+				c.ret, c.err = func() (interface{}, error) {
+					re, err := regexp.Compile(pattern)
+					if err != nil {
+						return nil, err
+					}
+					m := re.FindAllString(s, n)
+					if m == nil {
+						return nil, errNoMatch
+					}
+					return m, nil
+				}()
+			},
+		}, {
+			Name:   "FindSubmatch",
+			Params: []kind{stringKind, stringKind},
+			Result: listKind,
+			Func: func(c *callCtxt) {
+				pattern, s := c.string(0), c.string(1)
+				c.ret, c.err = func() (interface{}, error) {
+					re, err := regexp.Compile(pattern)
+					if err != nil {
+						return nil, err
+					}
+					m := re.FindStringSubmatch(s)
+					if m == nil {
+						return nil, errNoMatch
+					}
+					return m, nil
+				}()
+			},
+		}, {
+			Name:   "FindAllSubmatch",
+			Params: []kind{stringKind, stringKind, intKind},
+			Result: listKind,
+			Func: func(c *callCtxt) {
+				pattern, s, n := c.string(0), c.string(1), c.int(2)
+				c.ret, c.err = func() (interface{}, error) {
+					re, err := regexp.Compile(pattern)
+					if err != nil {
+						return nil, err
+					}
+					m := re.FindAllStringSubmatch(s, n)
+					if m == nil {
+						return nil, errNoMatch
+					}
+					return m, nil
+				}()
+			},
+		}, {
+			Name:   "FindNamedSubmatch",
+			Params: []kind{stringKind, stringKind},
+			Result: structKind,
+			Func: func(c *callCtxt) {
+				pattern, s := c.string(0), c.string(1)
+				c.ret, c.err = func() (interface{}, error) {
+					re, err := regexp.Compile(pattern)
+					if err != nil {
+						return nil, err
+					}
+					names := re.SubexpNames()
+					if len(names) == 0 {
+						return nil, errNoNamedGroup
+					}
+					m := re.FindStringSubmatch(s)
+					if m == nil {
+						return nil, errNoMatch
+					}
+					r := make(map[string]string, len(names)-1)
+					for k, name := range names {
+						if name != "" {
+							r[name] = m[k]
+						}
+					}
+					return r, nil
+				}()
+			},
+		}, {
+			Name:   "FindAllNamedSubmatch",
+			Params: []kind{stringKind, stringKind, intKind},
+			Result: listKind,
+			Func: func(c *callCtxt) {
+				pattern, s, n := c.string(0), c.string(1), c.int(2)
+				c.ret, c.err = func() (interface{}, error) {
+					re, err := regexp.Compile(pattern)
+					if err != nil {
+						return nil, err
+					}
+					names := re.SubexpNames()
+					if len(names) == 0 {
+						return nil, errNoNamedGroup
+					}
+					m := re.FindAllStringSubmatch(s, n)
+					if m == nil {
+						return nil, errNoMatch
+					}
+					result := make([]map[string]string, len(m))
+					for i, m := range m {
+						r := make(map[string]string, len(names)-1)
+						for k, name := range names {
+							if name != "" {
+								r[name] = m[k]
+							}
+						}
+						result[i] = r
+					}
+					return result, nil
+				}()
+			},
+		}, {
 			Name:   "Match",
 			Params: []kind{stringKind, stringKind},
 			Result: boolKind,
diff --git a/pkg/regexp/manual.go b/pkg/regexp/manual.go
new file mode 100644
index 0000000..1a14476
--- /dev/null
+++ b/pkg/regexp/manual.go
@@ -0,0 +1,148 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package regexp
+
+import (
+	"errors"
+	"regexp"
+)
+
+var errNoMatch = errors.New("no match")
+
+// Find returns a string holding the text of the leftmost match in s of
+// the regular expression. It returns bottom if there was no match.
+func Find(pattern, s string) (string, error) {
+	re, err := regexp.Compile(pattern)
+	if err != nil {
+		return "", err
+	}
+	m := re.FindStringIndex(s)
+	if m == nil {
+		return "", errNoMatch
+	}
+	return s[m[0]:m[1]], nil
+}
+
+// FindAll returns a list of all successive matches of the expression. It
+// matches successive non-overlapping matches of the entire expression. Empty
+// matches abutting a preceding match are ignored. The return value is a list
+// containing the successive matches. The integer argument n indicates the
+// maximum number of matches to return for n >= 0, or all matches otherwise. It
+// returns bottom for no match.
+func FindAll(pattern, s string, n int) ([]string, error) {
+	re, err := regexp.Compile(pattern)
+	if err != nil {
+		return nil, err
+	}
+	m := re.FindAllString(s, n)
+	if m == nil {
+		return nil, errNoMatch
+	}
+	return m, nil
+}
+
+// FindSubmatch returns a list of strings holding the text of the leftmost match
+// of the regular expression in s and the matches, if any, of its
+// subexpressions. Submatches are matches of parenthesized subexpressions (also
+// known as capturing groups) within the regular expression, numbered from left
+// to right in order of opening parenthesis. Submatch 0 is the match of the
+// entire expression, submatch 1 the match of the first parenthesized
+// subexpression, and so on. It returns bottom for no match.
+func FindSubmatch(pattern, s string) ([]string, error) {
+	re, err := regexp.Compile(pattern)
+	if err != nil {
+		return nil, err
+	}
+	m := re.FindStringSubmatch(s)
+	if m == nil {
+		return nil, errNoMatch
+	}
+	return m, nil
+}
+
+// FindAllSubmatch finds successive matches as returned by FindSubmatch,
+// observing the rules of FindAll. It returns bottom for no match.
+func FindAllSubmatch(pattern, s string, n int) ([][]string, error) {
+	re, err := regexp.Compile(pattern)
+	if err != nil {
+		return nil, err
+	}
+	m := re.FindAllStringSubmatch(s, n)
+	if m == nil {
+		return nil, errNoMatch
+	}
+	return m, nil
+}
+
+var errNoNamedGroup = errors.New("no named groups")
+
+// FindNamedSubmatch is like FindSubmatch, but returns a map with the names used
+// in capturing groups.
+//
+// Example:
+//     regexp.MapSubmatch(#"Hello (?P<person>\w*)!"#, "Hello World!")
+//  Output:
+//     [{person: "World"}]
+//
+func FindNamedSubmatch(pattern, s string) (map[string]string, error) {
+	re, err := regexp.Compile(pattern)
+	if err != nil {
+		return nil, err
+	}
+	names := re.SubexpNames()
+	if len(names) == 0 {
+		return nil, errNoNamedGroup
+	}
+	m := re.FindStringSubmatch(s)
+	if m == nil {
+		return nil, errNoMatch
+	}
+	r := make(map[string]string, len(names)-1)
+	for k, name := range names {
+		if name != "" {
+			r[name] = m[k]
+		}
+	}
+	return r, nil
+}
+
+// FindAllNamedSubmatch is like FindAllSubmatch, but returns a map with the
+// named used in capturing groups. See FindNamedSubmatch for an example on
+// how to use named groups.
+func FindAllNamedSubmatch(pattern, s string, n int) ([]map[string]string, error) {
+	re, err := regexp.Compile(pattern)
+	if err != nil {
+		return nil, err
+	}
+	names := re.SubexpNames()
+	if len(names) == 0 {
+		return nil, errNoNamedGroup
+	}
+	m := re.FindAllStringSubmatch(s, n)
+	if m == nil {
+		return nil, errNoMatch
+	}
+	result := make([]map[string]string, len(m))
+	for i, m := range m {
+		r := make(map[string]string, len(names)-1)
+		for k, name := range names {
+			if name != "" {
+				r[name] = m[k]
+			}
+		}
+		result[i] = r
+	}
+	return result, nil
+}