cue/format: fix string label rewriting

String labels should only be rewritten if it does not
change the semantics, that is, if no reference will
point to the label after the change.

Change-Id: Id6f4298e6e93461a6d9ffbc2196d593d27707d90
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/4383
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/cue/format/format_test.go b/cue/format/format_test.go
index ced3ed1..2935933 100644
--- a/cue/format/format_test.go
+++ b/cue/format/format_test.go
@@ -463,7 +463,7 @@
 	t.Skip()
 	const src = `
 `
-	b, err := format([]byte(src), 0)
+	b, err := format([]byte(src), simplify)
 	if err != nil {
 		t.Error(err)
 	}
diff --git a/cue/format/node.go b/cue/format/node.go
index 2c19f74..734a2a7 100644
--- a/cue/format/node.go
+++ b/cue/format/node.go
@@ -28,18 +28,32 @@
 func printNode(node interface{}, f *printer) error {
 	s := newFormatter(f)
 
+	ls := labelSimplifier{scope: map[string]bool{}}
+
 	// format node
 	f.allowed = nooverride // gobble initial whitespace.
 	switch x := node.(type) {
 	case *ast.File:
+		if f.cfg.simplify {
+			ls.markReferences(x)
+		}
 		s.file(x)
 	case ast.Expr:
+		if f.cfg.simplify {
+			ls.markReferences(x)
+		}
 		s.expr(x)
 	case ast.Decl:
+		if f.cfg.simplify {
+			ls.markReferences(x)
+		}
 		s.decl(x)
 	// case ast.Node: // TODO: do we need this?
 	// 	s.walk(x)
 	case []ast.Decl:
+		if f.cfg.simplify {
+			ls.processDecls(x)
+		}
 		s.walkDeclList(x)
 	default:
 		goto unsupported
@@ -397,18 +411,6 @@
 		f.print(n.NamePos, name)
 
 	case *ast.BasicLit:
-		if f.cfg.simplify && n.Kind == token.STRING && len(n.Value) > 2 {
-			s := n.Value
-			unquoted, err := strconv.Unquote(s)
-			if err == nil {
-				// TODO: only do this when requested and
-				// if this doesn't unshadow anything.
-				if isValidIdent(unquoted) {
-					f.print(n.ValuePos, unquoted)
-					break
-				}
-			}
-		}
 		str := n.Value
 		// Allow any CUE string in the AST, but ensure it is formatted
 		// according to spec.
diff --git a/cue/format/simplify.go b/cue/format/simplify.go
new file mode 100644
index 0000000..410911b
--- /dev/null
+++ b/cue/format/simplify.go
@@ -0,0 +1,113 @@
+// Copyright 2019 CUE Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package format
+
+import (
+	"strconv"
+	"strings"
+
+	"cuelang.org/go/cue/ast"
+	"cuelang.org/go/cue/ast/astutil"
+)
+
+// labelSimplifier rewrites string labels to identifiers if
+// no identifiers will subsequently bind to the exposed label.
+// In other words, string labels are only replaced if this does
+// not change the semantics of the CUE code.
+type labelSimplifier struct {
+	parent *labelSimplifier
+	scope  map[string]bool
+}
+
+func (s *labelSimplifier) processDecls(decls []ast.Decl) {
+	sc := labelSimplifier{parent: s, scope: map[string]bool{}}
+	for _, d := range decls {
+		switch x := d.(type) {
+		case *ast.Field:
+			ast.Walk(x.Label, sc.markStrings, nil)
+		}
+	}
+
+	for _, d := range decls {
+		switch x := d.(type) {
+		case *ast.Field:
+			ast.Walk(x.Value, sc.markReferences, nil)
+		default:
+			ast.Walk(x, sc.markReferences, nil)
+		}
+	}
+
+	for _, d := range decls {
+		switch x := d.(type) {
+		case *ast.Field:
+			x.Label = astutil.Apply(x.Label, sc.replace, nil).(ast.Label)
+		}
+	}
+}
+
+func (s *labelSimplifier) markReferences(n ast.Node) bool {
+	// Record strings at this level.
+	switch x := n.(type) {
+	case *ast.File:
+		s.processDecls(x.Decls)
+		return false
+
+	case *ast.StructLit:
+		s.processDecls(x.Elts)
+		return false
+
+	case *ast.SelectorExpr:
+		ast.Walk(x.X, s.markReferences, nil)
+		return false
+
+	case *ast.Ident:
+		for c := s; c != nil; c = c.parent {
+			if _, ok := c.scope[x.Name]; ok {
+				c.scope[x.Name] = false
+				break
+			}
+		}
+	}
+	return true
+}
+
+func (s *labelSimplifier) markStrings(n ast.Node) bool {
+	switch x := n.(type) {
+	case *ast.BasicLit:
+		str, err := strconv.Unquote(x.Value)
+		if err != nil || !ast.IsValidIdent(str) || strings.HasPrefix(str, "_") {
+			return false
+		}
+		s.scope[str] = true
+
+	case *ast.Ident:
+		s.scope[x.Name] = true
+
+	case *ast.ListLit:
+		return false
+	}
+	return true
+}
+
+func (s *labelSimplifier) replace(c astutil.Cursor) bool {
+	switch x := c.Node().(type) {
+	case *ast.BasicLit:
+		str, err := strconv.Unquote(x.Value)
+		if err == nil && s.scope[str] && !strings.HasPrefix(str, "_") {
+			c.Replace(ast.NewIdent(str))
+		}
+	}
+	return true
+}
diff --git a/cue/format/testdata/simplify.golden b/cue/format/testdata/simplify.golden
index 0b5c393..929063b 100644
--- a/cue/format/testdata/simplify.golden
+++ b/cue/format/testdata/simplify.golden
@@ -1,3 +1,5 @@
+import "time"
+
 foo: bar: "str"
 
 a: B: 42
@@ -5,3 +7,32 @@
 "a.b": "foo-": cc_dd: x
 
 a: b: c: 3
+
+// references to bar are all shadowed and this can be safely turned into
+// an identifier.
+bar: "str"
+
+// These references would be directly referred to if turned into an identifier.
+// The quotes should therefore not be removed.
+"baz1": 3
+"baz2": 3
+"baz3": 3 // TODO: could be simplified.
+
+// These string labels may be turned into an identifier.
+qux:  4
+quux: 5
+
+// TODO(legacy): Don't simplify "hidden" fields for now.
+"_foo": 3
+
+x: {
+	r1: baz1
+	bar: r2: bar
+	r3:           bar
+	E=quux:       3
+	r4:           quux
+	[baz2="str"]: 4
+	r5:           baz2
+	[baz3="bar"]: name: baz3 // TODO: treat baz3 as shadowed.
+	Time: time.Time
+}
diff --git a/cue/format/testdata/simplify.input b/cue/format/testdata/simplify.input
index c743830..4bb466a 100644
--- a/cue/format/testdata/simplify.input
+++ b/cue/format/testdata/simplify.input
@@ -1,10 +1,41 @@
-"foo" "bar": "str"
+import "time"
 
-a "B": 42
+"foo": "bar": "str"
 
-"a.b" "foo-" "cc_dd": x
+a: "B": 42
+
+"a.b": "foo-": "cc_dd": x
 
 
 a:
     b:
-        c: 3
\ No newline at end of file
+        c: 3
+
+// references to bar are all shadowed and this can be safely turned into
+// an identifier.
+"bar": "str"
+
+// These references would be directly referred to if turned into an identifier.
+// The quotes should therefore not be removed.
+"baz1": 3
+"baz2": 3
+"baz3": 3 // TODO: could be simplified.
+
+// These string labels may be turned into an identifier.
+"qux": 4
+"quux": 5
+
+// TODO(legacy): Don't simplify "hidden" fields for now.
+"_foo": 3
+
+x: {
+    r1: baz1
+    bar: r2: bar
+    r3: bar
+    E=quux: 3
+    r4: quux
+    [baz2="str"]: 4
+    r5: baz2
+    [baz3="bar"]: name: baz3 // TODO: treat baz3 as shadowed.
+    "Time": time.Time
+}
\ No newline at end of file
diff --git a/cue/resolve_test.go b/cue/resolve_test.go
index 394490e..d764502 100644
--- a/cue/resolve_test.go
+++ b/cue/resolve_test.go
@@ -2835,6 +2835,16 @@
 		out: `<0>{` +
 			`c1: <1>{bar: <2>{baz: 2}, baz: 2}, ` +
 			`c2: _|_(<3>{bar: 1<3>.bar}:cannot embed value 1 of type int in struct)}`,
+	}, {
+		desc: "don't bind to string labels",
+		in: `
+			x: 1
+			y: {
+				"x": 2
+				z: x
+			}
+			`,
+		out: `<0>{x: 1, y: <1>{x: 2, z: 1}}`,
 	}}
 	rewriteHelper(t, testCases, evalFull)
 }