internal/third_party/yaml: improve yaml positining

- retain more of the original spacing into the conversion
- store original absolute positions

Change-Id: Id1edbefe1050e1f2dcec3ceb2f8877018b0b5876
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/2562
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/internal/third_party/yaml/decode.go b/internal/third_party/yaml/decode.go
index 024794d..22a6420 100644
--- a/internal/third_party/yaml/decode.go
+++ b/internal/third_party/yaml/decode.go
@@ -80,7 +80,7 @@
 	if err != nil {
 		return nil, err
 	}
-	info := token.NewFile(filename, -1, len(b))
+	info := token.NewFile(filename, -1, len(b)+2)
 	info.SetLinesForContent(b)
 	p := parser{info: info}
 	if !yaml_parser_initialize(&p.parser, filename) {
@@ -233,6 +233,9 @@
 	for p.peek() != yaml_SEQUENCE_END_EVENT {
 		n.children = append(n.children, p.parse())
 	}
+	if len(n.children) > 0 {
+		n.endPos = n.children[len(n.children)-1].endPos
+	}
 	p.expect(yaml_SEQUENCE_END_EVENT)
 	return n
 }
@@ -244,6 +247,9 @@
 	for p.peek() != yaml_MAPPING_END_EVENT {
 		n.children = append(n.children, p.parse(), p.parse())
 	}
+	if len(n.children) > 0 {
+		n.endPos = n.children[len(n.children)-1].endPos
+	}
 	p.expect(yaml_MAPPING_END_EVENT)
 	return n
 }
@@ -252,13 +258,14 @@
 // Decoder, unmarshals a node into a provided value.
 
 type decoder struct {
-	p        *parser
-	doc      *node
-	aliases  map[*node]bool
-	mapType  reflect.Type
-	terrors  []string
-	prev     token.Pos
-	lastNode ast.Node
+	p            *parser
+	doc          *node
+	aliases      map[*node]bool
+	mapType      reflect.Type
+	terrors      []string
+	prev         token.Pos
+	lastNode     ast.Node
+	forceNewline bool
 }
 
 var (
@@ -320,10 +327,9 @@
 		if c.mark.index >= m.index {
 			break
 		}
-		// fp := d.p.info.Pos(c.mark.index, 0)
 		comments = append(comments, &ast.Comment{
-			c.pos.Pos(),
-			"//" + c.text[1:],
+			Slash: d.pos(c.mark),
+			Text:  "//" + c.text[1:],
 		})
 		d.p.parser.comments = d.p.parser.comments[1:]
 	}
@@ -343,11 +349,9 @@
 	c := d.p.parser.comments[0]
 	if c.mark.index == m.index {
 		comment := &ast.Comment{
-			c.pos.Pos(),
-			// d.p.info.Pos(m.index+1, 0),
-			"//" + c.text[1:],
+			Slash: d.pos(c.mark),
+			Text:  "//" + c.text[1:],
 		}
-		// expr.AddComment(pos, false)
 		expr.AddComment(&ast.CommentGroup{
 			Line:     true,
 			Position: pos,
@@ -357,14 +361,35 @@
 }
 
 func (d *decoder) pos(m yaml_mark_t) token.Pos {
-	return token.NoPos
-	// TODO: reenable once we have better spacing.
-	// pos := d.p.info.Pos(m.index)
-	// if pos <= d.prev+1 {
-	// 	return token.NoPos
-	// }
-	// d.prev = pos
-	// return pos
+	pos := d.p.info.Pos(m.index+1, token.NoRelPos)
+
+	if d.forceNewline {
+		d.forceNewline = false
+		pos = pos.WithRel(token.Newline)
+	} else if d.prev.IsValid() {
+		c := pos.Position()
+		p := d.prev.Position()
+		switch {
+		case c.Line-p.Line >= 2:
+			pos = pos.WithRel(token.NewSection)
+		case c.Line-p.Line == 1:
+			pos = pos.WithRel(token.Newline)
+		case c.Column-p.Column > 0:
+			pos = pos.WithRel(token.Blank)
+		default:
+			pos = pos.WithRel(token.NoSpace)
+		}
+		if pos.Before(d.prev) {
+			return token.NoPos
+		}
+	}
+
+	d.prev = pos
+	return pos
+}
+
+func (d *decoder) absPos(m yaml_mark_t) token.Pos {
+	return d.p.info.Pos(m.index+1, token.NoRelPos)
 }
 
 func (d *decoder) start(n *node) token.Pos {
@@ -373,8 +398,7 @@
 
 func (d *decoder) ident(n *node, name string) *ast.Ident {
 	return &ast.Ident{
-		// NamePos: d.pos(n.startPos),
-		NamePos: d.p.parser.relPos().Pos(),
+		NamePos: d.pos(n.startPos),
 		Name:    name,
 	}
 }
@@ -423,16 +447,14 @@
 	// TODO: use parse literal or parse expression instead.
 	case yaml_TIMESTAMP_TAG:
 		return &ast.BasicLit{
-			// ValuePos: d.start(n),
-			ValuePos: d.p.parser.relPos().Pos(),
+			ValuePos: d.start(n),
 			Kind:     token.STRING,
 			Value:    strconv.Quote(n.value),
 		}
 
 	case yaml_STR_TAG:
 		return &ast.BasicLit{
-			// ValuePos: d.start(n),
-			ValuePos: d.p.parser.relPos().Pos(),
+			ValuePos: d.start(n),
 			Kind:     token.STRING,
 			Value:    d.quoteString(n.value),
 		}
@@ -442,8 +464,7 @@
 		buf[0] = '\''
 		buf[len(buf)-1] = '\''
 		return &ast.BasicLit{
-			// ValuePos: d.start(n),
-			ValuePos: d.p.parser.relPos().Pos(),
+			ValuePos: d.start(n),
 			Kind:     token.STRING,
 			Value:    string(buf),
 		}
@@ -456,8 +477,7 @@
 			str = "true"
 		}
 		return &ast.BasicLit{
-			// ValuePos: d.start(n),
-			ValuePos: d.p.parser.relPos().Pos(),
+			ValuePos: d.start(n),
 			Kind:     tok,
 			Value:    str,
 		}
@@ -491,18 +511,16 @@
 
 	case yaml_NULL_TAG:
 		return &ast.BasicLit{
-			ValuePos: d.p.parser.relPos().Pos(),
+			ValuePos: d.start(n),
 			Kind:     token.NULL,
 			Value:    "null",
 		}
 	}
 	err := &ast.BottomLit{
-		// Bottom: d.pos(n.startPos)
-		Bottom: d.p.parser.relPos().Pos(),
+		Bottom: d.pos(n.startPos),
 	}
 	comment := &ast.Comment{
-		// Slash: d.start(n),
-		Slash: token.Blank.Pos(),
+		Slash: d.start(n),
 		Text:  "// " + d.terror(n, tag),
 	}
 	err.AddComment(&ast.CommentGroup{
@@ -533,10 +551,9 @@
 	}
 stringLabel:
 	return &ast.BasicLit{
-		ValuePos: d.p.parser.relPos().Pos(),
-		// ValuePos: d.start(n),
-		Kind:  token.STRING,
-		Value: strconv.Quote(n.value),
+		ValuePos: d.start(n),
+		Kind:     token.STRING,
+		Value:    strconv.Quote(n.value),
 	}
 }
 
@@ -546,15 +563,13 @@
 		minuses++
 	}
 	expr = &ast.BasicLit{
-		// ValuePos: d.start(n) + minuses.Pos(),
-		ValuePos: d.p.parser.relPos().Pos(),
+		ValuePos: d.start(n), //  + minuses.Pos(),
 		Kind:     kind,
 		Value:    val,
 	}
 	if minuses > 0 {
 		expr = &ast.UnaryExpr{
-			// OpPos: d.start(n),
-			OpPos: d.p.parser.relPos().Pos(),
+			OpPos: d.start(n),
 			Op:    token.SUB,
 			X:     expr,
 		}
@@ -601,22 +616,53 @@
 
 func (d *decoder) sequence(n *node) ast.Expr {
 	list := &ast.ListLit{}
-	if n.startPos.line != n.endPos.line || len(n.children) != 1 {
-		list.Lbrack = d.pos(n.startPos)
-		list.Rbrack = d.pos(n.endPos)
+	list.Lbrack = d.pos(n.startPos).WithRel(token.Blank)
+	switch ln := len(n.children); ln {
+	case 0:
+		d.prev = list.Lbrack
+	default:
+		d.prev = d.pos(n.children[ln-1].endPos)
 	}
+	list.Rbrack = d.pos(n.endPos)
+
+	noNewline := true
+	single := d.isOneLiner(n.startPos, n.endPos)
 	for _, c := range n.children {
-		list.Elts = append(list.Elts, d.unmarshal(c))
+		d.forceNewline = !single
+		elem := d.unmarshal(c)
+		list.Elts = append(list.Elts, elem)
+		_, noNewline = elem.(*ast.StructLit)
+	}
+	if !single && !noNewline {
+		list.Rbrack = list.Rbrack.WithRel(token.Newline)
 	}
 	return list
 }
 
+func (d *decoder) isOneLiner(start, end yaml_mark_t) bool {
+	s := d.absPos(start).Position()
+	e := d.absPos(end).Position()
+	return s.Line == e.Line
+}
+
 func (d *decoder) mapping(n *node) ast.Expr {
+	newline := d.forceNewline
+
 	structure := &ast.StructLit{}
 	d.insertMap(n, structure, false)
-	if len(structure.Elts) != 1 {
-		structure.Lbrace = d.pos(n.startPos)
-		structure.Rbrace = d.pos(n.endPos)
+
+	// NOTE: we currently translate YAML without curly braces to CUE with
+	// curly braces, even for single elements. Removing the following line
+	// would generate the folded form.
+	structure.Lbrace = d.absPos(n.startPos).WithRel(token.NoSpace)
+	structure.Rbrace = d.absPos(n.endPos).WithRel(token.Newline)
+	if d.isOneLiner(n.startPos, n.endPos) && !newline {
+		if len(structure.Elts) != 1 {
+			structure.Lbrace = d.absPos(n.startPos).WithRel(token.Blank)
+		}
+		if len(structure.Elts) != 1 || structure.Elts[0].Pos().RelPos() < token.Newline {
+			structure.Rbrace = structure.Rbrace.WithRel(token.Blank)
+		}
 	}
 	return structure
 }
diff --git a/internal/third_party/yaml/decode_test.go b/internal/third_party/yaml/decode_test.go
index 608fe8a..87b72f1 100644
--- a/internal/third_party/yaml/decode_test.go
+++ b/internal/third_party/yaml/decode_test.go
@@ -169,13 +169,24 @@
 	// Block sequence
 	{
 		"seq:\n - A\n - B",
-		`seq: ["A", "B"]`,
+		`seq: [
+	"A",
+	"B",
+]`,
 	}, {
 		"seq:\n - A\n - B\n - C",
-		`seq: ["A", "B", "C"]`,
+		`seq: [
+	"A",
+	"B",
+	"C",
+]`,
 	}, {
 		"seq:\n - A\n - 1\n - C",
-		`seq: ["A", 1, "C"]`,
+		`seq: [
+	"A",
+	1,
+	"C",
+]`,
 	},
 
 	// Literal block scalar
@@ -208,9 +219,7 @@
 	// Structs
 	{
 		"a: {b: c}",
-		`a: {
-	b: "c"
-}`,
+		`a: {b: "c"}`,
 	},
 	{
 		"hello: world",
@@ -232,10 +241,7 @@
 		"a: true",
 	}, {
 		"{ a: 1, b: {c: 1} }",
-		`a: 1
-b: {
-	c: 1
-}`,
+		`a: 1, b: {c: 1}`,
 	},
 
 	// Some cross type conversions
@@ -360,10 +366,13 @@
 		`"1": "\"2\""`,
 	}, {
 		"v:\n- A\n- 'B\n\n  C'\n",
-		`v: ["A", """
+		`v: [
+	"A",
+	"""
 		B
 		C
-		"""]`,
+		""",
+]`,
 	},
 
 	// Explicit tags.
@@ -400,12 +409,9 @@
 d: 2`,
 	}, {
 		"a: &a {c: 1}\nb: *a",
-		`a: {
-	c: 1
-}
+		`a: {c: 1}
 b: {
-	c: 1
-}`,
+	c: 1}`, // TODO fix this spacing. Expansions low priority though.
 	}, {
 		"a: &a [1, 2]\nb: *a",
 		"a: [1, 2]\nb: [1, 2]", // TODO: a: [1, 2], b: a
@@ -458,9 +464,7 @@
 	// issue #295 (allow scalars with colons in flow mappings and sequences)
 	{
 		"a: {b: https://github.com/go-yaml/yaml}",
-		`a: {
-	b: "https://github.com/go-yaml/yaml"
-}`,
+		`a: {b: "https://github.com/go-yaml/yaml"}`,
 	},
 	{
 		"a: [https://github.com/go-yaml/yaml]",
@@ -500,12 +504,45 @@
 	// Ordered maps.
 	{
 		"{b: 2, a: 1, d: 4, c: 3, sub: {e: 5}}",
-		`b: 2
-a: 1
-d: 4
-c: 3
-sub: {
-	e: 5
+		`b: 2, a: 1, d: 4, c: 3, sub: {e: 5}`,
+	},
+
+	// Spacing
+	{
+		`
+a: {}
+c: 1
+d: [
+]
+e: []
+`,
+		`a: {}
+c: 1
+d: [
+]
+e: []`,
+	},
+
+	{
+		`
+a:
+  - { "a": 1, "b": 2 }
+  - { "c": 1, "d": 2 }
+`,
+		`a: [{
+	a: 1, b: 2
+}, {
+	c: 1, d: 2
+}]`,
+	},
+
+	{
+		"a:\n b:\n  c: d\n  e: f\n",
+		`a: {
+	b: {
+		c: "d"
+		e: "f"
+	}
 }`,
 	},
 
diff --git a/internal/third_party/yaml/parserc.go b/internal/third_party/yaml/parserc.go
index 52d8f74..aaf7f26 100644
--- a/internal/third_party/yaml/parserc.go
+++ b/internal/third_party/yaml/parserc.go
@@ -2,8 +2,6 @@
 
 import (
 	"bytes"
-
-	"cuelang.org/go/cue/token"
 )
 
 // The parser implements the following grammar:
@@ -60,9 +58,8 @@
 	parser.tokens_head++
 }
 
-func add_comment(parser *yaml_parser_t, p token.RelPos, m yaml_mark_t, text string) {
+func add_comment(parser *yaml_parser_t, m yaml_mark_t, text string) {
 	parser.comments = append(parser.comments, yaml_comment_t{
-		pos:  p,
 		mark: m,
 		text: text,
 	})
diff --git a/internal/third_party/yaml/scannerc.go b/internal/third_party/yaml/scannerc.go
index 2229abc..94ace4b 100644
--- a/internal/third_party/yaml/scannerc.go
+++ b/internal/third_party/yaml/scannerc.go
@@ -1459,7 +1459,6 @@
 
 		// Eat a comment until a line break.
 		if parser.buffer[parser.buffer_pos] == '#' {
-			rel := parser.relPos()
 			m := parser.mark
 			parser.comment_buffer = parser.comment_buffer[:0]
 			for !is_breakz(parser.buffer, parser.buffer_pos) {
@@ -1471,7 +1470,7 @@
 					return false
 				}
 			}
-			add_comment(parser, rel, m, string(parser.comment_buffer))
+			add_comment(parser, m, string(parser.comment_buffer))
 		}
 
 		// If it is a line break, eat it.
@@ -1569,7 +1568,6 @@
 	}
 
 	if parser.buffer[parser.buffer_pos] == '#' {
-		rel := parser.relPos()
 		m := parser.mark
 		parser.comment_buffer = parser.comment_buffer[:0]
 		for !is_breakz(parser.buffer, parser.buffer_pos) {
@@ -1581,7 +1579,7 @@
 				return false
 			}
 		}
-		add_comment(parser, rel, m, string(parser.comment_buffer))
+		add_comment(parser, m, string(parser.comment_buffer))
 	}
 
 	// Check if we are at the end of the line.
@@ -2146,7 +2144,6 @@
 		}
 	}
 	if parser.buffer[parser.buffer_pos] == '#' {
-		rel := parser.relPos()
 		m := parser.mark
 		parser.comment_buffer = parser.comment_buffer[:0]
 		for !is_breakz(parser.buffer, parser.buffer_pos) {
@@ -2158,7 +2155,7 @@
 				return false
 			}
 		}
-		add_comment(parser, rel, m, string(parser.comment_buffer))
+		add_comment(parser, m, string(parser.comment_buffer))
 	}
 
 	// Check if we are at the end of the line.
diff --git a/internal/third_party/yaml/testdata/merge.out b/internal/third_party/yaml/testdata/merge.out
index b8ed7a4..ee6e6bb 100644
--- a/internal/third_party/yaml/testdata/merge.out
+++ b/internal/third_party/yaml/testdata/merge.out
@@ -2,18 +2,18 @@
 // Test
 anchors: {
 	list: [{
-		x: 1
-		y: 2
+		x: 1, y: 2
 	}, {
-		x: 0
-		y: 2
+		x: 0, y: 2
 	}, {
 		r: 10
 	}, {
 		r: 1
 	}]
 }
+
 // All the following maps are equal:
+
 plain: {
 	// Explicit keys
 	x:     1
@@ -21,47 +21,56 @@
 	r:     10
 	label: "center/big"
 }
+
 mergeOne: {
 	x: 1
 	y: 2
 	// Merge one map
+
 	r:     10
 	label: "center/big"
 }
+
 mergeMultiple: {
 	r: 10
 	x: 1
 	y: 2
 	// Merge multiple maps
+
 	label: "center/big"
 }
+
 override: {
 	r:     10
 	x:     1
 	y:     2
 	label: "center/big"
 }
+
 shortTag: {
 	r: 10
 	x: 1
 	y: 2
 	// Explicit short merge tag
+
 	label: "center/big"
 }
+
 longTag: {
 	r: 10
 	x: 1
 	y: 2
 	// Explicit merge long tag
+
 	label: "center/big"
 }
+
 inlineMap: {
 	// Inlined map
-	x:     1
-	y:     2
-	r:     10
+	x:     1, y: 2, r: 10
 	label: "center/big"
 }
+
 inlineSequenceMap: {
 	// Inlined map in sequence
 	r:     10
diff --git a/internal/third_party/yaml/yaml.go b/internal/third_party/yaml/yaml.go
index 955cd82..08821c8 100644
--- a/internal/third_party/yaml/yaml.go
+++ b/internal/third_party/yaml/yaml.go
@@ -102,11 +102,12 @@
 	return &Decoder{parser: d}, nil
 }
 
-// Decode reads the next YAML-encoded value from its input
-// and stores it in the value pointed to by v.
+// Decode reads the next YAML-encoded value from its input and stores it in the
+// value pointed to by v. It returns io.EOF if there are no more value in the
+// stream.
 //
-// See the documentation for Unmarshal for details about the
-// conversion of YAML into a Go value.
+// See the documentation for Unmarshal for details about the conversion of YAML
+// into a Go value.
 func (dec *Decoder) Decode() (expr ast.Expr, err error) {
 	d := newDecoder(dec.parser)
 	defer handleErr(&err)
diff --git a/internal/third_party/yaml/yamlh.go b/internal/third_party/yaml/yamlh.go
index 93ab268..46ce462 100644
--- a/internal/third_party/yaml/yamlh.go
+++ b/internal/third_party/yaml/yamlh.go
@@ -3,8 +3,6 @@
 import (
 	"fmt"
 	"io"
-
-	"cuelang.org/go/cue/token"
 )
 
 // The version directive data.
@@ -521,28 +519,10 @@
 }
 
 type yaml_comment_t struct {
-	pos  token.RelPos
 	mark yaml_mark_t
 	text string
 }
 
-func (p *yaml_parser_t) relPos() (pos token.RelPos) {
-	switch {
-	case p.linesSinceLast > 1:
-		pos = token.NewSection
-	case p.linesSinceLast == 1:
-		pos = token.Newline
-	case p.spacesSinceLast > 0:
-		pos = token.Blank
-	default:
-		pos = token.NoSpace
-	}
-	p.linesSinceLast = 0
-	p.spacesSinceLast = 0
-	// fmt.Println("REL", pos)
-	return token.NoRelPos
-}
-
 // The parser structure.
 //
 // All members are internal. Manage the structure using the