internal/third_party/yaml: improve yaml positining
- retain more of the original spacing into the conversion
- store original absolute positions
Change-Id: Id1edbefe1050e1f2dcec3ceb2f8877018b0b5876
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/2562
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
diff --git a/internal/third_party/yaml/decode.go b/internal/third_party/yaml/decode.go
index 024794d..22a6420 100644
--- a/internal/third_party/yaml/decode.go
+++ b/internal/third_party/yaml/decode.go
@@ -80,7 +80,7 @@
if err != nil {
return nil, err
}
- info := token.NewFile(filename, -1, len(b))
+ info := token.NewFile(filename, -1, len(b)+2)
info.SetLinesForContent(b)
p := parser{info: info}
if !yaml_parser_initialize(&p.parser, filename) {
@@ -233,6 +233,9 @@
for p.peek() != yaml_SEQUENCE_END_EVENT {
n.children = append(n.children, p.parse())
}
+ if len(n.children) > 0 {
+ n.endPos = n.children[len(n.children)-1].endPos
+ }
p.expect(yaml_SEQUENCE_END_EVENT)
return n
}
@@ -244,6 +247,9 @@
for p.peek() != yaml_MAPPING_END_EVENT {
n.children = append(n.children, p.parse(), p.parse())
}
+ if len(n.children) > 0 {
+ n.endPos = n.children[len(n.children)-1].endPos
+ }
p.expect(yaml_MAPPING_END_EVENT)
return n
}
@@ -252,13 +258,14 @@
// Decoder, unmarshals a node into a provided value.
type decoder struct {
- p *parser
- doc *node
- aliases map[*node]bool
- mapType reflect.Type
- terrors []string
- prev token.Pos
- lastNode ast.Node
+ p *parser
+ doc *node
+ aliases map[*node]bool
+ mapType reflect.Type
+ terrors []string
+ prev token.Pos
+ lastNode ast.Node
+ forceNewline bool
}
var (
@@ -320,10 +327,9 @@
if c.mark.index >= m.index {
break
}
- // fp := d.p.info.Pos(c.mark.index, 0)
comments = append(comments, &ast.Comment{
- c.pos.Pos(),
- "//" + c.text[1:],
+ Slash: d.pos(c.mark),
+ Text: "//" + c.text[1:],
})
d.p.parser.comments = d.p.parser.comments[1:]
}
@@ -343,11 +349,9 @@
c := d.p.parser.comments[0]
if c.mark.index == m.index {
comment := &ast.Comment{
- c.pos.Pos(),
- // d.p.info.Pos(m.index+1, 0),
- "//" + c.text[1:],
+ Slash: d.pos(c.mark),
+ Text: "//" + c.text[1:],
}
- // expr.AddComment(pos, false)
expr.AddComment(&ast.CommentGroup{
Line: true,
Position: pos,
@@ -357,14 +361,35 @@
}
func (d *decoder) pos(m yaml_mark_t) token.Pos {
- return token.NoPos
- // TODO: reenable once we have better spacing.
- // pos := d.p.info.Pos(m.index)
- // if pos <= d.prev+1 {
- // return token.NoPos
- // }
- // d.prev = pos
- // return pos
+ pos := d.p.info.Pos(m.index+1, token.NoRelPos)
+
+ if d.forceNewline {
+ d.forceNewline = false
+ pos = pos.WithRel(token.Newline)
+ } else if d.prev.IsValid() {
+ c := pos.Position()
+ p := d.prev.Position()
+ switch {
+ case c.Line-p.Line >= 2:
+ pos = pos.WithRel(token.NewSection)
+ case c.Line-p.Line == 1:
+ pos = pos.WithRel(token.Newline)
+ case c.Column-p.Column > 0:
+ pos = pos.WithRel(token.Blank)
+ default:
+ pos = pos.WithRel(token.NoSpace)
+ }
+ if pos.Before(d.prev) {
+ return token.NoPos
+ }
+ }
+
+ d.prev = pos
+ return pos
+}
+
+func (d *decoder) absPos(m yaml_mark_t) token.Pos {
+ return d.p.info.Pos(m.index+1, token.NoRelPos)
}
func (d *decoder) start(n *node) token.Pos {
@@ -373,8 +398,7 @@
func (d *decoder) ident(n *node, name string) *ast.Ident {
return &ast.Ident{
- // NamePos: d.pos(n.startPos),
- NamePos: d.p.parser.relPos().Pos(),
+ NamePos: d.pos(n.startPos),
Name: name,
}
}
@@ -423,16 +447,14 @@
// TODO: use parse literal or parse expression instead.
case yaml_TIMESTAMP_TAG:
return &ast.BasicLit{
- // ValuePos: d.start(n),
- ValuePos: d.p.parser.relPos().Pos(),
+ ValuePos: d.start(n),
Kind: token.STRING,
Value: strconv.Quote(n.value),
}
case yaml_STR_TAG:
return &ast.BasicLit{
- // ValuePos: d.start(n),
- ValuePos: d.p.parser.relPos().Pos(),
+ ValuePos: d.start(n),
Kind: token.STRING,
Value: d.quoteString(n.value),
}
@@ -442,8 +464,7 @@
buf[0] = '\''
buf[len(buf)-1] = '\''
return &ast.BasicLit{
- // ValuePos: d.start(n),
- ValuePos: d.p.parser.relPos().Pos(),
+ ValuePos: d.start(n),
Kind: token.STRING,
Value: string(buf),
}
@@ -456,8 +477,7 @@
str = "true"
}
return &ast.BasicLit{
- // ValuePos: d.start(n),
- ValuePos: d.p.parser.relPos().Pos(),
+ ValuePos: d.start(n),
Kind: tok,
Value: str,
}
@@ -491,18 +511,16 @@
case yaml_NULL_TAG:
return &ast.BasicLit{
- ValuePos: d.p.parser.relPos().Pos(),
+ ValuePos: d.start(n),
Kind: token.NULL,
Value: "null",
}
}
err := &ast.BottomLit{
- // Bottom: d.pos(n.startPos)
- Bottom: d.p.parser.relPos().Pos(),
+ Bottom: d.pos(n.startPos),
}
comment := &ast.Comment{
- // Slash: d.start(n),
- Slash: token.Blank.Pos(),
+ Slash: d.start(n),
Text: "// " + d.terror(n, tag),
}
err.AddComment(&ast.CommentGroup{
@@ -533,10 +551,9 @@
}
stringLabel:
return &ast.BasicLit{
- ValuePos: d.p.parser.relPos().Pos(),
- // ValuePos: d.start(n),
- Kind: token.STRING,
- Value: strconv.Quote(n.value),
+ ValuePos: d.start(n),
+ Kind: token.STRING,
+ Value: strconv.Quote(n.value),
}
}
@@ -546,15 +563,13 @@
minuses++
}
expr = &ast.BasicLit{
- // ValuePos: d.start(n) + minuses.Pos(),
- ValuePos: d.p.parser.relPos().Pos(),
+ ValuePos: d.start(n), // + minuses.Pos(),
Kind: kind,
Value: val,
}
if minuses > 0 {
expr = &ast.UnaryExpr{
- // OpPos: d.start(n),
- OpPos: d.p.parser.relPos().Pos(),
+ OpPos: d.start(n),
Op: token.SUB,
X: expr,
}
@@ -601,22 +616,53 @@
func (d *decoder) sequence(n *node) ast.Expr {
list := &ast.ListLit{}
- if n.startPos.line != n.endPos.line || len(n.children) != 1 {
- list.Lbrack = d.pos(n.startPos)
- list.Rbrack = d.pos(n.endPos)
+ list.Lbrack = d.pos(n.startPos).WithRel(token.Blank)
+ switch ln := len(n.children); ln {
+ case 0:
+ d.prev = list.Lbrack
+ default:
+ d.prev = d.pos(n.children[ln-1].endPos)
}
+ list.Rbrack = d.pos(n.endPos)
+
+ noNewline := true
+ single := d.isOneLiner(n.startPos, n.endPos)
for _, c := range n.children {
- list.Elts = append(list.Elts, d.unmarshal(c))
+ d.forceNewline = !single
+ elem := d.unmarshal(c)
+ list.Elts = append(list.Elts, elem)
+ _, noNewline = elem.(*ast.StructLit)
+ }
+ if !single && !noNewline {
+ list.Rbrack = list.Rbrack.WithRel(token.Newline)
}
return list
}
+func (d *decoder) isOneLiner(start, end yaml_mark_t) bool {
+ s := d.absPos(start).Position()
+ e := d.absPos(end).Position()
+ return s.Line == e.Line
+}
+
func (d *decoder) mapping(n *node) ast.Expr {
+ newline := d.forceNewline
+
structure := &ast.StructLit{}
d.insertMap(n, structure, false)
- if len(structure.Elts) != 1 {
- structure.Lbrace = d.pos(n.startPos)
- structure.Rbrace = d.pos(n.endPos)
+
+ // NOTE: we currently translate YAML without curly braces to CUE with
+ // curly braces, even for single elements. Removing the following line
+ // would generate the folded form.
+ structure.Lbrace = d.absPos(n.startPos).WithRel(token.NoSpace)
+ structure.Rbrace = d.absPos(n.endPos).WithRel(token.Newline)
+ if d.isOneLiner(n.startPos, n.endPos) && !newline {
+ if len(structure.Elts) != 1 {
+ structure.Lbrace = d.absPos(n.startPos).WithRel(token.Blank)
+ }
+ if len(structure.Elts) != 1 || structure.Elts[0].Pos().RelPos() < token.Newline {
+ structure.Rbrace = structure.Rbrace.WithRel(token.Blank)
+ }
}
return structure
}
diff --git a/internal/third_party/yaml/decode_test.go b/internal/third_party/yaml/decode_test.go
index 608fe8a..87b72f1 100644
--- a/internal/third_party/yaml/decode_test.go
+++ b/internal/third_party/yaml/decode_test.go
@@ -169,13 +169,24 @@
// Block sequence
{
"seq:\n - A\n - B",
- `seq: ["A", "B"]`,
+ `seq: [
+ "A",
+ "B",
+]`,
}, {
"seq:\n - A\n - B\n - C",
- `seq: ["A", "B", "C"]`,
+ `seq: [
+ "A",
+ "B",
+ "C",
+]`,
}, {
"seq:\n - A\n - 1\n - C",
- `seq: ["A", 1, "C"]`,
+ `seq: [
+ "A",
+ 1,
+ "C",
+]`,
},
// Literal block scalar
@@ -208,9 +219,7 @@
// Structs
{
"a: {b: c}",
- `a: {
- b: "c"
-}`,
+ `a: {b: "c"}`,
},
{
"hello: world",
@@ -232,10 +241,7 @@
"a: true",
}, {
"{ a: 1, b: {c: 1} }",
- `a: 1
-b: {
- c: 1
-}`,
+ `a: 1, b: {c: 1}`,
},
// Some cross type conversions
@@ -360,10 +366,13 @@
`"1": "\"2\""`,
}, {
"v:\n- A\n- 'B\n\n C'\n",
- `v: ["A", """
+ `v: [
+ "A",
+ """
B
C
- """]`,
+ """,
+]`,
},
// Explicit tags.
@@ -400,12 +409,9 @@
d: 2`,
}, {
"a: &a {c: 1}\nb: *a",
- `a: {
- c: 1
-}
+ `a: {c: 1}
b: {
- c: 1
-}`,
+ c: 1}`, // TODO fix this spacing. Expansions low priority though.
}, {
"a: &a [1, 2]\nb: *a",
"a: [1, 2]\nb: [1, 2]", // TODO: a: [1, 2], b: a
@@ -458,9 +464,7 @@
// issue #295 (allow scalars with colons in flow mappings and sequences)
{
"a: {b: https://github.com/go-yaml/yaml}",
- `a: {
- b: "https://github.com/go-yaml/yaml"
-}`,
+ `a: {b: "https://github.com/go-yaml/yaml"}`,
},
{
"a: [https://github.com/go-yaml/yaml]",
@@ -500,12 +504,45 @@
// Ordered maps.
{
"{b: 2, a: 1, d: 4, c: 3, sub: {e: 5}}",
- `b: 2
-a: 1
-d: 4
-c: 3
-sub: {
- e: 5
+ `b: 2, a: 1, d: 4, c: 3, sub: {e: 5}`,
+ },
+
+ // Spacing
+ {
+ `
+a: {}
+c: 1
+d: [
+]
+e: []
+`,
+ `a: {}
+c: 1
+d: [
+]
+e: []`,
+ },
+
+ {
+ `
+a:
+ - { "a": 1, "b": 2 }
+ - { "c": 1, "d": 2 }
+`,
+ `a: [{
+ a: 1, b: 2
+}, {
+ c: 1, d: 2
+}]`,
+ },
+
+ {
+ "a:\n b:\n c: d\n e: f\n",
+ `a: {
+ b: {
+ c: "d"
+ e: "f"
+ }
}`,
},
diff --git a/internal/third_party/yaml/parserc.go b/internal/third_party/yaml/parserc.go
index 52d8f74..aaf7f26 100644
--- a/internal/third_party/yaml/parserc.go
+++ b/internal/third_party/yaml/parserc.go
@@ -2,8 +2,6 @@
import (
"bytes"
-
- "cuelang.org/go/cue/token"
)
// The parser implements the following grammar:
@@ -60,9 +58,8 @@
parser.tokens_head++
}
-func add_comment(parser *yaml_parser_t, p token.RelPos, m yaml_mark_t, text string) {
+func add_comment(parser *yaml_parser_t, m yaml_mark_t, text string) {
parser.comments = append(parser.comments, yaml_comment_t{
- pos: p,
mark: m,
text: text,
})
diff --git a/internal/third_party/yaml/scannerc.go b/internal/third_party/yaml/scannerc.go
index 2229abc..94ace4b 100644
--- a/internal/third_party/yaml/scannerc.go
+++ b/internal/third_party/yaml/scannerc.go
@@ -1459,7 +1459,6 @@
// Eat a comment until a line break.
if parser.buffer[parser.buffer_pos] == '#' {
- rel := parser.relPos()
m := parser.mark
parser.comment_buffer = parser.comment_buffer[:0]
for !is_breakz(parser.buffer, parser.buffer_pos) {
@@ -1471,7 +1470,7 @@
return false
}
}
- add_comment(parser, rel, m, string(parser.comment_buffer))
+ add_comment(parser, m, string(parser.comment_buffer))
}
// If it is a line break, eat it.
@@ -1569,7 +1568,6 @@
}
if parser.buffer[parser.buffer_pos] == '#' {
- rel := parser.relPos()
m := parser.mark
parser.comment_buffer = parser.comment_buffer[:0]
for !is_breakz(parser.buffer, parser.buffer_pos) {
@@ -1581,7 +1579,7 @@
return false
}
}
- add_comment(parser, rel, m, string(parser.comment_buffer))
+ add_comment(parser, m, string(parser.comment_buffer))
}
// Check if we are at the end of the line.
@@ -2146,7 +2144,6 @@
}
}
if parser.buffer[parser.buffer_pos] == '#' {
- rel := parser.relPos()
m := parser.mark
parser.comment_buffer = parser.comment_buffer[:0]
for !is_breakz(parser.buffer, parser.buffer_pos) {
@@ -2158,7 +2155,7 @@
return false
}
}
- add_comment(parser, rel, m, string(parser.comment_buffer))
+ add_comment(parser, m, string(parser.comment_buffer))
}
// Check if we are at the end of the line.
diff --git a/internal/third_party/yaml/testdata/merge.out b/internal/third_party/yaml/testdata/merge.out
index b8ed7a4..ee6e6bb 100644
--- a/internal/third_party/yaml/testdata/merge.out
+++ b/internal/third_party/yaml/testdata/merge.out
@@ -2,18 +2,18 @@
// Test
anchors: {
list: [{
- x: 1
- y: 2
+ x: 1, y: 2
}, {
- x: 0
- y: 2
+ x: 0, y: 2
}, {
r: 10
}, {
r: 1
}]
}
+
// All the following maps are equal:
+
plain: {
// Explicit keys
x: 1
@@ -21,47 +21,56 @@
r: 10
label: "center/big"
}
+
mergeOne: {
x: 1
y: 2
// Merge one map
+
r: 10
label: "center/big"
}
+
mergeMultiple: {
r: 10
x: 1
y: 2
// Merge multiple maps
+
label: "center/big"
}
+
override: {
r: 10
x: 1
y: 2
label: "center/big"
}
+
shortTag: {
r: 10
x: 1
y: 2
// Explicit short merge tag
+
label: "center/big"
}
+
longTag: {
r: 10
x: 1
y: 2
// Explicit merge long tag
+
label: "center/big"
}
+
inlineMap: {
// Inlined map
- x: 1
- y: 2
- r: 10
+ x: 1, y: 2, r: 10
label: "center/big"
}
+
inlineSequenceMap: {
// Inlined map in sequence
r: 10
diff --git a/internal/third_party/yaml/yaml.go b/internal/third_party/yaml/yaml.go
index 955cd82..08821c8 100644
--- a/internal/third_party/yaml/yaml.go
+++ b/internal/third_party/yaml/yaml.go
@@ -102,11 +102,12 @@
return &Decoder{parser: d}, nil
}
-// Decode reads the next YAML-encoded value from its input
-// and stores it in the value pointed to by v.
+// Decode reads the next YAML-encoded value from its input and stores it in the
+// value pointed to by v. It returns io.EOF if there are no more value in the
+// stream.
//
-// See the documentation for Unmarshal for details about the
-// conversion of YAML into a Go value.
+// See the documentation for Unmarshal for details about the conversion of YAML
+// into a Go value.
func (dec *Decoder) Decode() (expr ast.Expr, err error) {
d := newDecoder(dec.parser)
defer handleErr(&err)
diff --git a/internal/third_party/yaml/yamlh.go b/internal/third_party/yaml/yamlh.go
index 93ab268..46ce462 100644
--- a/internal/third_party/yaml/yamlh.go
+++ b/internal/third_party/yaml/yamlh.go
@@ -3,8 +3,6 @@
import (
"fmt"
"io"
-
- "cuelang.org/go/cue/token"
)
// The version directive data.
@@ -521,28 +519,10 @@
}
type yaml_comment_t struct {
- pos token.RelPos
mark yaml_mark_t
text string
}
-func (p *yaml_parser_t) relPos() (pos token.RelPos) {
- switch {
- case p.linesSinceLast > 1:
- pos = token.NewSection
- case p.linesSinceLast == 1:
- pos = token.Newline
- case p.spacesSinceLast > 0:
- pos = token.Blank
- default:
- pos = token.NoSpace
- }
- p.linesSinceLast = 0
- p.spacesSinceLast = 0
- // fmt.Println("REL", pos)
- return token.NoRelPos
-}
-
// The parser structure.
//
// All members are internal. Manage the structure using the