cue/format/printer.go - cue - Git at Google

 // Copyright 2018 The CUE Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 package format

 import (
 	"fmt"
 	"os"
 	"strings"
 	"text/tabwriter"

 	"cuelang.org/go/cue/ast"
 	"cuelang.org/go/cue/errors"
 	"cuelang.org/go/cue/token"
 )

 // A printer takes the stream of formatting tokens and spacing directives
 // produced by the formatter and adjusts the spacing based on the original
 // source code.
 type printer struct {
 	cfg *config

 	allowed     whiteSpace
 	requested   whiteSpace
 	indentStack []whiteSpace

 	pos     token.Position // current pos in AST
 	lineout line

 	lastTok token.Token // last token printed (syntax.ILLEGAL if it's whitespace)

 	output      []byte
 	indent      int
 	spaceBefore bool

 	errs errors.Error
 }

 type line int

 func (p *printer) init(cfg *config) {
 	p.cfg = cfg
 	p.pos = token.Position{Line: 1, Column: 1}
 }

 func (p *printer) errf(n ast.Node, format string, args ...interface{}) {
 	p.errs = errors.Append(p.errs, errors.Newf(n.Pos(), format, args...))
 }

 const debug = false

 func (p *printer) internalError(msg ...interface{}) {
 	if debug {
 		fmt.Print(p.pos.String() + ": ")
 		fmt.Println(msg...)
 		panic("go/printer")
 	}
 }

 func (p *printer) lineFor(pos token.Pos) int {
 	return pos.Line()
 }

 func (p *printer) Print(v interface{}) {
 	var (
 		impliedComma = false
 		isLit        bool
 		data         string
 		nextWS       whiteSpace
 	)
 	switch x := v.(type) {
 	case *line:
 		*x = p.lineout

 	case token.Token:
 		s := x.String()
 		before, after := mayCombine(p.lastTok, x)
 		if before && !p.spaceBefore {
 			// the previous and the current token must be
 			// separated by a blank otherwise they combine
 			// into a different incorrect token sequence
 			// (except for syntax.INT followed by a '.' this
 			// should never happen because it is taken care
 			// of via binary expression formatting)
 			if p.allowed&blank != 0 {
 				p.internalError("whitespace buffer not empty")
 			}
 			p.allowed |= blank
 		}
 		if after {
 			nextWS = blank
 		}
 		data = s
 		switch x {
 		case token.EOF:
 			data = ""
 			p.allowed = newline
 			p.allowed &^= newsection
 		case token.LPAREN, token.LBRACK, token.LBRACE:
 		case token.RPAREN, token.RBRACK, token.RBRACE:
 			impliedComma = true
 		}
 		p.lastTok = x

 	case *ast.BasicLit:
 		data = x.Value
 		switch x.Kind {
 		case token.INT:
 			if len(data) > 1 &&
 				data[0] == '0' &&
 				data[1] >= '0' && data[1] <= '9' {
 				data = "0o" + data[1:]
 			}
 		case token.FLOAT:
 			if strings.IndexByte(data, 'E') != -1 {
 				data = strings.ToLower(data)
 			}
 		}

 		isLit = true
 		impliedComma = true
 		p.lastTok = x.Kind

 	case *ast.Ident:
 		data = x.Name
 		if !ast.IsValidIdent(data) {
 			p.errf(x, "invalid identifier %q", x.Name)
 			data = "*bad identifier*"
 		}
 		impliedComma = true
 		p.lastTok = token.IDENT

 	case string:
 		data = x
 		impliedComma = true
 		p.lastTok = token.STRING

 	case *ast.CommentGroup:
 		rel := x.Pos().RelPos()
 		if x.Line { // TODO: we probably don't need this.
 			rel = token.Blank
 		}
 		switch rel {
 		case token.NoRelPos:
 		case token.Newline, token.NewSection:
 		case token.Blank, token.Elided:
 			p.allowed |= blank
 			fallthrough
 		case token.NoSpace:
 			p.allowed &^= newline | newsection | formfeed | declcomma
 		}
 		return

 	case *ast.Attribute:
 		data = x.Text
 		impliedComma = true
 		p.lastTok = token.ATTRIBUTE

 	case *ast.Comment:
 		// TODO: if implied comma, postpone comment
 		data = x.Text
 		p.lastTok = token.COMMENT

 	case whiteSpace:
 		p.allowed |= x
 		return

 	case token.Pos:
 		// TODO: should we use a known file position to synchronize? Go does,
 		// but we don't really have to.
 		// pos := x
 		if x.HasRelPos() {
 			if p.allowed&nooverride == 0 {
 				requested := p.allowed
 				switch x.RelPos() {
 				case token.NoSpace:
 					requested &^= newline | newsection | formfeed
 				case token.Blank:
 					requested |= blank
 					requested &^= newline | newsection | formfeed
 				case token.Newline:
 					requested |= newline
 				case token.NewSection:
 					requested |= newsection
 				}
 				p.writeWhitespace(requested)
 				p.allowed = 0
 				p.requested = 0
 			}
 			// p.pos = pos
 		}
 		return

 	default:
 		fmt.Fprintf(os.Stderr, "print: unsupported argument %v (%T)\n", x, x)
 		panic("go/printer type")
 	}

 	p.writeWhitespace(p.allowed)
 	p.allowed = 0
 	p.requested = 0
 	p.writeString(data, isLit)
 	p.allowed = nextWS
 	_ = impliedComma // TODO: delay comment printings
 }

 func (p *printer) writeWhitespace(ws whiteSpace) {
 	if ws&comma != 0 {
 		switch {
 		case ws&(newsection|newline|formfeed) != 0,
 			ws&trailcomma == 0:
 			p.writeByte(',', 1)
 		}
 	}
 	if ws&indent != 0 {
 		p.markLineIndent(ws)
 	}
 	if ws&unindent != 0 {
 		p.markUnindentLine()
 	}
 	switch {
 	case ws&newsection != 0:
 		p.maybeIndentLine(ws)
 		p.writeByte('\f', 2)
 		p.lineout += 2
 		p.spaceBefore = true
 	case ws&formfeed != 0:
 		p.maybeIndentLine(ws)
 		p.writeByte('\f', 1)
 		p.lineout++
 		p.spaceBefore = true
 	case ws&newline != 0:
 		p.maybeIndentLine(ws)
 		p.writeByte('\n', 1)
 		p.lineout++
 		p.spaceBefore = true
 	case ws&declcomma != 0:
 		p.writeByte(',', 1)
 		p.writeByte(' ', 1)
 		p.spaceBefore = true
 	case ws&noblank != 0:
 	case ws&vtab != 0:
 		p.writeByte('\v', 1)
 		p.spaceBefore = true
 	case ws&blank != 0:
 		p.writeByte(' ', 1)
 		p.spaceBefore = true
 	}
 }

 func (p *printer) markLineIndent(ws whiteSpace) {
 	p.indentStack = append(p.indentStack, ws)
 }

 func (p *printer) markUnindentLine() (wasUnindented bool) {
 	last := len(p.indentStack) - 1
 	if ws := p.indentStack[last]; ws&indented != 0 {
 		p.indent--
 		wasUnindented = true
 	}
 	p.indentStack = p.indentStack[:last]
 	return wasUnindented
 }

 func (p *printer) maybeIndentLine(ws whiteSpace) {
 	if ws&unindent == 0 && len(p.indentStack) > 0 {
 		last := len(p.indentStack) - 1
 		if ws := p.indentStack[last]; ws&indented != 0 || ws&indent == 0 {
 			return
 		}
 		p.indentStack[last] |= indented
 		p.indent++
 	}
 }

 func (f *formatter) matchUnindent() whiteSpace {
 	f.allowed |= unindent
 	// TODO: make this work. Whitespace from closing bracket should match that
 	// of opening if there is no position information.
 	// f.allowed &^= nooverride | newline | newsection | formfeed | blank | noblank
 	// ws := f.indentStack[len(f.indentStack)-1]
 	// mask := blank | noblank | vtab
 	// f.allowed |= unindent | blank | noblank
 	// if ws&newline != 0 || ws*indented != 0 {
 	// 	f.allowed |= newline
 	// }
 	return 0
 }

 // writeString writes the string s to p.output and updates p.pos, p.out,
 // and p.last. If isLit is set, s is escaped w/ tabwriter.Escape characters
 // to protect s from being interpreted by the tabwriter.
 //
 // Note: writeString is only used to write Go tokens, literals, and
 // comments, all of which must be written literally. Thus, it is correct
 // to always set isLit = true. However, setting it explicitly only when
 // needed (i.e., when we don't know that s contains no tabs or line breaks)
 // avoids processing extra escape characters and reduces run time of the
 // printer benchmark by up to 10%.
 //
 func (p *printer) writeString(s string, isLit bool) {
 	if s != "" {
 		p.spaceBefore = false
 	}

 	if isLit {
 		// Protect s such that is passes through the tabwriter
 		// unchanged. Note that valid Go programs cannot contain
 		// tabwriter.Escape bytes since they do not appear in legal
 		// UTF-8 sequences.
 		p.output = append(p.output, tabwriter.Escape)
 	}

 	p.output = append(p.output, s...)

 	if isLit {
 		p.output = append(p.output, tabwriter.Escape)
 	}
 	// update positions
 	nLines := 0
 	var li int // index of last newline; valid if nLines > 0
 	for i := 0; i < len(s); i++ {
 		// CUE tokens cannot contain '\f' - no need to look for it
 		if s[i] == '\n' {
 			nLines++
 			li = i
 		}
 	}
 	p.pos.Offset += len(s)
 	if nLines > 0 {
 		p.pos.Line += nLines
 		c := len(s) - li
 		p.pos.Column = c
 	} else {
 		p.pos.Column += len(s)
 	}
 }

 func (p *printer) writeByte(ch byte, n int) {
 	for i := 0; i < n; i++ {
 		p.output = append(p.output, ch)
 	}

 	// update positions
 	p.pos.Offset += n
 	if ch == '\n' || ch == '\f' {
 		p.pos.Line += n
 		p.pos.Column = 1

 		n := p.cfg.Indent + p.indent // include base indentation
 		for i := 0; i < n; i++ {
 			p.output = append(p.output, '\t')
 		}

 		// update positions
 		p.pos.Offset += n
 		p.pos.Column += n

 		return
 	}
 	p.pos.Column += n
 }

 func mayCombine(prev, next token.Token) (before, after bool) {
 	s := next.String()
 	if 'a' <= s[0] && s[0] < 'z' {
 		return true, true
 	}
 	switch prev {
 	case token.IQUO, token.IREM, token.IDIV, token.IMOD:
 		return false, false
 	case token.INT:
 		before = next == token.PERIOD // 1.
 	case token.ADD:
 		before = s[0] == '+' // ++
 	case token.SUB:
 		before = s[0] == '-' // --
 	case token.QUO:
 		before = s[0] == '*' // /*
 	}
 	return before, false
 }
	// Copyright 2018 The CUE Authors
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	package format

	import (
	"fmt"
	"os"
	"strings"
	"text/tabwriter"

	"cuelang.org/go/cue/ast"
	"cuelang.org/go/cue/errors"
	"cuelang.org/go/cue/token"
	)

	// A printer takes the stream of formatting tokens and spacing directives
	// produced by the formatter and adjusts the spacing based on the original
	// source code.
	type printer struct {
	cfg *config

	allowed whiteSpace
	requested whiteSpace
	indentStack []whiteSpace

	pos token.Position // current pos in AST
	lineout line

	lastTok token.Token // last token printed (syntax.ILLEGAL if it's whitespace)

	output []byte
	indent int
	spaceBefore bool

	errs errors.Error
	}

	type line int

	func (p printer) init(cfg config) {
	p.cfg = cfg
	p.pos = token.Position{Line: 1, Column: 1}
	}

	func (p *printer) errf(n ast.Node, format string, args ...interface{}) {
	p.errs = errors.Append(p.errs, errors.Newf(n.Pos(), format, args...))
	}

	const debug = false

	func (p *printer) internalError(msg ...interface{}) {
	if debug {
	fmt.Print(p.pos.String() + ": ")
	fmt.Println(msg...)
	panic("go/printer")
	}
	}

	func (p *printer) lineFor(pos token.Pos) int {
	return pos.Line()
	}

	func (p *printer) Print(v interface{}) {
	var (
	impliedComma = false
	isLit bool
	data string
	nextWS whiteSpace
	)
	switch x := v.(type) {
	case *line:
	*x = p.lineout

	case token.Token:
	s := x.String()
	before, after := mayCombine(p.lastTok, x)
	if before && !p.spaceBefore {
	// the previous and the current token must be
	// separated by a blank otherwise they combine
	// into a different incorrect token sequence
	// (except for syntax.INT followed by a '.' this
	// should never happen because it is taken care
	// of via binary expression formatting)
	if p.allowed&blank != 0 {
	p.internalError("whitespace buffer not empty")
	}
	p.allowed \|= blank
	}
	if after {
	nextWS = blank
	}
	data = s
	switch x {
	case token.EOF:
	data = ""
	p.allowed = newline
	p.allowed &^= newsection
	case token.LPAREN, token.LBRACK, token.LBRACE:
	case token.RPAREN, token.RBRACK, token.RBRACE:
	impliedComma = true
	}
	p.lastTok = x

	case *ast.BasicLit:
	data = x.Value
	switch x.Kind {
	case token.INT:
	if len(data) > 1 &&
	data[0] == '0' &&
	data[1] >= '0' && data[1] <= '9' {
	data = "0o" + data[1:]
	}
	case token.FLOAT:
	if strings.IndexByte(data, 'E') != -1 {
	data = strings.ToLower(data)
	}
	}

	isLit = true
	impliedComma = true
	p.lastTok = x.Kind

	case *ast.Ident:
	data = x.Name
	if !ast.IsValidIdent(data) {
	p.errf(x, "invalid identifier %q", x.Name)
	data = "bad identifier"
	}
	impliedComma = true
	p.lastTok = token.IDENT

	case string:
	data = x
	impliedComma = true
	p.lastTok = token.STRING

	case *ast.CommentGroup:
	rel := x.Pos().RelPos()
	if x.Line { // TODO: we probably don't need this.
	rel = token.Blank
	}
	switch rel {
	case token.NoRelPos:
	case token.Newline, token.NewSection:
	case token.Blank, token.Elided:
	p.allowed \|= blank
	fallthrough
	case token.NoSpace:
	p.allowed &^= newline \| newsection \| formfeed \| declcomma
	}
	return

	case *ast.Attribute:
	data = x.Text
	impliedComma = true
	p.lastTok = token.ATTRIBUTE

	case *ast.Comment:
	// TODO: if implied comma, postpone comment
	data = x.Text
	p.lastTok = token.COMMENT

	case whiteSpace:
	p.allowed \|= x
	return

	case token.Pos:
	// TODO: should we use a known file position to synchronize? Go does,
	// but we don't really have to.
	// pos := x
	if x.HasRelPos() {
	if p.allowed&nooverride == 0 {
	requested := p.allowed
	switch x.RelPos() {
	case token.NoSpace:
	requested &^= newline \| newsection \| formfeed
	case token.Blank:
	requested \|= blank
	requested &^= newline \| newsection \| formfeed
	case token.Newline:
	requested \|= newline
	case token.NewSection:
	requested \|= newsection
	}
	p.writeWhitespace(requested)
	p.allowed = 0
	p.requested = 0
	}
	// p.pos = pos
	}
	return

	default:
	fmt.Fprintf(os.Stderr, "print: unsupported argument %v (%T)\n", x, x)
	panic("go/printer type")
	}

	p.writeWhitespace(p.allowed)
	p.allowed = 0
	p.requested = 0
	p.writeString(data, isLit)
	p.allowed = nextWS
	_ = impliedComma // TODO: delay comment printings
	}

	func (p *printer) writeWhitespace(ws whiteSpace) {
	if ws&comma != 0 {
	switch {
	case ws&(newsection\|newline\|formfeed) != 0,
	ws&trailcomma == 0:
	p.writeByte(',', 1)
	}
	}
	if ws&indent != 0 {
	p.markLineIndent(ws)
	}
	if ws&unindent != 0 {
	p.markUnindentLine()
	}
	switch {
	case ws&newsection != 0:
	p.maybeIndentLine(ws)
	p.writeByte('\f', 2)
	p.lineout += 2
	p.spaceBefore = true
	case ws&formfeed != 0:
	p.maybeIndentLine(ws)
	p.writeByte('\f', 1)
	p.lineout++
	p.spaceBefore = true
	case ws&newline != 0:
	p.maybeIndentLine(ws)
	p.writeByte('\n', 1)
	p.lineout++
	p.spaceBefore = true
	case ws&declcomma != 0:
	p.writeByte(',', 1)
	p.writeByte(' ', 1)
	p.spaceBefore = true
	case ws&noblank != 0:
	case ws&vtab != 0:
	p.writeByte('\v', 1)
	p.spaceBefore = true
	case ws&blank != 0:
	p.writeByte(' ', 1)
	p.spaceBefore = true
	}
	}

	func (p *printer) markLineIndent(ws whiteSpace) {
	p.indentStack = append(p.indentStack, ws)
	}

	func (p *printer) markUnindentLine() (wasUnindented bool) {
	last := len(p.indentStack) - 1
	if ws := p.indentStack[last]; ws&indented != 0 {
	p.indent--
	wasUnindented = true
	}
	p.indentStack = p.indentStack[:last]
	return wasUnindented
	}

	func (p *printer) maybeIndentLine(ws whiteSpace) {
	if ws&unindent == 0 && len(p.indentStack) > 0 {
	last := len(p.indentStack) - 1
	if ws := p.indentStack[last]; ws&indented != 0 \|\| ws&indent == 0 {
	return
	}
	p.indentStack[last] \|= indented
	p.indent++
	}
	}

	func (f *formatter) matchUnindent() whiteSpace {
	f.allowed \|= unindent
	// TODO: make this work. Whitespace from closing bracket should match that
	// of opening if there is no position information.
	// f.allowed &^= nooverride \| newline \| newsection \| formfeed \| blank \| noblank
	// ws := f.indentStack[len(f.indentStack)-1]
	// mask := blank \| noblank \| vtab
	// f.allowed \|= unindent \| blank \| noblank
	// if ws&newline != 0 \|\| ws*indented != 0 {
	// f.allowed \|= newline
	// }
	return 0
	}

	// writeString writes the string s to p.output and updates p.pos, p.out,
	// and p.last. If isLit is set, s is escaped w/ tabwriter.Escape characters
	// to protect s from being interpreted by the tabwriter.
	//
	// Note: writeString is only used to write Go tokens, literals, and
	// comments, all of which must be written literally. Thus, it is correct
	// to always set isLit = true. However, setting it explicitly only when
	// needed (i.e., when we don't know that s contains no tabs or line breaks)
	// avoids processing extra escape characters and reduces run time of the
	// printer benchmark by up to 10%.
	//
	func (p *printer) writeString(s string, isLit bool) {
	if s != "" {
	p.spaceBefore = false
	}

	if isLit {
	// Protect s such that is passes through the tabwriter
	// unchanged. Note that valid Go programs cannot contain
	// tabwriter.Escape bytes since they do not appear in legal
	// UTF-8 sequences.
	p.output = append(p.output, tabwriter.Escape)
	}

	p.output = append(p.output, s...)

	if isLit {
	p.output = append(p.output, tabwriter.Escape)
	}
	// update positions
	nLines := 0
	var li int // index of last newline; valid if nLines > 0
	for i := 0; i < len(s); i++ {
	// CUE tokens cannot contain '\f' - no need to look for it
	if s[i] == '\n' {
	nLines++
	li = i
	}
	}
	p.pos.Offset += len(s)
	if nLines > 0 {
	p.pos.Line += nLines
	c := len(s) - li
	p.pos.Column = c
	} else {
	p.pos.Column += len(s)
	}
	}

	func (p *printer) writeByte(ch byte, n int) {
	for i := 0; i < n; i++ {
	p.output = append(p.output, ch)
	}

	// update positions
	p.pos.Offset += n
	if ch == '\n' \|\| ch == '\f' {
	p.pos.Line += n
	p.pos.Column = 1

	n := p.cfg.Indent + p.indent // include base indentation
	for i := 0; i < n; i++ {
	p.output = append(p.output, '\t')
	}

	// update positions
	p.pos.Offset += n
	p.pos.Column += n

	return
	}
	p.pos.Column += n
	}

	func mayCombine(prev, next token.Token) (before, after bool) {
	s := next.String()
	if 'a' <= s[0] && s[0] < 'z' {
	return true, true
	}
	switch prev {
	case token.IQUO, token.IREM, token.IDIV, token.IMOD:
	return false, false
	case token.INT:
	before = next == token.PERIOD // 1.
	case token.ADD:
	before = s[0] == '+' // ++
	case token.SUB:
	before = s[0] == '-' // --
	case token.QUO:
	before = s[0] == '' // /
	}
	return before, false
	}