Commit 730e39cd authored by Rob Pike's avatar Rob Pike

fmt: add %U format for standard Unicode representation of integer values.

	fmt.Printf("%U", 1) yields "U+0001"
It's essentially "U+%.4x" but lets you override the precision works in scan, too.

R=rsc
CC=golang-dev
https://golang.org/cl/3423043
parent e2d1595c
......@@ -26,6 +26,7 @@
%o base 8
%x base 16, with lower-case letters for a-f
%X base 16, with upper-case letters for A-F
%U unicode format: U+1234; same as "U+%x" with 4 digits default
Floating-point and complex constituents:
%e scientific notation, e.g. -1234.456e+78
%E scientific notation, e.g. -1234.456E+78
......
......@@ -161,6 +161,14 @@ var fmttests = []fmtTest{
{"% d", 0, " 0"},
{"% d", 12345, " 12345"},
// unicode format
{"%U", 0x1, "U+0001"},
{"%.8U", 0x2, "U+00000002"},
{"%U", 0x1234, "U+1234"},
{"%U", 0x12345, "U+12345"},
{"%10.6U", 0xABC, " U+000ABC"},
{"%-10.6U", 0xABC, "U+000ABC "},
// floats
{"%+.3e", 0.0, "+0.000e+00"},
{"%+.3e", 1.0, "+1.000e+00"},
......
......@@ -49,6 +49,7 @@ type fmt struct {
plus bool
sharp bool
space bool
unicode bool
zero bool
}
......@@ -61,6 +62,7 @@ func (f *fmt) clearflags() {
f.plus = false
f.sharp = false
f.space = false
f.unicode = false
f.zero = false
}
......@@ -213,6 +215,12 @@ func (f *fmt) integer(a int64, base uint64, signedness bool, digits string) {
buf[i] = '0'
}
}
if f.unicode {
i--
buf[i] = '+'
i--
buf[i] = 'U'
}
if negative {
i--
......
......@@ -316,6 +316,8 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) {
p.fmt.integer(v, 8, signed, ldigits)
case 'x':
p.fmt.integer(v, 16, signed, ldigits)
case 'U':
p.fmtUnicode(v)
case 'X':
p.fmt.integer(v, 16, signed, udigits)
default:
......@@ -323,7 +325,7 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) {
}
}
// fmt_sharpHex64 formats a uint64 in hexadecimal and prefixes it with 0x by
// fmt0x64 formats a uint64 in hexadecimal and prefixes it with 0x by
// temporarily turning on the sharp flag.
func (p *pp) fmt0x64(v uint64) {
sharp := p.fmt.sharp
......@@ -332,6 +334,23 @@ func (p *pp) fmt0x64(v uint64) {
p.fmt.sharp = sharp
}
// fmtUnicode formats a uint64 in U+1234 form by
// temporarily turning on the unicode flag and tweaking the precision.
func (p *pp) fmtUnicode(v int64) {
precPresent := p.fmt.precPresent
prec := p.fmt.prec
if !precPresent {
// If prec is already set, leave it alone; otherwise 4 is minimum.
p.fmt.prec = 4
p.fmt.precPresent = true
}
p.fmt.unicode = true // turn on U+
p.fmt.integer(int64(v), 16, unsigned, udigits)
p.fmt.unicode = false
p.fmt.prec = prec
p.fmt.precPresent = precPresent
}
func (p *pp) fmtUint64(v uint64, verb int, goSyntax bool, value interface{}) {
switch verb {
case 'b':
......
......@@ -388,9 +388,9 @@ func (s *ss) typeError(field interface{}, expected string) {
var complexError = os.ErrorString("syntax error scanning complex number")
var boolError = os.ErrorString("syntax error scanning boolean")
// accepts checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
// buffer and returns true. Otherwise it return false.
func (s *ss) accept(ok string) bool {
// consume reads the next rune in the input and reports whether it is in the ok string.
// If accept is true, it puts the character into the input token.
func (s *ss) consume(ok string, accept bool) bool {
if s.wid >= s.maxWid {
return false
}
......@@ -400,17 +400,25 @@ func (s *ss) accept(ok string) bool {
}
for i := 0; i < len(ok); i++ {
if int(ok[i]) == rune {
s.buf.WriteRune(rune)
s.wid++
if accept {
s.buf.WriteRune(rune)
s.wid++
}
return true
}
}
if rune != EOF {
if rune != EOF && accept {
s.UngetRune()
}
return false
}
// accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
// buffer and returns true. Otherwise it return false.
func (s *ss) accept(ok string) bool {
return s.consume(ok, true)
}
// okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
func (s *ss) okVerb(verb int, okVerbs, typ string) bool {
for _, v := range okVerbs {
......@@ -460,7 +468,7 @@ const (
// getBase returns the numeric base represented by the verb and its digit string.
func (s *ss) getBase(verb int) (base int, digits string) {
s.okVerb(verb, "bdoxXv", "integer") // sets s.err
s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
base = 10
digits = decimalDigits
switch verb {
......@@ -470,7 +478,7 @@ func (s *ss) getBase(verb int) (base int, digits string) {
case 'o':
base = 8
digits = octalDigits
case 'x', 'X':
case 'x', 'X', 'U':
base = 16
digits = hexadecimalDigits
}
......@@ -506,7 +514,13 @@ func (s *ss) scanInt(verb int, bitSize int) int64 {
}
base, digits := s.getBase(verb)
s.skipSpace(false)
s.accept(sign) // If there's a sign, it will be left in the token buffer.
if verb == 'U' {
if !s.consume("U", false) || !s.consume("+", false) {
s.errorString("bad unicode format ")
}
} else {
s.accept(sign) // If there's a sign, it will be left in the token buffer.
}
tok := s.scanNumber(digits)
i, err := strconv.Btoi64(tok, base)
if err != nil {
......@@ -528,6 +542,11 @@ func (s *ss) scanUint(verb int, bitSize int) uint64 {
}
base, digits := s.getBase(verb)
s.skipSpace(false)
if verb == 'U' {
if !s.consume("U", false) || !s.consume("+", false) {
s.errorString("bad unicode format ")
}
}
tok := s.scanNumber(digits)
i, err := strconv.Btoui64(tok, base)
if err != nil {
......
......@@ -222,6 +222,8 @@ var scanfTests = []ScanfTest{
{"%o", "075\n", &uintVal, uint(075)},
{"%x", "a75\n", &uintVal, uint(0xa75)},
{"%x", "A75\n", &uintVal, uint(0xa75)},
{"%U", "U+1234\n", &intVal, int(0x1234)},
{"%U", "U+4567\n", &uintVal, uint(0x4567)},
// Strings
{"%s", "using-%s\n", &stringVal, "using-%s"},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment