Commit 730e39cd authored by Rob Pike's avatar Rob Pike

fmt: add %U format for standard Unicode representation of integer values.

	fmt.Printf("%U", 1) yields "U+0001"
It's essentially "U+%.4x" but lets you override the precision works in scan, too.

R=rsc
CC=golang-dev
https://golang.org/cl/3423043
parent e2d1595c
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
%o base 8 %o base 8
%x base 16, with lower-case letters for a-f %x base 16, with lower-case letters for a-f
%X base 16, with upper-case letters for A-F %X base 16, with upper-case letters for A-F
%U unicode format: U+1234; same as "U+%x" with 4 digits default
Floating-point and complex constituents: Floating-point and complex constituents:
%e scientific notation, e.g. -1234.456e+78 %e scientific notation, e.g. -1234.456e+78
%E scientific notation, e.g. -1234.456E+78 %E scientific notation, e.g. -1234.456E+78
......
...@@ -161,6 +161,14 @@ var fmttests = []fmtTest{ ...@@ -161,6 +161,14 @@ var fmttests = []fmtTest{
{"% d", 0, " 0"}, {"% d", 0, " 0"},
{"% d", 12345, " 12345"}, {"% d", 12345, " 12345"},
// unicode format
{"%U", 0x1, "U+0001"},
{"%.8U", 0x2, "U+00000002"},
{"%U", 0x1234, "U+1234"},
{"%U", 0x12345, "U+12345"},
{"%10.6U", 0xABC, " U+000ABC"},
{"%-10.6U", 0xABC, "U+000ABC "},
// floats // floats
{"%+.3e", 0.0, "+0.000e+00"}, {"%+.3e", 0.0, "+0.000e+00"},
{"%+.3e", 1.0, "+1.000e+00"}, {"%+.3e", 1.0, "+1.000e+00"},
......
...@@ -49,6 +49,7 @@ type fmt struct { ...@@ -49,6 +49,7 @@ type fmt struct {
plus bool plus bool
sharp bool sharp bool
space bool space bool
unicode bool
zero bool zero bool
} }
...@@ -61,6 +62,7 @@ func (f *fmt) clearflags() { ...@@ -61,6 +62,7 @@ func (f *fmt) clearflags() {
f.plus = false f.plus = false
f.sharp = false f.sharp = false
f.space = false f.space = false
f.unicode = false
f.zero = false f.zero = false
} }
...@@ -213,6 +215,12 @@ func (f *fmt) integer(a int64, base uint64, signedness bool, digits string) { ...@@ -213,6 +215,12 @@ func (f *fmt) integer(a int64, base uint64, signedness bool, digits string) {
buf[i] = '0' buf[i] = '0'
} }
} }
if f.unicode {
i--
buf[i] = '+'
i--
buf[i] = 'U'
}
if negative { if negative {
i-- i--
......
...@@ -316,6 +316,8 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) { ...@@ -316,6 +316,8 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) {
p.fmt.integer(v, 8, signed, ldigits) p.fmt.integer(v, 8, signed, ldigits)
case 'x': case 'x':
p.fmt.integer(v, 16, signed, ldigits) p.fmt.integer(v, 16, signed, ldigits)
case 'U':
p.fmtUnicode(v)
case 'X': case 'X':
p.fmt.integer(v, 16, signed, udigits) p.fmt.integer(v, 16, signed, udigits)
default: default:
...@@ -323,7 +325,7 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) { ...@@ -323,7 +325,7 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) {
} }
} }
// fmt_sharpHex64 formats a uint64 in hexadecimal and prefixes it with 0x by // fmt0x64 formats a uint64 in hexadecimal and prefixes it with 0x by
// temporarily turning on the sharp flag. // temporarily turning on the sharp flag.
func (p *pp) fmt0x64(v uint64) { func (p *pp) fmt0x64(v uint64) {
sharp := p.fmt.sharp sharp := p.fmt.sharp
...@@ -332,6 +334,23 @@ func (p *pp) fmt0x64(v uint64) { ...@@ -332,6 +334,23 @@ func (p *pp) fmt0x64(v uint64) {
p.fmt.sharp = sharp p.fmt.sharp = sharp
} }
// fmtUnicode formats a uint64 in U+1234 form by
// temporarily turning on the unicode flag and tweaking the precision.
func (p *pp) fmtUnicode(v int64) {
precPresent := p.fmt.precPresent
prec := p.fmt.prec
if !precPresent {
// If prec is already set, leave it alone; otherwise 4 is minimum.
p.fmt.prec = 4
p.fmt.precPresent = true
}
p.fmt.unicode = true // turn on U+
p.fmt.integer(int64(v), 16, unsigned, udigits)
p.fmt.unicode = false
p.fmt.prec = prec
p.fmt.precPresent = precPresent
}
func (p *pp) fmtUint64(v uint64, verb int, goSyntax bool, value interface{}) { func (p *pp) fmtUint64(v uint64, verb int, goSyntax bool, value interface{}) {
switch verb { switch verb {
case 'b': case 'b':
......
...@@ -388,9 +388,9 @@ func (s *ss) typeError(field interface{}, expected string) { ...@@ -388,9 +388,9 @@ func (s *ss) typeError(field interface{}, expected string) {
var complexError = os.ErrorString("syntax error scanning complex number") var complexError = os.ErrorString("syntax error scanning complex number")
var boolError = os.ErrorString("syntax error scanning boolean") var boolError = os.ErrorString("syntax error scanning boolean")
// accepts checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the // consume reads the next rune in the input and reports whether it is in the ok string.
// buffer and returns true. Otherwise it return false. // If accept is true, it puts the character into the input token.
func (s *ss) accept(ok string) bool { func (s *ss) consume(ok string, accept bool) bool {
if s.wid >= s.maxWid { if s.wid >= s.maxWid {
return false return false
} }
...@@ -400,17 +400,25 @@ func (s *ss) accept(ok string) bool { ...@@ -400,17 +400,25 @@ func (s *ss) accept(ok string) bool {
} }
for i := 0; i < len(ok); i++ { for i := 0; i < len(ok); i++ {
if int(ok[i]) == rune { if int(ok[i]) == rune {
if accept {
s.buf.WriteRune(rune) s.buf.WriteRune(rune)
s.wid++ s.wid++
}
return true return true
} }
} }
if rune != EOF { if rune != EOF && accept {
s.UngetRune() s.UngetRune()
} }
return false return false
} }
// accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
// buffer and returns true. Otherwise it return false.
func (s *ss) accept(ok string) bool {
return s.consume(ok, true)
}
// okVerb verifies that the verb is present in the list, setting s.err appropriately if not. // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
func (s *ss) okVerb(verb int, okVerbs, typ string) bool { func (s *ss) okVerb(verb int, okVerbs, typ string) bool {
for _, v := range okVerbs { for _, v := range okVerbs {
...@@ -460,7 +468,7 @@ const ( ...@@ -460,7 +468,7 @@ const (
// getBase returns the numeric base represented by the verb and its digit string. // getBase returns the numeric base represented by the verb and its digit string.
func (s *ss) getBase(verb int) (base int, digits string) { func (s *ss) getBase(verb int) (base int, digits string) {
s.okVerb(verb, "bdoxXv", "integer") // sets s.err s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
base = 10 base = 10
digits = decimalDigits digits = decimalDigits
switch verb { switch verb {
...@@ -470,7 +478,7 @@ func (s *ss) getBase(verb int) (base int, digits string) { ...@@ -470,7 +478,7 @@ func (s *ss) getBase(verb int) (base int, digits string) {
case 'o': case 'o':
base = 8 base = 8
digits = octalDigits digits = octalDigits
case 'x', 'X': case 'x', 'X', 'U':
base = 16 base = 16
digits = hexadecimalDigits digits = hexadecimalDigits
} }
...@@ -506,7 +514,13 @@ func (s *ss) scanInt(verb int, bitSize int) int64 { ...@@ -506,7 +514,13 @@ func (s *ss) scanInt(verb int, bitSize int) int64 {
} }
base, digits := s.getBase(verb) base, digits := s.getBase(verb)
s.skipSpace(false) s.skipSpace(false)
if verb == 'U' {
if !s.consume("U", false) || !s.consume("+", false) {
s.errorString("bad unicode format ")
}
} else {
s.accept(sign) // If there's a sign, it will be left in the token buffer. s.accept(sign) // If there's a sign, it will be left in the token buffer.
}
tok := s.scanNumber(digits) tok := s.scanNumber(digits)
i, err := strconv.Btoi64(tok, base) i, err := strconv.Btoi64(tok, base)
if err != nil { if err != nil {
...@@ -528,6 +542,11 @@ func (s *ss) scanUint(verb int, bitSize int) uint64 { ...@@ -528,6 +542,11 @@ func (s *ss) scanUint(verb int, bitSize int) uint64 {
} }
base, digits := s.getBase(verb) base, digits := s.getBase(verb)
s.skipSpace(false) s.skipSpace(false)
if verb == 'U' {
if !s.consume("U", false) || !s.consume("+", false) {
s.errorString("bad unicode format ")
}
}
tok := s.scanNumber(digits) tok := s.scanNumber(digits)
i, err := strconv.Btoui64(tok, base) i, err := strconv.Btoui64(tok, base)
if err != nil { if err != nil {
......
...@@ -222,6 +222,8 @@ var scanfTests = []ScanfTest{ ...@@ -222,6 +222,8 @@ var scanfTests = []ScanfTest{
{"%o", "075\n", &uintVal, uint(075)}, {"%o", "075\n", &uintVal, uint(075)},
{"%x", "a75\n", &uintVal, uint(0xa75)}, {"%x", "a75\n", &uintVal, uint(0xa75)},
{"%x", "A75\n", &uintVal, uint(0xa75)}, {"%x", "A75\n", &uintVal, uint(0xa75)},
{"%U", "U+1234\n", &intVal, int(0x1234)},
{"%U", "U+4567\n", &uintVal, uint(0x4567)},
// Strings // Strings
{"%s", "using-%s\n", &stringVal, "using-%s"}, {"%s", "using-%s\n", &stringVal, "using-%s"},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment