unicode: update to Unicode 10.0.0

Also includes all derived values as well as vendored packages. Generated by running UNICODE_VERSION=10.0.0 go generate in golang.org/x/text and modified by hand to add the tests and entries in next.txt for new script and properties. Closes Issue #21471 Change-Id: I1d10ee3887bd1fd3d5a756ee0d04bd6ec2814ba1 Reviewed-on: https://go-review.googlesource.com/63953 Run-TryBot: Marcel van Lohuizen <mpvl@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Andrew Bonventre <andybons@golang.org>

unicode: update to Unicode 10.0.0
Also includes all derived values as well as vendored packages. Generated by running UNICODE_VERSION=10.0.0 go generate in golang.org/x/text and modified by hand to add the tests and entries in next.txt for new script and properties. Closes Issue #21471 Change-Id: I1d10ee3887bd1fd3d5a756ee0d04bd6ec2814ba1 Reviewed-on: https://go-review.googlesource.com/63953 Run-TryBot: Marcel van Lohuizen <mpvl@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Andrew Bonventre <andybons@golang.org>
2fd95497 · Marcel van Lohuizen · 8c532f5f · 2fd95497 · 2fd95497 · 2fd95497
Commit 2fd95497 authored Sep 15, 2017 by Marcel van Lohuizen
16 changed files
--- a/api/except.txt
+++ b/api/except.txt
@@ -344,3 +344,4 @@ pkg syscall (openbsd-386), const SYS_KILL = 37
 pkg syscall (openbsd-386-cgo), const SYS_KILL = 37
 pkg syscall (openbsd-amd64), const SYS_KILL = 37
 pkg syscall (openbsd-amd64-cgo), const SYS_KILL = 37
+pkg unicode, const Version = "9.0.0"
--- a/api/next.txt
+++ b/api/next.txt
 pkg math/big, const MaxBase = 62
+pkg unicode, const Version = "10.0.0"
+pkg unicode, var Masaram_Gondi *RangeTable
+pkg unicode, var Nushu *RangeTable
+pkg unicode, var Soyombo *RangeTable
+pkg unicode, var Zanabazar_Square *RangeTable
+pkg unicode, var Regional_Indicator *RangeTable
--- a/src/strconv/isprint.go
+++ b/src/strconv/isprint.go
@@ -7,7 +7,7 @@

 package strconv

-// (462+139+82)*2 + (378)*4 = 2878 bytes
+// (456+140+86)*2 + (396)*4 = 2948 bytes

 var isPrint16 = []uint16{
 	0x0020, 0x007e,
@@ -25,7 +25,7 @@ var isPrint16 = []uint16{
 	0x07c0, 0x07fa,
 	0x0800, 0x082d,
 	0x0830, 0x085b,
-	0x085e, 0x085e,
+	0x085e, 0x086a,
 	0x08a0, 0x08bd,
 	0x08d4, 0x098c,
 	0x098f, 0x0990,
@@ -36,7 +36,7 @@ var isPrint16 = []uint16{
 	0x09cb, 0x09ce,
 	0x09d7, 0x09d7,
 	0x09dc, 0x09e3,
-	0x09e6, 0x09fb,
+	0x09e6, 0x09fd,
 	0x0a01, 0x0a0a,
 	0x0a0f, 0x0a10,
 	0x0a13, 0x0a39,
@@ -51,8 +51,7 @@ var isPrint16 = []uint16{
 	0x0ad0, 0x0ad0,
 	0x0ae0, 0x0ae3,
 	0x0ae6, 0x0af1,
-	0x0af9, 0x0af9,
-	0x0b01, 0x0b0c,
+	0x0af9, 0x0b0c,
 	0x0b0f, 0x0b10,
 	0x0b13, 0x0b39,
 	0x0b3c, 0x0b44,
@@ -82,8 +81,7 @@ var isPrint16 = []uint16{
 	0x0cd5, 0x0cd6,
 	0x0cde, 0x0ce3,
 	0x0ce6, 0x0cf2,
-	0x0d01, 0x0d3a,
-	0x0d3d, 0x0d4f,
+	0x0d00, 0x0d4f,
 	0x0d54, 0x0d63,
 	0x0d66, 0x0d7f,
 	0x0d82, 0x0d96,
@@ -154,8 +152,7 @@ var isPrint16 = []uint16{
 	0x1c4d, 0x1c88,
 	0x1cc0, 0x1cc7,
 	0x1cd0, 0x1cf9,
-	0x1d00, 0x1df5,
-	0x1dfb, 0x1f15,
+	0x1d00, 0x1f15,
 	0x1f18, 0x1f1d,
 	0x1f20, 0x1f45,
 	0x1f48, 0x1f4d,
@@ -167,7 +164,7 @@ var isPrint16 = []uint16{
 	0x2030, 0x205e,
 	0x2070, 0x2071,
 	0x2074, 0x209c,
-	0x20a0, 0x20be,
+	0x20a0, 0x20bf,
 	0x20d0, 0x20f0,
 	0x2100, 0x218b,
 	0x2190, 0x2426,
@@ -175,7 +172,7 @@ var isPrint16 = []uint16{
 	0x2460, 0x2b73,
 	0x2b76, 0x2b95,
 	0x2b98, 0x2bb9,
-	0x2bbd, 0x2bd1,
+	0x2bbd, 0x2bd2,
 	0x2bec, 0x2bef,
 	0x2c00, 0x2cf3,
 	0x2cf9, 0x2d27,
@@ -183,17 +180,17 @@ var isPrint16 = []uint16{
 	0x2d30, 0x2d67,
 	0x2d6f, 0x2d70,
 	0x2d7f, 0x2d96,
-	0x2da0, 0x2e44,
+	0x2da0, 0x2e49,
 	0x2e80, 0x2ef3,
 	0x2f00, 0x2fd5,
 	0x2ff0, 0x2ffb,
 	0x3001, 0x3096,
 	0x3099, 0x30ff,
-	0x3105, 0x312d,
+	0x3105, 0x312e,
 	0x3131, 0x31ba,
 	0x31c0, 0x31e3,
 	0x31f0, 0x4db5,
-	0x4dc0, 0x9fd5,
+	0x4dc0, 0x9fea,
 	0xa000, 0xa48c,
 	0xa490, 0xa4c6,
 	0xa4d0, 0xa62b,
@@ -254,6 +251,7 @@ var isNotPrint16 = []uint16{
 	0x0590,
 	0x06dd,
 	0x083f,
+	0x085f,
 	0x08b5,
 	0x08e2,
 	0x0984,
@@ -275,6 +273,7 @@ var isNotPrint16 = []uint16{
 	0x0ab4,
 	0x0ac6,
 	0x0aca,
+	0x0b00,
 	0x0b04,
 	0x0b29,
 	0x0b31,
@@ -341,7 +340,7 @@ var isNotPrint16 = []uint16{
 	0x1771,
 	0x191f,
 	0x1a5f,
-	0x1cf7,
+	0x1dfa,
 	0x1f58,
 	0x1f5a,
 	0x1f5c,
@@ -351,7 +350,6 @@ var isNotPrint16 = []uint16{
 	0x1fdc,
 	0x1ff5,
 	0x208f,
-	0x23ff,
 	0x2bc9,
 	0x2c2f,
 	0x2c5f,
@@ -398,7 +396,7 @@ var isPrint32 = []uint32{
 	0x0102a0, 0x0102d0,
 	0x0102e0, 0x0102fb,
 	0x010300, 0x010323,
-	0x010330, 0x01034a,
+	0x01032d, 0x01034a,
 	0x010350, 0x01037a,
 	0x010380, 0x0103c3,
 	0x0103c8, 0x0103d5,
@@ -481,11 +479,17 @@ var isPrint32 = []uint32{
 	0x011730, 0x01173f,
 	0x0118a0, 0x0118f2,
 	0x0118ff, 0x0118ff,
+	0x011a00, 0x011a47,
+	0x011a50, 0x011a83,
+	0x011a86, 0x011aa2,
 	0x011ac0, 0x011af8,
 	0x011c00, 0x011c45,
 	0x011c50, 0x011c6c,
 	0x011c70, 0x011c8f,
 	0x011c92, 0x011cb6,
+	0x011d00, 0x011d36,
+	0x011d3a, 0x011d47,
+	0x011d50, 0x011d59,
 	0x012000, 0x012399,
 	0x012400, 0x012474,
 	0x012480, 0x012543,
@@ -502,10 +506,11 @@ var isPrint32 = []uint32{
 	0x016f00, 0x016f44,
 	0x016f50, 0x016f7e,
 	0x016f8f, 0x016f9f,
-	0x016fe0, 0x016fe0,
+	0x016fe0, 0x016fe1,
 	0x017000, 0x0187ec,
 	0x018800, 0x018af2,
-	0x01b000, 0x01b001,
+	0x01b000, 0x01b11e,
+	0x01b170, 0x01b2fb,
 	0x01bc00, 0x01bc6a,
 	0x01bc70, 0x01bc7c,
 	0x01bc80, 0x01bc88,
@@ -553,9 +558,10 @@ var isPrint32 = []uint32{
 	0x01f210, 0x01f23b,
 	0x01f240, 0x01f248,
 	0x01f250, 0x01f251,
-	0x01f300, 0x01f6d2,
+	0x01f260, 0x01f265,
+	0x01f300, 0x01f6d4,
 	0x01f6e0, 0x01f6ec,
-	0x01f6f0, 0x01f6f6,
+	0x01f6f0, 0x01f6f8,
 	0x01f700, 0x01f773,
 	0x01f780, 0x01f7d4,
 	0x01f800, 0x01f80b,
@@ -563,16 +569,17 @@ var isPrint32 = []uint32{
 	0x01f850, 0x01f859,
 	0x01f860, 0x01f887,
 	0x01f890, 0x01f8ad,
-	0x01f910, 0x01f927,
-	0x01f930, 0x01f930,
-	0x01f933, 0x01f94b,
-	0x01f950, 0x01f95e,
-	0x01f980, 0x01f991,
+	0x01f900, 0x01f90b,
+	0x01f910, 0x01f94c,
+	0x01f950, 0x01f96b,
+	0x01f980, 0x01f997,
 	0x01f9c0, 0x01f9c0,
+	0x01f9d0, 0x01f9e6,
 	0x020000, 0x02a6d6,
 	0x02a700, 0x02b734,
 	0x02b740, 0x02b81d,
 	0x02b820, 0x02cea1,
+	0x02ceb0, 0x02ebe0,
 	0x02f800, 0x02fa1d,
 	0x0e0100, 0x0e01ef,
 }
@@ -605,9 +612,14 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
 	0x1334,
 	0x145a,
 	0x145c,
+	0x1a9d,
 	0x1c09,
 	0x1c37,
 	0x1ca8,
+	0x1d07,
+	0x1d0a,
+	0x1d3b,
+	0x1d3e,
 	0x246f,
 	0x6a5f,
 	0x6b5a,
@@ -658,7 +670,6 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
 	0xf0c0,
 	0xf0d0,
 	0xf12f,
-	0xf91f,
 	0xf93f,
 }


--- a/src/unicode/script_test.go
+++ b/src/unicode/script_test.go
@@ -14,8 +14,13 @@ type T struct {
 	script string
 }

-// Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0, 7.0.0 and 8.0.0
+// Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0, 7.0.0, 8.0.0,
+// 9.0.0, 10.0.0.
 // mostly to discover when new scripts and categories arise.
+// If this tests fails, add the missing scripts to the test and add entries
+// of the form
+//     pkg unicode, var <new script> *RangeTable
+// to api/next.txt.
 var inTest = []T{
 	{0x11711, "Ahom"},
 	{0x1e900, "Adlam"},
@@ -92,6 +97,7 @@ var inTest = []T{
 	{0x0843, "Mandaic"},
 	{0x10ac8, "Manichaean"},
 	{0x11cB6, "Marchen"},
+	{0x11d59, "Masaram_Gondi"},
 	{0xabd0, "Meetei_Mayek"},
 	{0x1e800, "Mende_Kikakui"},
 	{0x1099f, "Meroitic_Hieroglyphs"},
@@ -106,6 +112,7 @@ var inTest = []T{
 	{0x11400, "Newa"},
 	{0x19c3, "New_Tai_Lue"},
 	{0x07f8, "Nko"},
+	{0x1b170, "Nushu"},
 	{0x169b, "Ogham"},
 	{0x1c6a, "Ol_Chiki"},
 	{0x10C80, "Old_Hungarian"},
@@ -134,6 +141,7 @@ var inTest = []T{
 	{0x1D920, "SignWriting"},
 	{0x0dbd, "Sinhala"},
 	{0x110d0, "Sora_Sompeng"},
+	{0x11a99, "Soyombo"},
 	{0x1ba3, "Sundanese"},
 	{0xa803, "Syloti_Nagri"},
 	{0x070f, "Syriac"},
@@ -155,6 +163,7 @@ var inTest = []T{
 	{0xa60e, "Vai"},
 	{0x118ff, "Warang_Citi"},
 	{0xa216, "Yi"},
+	{0x11a0a, "Zanabazar_Square"},
 }

 var outTest = []T{ // not really worth being thorough
@@ -229,6 +238,7 @@ var inPropTest = []T{
 	{0x06DD, "Prepended_Concatenation_Mark"},
 	{0x300D, "Quotation_Mark"},
 	{0x2EF3, "Radical"},
+	{0x1f1ff, "Regional_Indicator"},
 	{0x061F, "STerm"}, // Deprecated alias of Sentence_Terminal
 	{0x061F, "Sentence_Terminal"},
 	{0x2071, "Soft_Dotted"},

--- a/src/unicode/tables.go
+++ b/src/unicode/tables.go
--- a/src/vendor/golang_org/x/net/idna/idna.go
+++ b/src/vendor/golang_org/x/net/idna/idna.go
@@ -21,6 +21,7 @@ import (
 	"unicode/utf8"

 	"golang_org/x/text/secure/bidirule"
+	"golang_org/x/text/unicode/bidi"
 	"golang_org/x/text/unicode/norm"
 )

@@ -67,6 +68,15 @@ func VerifyDNSLength(verify bool) Option {
 	return func(o *options) { o.verifyDNSLength = verify }
 }

+// RemoveLeadingDots removes leading label separators. Leading runes that map to
+// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
+//
+// This is the behavior suggested by the UTS #46 and is adopted by some
+// browsers.
+func RemoveLeadingDots(remove bool) Option {
+	return func(o *options) { o.removeLeadingDots = remove }
+}
+
 // ValidateLabels sets whether to check the mandatory label validation criteria
 // as defined in Section 5.4 of RFC 5891. This includes testing for correct use
 // of hyphens ('-'), normalization, validity of runes, and the context rules.
@@ -83,7 +93,7 @@ func ValidateLabels(enable bool) Option {
 	}
 }

-// StrictDomainName limits the set of permissable ASCII characters to those
+// StrictDomainName limits the set of permissible ASCII characters to those
 // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
 // hyphen). This is set by default for MapForLookup and ValidateForRegistration.
 //
@@ -137,10 +147,11 @@ func MapForLookup() Option {
 }

 type options struct {
-	transitional    bool
-	useSTD3Rules    bool
-	validateLabels  bool
-	verifyDNSLength bool
+	transitional      bool
+	useSTD3Rules      bool
+	validateLabels    bool
+	verifyDNSLength   bool
+	removeLeadingDots bool

 	trie *idnaTrie

@@ -149,14 +160,14 @@ type options struct {

 	// mapping implements a validation and mapping step as defined in RFC 5895
 	// or UTS 46, tailored to, for example, domain registration or lookup.
-	mapping func(p *Profile, s string) (string, error)
+	mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)

 	// bidirule, if specified, checks whether s conforms to the Bidi Rule
 	// defined in RFC 5893.
 	bidirule func(s string) bool
 }

-// A Profile defines the configuration of a IDNA mapper.
+// A Profile defines the configuration of an IDNA mapper.
 type Profile struct {
 	options
 }
@@ -289,12 +300,16 @@ func (e runeError) Error() string {
 // see http://www.unicode.org/reports/tr46.
 func (p *Profile) process(s string, toASCII bool) (string, error) {
 	var err error
+	var isBidi bool
 	if p.mapping != nil {
-		s, err = p.mapping(p, s)
+		s, isBidi, err = p.mapping(p, s)
 	}
 	// Remove leading empty labels.
-	for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
+	if p.removeLeadingDots {
+		for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
+		}
 	}
+	// TODO: allow for a quick check the tables data.
 	// It seems like we should only create this error on ToASCII, but the
 	// UTS 46 conformance tests suggests we should always check this.
 	if err == nil && p.verifyDNSLength && s == "" {
@@ -320,6 +335,7 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
 				// Spec says keep the old label.
 				continue
 			}
+			isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
 			labels.set(u)
 			if err == nil && p.validateLabels {
 				err = p.fromPuny(p, u)
@@ -334,6 +350,14 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
 			err = p.validateLabel(label)
 		}
 	}
+	if isBidi && p.bidirule != nil && err == nil {
+		for labels.reset(); !labels.done(); labels.next() {
+			if !p.bidirule(labels.label()) {
+				err = &labelError{s, "B"}
+				break
+			}
+		}
+	}
 	if toASCII {
 		for labels.reset(); !labels.done(); labels.next() {
 			label := labels.label()
@@ -365,41 +389,65 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
 	return s, err
 }

-func normalize(p *Profile, s string) (string, error) {
-	return norm.NFC.String(s), nil
+func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
+	// TODO: consider first doing a quick check to see if any of these checks
+	// need to be done. This will make it slower in the general case, but
+	// faster in the common case.
+	mapped = norm.NFC.String(s)
+	isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
+	return mapped, isBidi, nil
 }

-func validateRegistration(p *Profile, s string) (string, error) {
+func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
+	// TODO: filter need for normalization in loop below.
 	if !norm.NFC.IsNormalString(s) {
-		return s, &labelError{s, "V1"}
+		return s, false, &labelError{s, "V1"}
 	}
-	var err error
 	for i := 0; i < len(s); {
 		v, sz := trie.lookupString(s[i:])
-		i += sz
+		bidi = bidi || info(v).isBidi(s[i:])
 		// Copy bytes not copied so far.
 		switch p.simplify(info(v).category()) {
 		// TODO: handle the NV8 defined in the Unicode idna data set to allow
 		// for strict conformance to IDNA2008.
 		case valid, deviation:
 		case disallowed, mapped, unknown, ignored:
-			if err == nil {
-				r, _ := utf8.DecodeRuneInString(s[i:])
-				err = runeError(r)
-			}
+			r, _ := utf8.DecodeRuneInString(s[i:])
+			return s, bidi, runeError(r)
 		}
+		i += sz
 	}
-	return s, err
+	return s, bidi, nil
 }

-func validateAndMap(p *Profile, s string) (string, error) {
+func (c info) isBidi(s string) bool {
+	if !c.isMapped() {
+		return c&attributesMask == rtl
+	}
+	// TODO: also store bidi info for mapped data. This is possible, but a bit
+	// cumbersome and not for the common case.
+	p, _ := bidi.LookupString(s)
+	switch p.Class() {
+	case bidi.R, bidi.AL, bidi.AN:
+		return true
+	}
+	return false
+}
+
+func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
 	var (
-		err error
-		b   []byte
-		k   int
+		b []byte
+		k int
 	)
+	// combinedInfoBits contains the or-ed bits of all runes. We use this
+	// to derive the mayNeedNorm bit later. This may trigger normalization
+	// overeagerly, but it will not do so in the common case. The end result
+	// is another 10% saving on BenchmarkProfile for the common case.
+	var combinedInfoBits info
 	for i := 0; i < len(s); {
 		v, sz := trie.lookupString(s[i:])
+		combinedInfoBits |= info(v)
+		bidi = bidi || info(v).isBidi(s[i:])
 		start := i
 		i += sz
 		// Copy bytes not copied so far.
@@ -408,7 +456,7 @@ func validateAndMap(p *Profile, s string) (string, error) {
 			continue
 		case disallowed:
 			if err == nil {
-				r, _ := utf8.DecodeRuneInString(s[i:])
+				r, _ := utf8.DecodeRuneInString(s[start:])
 				err = runeError(r)
 			}
 			continue
@@ -426,7 +474,9 @@ func validateAndMap(p *Profile, s string) (string, error) {
 	}
 	if k == 0 {
 		// No changes so far.
-		s = norm.NFC.String(s)
+		if combinedInfoBits&mayNeedNorm != 0 {
+			s = norm.NFC.String(s)
+		}
 	} else {
 		b = append(b, s[k:]...)
 		if norm.NFC.QuickSpan(b) != len(b) {
@@ -435,7 +485,7 @@ func validateAndMap(p *Profile, s string) (string, error) {
 		// TODO: the punycode converters require strings as input.
 		s = string(b)
 	}
-	return s, err
+	return s, bidi, err
 }

 // A labelIter allows iterating over domain name labels.
@@ -530,6 +580,8 @@ func validateFromPunycode(p *Profile, s string) error {
 	if !norm.NFC.IsNormalString(s) {
 		return &labelError{s, "V1"}
 	}
+	// TODO: detect whether string may have to be normalized in the following
+	// loop.
 	for i := 0; i < len(s); {
 		v, sz := trie.lookupString(s[i:])
 		if c := p.simplify(info(v).category()); c != valid && c != deviation {
@@ -604,16 +656,13 @@ var joinStates = [][numJoinTypes]joinState{

 // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
 // already implicitly satisfied by the overall implementation.
-func (p *Profile) validateLabel(s string) error {
+func (p *Profile) validateLabel(s string) (err error) {
 	if s == "" {
 		if p.verifyDNSLength {
 			return &labelError{s, "A4"}
 		}
 		return nil
 	}
-	if p.bidirule != nil && !p.bidirule(s) {
-		return &labelError{s, "B"}
-	}
 	if !p.validateLabels {
 		return nil
 	}

--- a/src/vendor/golang_org/x/net/idna/tables.go
+++ b/src/vendor/golang_org/x/net/idna/tables.go
--- a/src/vendor/golang_org/x/net/idna/trieval.go
+++ b/src/vendor/golang_org/x/net/idna/trieval.go
@@ -28,9 +28,9 @@ package idna
 //       15..3  index into xor or mapping table
 //     }
 //   } else {
-//       15..13 unused
-//           12 modifier (including virama)
-//           11 virama modifier
+//       15..14 unused
+//       13     mayNeedNorm
+//       12..11 attributes
 //       10..8  joining type
 //        7..3  category type
 //   }
@@ -51,15 +51,20 @@ const (
 	joinShift = 8
 	joinMask  = 0x07

-	viramaModifier = 0x0800
+	// Attributes
+	attributesMask = 0x1800
+	viramaModifier = 0x1800
 	modifier       = 0x1000
+	rtl            = 0x0800
+
+	mayNeedNorm = 0x2000
 )

 // A category corresponds to a category defined in the IDNA mapping table.
 type category uint16

 const (
-	unknown              category = 0 // not defined currently in unicode.
+	unknown              category = 0 // not currently defined in unicode.
 	mapped               category = 1
 	disallowedSTD3Mapped category = 2
 	deviation            category = 3
@@ -112,5 +117,5 @@ func (c info) isModifier() bool {
 }

 func (c info) isViramaModifier() bool {
-	return c&(viramaModifier|catSmallMask) == viramaModifier
+	return c&(attributesMask|catSmallMask) == viramaModifier
 }
--- a/src/vendor/golang_org/x/text/secure/bidirule/bidirule.go
+++ b/src/vendor/golang_org/x/text/secure/bidirule/bidirule.go
@@ -157,6 +157,7 @@ func DirectionString(s string) bidi.Direction {
 		e, sz := bidi.LookupString(s[i:])
 		if sz == 0 {
 			i++
+			continue
 		}
 		c := e.Class()
 		if c == bidi.R || c == bidi.AL || c == bidi.AN {
@@ -205,9 +206,6 @@ func (t *Transformer) isRTL() bool {
 }

 func (t *Transformer) isFinal() bool {
-	if !t.isRTL() {
-		return true
-	}
 	return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
 }


--- a/src/vendor/golang_org/x/text/unicode/bidi/tables.go
+++ b/src/vendor/golang_org/x/text/unicode/bidi/tables.go
--- a/src/vendor/golang_org/x/text/unicode/norm/composition.go
+++ b/src/vendor/golang_org/x/text/unicode/norm/composition.go
@@ -35,17 +35,9 @@ const (
 // streamSafe implements the policy of when a CGJ should be inserted.
 type streamSafe uint8

-// mkStreamSafe is a shorthand for declaring a streamSafe var and calling
-// first on it.
-func mkStreamSafe(p Properties) streamSafe {
-	return streamSafe(p.nTrailingNonStarters())
-}
-
-// first inserts the first rune of a segment.
+// first inserts the first rune of a segment. It is a faster version of next if
+// it is known p represents the first rune in a segment.
 func (ss *streamSafe) first(p Properties) {
-	if *ss != 0 {
-		panic("!= 0")
-	}
 	*ss = streamSafe(p.nTrailingNonStarters())
 }

@@ -68,7 +60,7 @@ func (ss *streamSafe) next(p Properties) ssState {
 	// be a non-starter. Note that it always hold that if nLead > 0 then
 	// nLead == nTrail.
 	if n == 0 {
-		*ss = 0
+		*ss = streamSafe(p.nTrailingNonStarters())
 		return ssStarter
 	}
 	return ssSuccess
@@ -144,7 +136,6 @@ func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) {
 func (rb *reorderBuffer) reset() {
 	rb.nrune = 0
 	rb.nbyte = 0
-	rb.ss = 0
 }

 func (rb *reorderBuffer) doFlush() bool {
@@ -259,6 +250,9 @@ func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) {
 // It flushes the buffer on each new segment start.
 func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
 	rb.tmpBytes.setBytes(dcomp)
+	// As the streamSafe accounting already handles the counting for modifiers,
+	// we don't have to call next. However, we do need to keep the accounting
+	// intact when flushing the buffer.
 	for i := 0; i < len(dcomp); {
 		info := rb.f.info(rb.tmpBytes, i)
 		if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() {

--- a/src/vendor/golang_org/x/text/unicode/norm/input.go
+++ b/src/vendor/golang_org/x/text/unicode/norm/input.go
@@ -92,16 +92,20 @@ func (in *input) charinfoNFKC(p int) (uint16, int) {
 }

 func (in *input) hangul(p int) (r rune) {
+	var size int
 	if in.bytes == nil {
 		if !isHangulString(in.str[p:]) {
 			return 0
 		}
-		r, _ = utf8.DecodeRuneInString(in.str[p:])
+		r, size = utf8.DecodeRuneInString(in.str[p:])
 	} else {
 		if !isHangul(in.bytes[p:]) {
 			return 0
 		}
-		r, _ = utf8.DecodeRune(in.bytes[p:])
+		r, size = utf8.DecodeRune(in.bytes[p:])
+	}
+	if size != hangulUTF8Size {
+		return 0
 	}
 	return r
 }
--- a/src/vendor/golang_org/x/text/unicode/norm/iter.go
+++ b/src/vendor/golang_org/x/text/unicode/norm/iter.go
@@ -43,6 +43,7 @@ func (i *Iter) Init(f Form, src []byte) {
 	i.next = i.rb.f.nextMain
 	i.asciiF = nextASCIIBytes
 	i.info = i.rb.f.info(i.rb.src, i.p)
+	i.rb.ss.first(i.info)
 }

 // InitString initializes i to iterate over src after normalizing it to Form f.
@@ -58,11 +59,12 @@ func (i *Iter) InitString(f Form, src string) {
 	i.next = i.rb.f.nextMain
 	i.asciiF = nextASCIIString
 	i.info = i.rb.f.info(i.rb.src, i.p)
+	i.rb.ss.first(i.info)
 }

 // Seek sets the segment to be returned by the next call to Next to start
 // at position p.  It is the responsibility of the caller to set p to the
-// start of a UTF8 rune.
+// start of a segment.
 func (i *Iter) Seek(offset int64, whence int) (int64, error) {
 	var abs int64
 	switch whence {
@@ -86,6 +88,7 @@ func (i *Iter) Seek(offset int64, whence int) (int64, error) {
 	i.multiSeg = nil
 	i.next = i.rb.f.nextMain
 	i.info = i.rb.f.info(i.rb.src, i.p)
+	i.rb.ss.first(i.info)
 	return abs, nil
 }

@@ -163,6 +166,7 @@ func nextHangul(i *Iter) []byte {
 	if next >= i.rb.nsrc {
 		i.setDone()
 	} else if i.rb.src.hangul(next) == 0 {
+		i.rb.ss.next(i.info)
 		i.info = i.rb.f.info(i.rb.src, i.p)
 		i.next = i.rb.f.nextMain
 		return i.next(i)
@@ -206,12 +210,10 @@ func nextMultiNorm(i *Iter) []byte {
 		if info.BoundaryBefore() {
 			i.rb.compose()
 			seg := i.buf[:i.rb.flushCopy(i.buf[:])]
-			i.rb.ss.first(info)
 			i.rb.insertUnsafe(input{bytes: d}, j, info)
 			i.multiSeg = d[j+int(info.size):]
 			return seg
 		}
-		i.rb.ss.next(info)
 		i.rb.insertUnsafe(input{bytes: d}, j, info)
 		j += int(info.size)
 	}
@@ -224,9 +226,9 @@ func nextMultiNorm(i *Iter) []byte {
 func nextDecomposed(i *Iter) (next []byte) {
 	outp := 0
 	inCopyStart, outCopyStart := i.p, 0
-	ss := mkStreamSafe(i.info)
 	for {
 		if sz := int(i.info.size); sz <= 1 {
+			i.rb.ss = 0
 			p := i.p
 			i.p++ // ASCII or illegal byte.  Either way, advance by 1.
 			if i.p >= i.rb.nsrc {
@@ -245,6 +247,8 @@ func nextDecomposed(i *Iter) (next []byte) {
 			p := outp + len(d)
 			if outp > 0 {
 				i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
+				// TODO: this condition should not be possible, but we leave it
+				// in for defensive purposes.
 				if p > len(i.buf) {
 					return i.buf[:outp]
 				}
@@ -268,7 +272,7 @@ func nextDecomposed(i *Iter) (next []byte) {
 			} else {
 				i.info = i.rb.f.info(i.rb.src, i.p)
 			}
-			switch ss.next(i.info) {
+			switch i.rb.ss.next(i.info) {
 			case ssOverflow:
 				i.next = nextCGJDecompose
 				fallthrough
@@ -311,7 +315,7 @@ func nextDecomposed(i *Iter) (next []byte) {
 		}
 		prevCC := i.info.tccc
 		i.info = i.rb.f.info(i.rb.src, i.p)
-		if v := ss.next(i.info); v == ssStarter {
+		if v := i.rb.ss.next(i.info); v == ssStarter {
 			break
 		} else if v == ssOverflow {
 			i.next = nextCGJDecompose
@@ -337,10 +341,6 @@ doNorm:

 func doNormDecomposed(i *Iter) []byte {
 	for {
-		if s := i.rb.ss.next(i.info); s == ssOverflow {
-			i.next = nextCGJDecompose
-			break
-		}
 		i.rb.insertUnsafe(i.rb.src, i.p, i.info)
 		if i.p += int(i.info.size); i.p >= i.rb.nsrc {
 			i.setDone()
@@ -350,6 +350,10 @@ func doNormDecomposed(i *Iter) []byte {
 		if i.info.ccc == 0 {
 			break
 		}
+		if s := i.rb.ss.next(i.info); s == ssOverflow {
+			i.next = nextCGJDecompose
+			break
+		}
 	}
 	// new segment or too many combining characters: exit normalization
 	return i.buf[:i.rb.flushCopy(i.buf[:])]
@@ -359,6 +363,7 @@ func nextCGJDecompose(i *Iter) []byte {
 	i.rb.ss = 0
 	i.rb.insertCGJ()
 	i.next = nextDecomposed
+	i.rb.ss.first(i.info)
 	buf := doNormDecomposed(i)
 	return buf
 }
@@ -367,7 +372,6 @@ func nextCGJDecompose(i *Iter) []byte {
 func nextComposed(i *Iter) []byte {
 	outp, startp := 0, i.p
 	var prevCC uint8
-	ss := mkStreamSafe(i.info)
 	for {
 		if !i.info.isYesC() {
 			goto doNorm
@@ -387,11 +391,12 @@ func nextComposed(i *Iter) []byte {
 			i.setDone()
 			break
 		} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
+			i.rb.ss = 0
 			i.next = i.asciiF
 			break
 		}
 		i.info = i.rb.f.info(i.rb.src, i.p)
-		if v := ss.next(i.info); v == ssStarter {
+		if v := i.rb.ss.next(i.info); v == ssStarter {
 			break
 		} else if v == ssOverflow {
 			i.next = nextCGJCompose
@@ -403,8 +408,10 @@ func nextComposed(i *Iter) []byte {
 	}
 	return i.returnSlice(startp, i.p)
 doNorm:
+	// reset to start position
 	i.p = startp
 	i.info = i.rb.f.info(i.rb.src, i.p)
+	i.rb.ss.first(i.info)
 	if i.info.multiSegment() {
 		d := i.info.Decomposition()
 		info := i.rb.f.info(input{bytes: d}, 0)

--- a/src/vendor/golang_org/x/text/unicode/norm/normalize.go
+++ b/src/vendor/golang_org/x/text/unicode/norm/normalize.go
@@ -324,7 +324,6 @@ func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool)
 		// have an overflow for runes that are starters (e.g. with U+FF9E).
 		switch ss.next(info) {
 		case ssStarter:
-			ss.first(info)
 			lastSegStart = i
 		case ssOverflow:
 			return lastSegStart, false
@@ -441,6 +440,8 @@ func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
 			}
 			return -1
 		}
+		// TODO: Using streamSafe to determine the boundary isn't the same as
+		// using BoundaryBefore. Determine which should be used.
 		if s := ss.next(info); s != ssSuccess {
 			return i
 		}
@@ -505,15 +506,14 @@ func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
 	if info.size == 0 {
 		return 0
 	}
-	if rb.nrune > 0 {
-		if s := rb.ss.next(info); s == ssStarter {
-			goto end
-		} else if s == ssOverflow {
-			rb.insertCGJ()
+	if s := rb.ss.next(info); s == ssStarter {
+		// TODO: this could be removed if we don't support merging.
+		if rb.nrune > 0 {
 			goto end
 		}
-	} else {
-		rb.ss.first(info)
+	} else if s == ssOverflow {
+		rb.insertCGJ()
+		goto end
 	}
 	if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
 		return int(err)

--- a/src/vendor/golang_org/x/text/unicode/norm/tables.go
+++ b/src/vendor/golang_org/x/text/unicode/norm/tables.go
--- a/src/vendor/golang_org/x/text/unicode/norm/transform.go
+++ b/src/vendor/golang_org/x/text/unicode/norm/transform.go
@@ -42,7 +42,7 @@ func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
 }

 func flushTransform(rb *reorderBuffer) bool {
-	// Write out (must fully fit in dst, or else it is a ErrShortDst).
+	// Write out (must fully fit in dst, or else it is an ErrShortDst).
 	if len(rb.out) < rb.nrune*utf8.UTFMax {
 		return false
 	}