Commit 2fd95497 authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

unicode: update to Unicode 10.0.0

Also includes all derived values as well as
vendored packages.

Generated by running
    UNICODE_VERSION=10.0.0 go generate
in golang.org/x/text

and modified by hand to add the tests and
entries in next.txt for new script and properties.

Closes Issue #21471

Change-Id: I1d10ee3887bd1fd3d5a756ee0d04bd6ec2814ba1
Reviewed-on: https://go-review.googlesource.com/63953
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarAndrew Bonventre <andybons@golang.org>
parent 8c532f5f
...@@ -344,3 +344,4 @@ pkg syscall (openbsd-386), const SYS_KILL = 37 ...@@ -344,3 +344,4 @@ pkg syscall (openbsd-386), const SYS_KILL = 37
pkg syscall (openbsd-386-cgo), const SYS_KILL = 37 pkg syscall (openbsd-386-cgo), const SYS_KILL = 37
pkg syscall (openbsd-amd64), const SYS_KILL = 37 pkg syscall (openbsd-amd64), const SYS_KILL = 37
pkg syscall (openbsd-amd64-cgo), const SYS_KILL = 37 pkg syscall (openbsd-amd64-cgo), const SYS_KILL = 37
pkg unicode, const Version = "9.0.0"
pkg math/big, const MaxBase = 62 pkg math/big, const MaxBase = 62
pkg unicode, const Version = "10.0.0"
pkg unicode, var Masaram_Gondi *RangeTable
pkg unicode, var Nushu *RangeTable
pkg unicode, var Soyombo *RangeTable
pkg unicode, var Zanabazar_Square *RangeTable
pkg unicode, var Regional_Indicator *RangeTable
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
package strconv package strconv
// (462+139+82)*2 + (378)*4 = 2878 bytes // (456+140+86)*2 + (396)*4 = 2948 bytes
var isPrint16 = []uint16{ var isPrint16 = []uint16{
0x0020, 0x007e, 0x0020, 0x007e,
...@@ -25,7 +25,7 @@ var isPrint16 = []uint16{ ...@@ -25,7 +25,7 @@ var isPrint16 = []uint16{
0x07c0, 0x07fa, 0x07c0, 0x07fa,
0x0800, 0x082d, 0x0800, 0x082d,
0x0830, 0x085b, 0x0830, 0x085b,
0x085e, 0x085e, 0x085e, 0x086a,
0x08a0, 0x08bd, 0x08a0, 0x08bd,
0x08d4, 0x098c, 0x08d4, 0x098c,
0x098f, 0x0990, 0x098f, 0x0990,
...@@ -36,7 +36,7 @@ var isPrint16 = []uint16{ ...@@ -36,7 +36,7 @@ var isPrint16 = []uint16{
0x09cb, 0x09ce, 0x09cb, 0x09ce,
0x09d7, 0x09d7, 0x09d7, 0x09d7,
0x09dc, 0x09e3, 0x09dc, 0x09e3,
0x09e6, 0x09fb, 0x09e6, 0x09fd,
0x0a01, 0x0a0a, 0x0a01, 0x0a0a,
0x0a0f, 0x0a10, 0x0a0f, 0x0a10,
0x0a13, 0x0a39, 0x0a13, 0x0a39,
...@@ -51,8 +51,7 @@ var isPrint16 = []uint16{ ...@@ -51,8 +51,7 @@ var isPrint16 = []uint16{
0x0ad0, 0x0ad0, 0x0ad0, 0x0ad0,
0x0ae0, 0x0ae3, 0x0ae0, 0x0ae3,
0x0ae6, 0x0af1, 0x0ae6, 0x0af1,
0x0af9, 0x0af9, 0x0af9, 0x0b0c,
0x0b01, 0x0b0c,
0x0b0f, 0x0b10, 0x0b0f, 0x0b10,
0x0b13, 0x0b39, 0x0b13, 0x0b39,
0x0b3c, 0x0b44, 0x0b3c, 0x0b44,
...@@ -82,8 +81,7 @@ var isPrint16 = []uint16{ ...@@ -82,8 +81,7 @@ var isPrint16 = []uint16{
0x0cd5, 0x0cd6, 0x0cd5, 0x0cd6,
0x0cde, 0x0ce3, 0x0cde, 0x0ce3,
0x0ce6, 0x0cf2, 0x0ce6, 0x0cf2,
0x0d01, 0x0d3a, 0x0d00, 0x0d4f,
0x0d3d, 0x0d4f,
0x0d54, 0x0d63, 0x0d54, 0x0d63,
0x0d66, 0x0d7f, 0x0d66, 0x0d7f,
0x0d82, 0x0d96, 0x0d82, 0x0d96,
...@@ -154,8 +152,7 @@ var isPrint16 = []uint16{ ...@@ -154,8 +152,7 @@ var isPrint16 = []uint16{
0x1c4d, 0x1c88, 0x1c4d, 0x1c88,
0x1cc0, 0x1cc7, 0x1cc0, 0x1cc7,
0x1cd0, 0x1cf9, 0x1cd0, 0x1cf9,
0x1d00, 0x1df5, 0x1d00, 0x1f15,
0x1dfb, 0x1f15,
0x1f18, 0x1f1d, 0x1f18, 0x1f1d,
0x1f20, 0x1f45, 0x1f20, 0x1f45,
0x1f48, 0x1f4d, 0x1f48, 0x1f4d,
...@@ -167,7 +164,7 @@ var isPrint16 = []uint16{ ...@@ -167,7 +164,7 @@ var isPrint16 = []uint16{
0x2030, 0x205e, 0x2030, 0x205e,
0x2070, 0x2071, 0x2070, 0x2071,
0x2074, 0x209c, 0x2074, 0x209c,
0x20a0, 0x20be, 0x20a0, 0x20bf,
0x20d0, 0x20f0, 0x20d0, 0x20f0,
0x2100, 0x218b, 0x2100, 0x218b,
0x2190, 0x2426, 0x2190, 0x2426,
...@@ -175,7 +172,7 @@ var isPrint16 = []uint16{ ...@@ -175,7 +172,7 @@ var isPrint16 = []uint16{
0x2460, 0x2b73, 0x2460, 0x2b73,
0x2b76, 0x2b95, 0x2b76, 0x2b95,
0x2b98, 0x2bb9, 0x2b98, 0x2bb9,
0x2bbd, 0x2bd1, 0x2bbd, 0x2bd2,
0x2bec, 0x2bef, 0x2bec, 0x2bef,
0x2c00, 0x2cf3, 0x2c00, 0x2cf3,
0x2cf9, 0x2d27, 0x2cf9, 0x2d27,
...@@ -183,17 +180,17 @@ var isPrint16 = []uint16{ ...@@ -183,17 +180,17 @@ var isPrint16 = []uint16{
0x2d30, 0x2d67, 0x2d30, 0x2d67,
0x2d6f, 0x2d70, 0x2d6f, 0x2d70,
0x2d7f, 0x2d96, 0x2d7f, 0x2d96,
0x2da0, 0x2e44, 0x2da0, 0x2e49,
0x2e80, 0x2ef3, 0x2e80, 0x2ef3,
0x2f00, 0x2fd5, 0x2f00, 0x2fd5,
0x2ff0, 0x2ffb, 0x2ff0, 0x2ffb,
0x3001, 0x3096, 0x3001, 0x3096,
0x3099, 0x30ff, 0x3099, 0x30ff,
0x3105, 0x312d, 0x3105, 0x312e,
0x3131, 0x31ba, 0x3131, 0x31ba,
0x31c0, 0x31e3, 0x31c0, 0x31e3,
0x31f0, 0x4db5, 0x31f0, 0x4db5,
0x4dc0, 0x9fd5, 0x4dc0, 0x9fea,
0xa000, 0xa48c, 0xa000, 0xa48c,
0xa490, 0xa4c6, 0xa490, 0xa4c6,
0xa4d0, 0xa62b, 0xa4d0, 0xa62b,
...@@ -254,6 +251,7 @@ var isNotPrint16 = []uint16{ ...@@ -254,6 +251,7 @@ var isNotPrint16 = []uint16{
0x0590, 0x0590,
0x06dd, 0x06dd,
0x083f, 0x083f,
0x085f,
0x08b5, 0x08b5,
0x08e2, 0x08e2,
0x0984, 0x0984,
...@@ -275,6 +273,7 @@ var isNotPrint16 = []uint16{ ...@@ -275,6 +273,7 @@ var isNotPrint16 = []uint16{
0x0ab4, 0x0ab4,
0x0ac6, 0x0ac6,
0x0aca, 0x0aca,
0x0b00,
0x0b04, 0x0b04,
0x0b29, 0x0b29,
0x0b31, 0x0b31,
...@@ -341,7 +340,7 @@ var isNotPrint16 = []uint16{ ...@@ -341,7 +340,7 @@ var isNotPrint16 = []uint16{
0x1771, 0x1771,
0x191f, 0x191f,
0x1a5f, 0x1a5f,
0x1cf7, 0x1dfa,
0x1f58, 0x1f58,
0x1f5a, 0x1f5a,
0x1f5c, 0x1f5c,
...@@ -351,7 +350,6 @@ var isNotPrint16 = []uint16{ ...@@ -351,7 +350,6 @@ var isNotPrint16 = []uint16{
0x1fdc, 0x1fdc,
0x1ff5, 0x1ff5,
0x208f, 0x208f,
0x23ff,
0x2bc9, 0x2bc9,
0x2c2f, 0x2c2f,
0x2c5f, 0x2c5f,
...@@ -398,7 +396,7 @@ var isPrint32 = []uint32{ ...@@ -398,7 +396,7 @@ var isPrint32 = []uint32{
0x0102a0, 0x0102d0, 0x0102a0, 0x0102d0,
0x0102e0, 0x0102fb, 0x0102e0, 0x0102fb,
0x010300, 0x010323, 0x010300, 0x010323,
0x010330, 0x01034a, 0x01032d, 0x01034a,
0x010350, 0x01037a, 0x010350, 0x01037a,
0x010380, 0x0103c3, 0x010380, 0x0103c3,
0x0103c8, 0x0103d5, 0x0103c8, 0x0103d5,
...@@ -481,11 +479,17 @@ var isPrint32 = []uint32{ ...@@ -481,11 +479,17 @@ var isPrint32 = []uint32{
0x011730, 0x01173f, 0x011730, 0x01173f,
0x0118a0, 0x0118f2, 0x0118a0, 0x0118f2,
0x0118ff, 0x0118ff, 0x0118ff, 0x0118ff,
0x011a00, 0x011a47,
0x011a50, 0x011a83,
0x011a86, 0x011aa2,
0x011ac0, 0x011af8, 0x011ac0, 0x011af8,
0x011c00, 0x011c45, 0x011c00, 0x011c45,
0x011c50, 0x011c6c, 0x011c50, 0x011c6c,
0x011c70, 0x011c8f, 0x011c70, 0x011c8f,
0x011c92, 0x011cb6, 0x011c92, 0x011cb6,
0x011d00, 0x011d36,
0x011d3a, 0x011d47,
0x011d50, 0x011d59,
0x012000, 0x012399, 0x012000, 0x012399,
0x012400, 0x012474, 0x012400, 0x012474,
0x012480, 0x012543, 0x012480, 0x012543,
...@@ -502,10 +506,11 @@ var isPrint32 = []uint32{ ...@@ -502,10 +506,11 @@ var isPrint32 = []uint32{
0x016f00, 0x016f44, 0x016f00, 0x016f44,
0x016f50, 0x016f7e, 0x016f50, 0x016f7e,
0x016f8f, 0x016f9f, 0x016f8f, 0x016f9f,
0x016fe0, 0x016fe0, 0x016fe0, 0x016fe1,
0x017000, 0x0187ec, 0x017000, 0x0187ec,
0x018800, 0x018af2, 0x018800, 0x018af2,
0x01b000, 0x01b001, 0x01b000, 0x01b11e,
0x01b170, 0x01b2fb,
0x01bc00, 0x01bc6a, 0x01bc00, 0x01bc6a,
0x01bc70, 0x01bc7c, 0x01bc70, 0x01bc7c,
0x01bc80, 0x01bc88, 0x01bc80, 0x01bc88,
...@@ -553,9 +558,10 @@ var isPrint32 = []uint32{ ...@@ -553,9 +558,10 @@ var isPrint32 = []uint32{
0x01f210, 0x01f23b, 0x01f210, 0x01f23b,
0x01f240, 0x01f248, 0x01f240, 0x01f248,
0x01f250, 0x01f251, 0x01f250, 0x01f251,
0x01f300, 0x01f6d2, 0x01f260, 0x01f265,
0x01f300, 0x01f6d4,
0x01f6e0, 0x01f6ec, 0x01f6e0, 0x01f6ec,
0x01f6f0, 0x01f6f6, 0x01f6f0, 0x01f6f8,
0x01f700, 0x01f773, 0x01f700, 0x01f773,
0x01f780, 0x01f7d4, 0x01f780, 0x01f7d4,
0x01f800, 0x01f80b, 0x01f800, 0x01f80b,
...@@ -563,16 +569,17 @@ var isPrint32 = []uint32{ ...@@ -563,16 +569,17 @@ var isPrint32 = []uint32{
0x01f850, 0x01f859, 0x01f850, 0x01f859,
0x01f860, 0x01f887, 0x01f860, 0x01f887,
0x01f890, 0x01f8ad, 0x01f890, 0x01f8ad,
0x01f910, 0x01f927, 0x01f900, 0x01f90b,
0x01f930, 0x01f930, 0x01f910, 0x01f94c,
0x01f933, 0x01f94b, 0x01f950, 0x01f96b,
0x01f950, 0x01f95e, 0x01f980, 0x01f997,
0x01f980, 0x01f991,
0x01f9c0, 0x01f9c0, 0x01f9c0, 0x01f9c0,
0x01f9d0, 0x01f9e6,
0x020000, 0x02a6d6, 0x020000, 0x02a6d6,
0x02a700, 0x02b734, 0x02a700, 0x02b734,
0x02b740, 0x02b81d, 0x02b740, 0x02b81d,
0x02b820, 0x02cea1, 0x02b820, 0x02cea1,
0x02ceb0, 0x02ebe0,
0x02f800, 0x02fa1d, 0x02f800, 0x02fa1d,
0x0e0100, 0x0e01ef, 0x0e0100, 0x0e01ef,
} }
...@@ -605,9 +612,14 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry ...@@ -605,9 +612,14 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0x1334, 0x1334,
0x145a, 0x145a,
0x145c, 0x145c,
0x1a9d,
0x1c09, 0x1c09,
0x1c37, 0x1c37,
0x1ca8, 0x1ca8,
0x1d07,
0x1d0a,
0x1d3b,
0x1d3e,
0x246f, 0x246f,
0x6a5f, 0x6a5f,
0x6b5a, 0x6b5a,
...@@ -658,7 +670,6 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry ...@@ -658,7 +670,6 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0xf0c0, 0xf0c0,
0xf0d0, 0xf0d0,
0xf12f, 0xf12f,
0xf91f,
0xf93f, 0xf93f,
} }
......
...@@ -14,8 +14,13 @@ type T struct { ...@@ -14,8 +14,13 @@ type T struct {
script string script string
} }
// Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0, 7.0.0 and 8.0.0 // Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0, 7.0.0, 8.0.0,
// 9.0.0, 10.0.0.
// mostly to discover when new scripts and categories arise. // mostly to discover when new scripts and categories arise.
// If this tests fails, add the missing scripts to the test and add entries
// of the form
// pkg unicode, var <new script> *RangeTable
// to api/next.txt.
var inTest = []T{ var inTest = []T{
{0x11711, "Ahom"}, {0x11711, "Ahom"},
{0x1e900, "Adlam"}, {0x1e900, "Adlam"},
...@@ -92,6 +97,7 @@ var inTest = []T{ ...@@ -92,6 +97,7 @@ var inTest = []T{
{0x0843, "Mandaic"}, {0x0843, "Mandaic"},
{0x10ac8, "Manichaean"}, {0x10ac8, "Manichaean"},
{0x11cB6, "Marchen"}, {0x11cB6, "Marchen"},
{0x11d59, "Masaram_Gondi"},
{0xabd0, "Meetei_Mayek"}, {0xabd0, "Meetei_Mayek"},
{0x1e800, "Mende_Kikakui"}, {0x1e800, "Mende_Kikakui"},
{0x1099f, "Meroitic_Hieroglyphs"}, {0x1099f, "Meroitic_Hieroglyphs"},
...@@ -106,6 +112,7 @@ var inTest = []T{ ...@@ -106,6 +112,7 @@ var inTest = []T{
{0x11400, "Newa"}, {0x11400, "Newa"},
{0x19c3, "New_Tai_Lue"}, {0x19c3, "New_Tai_Lue"},
{0x07f8, "Nko"}, {0x07f8, "Nko"},
{0x1b170, "Nushu"},
{0x169b, "Ogham"}, {0x169b, "Ogham"},
{0x1c6a, "Ol_Chiki"}, {0x1c6a, "Ol_Chiki"},
{0x10C80, "Old_Hungarian"}, {0x10C80, "Old_Hungarian"},
...@@ -134,6 +141,7 @@ var inTest = []T{ ...@@ -134,6 +141,7 @@ var inTest = []T{
{0x1D920, "SignWriting"}, {0x1D920, "SignWriting"},
{0x0dbd, "Sinhala"}, {0x0dbd, "Sinhala"},
{0x110d0, "Sora_Sompeng"}, {0x110d0, "Sora_Sompeng"},
{0x11a99, "Soyombo"},
{0x1ba3, "Sundanese"}, {0x1ba3, "Sundanese"},
{0xa803, "Syloti_Nagri"}, {0xa803, "Syloti_Nagri"},
{0x070f, "Syriac"}, {0x070f, "Syriac"},
...@@ -155,6 +163,7 @@ var inTest = []T{ ...@@ -155,6 +163,7 @@ var inTest = []T{
{0xa60e, "Vai"}, {0xa60e, "Vai"},
{0x118ff, "Warang_Citi"}, {0x118ff, "Warang_Citi"},
{0xa216, "Yi"}, {0xa216, "Yi"},
{0x11a0a, "Zanabazar_Square"},
} }
var outTest = []T{ // not really worth being thorough var outTest = []T{ // not really worth being thorough
...@@ -229,6 +238,7 @@ var inPropTest = []T{ ...@@ -229,6 +238,7 @@ var inPropTest = []T{
{0x06DD, "Prepended_Concatenation_Mark"}, {0x06DD, "Prepended_Concatenation_Mark"},
{0x300D, "Quotation_Mark"}, {0x300D, "Quotation_Mark"},
{0x2EF3, "Radical"}, {0x2EF3, "Radical"},
{0x1f1ff, "Regional_Indicator"},
{0x061F, "STerm"}, // Deprecated alias of Sentence_Terminal {0x061F, "STerm"}, // Deprecated alias of Sentence_Terminal
{0x061F, "Sentence_Terminal"}, {0x061F, "Sentence_Terminal"},
{0x2071, "Soft_Dotted"}, {0x2071, "Soft_Dotted"},
......
This diff is collapsed.
...@@ -21,6 +21,7 @@ import ( ...@@ -21,6 +21,7 @@ import (
"unicode/utf8" "unicode/utf8"
"golang_org/x/text/secure/bidirule" "golang_org/x/text/secure/bidirule"
"golang_org/x/text/unicode/bidi"
"golang_org/x/text/unicode/norm" "golang_org/x/text/unicode/norm"
) )
...@@ -67,6 +68,15 @@ func VerifyDNSLength(verify bool) Option { ...@@ -67,6 +68,15 @@ func VerifyDNSLength(verify bool) Option {
return func(o *options) { o.verifyDNSLength = verify } return func(o *options) { o.verifyDNSLength = verify }
} }
// RemoveLeadingDots removes leading label separators. Leading runes that map to
// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
//
// This is the behavior suggested by the UTS #46 and is adopted by some
// browsers.
func RemoveLeadingDots(remove bool) Option {
return func(o *options) { o.removeLeadingDots = remove }
}
// ValidateLabels sets whether to check the mandatory label validation criteria // ValidateLabels sets whether to check the mandatory label validation criteria
// as defined in Section 5.4 of RFC 5891. This includes testing for correct use // as defined in Section 5.4 of RFC 5891. This includes testing for correct use
// of hyphens ('-'), normalization, validity of runes, and the context rules. // of hyphens ('-'), normalization, validity of runes, and the context rules.
...@@ -83,7 +93,7 @@ func ValidateLabels(enable bool) Option { ...@@ -83,7 +93,7 @@ func ValidateLabels(enable bool) Option {
} }
} }
// StrictDomainName limits the set of permissable ASCII characters to those // StrictDomainName limits the set of permissible ASCII characters to those
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
// hyphen). This is set by default for MapForLookup and ValidateForRegistration. // hyphen). This is set by default for MapForLookup and ValidateForRegistration.
// //
...@@ -141,6 +151,7 @@ type options struct { ...@@ -141,6 +151,7 @@ type options struct {
useSTD3Rules bool useSTD3Rules bool
validateLabels bool validateLabels bool
verifyDNSLength bool verifyDNSLength bool
removeLeadingDots bool
trie *idnaTrie trie *idnaTrie
...@@ -149,14 +160,14 @@ type options struct { ...@@ -149,14 +160,14 @@ type options struct {
// mapping implements a validation and mapping step as defined in RFC 5895 // mapping implements a validation and mapping step as defined in RFC 5895
// or UTS 46, tailored to, for example, domain registration or lookup. // or UTS 46, tailored to, for example, domain registration or lookup.
mapping func(p *Profile, s string) (string, error) mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
// bidirule, if specified, checks whether s conforms to the Bidi Rule // bidirule, if specified, checks whether s conforms to the Bidi Rule
// defined in RFC 5893. // defined in RFC 5893.
bidirule func(s string) bool bidirule func(s string) bool
} }
// A Profile defines the configuration of a IDNA mapper. // A Profile defines the configuration of an IDNA mapper.
type Profile struct { type Profile struct {
options options
} }
...@@ -289,12 +300,16 @@ func (e runeError) Error() string { ...@@ -289,12 +300,16 @@ func (e runeError) Error() string {
// see http://www.unicode.org/reports/tr46. // see http://www.unicode.org/reports/tr46.
func (p *Profile) process(s string, toASCII bool) (string, error) { func (p *Profile) process(s string, toASCII bool) (string, error) {
var err error var err error
var isBidi bool
if p.mapping != nil { if p.mapping != nil {
s, err = p.mapping(p, s) s, isBidi, err = p.mapping(p, s)
} }
// Remove leading empty labels. // Remove leading empty labels.
if p.removeLeadingDots {
for ; len(s) > 0 && s[0] == '.'; s = s[1:] { for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
} }
}
// TODO: allow for a quick check the tables data.
// It seems like we should only create this error on ToASCII, but the // It seems like we should only create this error on ToASCII, but the
// UTS 46 conformance tests suggests we should always check this. // UTS 46 conformance tests suggests we should always check this.
if err == nil && p.verifyDNSLength && s == "" { if err == nil && p.verifyDNSLength && s == "" {
...@@ -320,6 +335,7 @@ func (p *Profile) process(s string, toASCII bool) (string, error) { ...@@ -320,6 +335,7 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
// Spec says keep the old label. // Spec says keep the old label.
continue continue
} }
isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
labels.set(u) labels.set(u)
if err == nil && p.validateLabels { if err == nil && p.validateLabels {
err = p.fromPuny(p, u) err = p.fromPuny(p, u)
...@@ -334,6 +350,14 @@ func (p *Profile) process(s string, toASCII bool) (string, error) { ...@@ -334,6 +350,14 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
err = p.validateLabel(label) err = p.validateLabel(label)
} }
} }
if isBidi && p.bidirule != nil && err == nil {
for labels.reset(); !labels.done(); labels.next() {
if !p.bidirule(labels.label()) {
err = &labelError{s, "B"}
break
}
}
}
if toASCII { if toASCII {
for labels.reset(); !labels.done(); labels.next() { for labels.reset(); !labels.done(); labels.next() {
label := labels.label() label := labels.label()
...@@ -365,41 +389,65 @@ func (p *Profile) process(s string, toASCII bool) (string, error) { ...@@ -365,41 +389,65 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
return s, err return s, err
} }
func normalize(p *Profile, s string) (string, error) { func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
return norm.NFC.String(s), nil // TODO: consider first doing a quick check to see if any of these checks
// need to be done. This will make it slower in the general case, but
// faster in the common case.
mapped = norm.NFC.String(s)
isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
return mapped, isBidi, nil
} }
func validateRegistration(p *Profile, s string) (string, error) { func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
// TODO: filter need for normalization in loop below.
if !norm.NFC.IsNormalString(s) { if !norm.NFC.IsNormalString(s) {
return s, &labelError{s, "V1"} return s, false, &labelError{s, "V1"}
} }
var err error
for i := 0; i < len(s); { for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:]) v, sz := trie.lookupString(s[i:])
i += sz bidi = bidi || info(v).isBidi(s[i:])
// Copy bytes not copied so far. // Copy bytes not copied so far.
switch p.simplify(info(v).category()) { switch p.simplify(info(v).category()) {
// TODO: handle the NV8 defined in the Unicode idna data set to allow // TODO: handle the NV8 defined in the Unicode idna data set to allow
// for strict conformance to IDNA2008. // for strict conformance to IDNA2008.
case valid, deviation: case valid, deviation:
case disallowed, mapped, unknown, ignored: case disallowed, mapped, unknown, ignored:
if err == nil {
r, _ := utf8.DecodeRuneInString(s[i:]) r, _ := utf8.DecodeRuneInString(s[i:])
err = runeError(r) return s, bidi, runeError(r)
} }
i += sz
} }
return s, bidi, nil
}
func (c info) isBidi(s string) bool {
if !c.isMapped() {
return c&attributesMask == rtl
} }
return s, err // TODO: also store bidi info for mapped data. This is possible, but a bit
// cumbersome and not for the common case.
p, _ := bidi.LookupString(s)
switch p.Class() {
case bidi.R, bidi.AL, bidi.AN:
return true
}
return false
} }
func validateAndMap(p *Profile, s string) (string, error) { func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
var ( var (
err error
b []byte b []byte
k int k int
) )
// combinedInfoBits contains the or-ed bits of all runes. We use this
// to derive the mayNeedNorm bit later. This may trigger normalization
// overeagerly, but it will not do so in the common case. The end result
// is another 10% saving on BenchmarkProfile for the common case.
var combinedInfoBits info
for i := 0; i < len(s); { for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:]) v, sz := trie.lookupString(s[i:])
combinedInfoBits |= info(v)
bidi = bidi || info(v).isBidi(s[i:])
start := i start := i
i += sz i += sz
// Copy bytes not copied so far. // Copy bytes not copied so far.
...@@ -408,7 +456,7 @@ func validateAndMap(p *Profile, s string) (string, error) { ...@@ -408,7 +456,7 @@ func validateAndMap(p *Profile, s string) (string, error) {
continue continue
case disallowed: case disallowed:
if err == nil { if err == nil {
r, _ := utf8.DecodeRuneInString(s[i:]) r, _ := utf8.DecodeRuneInString(s[start:])
err = runeError(r) err = runeError(r)
} }
continue continue
...@@ -426,7 +474,9 @@ func validateAndMap(p *Profile, s string) (string, error) { ...@@ -426,7 +474,9 @@ func validateAndMap(p *Profile, s string) (string, error) {
} }
if k == 0 { if k == 0 {
// No changes so far. // No changes so far.
if combinedInfoBits&mayNeedNorm != 0 {
s = norm.NFC.String(s) s = norm.NFC.String(s)
}
} else { } else {
b = append(b, s[k:]...) b = append(b, s[k:]...)
if norm.NFC.QuickSpan(b) != len(b) { if norm.NFC.QuickSpan(b) != len(b) {
...@@ -435,7 +485,7 @@ func validateAndMap(p *Profile, s string) (string, error) { ...@@ -435,7 +485,7 @@ func validateAndMap(p *Profile, s string) (string, error) {
// TODO: the punycode converters require strings as input. // TODO: the punycode converters require strings as input.
s = string(b) s = string(b)
} }
return s, err return s, bidi, err
} }
// A labelIter allows iterating over domain name labels. // A labelIter allows iterating over domain name labels.
...@@ -530,6 +580,8 @@ func validateFromPunycode(p *Profile, s string) error { ...@@ -530,6 +580,8 @@ func validateFromPunycode(p *Profile, s string) error {
if !norm.NFC.IsNormalString(s) { if !norm.NFC.IsNormalString(s) {
return &labelError{s, "V1"} return &labelError{s, "V1"}
} }
// TODO: detect whether string may have to be normalized in the following
// loop.
for i := 0; i < len(s); { for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:]) v, sz := trie.lookupString(s[i:])
if c := p.simplify(info(v).category()); c != valid && c != deviation { if c := p.simplify(info(v).category()); c != valid && c != deviation {
...@@ -604,16 +656,13 @@ var joinStates = [][numJoinTypes]joinState{ ...@@ -604,16 +656,13 @@ var joinStates = [][numJoinTypes]joinState{
// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
// already implicitly satisfied by the overall implementation. // already implicitly satisfied by the overall implementation.
func (p *Profile) validateLabel(s string) error { func (p *Profile) validateLabel(s string) (err error) {
if s == "" { if s == "" {
if p.verifyDNSLength { if p.verifyDNSLength {
return &labelError{s, "A4"} return &labelError{s, "A4"}
} }
return nil return nil
} }
if p.bidirule != nil && !p.bidirule(s) {
return &labelError{s, "B"}
}
if !p.validateLabels { if !p.validateLabels {
return nil return nil
} }
......
This diff is collapsed.
...@@ -28,9 +28,9 @@ package idna ...@@ -28,9 +28,9 @@ package idna
// 15..3 index into xor or mapping table // 15..3 index into xor or mapping table
// } // }
// } else { // } else {
// 15..13 unused // 15..14 unused
// 12 modifier (including virama) // 13 mayNeedNorm
// 11 virama modifier // 12..11 attributes
// 10..8 joining type // 10..8 joining type
// 7..3 category type // 7..3 category type
// } // }
...@@ -51,15 +51,20 @@ const ( ...@@ -51,15 +51,20 @@ const (
joinShift = 8 joinShift = 8
joinMask = 0x07 joinMask = 0x07
viramaModifier = 0x0800 // Attributes
attributesMask = 0x1800
viramaModifier = 0x1800
modifier = 0x1000 modifier = 0x1000
rtl = 0x0800
mayNeedNorm = 0x2000
) )
// A category corresponds to a category defined in the IDNA mapping table. // A category corresponds to a category defined in the IDNA mapping table.
type category uint16 type category uint16
const ( const (
unknown category = 0 // not defined currently in unicode. unknown category = 0 // not currently defined in unicode.
mapped category = 1 mapped category = 1
disallowedSTD3Mapped category = 2 disallowedSTD3Mapped category = 2
deviation category = 3 deviation category = 3
...@@ -112,5 +117,5 @@ func (c info) isModifier() bool { ...@@ -112,5 +117,5 @@ func (c info) isModifier() bool {
} }
func (c info) isViramaModifier() bool { func (c info) isViramaModifier() bool {
return c&(viramaModifier|catSmallMask) == viramaModifier return c&(attributesMask|catSmallMask) == viramaModifier
} }
...@@ -157,6 +157,7 @@ func DirectionString(s string) bidi.Direction { ...@@ -157,6 +157,7 @@ func DirectionString(s string) bidi.Direction {
e, sz := bidi.LookupString(s[i:]) e, sz := bidi.LookupString(s[i:])
if sz == 0 { if sz == 0 {
i++ i++
continue
} }
c := e.Class() c := e.Class()
if c == bidi.R || c == bidi.AL || c == bidi.AN { if c == bidi.R || c == bidi.AL || c == bidi.AN {
...@@ -205,9 +206,6 @@ func (t *Transformer) isRTL() bool { ...@@ -205,9 +206,6 @@ func (t *Transformer) isRTL() bool {
} }
func (t *Transformer) isFinal() bool { func (t *Transformer) isFinal() bool {
if !t.isRTL() {
return true
}
return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
} }
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -35,17 +35,9 @@ const ( ...@@ -35,17 +35,9 @@ const (
// streamSafe implements the policy of when a CGJ should be inserted. // streamSafe implements the policy of when a CGJ should be inserted.
type streamSafe uint8 type streamSafe uint8
// mkStreamSafe is a shorthand for declaring a streamSafe var and calling // first inserts the first rune of a segment. It is a faster version of next if
// first on it. // it is known p represents the first rune in a segment.
func mkStreamSafe(p Properties) streamSafe {
return streamSafe(p.nTrailingNonStarters())
}
// first inserts the first rune of a segment.
func (ss *streamSafe) first(p Properties) { func (ss *streamSafe) first(p Properties) {
if *ss != 0 {
panic("!= 0")
}
*ss = streamSafe(p.nTrailingNonStarters()) *ss = streamSafe(p.nTrailingNonStarters())
} }
...@@ -68,7 +60,7 @@ func (ss *streamSafe) next(p Properties) ssState { ...@@ -68,7 +60,7 @@ func (ss *streamSafe) next(p Properties) ssState {
// be a non-starter. Note that it always hold that if nLead > 0 then // be a non-starter. Note that it always hold that if nLead > 0 then
// nLead == nTrail. // nLead == nTrail.
if n == 0 { if n == 0 {
*ss = 0 *ss = streamSafe(p.nTrailingNonStarters())
return ssStarter return ssStarter
} }
return ssSuccess return ssSuccess
...@@ -144,7 +136,6 @@ func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) { ...@@ -144,7 +136,6 @@ func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) {
func (rb *reorderBuffer) reset() { func (rb *reorderBuffer) reset() {
rb.nrune = 0 rb.nrune = 0
rb.nbyte = 0 rb.nbyte = 0
rb.ss = 0
} }
func (rb *reorderBuffer) doFlush() bool { func (rb *reorderBuffer) doFlush() bool {
...@@ -259,6 +250,9 @@ func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) { ...@@ -259,6 +250,9 @@ func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) {
// It flushes the buffer on each new segment start. // It flushes the buffer on each new segment start.
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr { func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
rb.tmpBytes.setBytes(dcomp) rb.tmpBytes.setBytes(dcomp)
// As the streamSafe accounting already handles the counting for modifiers,
// we don't have to call next. However, we do need to keep the accounting
// intact when flushing the buffer.
for i := 0; i < len(dcomp); { for i := 0; i < len(dcomp); {
info := rb.f.info(rb.tmpBytes, i) info := rb.f.info(rb.tmpBytes, i)
if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() { if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() {
......
...@@ -92,16 +92,20 @@ func (in *input) charinfoNFKC(p int) (uint16, int) { ...@@ -92,16 +92,20 @@ func (in *input) charinfoNFKC(p int) (uint16, int) {
} }
func (in *input) hangul(p int) (r rune) { func (in *input) hangul(p int) (r rune) {
var size int
if in.bytes == nil { if in.bytes == nil {
if !isHangulString(in.str[p:]) { if !isHangulString(in.str[p:]) {
return 0 return 0
} }
r, _ = utf8.DecodeRuneInString(in.str[p:]) r, size = utf8.DecodeRuneInString(in.str[p:])
} else { } else {
if !isHangul(in.bytes[p:]) { if !isHangul(in.bytes[p:]) {
return 0 return 0
} }
r, _ = utf8.DecodeRune(in.bytes[p:]) r, size = utf8.DecodeRune(in.bytes[p:])
}
if size != hangulUTF8Size {
return 0
} }
return r return r
} }
...@@ -43,6 +43,7 @@ func (i *Iter) Init(f Form, src []byte) { ...@@ -43,6 +43,7 @@ func (i *Iter) Init(f Form, src []byte) {
i.next = i.rb.f.nextMain i.next = i.rb.f.nextMain
i.asciiF = nextASCIIBytes i.asciiF = nextASCIIBytes
i.info = i.rb.f.info(i.rb.src, i.p) i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
} }
// InitString initializes i to iterate over src after normalizing it to Form f. // InitString initializes i to iterate over src after normalizing it to Form f.
...@@ -58,11 +59,12 @@ func (i *Iter) InitString(f Form, src string) { ...@@ -58,11 +59,12 @@ func (i *Iter) InitString(f Form, src string) {
i.next = i.rb.f.nextMain i.next = i.rb.f.nextMain
i.asciiF = nextASCIIString i.asciiF = nextASCIIString
i.info = i.rb.f.info(i.rb.src, i.p) i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
} }
// Seek sets the segment to be returned by the next call to Next to start // Seek sets the segment to be returned by the next call to Next to start
// at position p. It is the responsibility of the caller to set p to the // at position p. It is the responsibility of the caller to set p to the
// start of a UTF8 rune. // start of a segment.
func (i *Iter) Seek(offset int64, whence int) (int64, error) { func (i *Iter) Seek(offset int64, whence int) (int64, error) {
var abs int64 var abs int64
switch whence { switch whence {
...@@ -86,6 +88,7 @@ func (i *Iter) Seek(offset int64, whence int) (int64, error) { ...@@ -86,6 +88,7 @@ func (i *Iter) Seek(offset int64, whence int) (int64, error) {
i.multiSeg = nil i.multiSeg = nil
i.next = i.rb.f.nextMain i.next = i.rb.f.nextMain
i.info = i.rb.f.info(i.rb.src, i.p) i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
return abs, nil return abs, nil
} }
...@@ -163,6 +166,7 @@ func nextHangul(i *Iter) []byte { ...@@ -163,6 +166,7 @@ func nextHangul(i *Iter) []byte {
if next >= i.rb.nsrc { if next >= i.rb.nsrc {
i.setDone() i.setDone()
} else if i.rb.src.hangul(next) == 0 { } else if i.rb.src.hangul(next) == 0 {
i.rb.ss.next(i.info)
i.info = i.rb.f.info(i.rb.src, i.p) i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain i.next = i.rb.f.nextMain
return i.next(i) return i.next(i)
...@@ -206,12 +210,10 @@ func nextMultiNorm(i *Iter) []byte { ...@@ -206,12 +210,10 @@ func nextMultiNorm(i *Iter) []byte {
if info.BoundaryBefore() { if info.BoundaryBefore() {
i.rb.compose() i.rb.compose()
seg := i.buf[:i.rb.flushCopy(i.buf[:])] seg := i.buf[:i.rb.flushCopy(i.buf[:])]
i.rb.ss.first(info)
i.rb.insertUnsafe(input{bytes: d}, j, info) i.rb.insertUnsafe(input{bytes: d}, j, info)
i.multiSeg = d[j+int(info.size):] i.multiSeg = d[j+int(info.size):]
return seg return seg
} }
i.rb.ss.next(info)
i.rb.insertUnsafe(input{bytes: d}, j, info) i.rb.insertUnsafe(input{bytes: d}, j, info)
j += int(info.size) j += int(info.size)
} }
...@@ -224,9 +226,9 @@ func nextMultiNorm(i *Iter) []byte { ...@@ -224,9 +226,9 @@ func nextMultiNorm(i *Iter) []byte {
func nextDecomposed(i *Iter) (next []byte) { func nextDecomposed(i *Iter) (next []byte) {
outp := 0 outp := 0
inCopyStart, outCopyStart := i.p, 0 inCopyStart, outCopyStart := i.p, 0
ss := mkStreamSafe(i.info)
for { for {
if sz := int(i.info.size); sz <= 1 { if sz := int(i.info.size); sz <= 1 {
i.rb.ss = 0
p := i.p p := i.p
i.p++ // ASCII or illegal byte. Either way, advance by 1. i.p++ // ASCII or illegal byte. Either way, advance by 1.
if i.p >= i.rb.nsrc { if i.p >= i.rb.nsrc {
...@@ -245,6 +247,8 @@ func nextDecomposed(i *Iter) (next []byte) { ...@@ -245,6 +247,8 @@ func nextDecomposed(i *Iter) (next []byte) {
p := outp + len(d) p := outp + len(d)
if outp > 0 { if outp > 0 {
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
// TODO: this condition should not be possible, but we leave it
// in for defensive purposes.
if p > len(i.buf) { if p > len(i.buf) {
return i.buf[:outp] return i.buf[:outp]
} }
...@@ -268,7 +272,7 @@ func nextDecomposed(i *Iter) (next []byte) { ...@@ -268,7 +272,7 @@ func nextDecomposed(i *Iter) (next []byte) {
} else { } else {
i.info = i.rb.f.info(i.rb.src, i.p) i.info = i.rb.f.info(i.rb.src, i.p)
} }
switch ss.next(i.info) { switch i.rb.ss.next(i.info) {
case ssOverflow: case ssOverflow:
i.next = nextCGJDecompose i.next = nextCGJDecompose
fallthrough fallthrough
...@@ -311,7 +315,7 @@ func nextDecomposed(i *Iter) (next []byte) { ...@@ -311,7 +315,7 @@ func nextDecomposed(i *Iter) (next []byte) {
} }
prevCC := i.info.tccc prevCC := i.info.tccc
i.info = i.rb.f.info(i.rb.src, i.p) i.info = i.rb.f.info(i.rb.src, i.p)
if v := ss.next(i.info); v == ssStarter { if v := i.rb.ss.next(i.info); v == ssStarter {
break break
} else if v == ssOverflow { } else if v == ssOverflow {
i.next = nextCGJDecompose i.next = nextCGJDecompose
...@@ -337,10 +341,6 @@ doNorm: ...@@ -337,10 +341,6 @@ doNorm:
func doNormDecomposed(i *Iter) []byte { func doNormDecomposed(i *Iter) []byte {
for { for {
if s := i.rb.ss.next(i.info); s == ssOverflow {
i.next = nextCGJDecompose
break
}
i.rb.insertUnsafe(i.rb.src, i.p, i.info) i.rb.insertUnsafe(i.rb.src, i.p, i.info)
if i.p += int(i.info.size); i.p >= i.rb.nsrc { if i.p += int(i.info.size); i.p >= i.rb.nsrc {
i.setDone() i.setDone()
...@@ -350,6 +350,10 @@ func doNormDecomposed(i *Iter) []byte { ...@@ -350,6 +350,10 @@ func doNormDecomposed(i *Iter) []byte {
if i.info.ccc == 0 { if i.info.ccc == 0 {
break break
} }
if s := i.rb.ss.next(i.info); s == ssOverflow {
i.next = nextCGJDecompose
break
}
} }
// new segment or too many combining characters: exit normalization // new segment or too many combining characters: exit normalization
return i.buf[:i.rb.flushCopy(i.buf[:])] return i.buf[:i.rb.flushCopy(i.buf[:])]
...@@ -359,6 +363,7 @@ func nextCGJDecompose(i *Iter) []byte { ...@@ -359,6 +363,7 @@ func nextCGJDecompose(i *Iter) []byte {
i.rb.ss = 0 i.rb.ss = 0
i.rb.insertCGJ() i.rb.insertCGJ()
i.next = nextDecomposed i.next = nextDecomposed
i.rb.ss.first(i.info)
buf := doNormDecomposed(i) buf := doNormDecomposed(i)
return buf return buf
} }
...@@ -367,7 +372,6 @@ func nextCGJDecompose(i *Iter) []byte { ...@@ -367,7 +372,6 @@ func nextCGJDecompose(i *Iter) []byte {
func nextComposed(i *Iter) []byte { func nextComposed(i *Iter) []byte {
outp, startp := 0, i.p outp, startp := 0, i.p
var prevCC uint8 var prevCC uint8
ss := mkStreamSafe(i.info)
for { for {
if !i.info.isYesC() { if !i.info.isYesC() {
goto doNorm goto doNorm
...@@ -387,11 +391,12 @@ func nextComposed(i *Iter) []byte { ...@@ -387,11 +391,12 @@ func nextComposed(i *Iter) []byte {
i.setDone() i.setDone()
break break
} else if i.rb.src._byte(i.p) < utf8.RuneSelf { } else if i.rb.src._byte(i.p) < utf8.RuneSelf {
i.rb.ss = 0
i.next = i.asciiF i.next = i.asciiF
break break
} }
i.info = i.rb.f.info(i.rb.src, i.p) i.info = i.rb.f.info(i.rb.src, i.p)
if v := ss.next(i.info); v == ssStarter { if v := i.rb.ss.next(i.info); v == ssStarter {
break break
} else if v == ssOverflow { } else if v == ssOverflow {
i.next = nextCGJCompose i.next = nextCGJCompose
...@@ -403,8 +408,10 @@ func nextComposed(i *Iter) []byte { ...@@ -403,8 +408,10 @@ func nextComposed(i *Iter) []byte {
} }
return i.returnSlice(startp, i.p) return i.returnSlice(startp, i.p)
doNorm: doNorm:
// reset to start position
i.p = startp i.p = startp
i.info = i.rb.f.info(i.rb.src, i.p) i.info = i.rb.f.info(i.rb.src, i.p)
i.rb.ss.first(i.info)
if i.info.multiSegment() { if i.info.multiSegment() {
d := i.info.Decomposition() d := i.info.Decomposition()
info := i.rb.f.info(input{bytes: d}, 0) info := i.rb.f.info(input{bytes: d}, 0)
......
...@@ -324,7 +324,6 @@ func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) ...@@ -324,7 +324,6 @@ func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool)
// have an overflow for runes that are starters (e.g. with U+FF9E). // have an overflow for runes that are starters (e.g. with U+FF9E).
switch ss.next(info) { switch ss.next(info) {
case ssStarter: case ssStarter:
ss.first(info)
lastSegStart = i lastSegStart = i
case ssOverflow: case ssOverflow:
return lastSegStart, false return lastSegStart, false
...@@ -441,6 +440,8 @@ func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int { ...@@ -441,6 +440,8 @@ func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
} }
return -1 return -1
} }
// TODO: Using streamSafe to determine the boundary isn't the same as
// using BoundaryBefore. Determine which should be used.
if s := ss.next(info); s != ssSuccess { if s := ss.next(info); s != ssSuccess {
return i return i
} }
...@@ -505,16 +506,15 @@ func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int { ...@@ -505,16 +506,15 @@ func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
if info.size == 0 { if info.size == 0 {
return 0 return 0
} }
if rb.nrune > 0 {
if s := rb.ss.next(info); s == ssStarter { if s := rb.ss.next(info); s == ssStarter {
// TODO: this could be removed if we don't support merging.
if rb.nrune > 0 {
goto end goto end
}
} else if s == ssOverflow { } else if s == ssOverflow {
rb.insertCGJ() rb.insertCGJ()
goto end goto end
} }
} else {
rb.ss.first(info)
}
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess { if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
return int(err) return int(err)
} }
......
...@@ -42,7 +42,7 @@ func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) ...@@ -42,7 +42,7 @@ func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
} }
func flushTransform(rb *reorderBuffer) bool { func flushTransform(rb *reorderBuffer) bool {
// Write out (must fully fit in dst, or else it is a ErrShortDst). // Write out (must fully fit in dst, or else it is an ErrShortDst).
if len(rb.out) < rb.nrune*utf8.UTFMax { if len(rb.out) < rb.nrune*utf8.UTFMax {
return false return false
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment