Commit 65d8cb98 authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

unicode: strconv: regexp: Upgrade to Unicode 7.0.0.

LGTM=r, bradfitz
R=r, bradfitz
CC=golang-codereviews
https://golang.org/cl/127470043
parent 15992623
......@@ -327,3 +327,4 @@ pkg syscall (netbsd-arm), const SizeofIfData = 132
pkg syscall (netbsd-arm), type IfMsghdr struct, Pad_cgo_1 [4]uint8
pkg syscall (netbsd-arm-cgo), const SizeofIfData = 132
pkg syscall (netbsd-arm-cgo), type IfMsghdr struct, Pad_cgo_1 [4]uint8
pkg unicode, const Version = "6.3.0"
......@@ -115,3 +115,27 @@ pkg debug/goobj, type Var struct, Kind int
pkg debug/goobj, type Var struct, Name string
pkg debug/goobj, type Var struct, Offset int
pkg debug/goobj, type Var struct, Type SymID
pkg unicode, const Version = "7.0.0"
pkg unicode, var Bassa_Vah *RangeTable
pkg unicode, var Caucasian_Albanian *RangeTable
pkg unicode, var Duployan *RangeTable
pkg unicode, var Elbasan *RangeTable
pkg unicode, var Grantha *RangeTable
pkg unicode, var Khojki *RangeTable
pkg unicode, var Khudawadi *RangeTable
pkg unicode, var Linear_A *RangeTable
pkg unicode, var Mahajani *RangeTable
pkg unicode, var Manichaean *RangeTable
pkg unicode, var Mende_Kikakui *RangeTable
pkg unicode, var Modi *RangeTable
pkg unicode, var Mro *RangeTable
pkg unicode, var Nabataean *RangeTable
pkg unicode, var Old_North_Arabian *RangeTable
pkg unicode, var Old_Permic *RangeTable
pkg unicode, var Pahawh_Hmong *RangeTable
pkg unicode, var Palmyrene *RangeTable
pkg unicode, var Pau_Cin_Hau *RangeTable
pkg unicode, var Psalter_Pahlavi *RangeTable
pkg unicode, var Siddham *RangeTable
pkg unicode, var Tirhuta *RangeTable
pkg unicode, var Warang_Citi *RangeTable
......@@ -1639,7 +1639,7 @@ const (
// minimum and maximum runes involved in folding.
// checked during test.
minFold = 0x0041
maxFold = 0x1044f
maxFold = 0x118df
)
// appendFoldedRange returns the result of appending the range lo-hi
......
......@@ -7,16 +7,15 @@
package strconv
// (470+136+60)*2 + (218)*4 = 2204 bytes
// (468+138+67)*2 + (326)*4 = 2650 bytes
var isPrint16 = []uint16{
0x0020, 0x007e,
0x00a1, 0x0377,
0x037a, 0x037e,
0x0384, 0x0527,
0x0531, 0x0556,
0x037a, 0x037f,
0x0384, 0x0556,
0x0559, 0x058a,
0x058f, 0x05c7,
0x058d, 0x05c7,
0x05d0, 0x05ea,
0x05f0, 0x05f4,
0x0606, 0x061b,
......@@ -27,7 +26,7 @@ var isPrint16 = []uint16{
0x0800, 0x082d,
0x0830, 0x085b,
0x085e, 0x085e,
0x08a0, 0x08ac,
0x08a0, 0x08b2,
0x08e4, 0x098c,
0x098f, 0x0990,
0x0993, 0x09b2,
......@@ -72,18 +71,17 @@ var isPrint16 = []uint16{
0x0bd0, 0x0bd0,
0x0bd7, 0x0bd7,
0x0be6, 0x0bfa,
0x0c01, 0x0c39,
0x0c00, 0x0c39,
0x0c3d, 0x0c4d,
0x0c55, 0x0c59,
0x0c60, 0x0c63,
0x0c66, 0x0c6f,
0x0c78, 0x0c7f,
0x0c82, 0x0cb9,
0x0c78, 0x0cb9,
0x0cbc, 0x0ccd,
0x0cd5, 0x0cd6,
0x0cde, 0x0ce3,
0x0ce6, 0x0cf2,
0x0d02, 0x0d3a,
0x0d01, 0x0d3a,
0x0d3d, 0x0d4e,
0x0d57, 0x0d57,
0x0d60, 0x0d63,
......@@ -94,6 +92,7 @@ var isPrint16 = []uint16{
0x0dc0, 0x0dc6,
0x0dca, 0x0dca,
0x0dcf, 0x0ddf,
0x0de6, 0x0def,
0x0df2, 0x0df4,
0x0e01, 0x0e3a,
0x0e3f, 0x0e5b,
......@@ -120,7 +119,7 @@ var isPrint16 = []uint16{
0x1380, 0x1399,
0x13a0, 0x13f4,
0x1400, 0x169c,
0x16a0, 0x16f0,
0x16a0, 0x16f8,
0x1700, 0x1714,
0x1720, 0x1736,
0x1740, 0x1753,
......@@ -133,8 +132,7 @@ var isPrint16 = []uint16{
0x1820, 0x1877,
0x1880, 0x18aa,
0x18b0, 0x18f5,
0x1900, 0x191c,
0x1920, 0x192b,
0x1900, 0x192b,
0x1930, 0x193b,
0x1940, 0x1940,
0x1944, 0x196d,
......@@ -147,6 +145,7 @@ var isPrint16 = []uint16{
0x1a7f, 0x1a89,
0x1a90, 0x1a99,
0x1aa0, 0x1aad,
0x1ab0, 0x1abe,
0x1b00, 0x1b4b,
0x1b50, 0x1b7c,
0x1b80, 0x1bf3,
......@@ -154,8 +153,8 @@ var isPrint16 = []uint16{
0x1c3b, 0x1c49,
0x1c4d, 0x1c7f,
0x1cc0, 0x1cc7,
0x1cd0, 0x1cf6,
0x1d00, 0x1de6,
0x1cd0, 0x1cf9,
0x1d00, 0x1df5,
0x1dfc, 0x1f15,
0x1f18, 0x1f1d,
0x1f20, 0x1f45,
......@@ -168,21 +167,23 @@ var isPrint16 = []uint16{
0x2030, 0x205e,
0x2070, 0x2071,
0x2074, 0x209c,
0x20a0, 0x20ba,
0x20a0, 0x20bd,
0x20d0, 0x20f0,
0x2100, 0x2189,
0x2190, 0x23f3,
0x2190, 0x23fa,
0x2400, 0x2426,
0x2440, 0x244a,
0x2460, 0x2b4c,
0x2b50, 0x2b59,
0x2460, 0x2b73,
0x2b76, 0x2b95,
0x2b98, 0x2bb9,
0x2bbd, 0x2bd1,
0x2c00, 0x2cf3,
0x2cf9, 0x2d27,
0x2d2d, 0x2d2d,
0x2d30, 0x2d67,
0x2d6f, 0x2d70,
0x2d7f, 0x2d96,
0x2da0, 0x2e3b,
0x2da0, 0x2e42,
0x2e80, 0x2ef3,
0x2f00, 0x2fd5,
0x2ff0, 0x2ffb,
......@@ -196,11 +197,10 @@ var isPrint16 = []uint16{
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
0xa640, 0xa697,
0xa69f, 0xa6f7,
0xa700, 0xa793,
0xa7a0, 0xa7aa,
0xa7f8, 0xa82b,
0xa640, 0xa6f7,
0xa700, 0xa7ad,
0xa7b0, 0xa7b1,
0xa7f7, 0xa82b,
0xa830, 0xa839,
0xa840, 0xa877,
0xa880, 0xa8c4,
......@@ -209,17 +209,16 @@ var isPrint16 = []uint16{
0xa900, 0xa953,
0xa95f, 0xa97c,
0xa980, 0xa9d9,
0xa9de, 0xa9df,
0xaa00, 0xaa36,
0xa9de, 0xaa36,
0xaa40, 0xaa4d,
0xaa50, 0xaa59,
0xaa5c, 0xaa7b,
0xaa80, 0xaac2,
0xaa5c, 0xaac2,
0xaadb, 0xaaf6,
0xab01, 0xab06,
0xab09, 0xab0e,
0xab11, 0xab16,
0xab20, 0xab2e,
0xab20, 0xab5f,
0xab64, 0xab65,
0xabc0, 0xabed,
0xabf0, 0xabf9,
0xac00, 0xd7a3,
......@@ -235,7 +234,7 @@ var isPrint16 = []uint16{
0xfd92, 0xfdc7,
0xfdf0, 0xfdfd,
0xfe00, 0xfe19,
0xfe20, 0xfe26,
0xfe20, 0xfe2d,
0xfe30, 0xfe6b,
0xfe70, 0xfefc,
0xff01, 0xffbe,
......@@ -252,15 +251,12 @@ var isNotPrint16 = []uint16{
0x038b,
0x038d,
0x03a2,
0x0530,
0x0560,
0x0588,
0x0590,
0x06dd,
0x083f,
0x08a1,
0x08ff,
0x0978,
0x0980,
0x0984,
0x09a9,
0x09b1,
......@@ -294,10 +290,10 @@ var isNotPrint16 = []uint16{
0x0c0d,
0x0c11,
0x0c29,
0x0c34,
0x0c45,
0x0c49,
0x0c57,
0x0c80,
0x0c84,
0x0c8d,
0x0c91,
......@@ -345,7 +341,9 @@ var isNotPrint16 = []uint16{
0x170d,
0x176d,
0x1771,
0x191f,
0x1a5f,
0x1cf7,
0x1f58,
0x1f5a,
0x1f5c,
......@@ -355,7 +353,7 @@ var isNotPrint16 = []uint16{
0x1fdc,
0x1ff5,
0x208f,
0x2700,
0x2bc9,
0x2c2f,
0x2c5f,
0x2d26,
......@@ -372,9 +370,12 @@ var isNotPrint16 = []uint16{
0x318f,
0x321f,
0x32ff,
0xa69e,
0xa78f,
0xa9ce,
0xa9ff,
0xab27,
0xab2f,
0xfb37,
0xfb3d,
0xfb3f,
......@@ -392,21 +393,31 @@ var isPrint32 = []uint32{
0x010080, 0x0100fa,
0x010100, 0x010102,
0x010107, 0x010133,
0x010137, 0x01018a,
0x010137, 0x01018c,
0x010190, 0x01019b,
0x0101a0, 0x0101a0,
0x0101d0, 0x0101fd,
0x010280, 0x01029c,
0x0102a0, 0x0102d0,
0x0102e0, 0x0102fb,
0x010300, 0x010323,
0x010330, 0x01034a,
0x010350, 0x01037a,
0x010380, 0x0103c3,
0x0103c8, 0x0103d5,
0x010400, 0x01049d,
0x0104a0, 0x0104a9,
0x010500, 0x010527,
0x010530, 0x010563,
0x01056f, 0x01056f,
0x010600, 0x010736,
0x010740, 0x010755,
0x010760, 0x010767,
0x010800, 0x010805,
0x010808, 0x010838,
0x01083c, 0x01083c,
0x01083f, 0x01085f,
0x01083f, 0x01089e,
0x0108a7, 0x0108af,
0x010900, 0x01091b,
0x01091f, 0x010939,
0x01093f, 0x01093f,
......@@ -417,32 +428,72 @@ var isPrint32 = []uint32{
0x010a38, 0x010a3a,
0x010a3f, 0x010a47,
0x010a50, 0x010a58,
0x010a60, 0x010a7f,
0x010a60, 0x010a9f,
0x010ac0, 0x010ae6,
0x010aeb, 0x010af6,
0x010b00, 0x010b35,
0x010b39, 0x010b55,
0x010b58, 0x010b72,
0x010b78, 0x010b7f,
0x010b78, 0x010b91,
0x010b99, 0x010b9c,
0x010ba9, 0x010baf,
0x010c00, 0x010c48,
0x010e60, 0x010e7e,
0x011000, 0x01104d,
0x011052, 0x01106f,
0x011080, 0x0110c1,
0x01107f, 0x0110c1,
0x0110d0, 0x0110e8,
0x0110f0, 0x0110f9,
0x011100, 0x011143,
0x011150, 0x011176,
0x011180, 0x0111c8,
0x0111d0, 0x0111d9,
0x0111cd, 0x0111cd,
0x0111d0, 0x0111da,
0x0111e1, 0x0111f4,
0x011200, 0x01123d,
0x0112b0, 0x0112ea,
0x0112f0, 0x0112f9,
0x011301, 0x01130c,
0x01130f, 0x011310,
0x011313, 0x011339,
0x01133c, 0x011344,
0x011347, 0x011348,
0x01134b, 0x01134d,
0x011357, 0x011357,
0x01135d, 0x011363,
0x011366, 0x01136c,
0x011370, 0x011374,
0x011480, 0x0114c7,
0x0114d0, 0x0114d9,
0x011580, 0x0115b5,
0x0115b8, 0x0115c9,
0x011600, 0x011644,
0x011650, 0x011659,
0x011680, 0x0116b7,
0x0116c0, 0x0116c9,
0x012000, 0x01236e,
0x012400, 0x012462,
0x012470, 0x012473,
0x0118a0, 0x0118f2,
0x0118ff, 0x0118ff,
0x011ac0, 0x011af8,
0x012000, 0x012398,
0x012400, 0x012474,
0x013000, 0x01342e,
0x016800, 0x016a38,
0x016a40, 0x016a69,
0x016a6e, 0x016a6f,
0x016ad0, 0x016aed,
0x016af0, 0x016af5,
0x016b00, 0x016b45,
0x016b50, 0x016b77,
0x016b7d, 0x016b8f,
0x016f00, 0x016f44,
0x016f50, 0x016f7e,
0x016f8f, 0x016f9f,
0x01b000, 0x01b001,
0x01bc00, 0x01bc6a,
0x01bc70, 0x01bc7c,
0x01bc80, 0x01bc88,
0x01bc90, 0x01bc99,
0x01bc9c, 0x01bc9f,
0x01d000, 0x01d0f5,
0x01d100, 0x01d126,
0x01d129, 0x01d172,
......@@ -458,6 +509,8 @@ var isPrint32 = []uint32{
0x01d54a, 0x01d6a5,
0x01d6a8, 0x01d7cb,
0x01d7ce, 0x01d7ff,
0x01e800, 0x01e8c4,
0x01e8c7, 0x01e8d6,
0x01ee00, 0x01ee24,
0x01ee27, 0x01ee3b,
0x01ee42, 0x01ee42,
......@@ -469,28 +522,30 @@ var isPrint32 = []uint32{
0x01f000, 0x01f02b,
0x01f030, 0x01f093,
0x01f0a0, 0x01f0ae,
0x01f0b1, 0x01f0be,
0x01f0c1, 0x01f0df,
0x01f100, 0x01f10a,
0x01f0b1, 0x01f0f5,
0x01f100, 0x01f10c,
0x01f110, 0x01f16b,
0x01f170, 0x01f19a,
0x01f1e6, 0x01f202,
0x01f210, 0x01f23a,
0x01f240, 0x01f248,
0x01f250, 0x01f251,
0x01f300, 0x01f320,
0x01f330, 0x01f37c,
0x01f380, 0x01f393,
0x01f3a0, 0x01f3ca,
0x01f3e0, 0x01f3f0,
0x01f400, 0x01f4fc,
0x01f500, 0x01f53d,
0x01f540, 0x01f543,
0x01f550, 0x01f567,
0x01f5fb, 0x01f640,
0x01f645, 0x01f64f,
0x01f680, 0x01f6c5,
0x01f300, 0x01f32c,
0x01f330, 0x01f37d,
0x01f380, 0x01f3ce,
0x01f3d4, 0x01f3f7,
0x01f400, 0x01f54a,
0x01f550, 0x01f642,
0x01f645, 0x01f6cf,
0x01f6e0, 0x01f6ec,
0x01f6f0, 0x01f6f3,
0x01f700, 0x01f773,
0x01f780, 0x01f7d4,
0x01f800, 0x01f80b,
0x01f810, 0x01f847,
0x01f850, 0x01f859,
0x01f860, 0x01f887,
0x01f890, 0x01f8ad,
0x020000, 0x02a6d6,
0x02a700, 0x02b734,
0x02b740, 0x02b81d,
......@@ -503,7 +558,6 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0x0027,
0x003b,
0x003e,
0x031f,
0x039e,
0x0809,
0x0836,
......@@ -513,6 +567,15 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0x0a18,
0x10bd,
0x1135,
0x1212,
0x1304,
0x1329,
0x1331,
0x1334,
0x246f,
0x6a5f,
0x6b5a,
0x6b62,
0xd455,
0xd49d,
0xd4ad,
......@@ -552,11 +615,10 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
0xee8a,
0xeea4,
0xeeaa,
0xf0c0,
0xf0d0,
0xf12f,
0xf336,
0xf3c5,
0xf43f,
0xf441,
0xf4f8,
0xf4ff,
0xf57a,
0xf5a4,
}
......@@ -40,7 +40,7 @@ func main() {
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt")
var url = flag.String("url",
"http://www.unicode.org/Public/6.3.0/ucd/",
"http://www.unicode.org/Public/7.0.0/ucd/",
"URL of Unicode database directory")
var tablelist = flag.String("tables",
"all",
......
......@@ -14,14 +14,15 @@ type T struct {
script string
}
// Hand-chosen tests from Unicode 5.1.0, 6.0.0 and 6.2.0 mostly to discover when new
// scripts and categories arise.
// Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0 and 7.0.0 mostly to
// discover when new scripts and categories arise.
var inTest = []T{
{0x06e2, "Arabic"},
{0x0567, "Armenian"},
{0x10b20, "Avestan"},
{0x1b37, "Balinese"},
{0xa6af, "Bamum"},
{0x16ada, "Bassa_Vah"},
{0x1be1, "Batak"},
{0x09c2, "Bengali"},
{0x3115, "Bopomofo"},
......@@ -31,6 +32,7 @@ var inTest = []T{
{0x11011, "Brahmi"},
{0x156d, "Canadian_Aboriginal"},
{0x102a9, "Carian"},
{0x10563, "Caucasian_Albanian"},
{0x11111, "Chakma"},
{0xaa4d, "Cham"},
{0x13c2, "Cherokee"},
......@@ -42,11 +44,14 @@ var inTest = []T{
{0xa663, "Cyrillic"},
{0x10430, "Deseret"},
{0x094a, "Devanagari"},
{0x1BC00, "Duployan"},
{0x13001, "Egyptian_Hieroglyphs"},
{0x10500, "Elbasan"},
{0x1271, "Ethiopic"},
{0x10fc, "Georgian"},
{0x2c40, "Glagolitic"},
{0x10347, "Gothic"},
{0x11303, "Grantha"},
{0x03ae, "Greek"},
{0x0abf, "Gujarati"},
{0x0a24, "Gurmukhi"},
......@@ -66,40 +71,56 @@ var inTest = []T{
{0xa928, "Kayah_Li"},
{0x10a11, "Kharoshthi"},
{0x17c6, "Khmer"},
{0x11211, "Khojki"},
{0x112df, "Khudawadi"},
{0x0eaa, "Lao"},
{0x1d79, "Latin"},
{0x1c10, "Lepcha"},
{0x1930, "Limbu"},
{0x10755, "Linear_A"},
{0x1003c, "Linear_B"},
{0xa4e1, "Lisu"},
{0x10290, "Lycian"},
{0x10930, "Lydian"},
{0x11173, "Mahajani"},
{0x0d42, "Malayalam"},
{0x0843, "Mandaic"},
{0x10ac8, "Manichaean"},
{0xabd0, "Meetei_Mayek"},
{0x1e800, "Mende_Kikakui"},
{0x1099f, "Meroitic_Hieroglyphs"},
{0x109a0, "Meroitic_Cursive"},
{0x16f00, "Miao"},
{0x11611, "Modi"},
{0x1822, "Mongolian"},
{0x16a60, "Mro"},
{0x104c, "Myanmar"},
{0x10880, "Nabataean"},
{0x19c3, "New_Tai_Lue"},
{0x07f8, "Nko"},
{0x169b, "Ogham"},
{0x1c6a, "Ol_Chiki"},
{0x10310, "Old_Italic"},
{0x10a80, "Old_North_Arabian"},
{0x10350, "Old_Permic"},
{0x103c9, "Old_Persian"},
{0x10a6f, "Old_South_Arabian"},
{0x10c20, "Old_Turkic"},
{0x0b3e, "Oriya"},
{0x10491, "Osmanya"},
{0x16b2b, "Pahawh_Hmong"},
{0x10876, "Palmyrene"},
{0x11ACE, "Pau_Cin_Hau"},
{0xa860, "Phags_Pa"},
{0x10918, "Phoenician"},
{0x10baf, "Psalter_Pahlavi"},
{0xa949, "Rejang"},
{0x16c0, "Runic"},
{0x081d, "Samaritan"},
{0xa892, "Saurashtra"},
{0x111a0, "Sharada"},
{0x10463, "Shavian"},
{0x115c1, "Siddham"},
{0x0dbd, "Sinhala"},
{0x110d0, "Sora_Sompeng"},
{0x1ba3, "Sundanese"},
......@@ -117,8 +138,10 @@ var inTest = []T{
{0x0e46, "Thai"},
{0x0f36, "Tibetan"},
{0x2d55, "Tifinagh"},
{0x114d9, "Tirhuta"},
{0x10388, "Ugaritic"},
{0xa60e, "Vai"},
{0x118ff, "Warang_Citi"},
{0xa216, "Yi"},
}
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment