Commit f38da967 authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

exp/locale/collate: moved low-level collation functionality

into separate package.  This allows this code to be shared
with the search package without the need for these two to use
the same tables.
Adjusted various files accordingly.

R=rsc
CC=golang-dev
https://golang.org/cl/7213044
parent ae8da3a2
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
package build package build
import ( import (
"exp/locale/collate" "exp/locale/collate/colltab"
"exp/norm" "exp/norm"
"fmt" "fmt"
"io" "io"
...@@ -225,25 +225,25 @@ func (t *Tailoring) SetAnchorBefore(anchor string) error { ...@@ -225,25 +225,25 @@ func (t *Tailoring) SetAnchorBefore(anchor string) error {
// at the primary sorting level: // at the primary sorting level:
// t := b.Tailoring("se") // t := b.Tailoring("se")
// t.SetAnchor("z") // t.SetAnchor("z")
// t.Insert(collate.Primary, "ä", "") // t.Insert(colltab.Primary, "ä", "")
// Order "ü" after "ue" at the secondary sorting level: // Order "ü" after "ue" at the secondary sorting level:
// t.SetAnchor("ue") // t.SetAnchor("ue")
// t.Insert(collate.Secondary, "ü","") // t.Insert(colltab.Secondary, "ü","")
// or // or
// t.SetAnchor("u") // t.SetAnchor("u")
// t.Insert(collate.Secondary, "ü", "e") // t.Insert(colltab.Secondary, "ü", "e")
// Order "q" afer "ab" at the secondary level and "Q" after "q" // Order "q" afer "ab" at the secondary level and "Q" after "q"
// at the tertiary level: // at the tertiary level:
// t.SetAnchor("ab") // t.SetAnchor("ab")
// t.Insert(collate.Secondary, "q", "") // t.Insert(colltab.Secondary, "q", "")
// t.Insert(collate.Tertiary, "Q", "") // t.Insert(colltab.Tertiary, "Q", "")
// Order "b" before "a": // Order "b" before "a":
// t.SetAnchorBefore("a") // t.SetAnchorBefore("a")
// t.Insert(collate.Primary, "b", "") // t.Insert(colltab.Primary, "b", "")
// Order "0" after the last primary ignorable: // Order "0" after the last primary ignorable:
// t.SetAnchor("<last_primary_ignorable/>") // t.SetAnchor("<last_primary_ignorable/>")
// t.Insert(collate.Primary, "0", "") // t.Insert(colltab.Primary, "0", "")
func (t *Tailoring) Insert(level collate.Level, str, extend string) error { func (t *Tailoring) Insert(level colltab.Level, str, extend string) error {
if t.anchor == nil { if t.anchor == nil {
return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str) return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
} }
...@@ -301,13 +301,13 @@ func (o *ordering) getWeight(e *entry) []rawCE { ...@@ -301,13 +301,13 @@ func (o *ordering) getWeight(e *entry) []rawCE {
e.elems = append(e.elems, o.getWeight(o.find(string(r)))...) e.elems = append(e.elems, o.getWeight(o.find(string(r)))...)
} }
} else if e.before { } else if e.before {
count := [collate.Identity + 1]int{} count := [colltab.Identity + 1]int{}
a := e a := e
for ; a.elems == nil && !a.implicit; a = a.next { for ; a.elems == nil && !a.implicit; a = a.next {
count[a.level]++ count[a.level]++
} }
e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)} e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)}
for i := collate.Primary; i < collate.Quaternary; i++ { for i := colltab.Primary; i < colltab.Quaternary; i++ {
if count[i] != 0 { if count[i] != 0 {
e.elems[0].w[i] -= count[i] e.elems[0].w[i] -= count[i]
break break
...@@ -336,11 +336,11 @@ func (o *ordering) addExtension(e *entry) { ...@@ -336,11 +336,11 @@ func (o *ordering) addExtension(e *entry) {
e.extend = "" e.extend = ""
} }
func (o *ordering) verifyWeights(a, b *entry, level collate.Level) error { func (o *ordering) verifyWeights(a, b *entry, level colltab.Level) error {
if level == collate.Identity || b == nil || b.elems == nil || a.elems == nil { if level == colltab.Identity || b == nil || b.elems == nil || a.elems == nil {
return nil return nil
} }
for i := collate.Primary; i < level; i++ { for i := colltab.Primary; i < level; i++ {
if a.elems[0].w[i] < b.elems[0].w[i] { if a.elems[0].w[i] < b.elems[0].w[i] {
return nil return nil
} }
...@@ -462,20 +462,21 @@ func (b *Builder) build() (*table, error) { ...@@ -462,20 +462,21 @@ func (b *Builder) build() (*table, error) {
} }
// Build builds the root Collator. // Build builds the root Collator.
func (b *Builder) Build() (*collate.Collator, error) { // TODO: return Weigher instead
func (b *Builder) Build() (colltab.Weigher, error) {
t, err := b.build() t, err := b.build()
if err != nil { if err != nil {
return nil, err return nil, err
} }
table := collate.Init(t) table := colltab.Init(t)
if table == nil { if table == nil {
panic("generated table of incompatible type") panic("generated table of incompatible type")
} }
return collate.NewFromTable(table), nil return table, nil
} }
// Build builds a Collator for Tailoring t. // Build builds a Collator for Tailoring t.
func (t *Tailoring) Build() (*collate.Collator, error) { func (t *Tailoring) Build() (colltab.Weigher, error) {
// TODO: implement. // TODO: implement.
return nil, nil return nil, nil
} }
...@@ -498,6 +499,7 @@ func (b *Builder) Print(w io.Writer) (n int, err error) { ...@@ -498,6 +499,7 @@ func (b *Builder) Print(w io.Writer) (n int, err error) {
p(fmt.Fprintf(w, "%q, ", loc.id)) p(fmt.Fprintf(w, "%q, ", loc.id))
} }
p(fmt.Fprintln(w, "}\n")) p(fmt.Fprintln(w, "}\n"))
p(fmt.Fprintf(w, "const varTop = 0x%x\n\n", b.varTop))
p(fmt.Fprintln(w, "var locales = map[string]tableIndex{")) p(fmt.Fprintln(w, "var locales = map[string]tableIndex{"))
for _, loc := range b.locale { for _, loc := range b.locale {
p(fmt.Fprintf(w, "\t%q: ", loc.id)) p(fmt.Fprintf(w, "\t%q: ", loc.id))
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
package build package build
import ( import (
"exp/locale/collate" "exp/locale/collate/colltab"
"fmt" "fmt"
"unicode" "unicode"
) )
...@@ -34,87 +34,15 @@ func makeRawCE(w []int, ccc uint8) rawCE { ...@@ -34,87 +34,15 @@ func makeRawCE(w []int, ccc uint8) rawCE {
// form to represent such m to n mappings. Such special collation elements // form to represent such m to n mappings. Such special collation elements
// have a value >= 0x80000000. // have a value >= 0x80000000.
// For normal collation elements, we assume that a collation element either has
// a primary or non-default secondary value, not both.
// Collation elements with a primary value are of the form
// 01pppppp pppppppp ppppppp0 ssssssss
// - p* is primary collation value
// - s* is the secondary collation value
// 00pppppp pppppppp ppppppps sssttttt, where
// - p* is primary collation value
// - s* offset of secondary from default value.
// - t* is the tertiary collation value
// 100ttttt cccccccc pppppppp pppppppp
// - t* is the tertiar collation value
// - c* is the cannonical combining class
// - p* is the primary collation value
// Collation elements with a secondary value are of the form
// 1010cccc ccccssss ssssssss tttttttt, where
// - c* is the canonical combining class
// - s* is the secondary collation value
// - t* is the tertiary collation value
const ( const (
maxPrimaryBits = 21 maxPrimaryBits = 21
maxPrimaryCompactBits = 16 maxSecondaryBits = 12
maxSecondaryBits = 12 maxTertiaryBits = 8
maxSecondaryCompactBits = 8
maxCCCBits = 8
maxSecondaryDiffBits = 4
maxTertiaryBits = 8
maxTertiaryCompactBits = 5
isPrimary = 0x40000000
isPrimaryCCC = 0x80000000
isSecondary = 0xA0000000
) )
func makeCE(rce rawCE) (uint32, error) { func makeCE(ce rawCE) (uint32, error) {
weights := rce.w v, e := colltab.MakeElem(ce.w[0], ce.w[1], ce.w[2], ce.ccc)
if w := weights[0]; w >= 1<<maxPrimaryBits || w < 0 { return uint32(v), e
return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
}
if w := weights[1]; w >= 1<<maxSecondaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
}
if w := weights[2]; w >= 1<<maxTertiaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits)
}
ce := uint32(0)
if weights[0] != 0 {
if rce.ccc != 0 {
if weights[0] >= 1<<maxPrimaryCompactBits {
return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", weights[0], 1<<maxPrimaryCompactBits)
}
if weights[1] != defaultSecondary {
return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", weights[1], rce.ccc)
}
ce = uint32(weights[2] << (maxPrimaryCompactBits + maxCCCBits))
ce |= uint32(rce.ccc) << maxPrimaryCompactBits
ce |= uint32(weights[0])
ce |= isPrimaryCCC
} else if weights[2] == defaultTertiary {
if weights[1] >= 1<<maxSecondaryCompactBits {
return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", weights[1], 1<<maxSecondaryCompactBits)
}
ce = uint32(weights[0]<<(maxSecondaryCompactBits+1) + weights[1])
ce |= isPrimary
} else {
d := weights[1] - defaultSecondary + maxSecondaryDiffBits
if d >= 1<<maxSecondaryDiffBits || d < 0 {
return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
}
if weights[2] >= 1<<maxTertiaryCompactBits {
return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x (%X)", weights[2], 1<<maxTertiaryCompactBits, weights)
}
ce = uint32(weights[0]<<maxSecondaryDiffBits + d)
ce = ce<<maxTertiaryCompactBits + uint32(weights[2])
}
} else {
ce = uint32(weights[1]<<maxTertiaryBits + weights[2])
ce += uint32(rce.ccc) << (maxSecondaryBits + maxTertiaryBits)
ce |= isSecondary
}
return ce, nil
} }
// For contractions, collation elements are of the form // For contractions, collation elements are of the form
...@@ -287,24 +215,24 @@ func convertLargeWeights(elems []rawCE) (res []rawCE, err error) { ...@@ -287,24 +215,24 @@ func convertLargeWeights(elems []rawCE) (res []rawCE, err error) {
// nextWeight computes the first possible collation weights following elems // nextWeight computes the first possible collation weights following elems
// for the given level. // for the given level.
func nextWeight(level collate.Level, elems []rawCE) []rawCE { func nextWeight(level colltab.Level, elems []rawCE) []rawCE {
if level == collate.Identity { if level == colltab.Identity {
next := make([]rawCE, len(elems)) next := make([]rawCE, len(elems))
copy(next, elems) copy(next, elems)
return next return next
} }
next := []rawCE{makeRawCE(elems[0].w, elems[0].ccc)} next := []rawCE{makeRawCE(elems[0].w, elems[0].ccc)}
next[0].w[level]++ next[0].w[level]++
if level < collate.Secondary { if level < colltab.Secondary {
next[0].w[collate.Secondary] = defaultSecondary next[0].w[colltab.Secondary] = defaultSecondary
} }
if level < collate.Tertiary { if level < colltab.Tertiary {
next[0].w[collate.Tertiary] = defaultTertiary next[0].w[colltab.Tertiary] = defaultTertiary
} }
// Filter entries that cannot influence ordering. // Filter entries that cannot influence ordering.
for _, ce := range elems[1:] { for _, ce := range elems[1:] {
skip := true skip := true
for i := collate.Primary; i < level; i++ { for i := colltab.Primary; i < level; i++ {
skip = skip && ce.w[i] == 0 skip = skip && ce.w[i] == 0
} }
if !skip { if !skip {
...@@ -314,7 +242,7 @@ func nextWeight(level collate.Level, elems []rawCE) []rawCE { ...@@ -314,7 +242,7 @@ func nextWeight(level collate.Level, elems []rawCE) []rawCE {
return next return next
} }
func nextVal(elems []rawCE, i int, level collate.Level) (index, value int) { func nextVal(elems []rawCE, i int, level colltab.Level) (index, value int) {
for ; i < len(elems) && elems[i].w[level] == 0; i++ { for ; i < len(elems) && elems[i].w[level] == 0; i++ {
} }
if i < len(elems) { if i < len(elems) {
...@@ -325,8 +253,8 @@ func nextVal(elems []rawCE, i int, level collate.Level) (index, value int) { ...@@ -325,8 +253,8 @@ func nextVal(elems []rawCE, i int, level collate.Level) (index, value int) {
// compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise. // compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise.
// It also returns the collation level at which the difference is found. // It also returns the collation level at which the difference is found.
func compareWeights(a, b []rawCE) (result int, level collate.Level) { func compareWeights(a, b []rawCE) (result int, level colltab.Level) {
for level := collate.Primary; level < collate.Identity; level++ { for level := colltab.Primary; level < colltab.Identity; level++ {
var va, vb int var va, vb int
for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 { for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 {
ia, va = nextVal(a, ia, level) ia, va = nextVal(a, ia, level)
...@@ -340,7 +268,7 @@ func compareWeights(a, b []rawCE) (result int, level collate.Level) { ...@@ -340,7 +268,7 @@ func compareWeights(a, b []rawCE) (result int, level collate.Level) {
} }
} }
} }
return 0, collate.Identity return 0, colltab.Identity
} }
func equalCE(a, b rawCE) bool { func equalCE(a, b rawCE) bool {
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
package build package build
import ( import (
"exp/locale/collate" "exp/locale/collate/colltab"
"testing" "testing"
) )
...@@ -98,7 +98,7 @@ func mkRawCES(in [][]int) []rawCE { ...@@ -98,7 +98,7 @@ func mkRawCES(in [][]int) []rawCE {
type weightsTest struct { type weightsTest struct {
a, b [][]int a, b [][]int
level collate.Level level colltab.Level
result int result int
} }
...@@ -106,22 +106,22 @@ var nextWeightTests = []weightsTest{ ...@@ -106,22 +106,22 @@ var nextWeightTests = []weightsTest{
{ {
a: [][]int{{100, 20, 5, 0}}, a: [][]int{{100, 20, 5, 0}},
b: [][]int{{101, defaultSecondary, defaultTertiary, 0}}, b: [][]int{{101, defaultSecondary, defaultTertiary, 0}},
level: collate.Primary, level: colltab.Primary,
}, },
{ {
a: [][]int{{100, 20, 5, 0}}, a: [][]int{{100, 20, 5, 0}},
b: [][]int{{100, 21, defaultTertiary, 0}}, b: [][]int{{100, 21, defaultTertiary, 0}},
level: collate.Secondary, level: colltab.Secondary,
}, },
{ {
a: [][]int{{100, 20, 5, 0}}, a: [][]int{{100, 20, 5, 0}},
b: [][]int{{100, 20, 6, 0}}, b: [][]int{{100, 20, 6, 0}},
level: collate.Tertiary, level: colltab.Tertiary,
}, },
{ {
a: [][]int{{100, 20, 5, 0}}, a: [][]int{{100, 20, 5, 0}},
b: [][]int{{100, 20, 5, 0}}, b: [][]int{{100, 20, 5, 0}},
level: collate.Identity, level: colltab.Identity,
}, },
} }
...@@ -129,14 +129,14 @@ var extra = [][]int{{200, 32, 8, 0}, {0, 32, 8, 0}, {0, 0, 8, 0}, {0, 0, 0, 0}} ...@@ -129,14 +129,14 @@ var extra = [][]int{{200, 32, 8, 0}, {0, 32, 8, 0}, {0, 0, 8, 0}, {0, 0, 0, 0}}
func TestNextWeight(t *testing.T) { func TestNextWeight(t *testing.T) {
for i, tt := range nextWeightTests { for i, tt := range nextWeightTests {
test := func(l collate.Level, tt weightsTest, a, gold [][]int) { test := func(l colltab.Level, tt weightsTest, a, gold [][]int) {
res := nextWeight(tt.level, mkRawCES(a)) res := nextWeight(tt.level, mkRawCES(a))
if !equalCEArrays(mkRawCES(gold), res) { if !equalCEArrays(mkRawCES(gold), res) {
t.Errorf("%d:%d: expected weights %d; found %d", i, l, gold, res) t.Errorf("%d:%d: expected weights %d; found %d", i, l, gold, res)
} }
} }
test(-1, tt, tt.a, tt.b) test(-1, tt, tt.a, tt.b)
for l := collate.Primary; l <= collate.Tertiary; l++ { for l := colltab.Primary; l <= colltab.Tertiary; l++ {
if tt.level <= l { if tt.level <= l {
test(l, tt, append(tt.a, extra[l]), tt.b) test(l, tt, append(tt.a, extra[l]), tt.b)
} else { } else {
...@@ -150,49 +150,49 @@ var compareTests = []weightsTest{ ...@@ -150,49 +150,49 @@ var compareTests = []weightsTest{
{ {
[][]int{{100, 20, 5, 0}}, [][]int{{100, 20, 5, 0}},
[][]int{{100, 20, 5, 0}}, [][]int{{100, 20, 5, 0}},
collate.Identity, colltab.Identity,
0, 0,
}, },
{ {
[][]int{{100, 20, 5, 0}, extra[0]}, [][]int{{100, 20, 5, 0}, extra[0]},
[][]int{{100, 20, 5, 1}}, [][]int{{100, 20, 5, 1}},
collate.Primary, colltab.Primary,
1, 1,
}, },
{ {
[][]int{{100, 20, 5, 0}}, [][]int{{100, 20, 5, 0}},
[][]int{{101, 20, 5, 0}}, [][]int{{101, 20, 5, 0}},
collate.Primary, colltab.Primary,
-1, -1,
}, },
{ {
[][]int{{101, 20, 5, 0}}, [][]int{{101, 20, 5, 0}},
[][]int{{100, 20, 5, 0}}, [][]int{{100, 20, 5, 0}},
collate.Primary, colltab.Primary,
1, 1,
}, },
{ {
[][]int{{100, 0, 0, 0}, {0, 20, 5, 0}}, [][]int{{100, 0, 0, 0}, {0, 20, 5, 0}},
[][]int{{0, 20, 5, 0}, {100, 0, 0, 0}}, [][]int{{0, 20, 5, 0}, {100, 0, 0, 0}},
collate.Identity, colltab.Identity,
0, 0,
}, },
{ {
[][]int{{100, 20, 5, 0}}, [][]int{{100, 20, 5, 0}},
[][]int{{100, 21, 5, 0}}, [][]int{{100, 21, 5, 0}},
collate.Secondary, colltab.Secondary,
-1, -1,
}, },
{ {
[][]int{{100, 20, 5, 0}}, [][]int{{100, 20, 5, 0}},
[][]int{{100, 20, 2, 0}}, [][]int{{100, 20, 2, 0}},
collate.Tertiary, colltab.Tertiary,
1, 1,
}, },
{ {
[][]int{{100, 20, 5, 1}}, [][]int{{100, 20, 5, 1}},
[][]int{{100, 20, 5, 2}}, [][]int{{100, 20, 5, 2}},
collate.Quaternary, colltab.Quaternary,
-1, -1,
}, },
} }
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
package build package build
import ( import (
"exp/locale/collate" "exp/locale/collate/colltab"
"exp/norm" "exp/norm"
"fmt" "fmt"
"log" "log"
...@@ -36,7 +36,7 @@ type entry struct { ...@@ -36,7 +36,7 @@ type entry struct {
// prev, next, and level are used to keep track of tailorings. // prev, next, and level are used to keep track of tailorings.
prev, next *entry prev, next *entry
level collate.Level // next differs at this level level colltab.Level // next differs at this level
skipRemove bool // do not unlink when removed skipRemove bool // do not unlink when removed
decompose bool // can use NFKD decomposition to generate elems decompose bool // can use NFKD decomposition to generate elems
...@@ -76,7 +76,7 @@ func (e *entry) contractionStarter() bool { ...@@ -76,7 +76,7 @@ func (e *entry) contractionStarter() bool {
// from the current entry. // from the current entry.
// Entries that can be explicitly derived and logical reset positions are // Entries that can be explicitly derived and logical reset positions are
// examples of entries that will not be indexed. // examples of entries that will not be indexed.
func (e *entry) nextIndexed() (*entry, collate.Level) { func (e *entry) nextIndexed() (*entry, colltab.Level) {
level := e.level level := e.level
for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next { for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next {
if e.level < level { if e.level < level {
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
package build package build
import ( import (
"exp/locale/collate" "exp/locale/collate/colltab"
"strconv" "strconv"
"testing" "testing"
) )
...@@ -27,7 +27,7 @@ func makeList(n int) []*entry { ...@@ -27,7 +27,7 @@ func makeList(n int) []*entry {
runes: runes, runes: runes,
elems: weights, elems: weights,
} }
weights = nextWeight(collate.Primary, weights) weights = nextWeight(colltab.Primary, weights)
} }
for i := 1; i < len(es); i++ { for i := 1; i < len(es); i++ {
es[i-1].next = es[i] es[i-1].next = es[i]
......
...@@ -9,6 +9,7 @@ package collate ...@@ -9,6 +9,7 @@ package collate
import ( import (
"bytes" "bytes"
"exp/locale/collate/colltab"
"exp/norm" "exp/norm"
) )
...@@ -46,7 +47,7 @@ type Collator struct { ...@@ -46,7 +47,7 @@ type Collator struct {
// diacritical marks to be ignored but not case without having to fiddle with levels). // diacritical marks to be ignored but not case without having to fiddle with levels).
// Strength sets the maximum level to use in comparison. // Strength sets the maximum level to use in comparison.
Strength Level Strength colltab.Level
// Alternate specifies an alternative handling of variables. // Alternate specifies an alternative handling of variables.
Alternate AlternateHandling Alternate AlternateHandling
...@@ -75,7 +76,7 @@ type Collator struct { ...@@ -75,7 +76,7 @@ type Collator struct {
f norm.Form f norm.Form
t Weigher t colltab.Weigher
sorter sorter sorter sorter
...@@ -125,17 +126,18 @@ func New(loc string) *Collator { ...@@ -125,17 +126,18 @@ func New(loc string) *Collator {
t = locales["root"] t = locales["root"]
} }
} }
return NewFromTable(Init(t)) return NewFromTable(colltab.Init(t))
} }
func NewFromTable(t Weigher) *Collator { func NewFromTable(t colltab.Weigher) *Collator {
c := &Collator{ c := &Collator{
Strength: Tertiary, Strength: colltab.Tertiary,
f: norm.NFD, f: norm.NFD,
t: t, t: t,
} }
c._iter[0].init(c) c._iter[0].init(c)
c._iter[1].init(c) c._iter[1].init(c)
c.variableTop = t.Top()
return c return c
} }
...@@ -166,7 +168,7 @@ func (c *Collator) Compare(a, b []byte) int { ...@@ -166,7 +168,7 @@ func (c *Collator) Compare(a, b []byte) int {
if res := c.compare(); res != 0 { if res := c.compare(); res != 0 {
return res return res
} }
if Identity == c.Strength { if colltab.Identity == c.Strength {
return bytes.Compare(a, b) return bytes.Compare(a, b)
} }
return 0 return 0
...@@ -182,7 +184,7 @@ func (c *Collator) CompareString(a, b string) int { ...@@ -182,7 +184,7 @@ func (c *Collator) CompareString(a, b string) int {
if res := c.compare(); res != 0 { if res := c.compare(); res != 0 {
return res return res
} }
if Identity == c.Strength { if colltab.Identity == c.Strength {
if a < b { if a < b {
return -1 return -1
} else if a > b { } else if a > b {
...@@ -222,7 +224,7 @@ func (c *Collator) compare() int { ...@@ -222,7 +224,7 @@ func (c *Collator) compare() int {
} else { } else {
// TODO: handle shifted // TODO: handle shifted
} }
if Secondary <= c.Strength { if colltab.Secondary <= c.Strength {
f := (*iter).nextSecondary f := (*iter).nextSecondary
if c.Backwards { if c.Backwards {
f = (*iter).prevSecondary f = (*iter).prevSecondary
...@@ -232,12 +234,12 @@ func (c *Collator) compare() int { ...@@ -232,12 +234,12 @@ func (c *Collator) compare() int {
} }
} }
// TODO: special case handling (Danish?) // TODO: special case handling (Danish?)
if Tertiary <= c.Strength || c.CaseLevel { if colltab.Tertiary <= c.Strength || c.CaseLevel {
if res := compareLevel((*iter).nextTertiary, ia, ib); res != 0 { if res := compareLevel((*iter).nextTertiary, ia, ib); res != 0 {
return res return res
} }
// TODO: Not needed for the default value of AltNonIgnorable? // TODO: Not needed for the default value of AltNonIgnorable?
if Quaternary <= c.Strength { if colltab.Quaternary <= c.Strength {
if res := compareLevel((*iter).nextQuaternary, ia, ib); res != 0 { if res := compareLevel((*iter).nextQuaternary, ia, ib); res != 0 {
return res return res
} }
...@@ -266,14 +268,14 @@ func (c *Collator) KeyFromString(buf *Buffer, str string) []byte { ...@@ -266,14 +268,14 @@ func (c *Collator) KeyFromString(buf *Buffer, str string) []byte {
return c.key(buf, c.getColElemsString(str)) return c.key(buf, c.getColElemsString(str))
} }
func (c *Collator) key(buf *Buffer, w []Elem) []byte { func (c *Collator) key(buf *Buffer, w []colltab.Elem) []byte {
processWeights(c.Alternate, c.variableTop, w) processWeights(c.Alternate, c.t.Top(), w)
kn := len(buf.key) kn := len(buf.key)
c.keyFromElems(buf, w) c.keyFromElems(buf, w)
return buf.key[kn:] return buf.key[kn:]
} }
func (c *Collator) getColElems(str []byte) []Elem { func (c *Collator) getColElems(str []byte) []colltab.Elem {
i := c.iter(0) i := c.iter(0)
i.setInput(str) i.setInput(str)
for i.next() { for i.next() {
...@@ -281,7 +283,7 @@ func (c *Collator) getColElems(str []byte) []Elem { ...@@ -281,7 +283,7 @@ func (c *Collator) getColElems(str []byte) []Elem {
return i.ce return i.ce
} }
func (c *Collator) getColElemsString(str string) []Elem { func (c *Collator) getColElemsString(str string) []colltab.Elem {
i := c.iter(0) i := c.iter(0)
i.setInputString(str) i.setInputString(str)
for i.next() { for i.next() {
...@@ -293,15 +295,15 @@ type iter struct { ...@@ -293,15 +295,15 @@ type iter struct {
bytes []byte bytes []byte
str string str string
wa [512]Elem wa [512]colltab.Elem
ce []Elem ce []colltab.Elem
pce int pce int
nce int // nce <= len(nce) nce int // nce <= len(nce)
prevCCC uint8 prevCCC uint8
pStarter int pStarter int
t Weigher t colltab.Weigher
} }
func (i *iter) init(c *Collator) { func (i *iter) init(c *Collator) {
...@@ -493,13 +495,13 @@ func appendPrimary(key []byte, p int) []byte { ...@@ -493,13 +495,13 @@ func appendPrimary(key []byte, p int) []byte {
// keyFromElems converts the weights ws to a compact sequence of bytes. // keyFromElems converts the weights ws to a compact sequence of bytes.
// The result will be appended to the byte buffer in buf. // The result will be appended to the byte buffer in buf.
func (c *Collator) keyFromElems(buf *Buffer, ws []Elem) { func (c *Collator) keyFromElems(buf *Buffer, ws []colltab.Elem) {
for _, v := range ws { for _, v := range ws {
if w := v.Primary(); w > 0 { if w := v.Primary(); w > 0 {
buf.key = appendPrimary(buf.key, w) buf.key = appendPrimary(buf.key, w)
} }
} }
if Secondary <= c.Strength { if colltab.Secondary <= c.Strength {
buf.key = append(buf.key, 0, 0) buf.key = append(buf.key, 0, 0)
// TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF. // TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF.
if !c.Backwards { if !c.Backwards {
...@@ -518,7 +520,7 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []Elem) { ...@@ -518,7 +520,7 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []Elem) {
} else if c.CaseLevel { } else if c.CaseLevel {
buf.key = append(buf.key, 0, 0) buf.key = append(buf.key, 0, 0)
} }
if Tertiary <= c.Strength || c.CaseLevel { if colltab.Tertiary <= c.Strength || c.CaseLevel {
buf.key = append(buf.key, 0, 0) buf.key = append(buf.key, 0, 0)
for _, v := range ws { for _, v := range ws {
if w := v.Tertiary(); w > 0 { if w := v.Tertiary(); w > 0 {
...@@ -529,12 +531,12 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []Elem) { ...@@ -529,12 +531,12 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []Elem) {
// Note that we represent MaxQuaternary as 0xFF. The first byte of the // Note that we represent MaxQuaternary as 0xFF. The first byte of the
// representation of a primary weight is always smaller than 0xFF, // representation of a primary weight is always smaller than 0xFF,
// so using this single byte value will compare correctly. // so using this single byte value will compare correctly.
if Quaternary <= c.Strength && c.Alternate >= AltShifted { if colltab.Quaternary <= c.Strength && c.Alternate >= AltShifted {
if c.Alternate == AltShiftTrimmed { if c.Alternate == AltShiftTrimmed {
lastNonFFFF := len(buf.key) lastNonFFFF := len(buf.key)
buf.key = append(buf.key, 0) buf.key = append(buf.key, 0)
for _, v := range ws { for _, v := range ws {
if w := v.Quaternary(); w == MaxQuaternary { if w := v.Quaternary(); w == colltab.MaxQuaternary {
buf.key = append(buf.key, 0xFF) buf.key = append(buf.key, 0xFF)
} else if w > 0 { } else if w > 0 {
buf.key = appendPrimary(buf.key, w) buf.key = appendPrimary(buf.key, w)
...@@ -545,7 +547,7 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []Elem) { ...@@ -545,7 +547,7 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []Elem) {
} else { } else {
buf.key = append(buf.key, 0) buf.key = append(buf.key, 0)
for _, v := range ws { for _, v := range ws {
if w := v.Quaternary(); w == MaxQuaternary { if w := v.Quaternary(); w == colltab.MaxQuaternary {
buf.key = append(buf.key, 0xFF) buf.key = append(buf.key, 0xFF)
} else if w > 0 { } else if w > 0 {
buf.key = appendPrimary(buf.key, w) buf.key = appendPrimary(buf.key, w)
...@@ -556,18 +558,18 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []Elem) { ...@@ -556,18 +558,18 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []Elem) {
} }
} }
func processWeights(vw AlternateHandling, top uint32, wa []Elem) { func processWeights(vw AlternateHandling, top uint32, wa []colltab.Elem) {
ignore := false ignore := false
vtop := int(top) vtop := int(top)
switch vw { switch vw {
case AltShifted, AltShiftTrimmed: case AltShifted, AltShiftTrimmed:
for i := range wa { for i := range wa {
if p := wa[i].Primary(); p <= vtop && p != 0 { if p := wa[i].Primary(); p <= vtop && p != 0 {
wa[i] = MakeQuaternary(p) wa[i] = colltab.MakeQuaternary(p)
ignore = true ignore = true
} else if p == 0 { } else if p == 0 {
if ignore { if ignore {
wa[i] = ceIgnore wa[i] = colltab.Ignore
} }
} else { } else {
ignore = false ignore = false
...@@ -576,7 +578,7 @@ func processWeights(vw AlternateHandling, top uint32, wa []Elem) { ...@@ -576,7 +578,7 @@ func processWeights(vw AlternateHandling, top uint32, wa []Elem) {
case AltBlanked: case AltBlanked:
for i := range wa { for i := range wa {
if p := wa[i].Primary(); p <= vtop && (ignore || p != 0) { if p := wa[i].Primary(); p <= vtop && (ignore || p != 0) {
wa[i] = ceIgnore wa[i] = colltab.Ignore
ignore = true ignore = true
} else { } else {
ignore = false ignore = false
......
...@@ -2,11 +2,11 @@ ...@@ -2,11 +2,11 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package collate_test package collate
import ( import (
"bytes" "bytes"
"exp/locale/collate" "exp/locale/collate/colltab"
"testing" "testing"
) )
...@@ -17,28 +17,36 @@ type weightsTest struct { ...@@ -17,28 +17,36 @@ type weightsTest struct {
type opts struct { type opts struct {
lev int lev int
alt collate.AlternateHandling alt AlternateHandling
top int top int
backwards bool backwards bool
caseLevel bool caseLevel bool
} }
func (o opts) level() collate.Level { func (o opts) level() colltab.Level {
if o.lev == 0 { if o.lev == 0 {
return collate.Quaternary return colltab.Quaternary
} }
return collate.Level(o.lev - 1) return colltab.Level(o.lev - 1)
} }
func (o opts) collator() *collate.Collator { func makeCE(w []int) colltab.Elem {
c := &collate.Collator{ ce, err := colltab.MakeElem(w[0], w[1], w[2], uint8(w[3]))
Strength: o.level(), if err != nil {
Alternate: o.alt, panic(err)
Backwards: o.backwards, }
CaseLevel: o.caseLevel, return ce
}
func (o opts) collator() *Collator {
c := &Collator{
Strength: o.level(),
Alternate: o.alt,
Backwards: o.backwards,
CaseLevel: o.caseLevel,
variableTop: uint32(o.top),
} }
collate.SetTop(c, o.top)
return c return c
} }
...@@ -46,165 +54,163 @@ const ( ...@@ -46,165 +54,163 @@ const (
maxQ = 0x1FFFFF maxQ = 0x1FFFFF
) )
func wpq(p, q int) collate.Weights { func wpq(p, q int) Weights {
return collate.W(p, defaults.Secondary, defaults.Tertiary, q) return W(p, defaults.Secondary, defaults.Tertiary, q)
} }
func wsq(s, q int) collate.Weights { func wsq(s, q int) Weights {
return collate.W(0, s, defaults.Tertiary, q) return W(0, s, defaults.Tertiary, q)
} }
func wq(q int) collate.Weights { func wq(q int) Weights {
return collate.W(0, 0, 0, q) return W(0, 0, 0, q)
} }
var zero = w(0, 0, 0, 0) var zero = W(0, 0, 0, 0)
var processTests = []weightsTest{ var processTests = []weightsTest{
// Shifted // Shifted
{ // simple sequence of non-variables { // simple sequence of non-variables
opt: opts{alt: collate.AltShifted, top: 100}, opt: opts{alt: AltShifted, top: 100},
in: ColElems{w(200), w(300), w(400)}, in: ColElems{W(200), W(300), W(400)},
out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)}, out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
}, },
{ // first is a variable { // first is a variable
opt: opts{alt: collate.AltShifted, top: 250}, opt: opts{alt: AltShifted, top: 250},
in: ColElems{w(200), w(300), w(400)}, in: ColElems{W(200), W(300), W(400)},
out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)}, out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
}, },
{ // all but first are variable { // all but first are variable
opt: opts{alt: collate.AltShifted, top: 999}, opt: opts{alt: AltShifted, top: 999},
in: ColElems{w(1000), w(200), w(300), w(400)}, in: ColElems{W(1000), W(200), W(300), W(400)},
out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)}, out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
}, },
{ // first is a modifier { // first is a modifier
opt: opts{alt: collate.AltShifted, top: 999}, opt: opts{alt: AltShifted, top: 999},
in: ColElems{w(0, 10), w(1000)}, in: ColElems{W(0, 10), W(1000)},
out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)}, out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
}, },
{ // primary ignorables { // primary ignorables
opt: opts{alt: collate.AltShifted, top: 250}, opt: opts{alt: AltShifted, top: 250},
in: ColElems{w(200), w(0, 10), w(300), w(0, 15), w(400)}, in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)}, out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
}, },
{ // secondary ignorables { // secondary ignorables
opt: opts{alt: collate.AltShifted, top: 250}, opt: opts{alt: AltShifted, top: 250},
in: ColElems{w(200), w(0, 0, 10), w(300), w(0, 0, 15), w(400)}, in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
out: ColElems{wq(200), zero, wpq(300, maxQ), w(0, 0, 15, maxQ), wpq(400, maxQ)}, out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
}, },
{ // tertiary ignorables, no change { // tertiary ignorables, no change
opt: opts{alt: collate.AltShifted, top: 250}, opt: opts{alt: AltShifted, top: 250},
in: ColElems{w(200), zero, w(300), zero, w(400)}, in: ColElems{W(200), zero, W(300), zero, W(400)},
out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)}, out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
}, },
// ShiftTrimmed (same as Shifted) // ShiftTrimmed (same as Shifted)
{ // simple sequence of non-variables { // simple sequence of non-variables
opt: opts{alt: collate.AltShiftTrimmed, top: 100}, opt: opts{alt: AltShiftTrimmed, top: 100},
in: ColElems{w(200), w(300), w(400)}, in: ColElems{W(200), W(300), W(400)},
out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)}, out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
}, },
{ // first is a variable { // first is a variable
opt: opts{alt: collate.AltShiftTrimmed, top: 250}, opt: opts{alt: AltShiftTrimmed, top: 250},
in: ColElems{w(200), w(300), w(400)}, in: ColElems{W(200), W(300), W(400)},
out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)}, out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
}, },
{ // all but first are variable { // all but first are variable
opt: opts{alt: collate.AltShiftTrimmed, top: 999}, opt: opts{alt: AltShiftTrimmed, top: 999},
in: ColElems{w(1000), w(200), w(300), w(400)}, in: ColElems{W(1000), W(200), W(300), W(400)},
out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)}, out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
}, },
{ // first is a modifier { // first is a modifier
opt: opts{alt: collate.AltShiftTrimmed, top: 999}, opt: opts{alt: AltShiftTrimmed, top: 999},
in: ColElems{w(0, 10), w(1000)}, in: ColElems{W(0, 10), W(1000)},
out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)}, out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
}, },
{ // primary ignorables { // primary ignorables
opt: opts{alt: collate.AltShiftTrimmed, top: 250}, opt: opts{alt: AltShiftTrimmed, top: 250},
in: ColElems{w(200), w(0, 10), w(300), w(0, 15), w(400)}, in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)}, out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
}, },
{ // secondary ignorables { // secondary ignorables
opt: opts{alt: collate.AltShiftTrimmed, top: 250}, opt: opts{alt: AltShiftTrimmed, top: 250},
in: ColElems{w(200), w(0, 0, 10), w(300), w(0, 0, 15), w(400)}, in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
out: ColElems{wq(200), zero, wpq(300, maxQ), w(0, 0, 15, maxQ), wpq(400, maxQ)}, out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
}, },
{ // tertiary ignorables, no change { // tertiary ignorables, no change
opt: opts{alt: collate.AltShiftTrimmed, top: 250}, opt: opts{alt: AltShiftTrimmed, top: 250},
in: ColElems{w(200), zero, w(300), zero, w(400)}, in: ColElems{W(200), zero, W(300), zero, W(400)},
out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)}, out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
}, },
// Blanked // Blanked
{ // simple sequence of non-variables { // simple sequence of non-variables
opt: opts{alt: collate.AltBlanked, top: 100}, opt: opts{alt: AltBlanked, top: 100},
in: ColElems{w(200), w(300), w(400)}, in: ColElems{W(200), W(300), W(400)},
out: ColElems{w(200), w(300), w(400)}, out: ColElems{W(200), W(300), W(400)},
}, },
{ // first is a variable { // first is a variable
opt: opts{alt: collate.AltBlanked, top: 250}, opt: opts{alt: AltBlanked, top: 250},
in: ColElems{w(200), w(300), w(400)}, in: ColElems{W(200), W(300), W(400)},
out: ColElems{zero, w(300), w(400)}, out: ColElems{zero, W(300), W(400)},
}, },
{ // all but first are variable { // all but first are variable
opt: opts{alt: collate.AltBlanked, top: 999}, opt: opts{alt: AltBlanked, top: 999},
in: ColElems{w(1000), w(200), w(300), w(400)}, in: ColElems{W(1000), W(200), W(300), W(400)},
out: ColElems{w(1000), zero, zero, zero}, out: ColElems{W(1000), zero, zero, zero},
}, },
{ // first is a modifier { // first is a modifier
opt: opts{alt: collate.AltBlanked, top: 999}, opt: opts{alt: AltBlanked, top: 999},
in: ColElems{w(0, 10), w(1000)}, in: ColElems{W(0, 10), W(1000)},
out: ColElems{w(0, 10), w(1000)}, out: ColElems{W(0, 10), W(1000)},
}, },
{ // primary ignorables { // primary ignorables
opt: opts{alt: collate.AltBlanked, top: 250}, opt: opts{alt: AltBlanked, top: 250},
in: ColElems{w(200), w(0, 10), w(300), w(0, 15), w(400)}, in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
out: ColElems{zero, zero, w(300), w(0, 15), w(400)}, out: ColElems{zero, zero, W(300), W(0, 15), W(400)},
}, },
{ // secondary ignorables { // secondary ignorables
opt: opts{alt: collate.AltBlanked, top: 250}, opt: opts{alt: AltBlanked, top: 250},
in: ColElems{w(200), w(0, 0, 10), w(300), w(0, 0, 15), w(400)}, in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
out: ColElems{zero, zero, w(300), w(0, 0, 15), w(400)}, out: ColElems{zero, zero, W(300), W(0, 0, 15), W(400)},
}, },
{ // tertiary ignorables, no change { // tertiary ignorables, no change
opt: opts{alt: collate.AltBlanked, top: 250}, opt: opts{alt: AltBlanked, top: 250},
in: ColElems{w(200), zero, w(300), zero, w(400)}, in: ColElems{W(200), zero, W(300), zero, W(400)},
out: ColElems{zero, zero, w(300), zero, w(400)}, out: ColElems{zero, zero, W(300), zero, W(400)},
}, },
// Non-ignorable: input is always equal to output. // Non-ignorable: input is always equal to output.
{ // all but first are variable { // all but first are variable
opt: opts{alt: collate.AltNonIgnorable, top: 999}, opt: opts{alt: AltNonIgnorable, top: 999},
in: ColElems{w(1000), w(200), w(300), w(400)}, in: ColElems{W(1000), W(200), W(300), W(400)},
out: ColElems{w(1000), w(200), w(300), w(400)}, out: ColElems{W(1000), W(200), W(300), W(400)},
}, },
{ // primary ignorables { // primary ignorables
opt: opts{alt: collate.AltNonIgnorable, top: 250}, opt: opts{alt: AltNonIgnorable, top: 250},
in: ColElems{w(200), w(0, 10), w(300), w(0, 15), w(400)}, in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
out: ColElems{w(200), w(0, 10), w(300), w(0, 15), w(400)}, out: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
}, },
{ // secondary ignorables { // secondary ignorables
opt: opts{alt: collate.AltNonIgnorable, top: 250}, opt: opts{alt: AltNonIgnorable, top: 250},
in: ColElems{w(200), w(0, 0, 10), w(300), w(0, 0, 15), w(400)}, in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
out: ColElems{w(200), w(0, 0, 10), w(300), w(0, 0, 15), w(400)}, out: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
}, },
{ // tertiary ignorables, no change { // tertiary ignorables, no change
opt: opts{alt: collate.AltNonIgnorable, top: 250}, opt: opts{alt: AltNonIgnorable, top: 250},
in: ColElems{w(200), zero, w(300), zero, w(400)}, in: ColElems{W(200), zero, W(300), zero, W(400)},
out: ColElems{w(200), zero, w(300), zero, w(400)}, out: ColElems{W(200), zero, W(300), zero, W(400)},
}, },
} }
func TestProcessWeights(t *testing.T) { func TestProcessWeights(t *testing.T) {
for i, tt := range processTests { for i, tt := range processTests {
res := collate.ProcessWeights(tt.opt.alt, tt.opt.top, tt.in) in := convertFromWeights(tt.in)
if len(res) != len(tt.out) { out := convertFromWeights(tt.out)
t.Errorf("%d: len(ws) was %d; want %d (%v should be %v)", i, len(res), len(tt.out), res, tt.out) processWeights(tt.opt.alt, uint32(tt.opt.top), in)
continue for j, w := range in {
} if w != out[j] {
for j, w := range res { t.Errorf("%d: Weights %d was %v; want %v %X %X", i, j, w, out[j])
if w != tt.out[j] {
t.Errorf("%d: Weights %d was %v; want %v", i, j, w, tt.out[j])
} }
} }
} }
...@@ -223,8 +229,8 @@ const sep = 0 // separator byte ...@@ -223,8 +229,8 @@ const sep = 0 // separator byte
var keyFromElemTests = []keyFromElemTest{ var keyFromElemTests = []keyFromElemTest{
{ // simple primary and secondary weights. { // simple primary and secondary weights.
opts{alt: collate.AltShifted}, opts{alt: AltShifted},
ColElems{w(0x200), w(0x7FFF), w(0, 0x30), w(0x100)}, ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
sep, sep, defT, defT, defT, defT, // tertiary sep, sep, defT, defT, defT, defT, // tertiary
...@@ -232,8 +238,8 @@ var keyFromElemTests = []keyFromElemTest{ ...@@ -232,8 +238,8 @@ var keyFromElemTests = []keyFromElemTest{
}, },
}, },
{ // same as first, but with zero element that need to be removed { // same as first, but with zero element that need to be removed
opts{alt: collate.AltShifted}, opts{alt: AltShifted},
ColElems{w(0x200), zero, w(0x7FFF), w(0, 0x30), zero, w(0x100)}, ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
sep, sep, defT, defT, defT, defT, // tertiary sep, sep, defT, defT, defT, defT, // tertiary
...@@ -241,8 +247,8 @@ var keyFromElemTests = []keyFromElemTest{ ...@@ -241,8 +247,8 @@ var keyFromElemTests = []keyFromElemTest{
}, },
}, },
{ // same as first, with large primary values { // same as first, with large primary values
opts{alt: collate.AltShifted}, opts{alt: AltShifted},
ColElems{w(0x200), w(0x8000), w(0, 0x30), w(0x12345)}, ColElems{W(0x200), W(0x8000), W(0, 0x30), W(0x12345)},
[]byte{0x2, 0, 0x80, 0x80, 0x00, 0x81, 0x23, 0x45, // primary []byte{0x2, 0, 0x80, 0x80, 0x00, 0x81, 0x23, 0x45, // primary
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
sep, sep, defT, defT, defT, defT, // tertiary sep, sep, defT, defT, defT, defT, // tertiary
...@@ -250,8 +256,8 @@ var keyFromElemTests = []keyFromElemTest{ ...@@ -250,8 +256,8 @@ var keyFromElemTests = []keyFromElemTest{
}, },
}, },
{ // same as first, but with the secondary level backwards { // same as first, but with the secondary level backwards
opts{alt: collate.AltShifted, backwards: true}, opts{alt: AltShifted, backwards: true},
ColElems{w(0x200), w(0x7FFF), w(0, 0x30), w(0x100)}, ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
sep, sep, 0, defS, 0, 0x30, 0, defS, 0, defS, // secondary sep, sep, 0, defS, 0, 0x30, 0, defS, 0, defS, // secondary
sep, sep, defT, defT, defT, defT, // tertiary sep, sep, defT, defT, defT, defT, // tertiary
...@@ -259,28 +265,28 @@ var keyFromElemTests = []keyFromElemTest{ ...@@ -259,28 +265,28 @@ var keyFromElemTests = []keyFromElemTest{
}, },
}, },
{ // same as first, ignoring quaternary level { // same as first, ignoring quaternary level
opts{alt: collate.AltShifted, lev: 3}, opts{alt: AltShifted, lev: 3},
ColElems{w(0x200), zero, w(0x7FFF), w(0, 0x30), zero, w(0x100)}, ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
sep, sep, defT, defT, defT, defT, // tertiary sep, sep, defT, defT, defT, defT, // tertiary
}, },
}, },
{ // same as first, ignoring tertiary level { // same as first, ignoring tertiary level
opts{alt: collate.AltShifted, lev: 2}, opts{alt: AltShifted, lev: 2},
ColElems{w(0x200), zero, w(0x7FFF), w(0, 0x30), zero, w(0x100)}, ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
}, },
}, },
{ // same as first, ignoring secondary level { // same as first, ignoring secondary level
opts{alt: collate.AltShifted, lev: 1}, opts{alt: AltShifted, lev: 1},
ColElems{w(0x200), zero, w(0x7FFF), w(0, 0x30), zero, w(0x100)}, ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00}, []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00},
}, },
{ // simple primary and secondary weights. { // simple primary and secondary weights.
opts{alt: collate.AltShiftTrimmed, top: 0x250}, opts{alt: AltShiftTrimmed, top: 0x250},
ColElems{w(0x300), w(0x200), w(0x7FFF), w(0, 0x30), w(0x800)}, ColElems{W(0x300), W(0x200), W(0x7FFF), W(0, 0x30), W(0x800)},
[]byte{0x3, 0, 0x7F, 0xFF, 0x8, 0x00, // primary []byte{0x3, 0, 0x7F, 0xFF, 0x8, 0x00, // primary
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
sep, sep, defT, defT, defT, defT, // tertiary sep, sep, defT, defT, defT, defT, // tertiary
...@@ -288,8 +294,8 @@ var keyFromElemTests = []keyFromElemTest{ ...@@ -288,8 +294,8 @@ var keyFromElemTests = []keyFromElemTest{
}, },
}, },
{ // as first, primary with case level enabled { // as first, primary with case level enabled
opts{alt: collate.AltShifted, lev: 1, caseLevel: true}, opts{alt: AltShifted, lev: 1, caseLevel: true},
ColElems{w(0x200), w(0x7FFF), w(0, 0x30), w(0x100)}, ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
sep, sep, // secondary sep, sep, // secondary
sep, sep, defT, defT, defT, defT, // tertiary sep, sep, defT, defT, defT, defT, // tertiary
...@@ -298,11 +304,13 @@ var keyFromElemTests = []keyFromElemTest{ ...@@ -298,11 +304,13 @@ var keyFromElemTests = []keyFromElemTest{
} }
func TestKeyFromElems(t *testing.T) { func TestKeyFromElems(t *testing.T) {
buf := collate.Buffer{} buf := Buffer{}
for i, tt := range keyFromElemTests { for i, tt := range keyFromElemTests {
buf.Reset() buf.Reset()
ws := collate.ProcessWeights(tt.opt.alt, tt.opt.top, tt.in) in := convertFromWeights(tt.in)
res := collate.KeyFromElems(tt.opt.collator(), &buf, ws) processWeights(tt.opt.alt, uint32(tt.opt.top), in)
tt.opt.collator().keyFromElems(&buf, in)
res := buf.key
if len(res) != len(tt.out) { if len(res) != len(tt.out) {
t.Errorf("%d: len(ws) was %d; want %d (%X should be %X)", i, len(res), len(tt.out), res, tt.out) t.Errorf("%d: len(ws) was %d; want %d (%X should be %X)", i, len(res), len(tt.out), res, tt.out)
} }
...@@ -335,15 +343,17 @@ func TestGetColElems(t *testing.T) { ...@@ -335,15 +343,17 @@ func TestGetColElems(t *testing.T) {
} }
} }
for j, chk := range append(tt.chk, check{string(str), len(str), out}) { for j, chk := range append(tt.chk, check{string(str), len(str), out}) {
ws := collate.GetColElems(c, []byte(chk.in)[:chk.n]) out := convertFromWeights(chk.out)
if len(ws) != len(chk.out) { ce := c.getColElems([]byte(chk.in)[:chk.n])
t.Errorf("%d:%d: len(ws) was %d; want %d", i, j, len(ws), len(chk.out)) if len(ce) != len(out) {
t.Errorf("%d:%d: len(ws) was %d; want %d", i, j, len(ce), len(out))
continue continue
} }
cnt := 0 cnt := 0
for k, w := range ws { for k, w := range ce {
if w != chk.out[k] { w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
t.Errorf("%d:%d: Weights %d was %v; want %v", i, j, k, w, chk.out[k]) if w != out[k] {
t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
cnt++ cnt++
} }
if cnt > 10 { if cnt > 10 {
...@@ -377,9 +387,9 @@ var keyTests = []keyTest{ ...@@ -377,9 +387,9 @@ var keyTests = []keyTest{
func TestKey(t *testing.T) { func TestKey(t *testing.T) {
c, _ := makeTable(appendNextTests[4].in) c, _ := makeTable(appendNextTests[4].in)
c.Alternate = collate.AltShifted c.Alternate = AltShifted
c.Strength = collate.Quaternary c.Strength = colltab.Quaternary
buf := collate.Buffer{} buf := Buffer{}
keys1 := [][]byte{} keys1 := [][]byte{}
keys2 := [][]byte{} keys2 := [][]byte{}
for _, tt := range keyTests { for _, tt := range keyTests {
...@@ -429,3 +439,77 @@ func TestCompare(t *testing.T) { ...@@ -429,3 +439,77 @@ func TestCompare(t *testing.T) {
} }
} }
} }
func TestDoNorm(t *testing.T) {
const div = -1 // The insertion point of the next block.
tests := []struct {
in, out []int
}{
{in: []int{4, div, 3},
out: []int{3, 4},
},
{in: []int{4, div, 3, 3, 3},
out: []int{3, 3, 3, 4},
},
{in: []int{0, 4, div, 3},
out: []int{0, 3, 4},
},
{in: []int{0, 0, 4, 5, div, 3, 3},
out: []int{0, 0, 3, 3, 4, 5},
},
{in: []int{0, 0, 1, 4, 5, div, 3, 3},
out: []int{0, 0, 1, 3, 3, 4, 5},
},
{in: []int{0, 0, 1, 4, 5, div, 4, 4},
out: []int{0, 0, 1, 4, 4, 4, 5},
},
}
for j, tt := range tests {
i := iter{}
var w, p, s int
for k, cc := range tt.in {
if cc == 0 {
s = 0
}
if cc == div {
w = 100
p = k
i.pStarter = s
continue
}
i.ce = append(i.ce, makeCE([]int{w, defaultSecondary, 2, cc}))
}
i.prevCCC = i.ce[p-1].CCC()
i.doNorm(p, i.ce[p].CCC())
if len(i.ce) != len(tt.out) {
t.Errorf("%d: length was %d; want %d", j, len(i.ce), len(tt.out))
}
prevCCC := uint8(0)
for k, ce := range i.ce {
if int(ce.CCC()) != tt.out[k] {
t.Errorf("%d:%d: unexpected CCC. Was %d; want %d", j, k, ce.CCC(), tt.out[k])
}
if k > 0 && ce.CCC() == prevCCC && i.ce[k-1].Primary() > ce.Primary() {
t.Errorf("%d:%d: normalization crossed across CCC boundary.", j, k)
}
}
}
// test cutoff of large sequence of combining characters.
result := []uint8{8, 8, 8, 5, 5}
for o := -2; o <= 2; o++ {
i := iter{pStarter: 2, prevCCC: 8}
n := maxCombiningCharacters + 1 + o
for j := 1; j < n+i.pStarter; j++ {
i.ce = append(i.ce, makeCE([]int{100, defaultSecondary, 2, 8}))
}
p := len(i.ce)
i.ce = append(i.ce, makeCE([]int{0, defaultSecondary, 2, 5}))
i.doNorm(p, 5)
if i.prevCCC != result[o+2] {
t.Errorf("%d: i.prevCCC was %d; want %d", n, i.prevCCC, result[o+2])
}
if result[o+2] == 5 && i.pStarter != p {
t.Errorf("%d: i.pStarter was %d; want %d", n, i.pStarter, p)
}
}
}
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package collate package colltab
import ( import (
"fmt"
"unicode" "unicode"
) )
...@@ -94,23 +95,31 @@ func (ce Elem) ctype() ceType { ...@@ -94,23 +95,31 @@ func (ce Elem) ctype() ceType {
// 11qqqqqq qqqqqqqq qqqqqqq0 00000000 // 11qqqqqq qqqqqqqq qqqqqqq0 00000000
// - q* quaternary value // - q* quaternary value
const ( const (
ceTypeMask = 0xC0000000 ceTypeMask = 0xC0000000
ceTypeMaskExt = 0xE0000000 ceTypeMaskExt = 0xE0000000
ceType1 = 0x40000000 ceIgnoreMask = 0xF00FFFFF
ceType2 = 0x00000000 ceType1 = 0x40000000
ceType3or4 = 0x80000000 ceType2 = 0x00000000
ceType4 = 0xA0000000 ceType3or4 = 0x80000000
ceTypeQ = 0xC0000000 ceType4 = 0xA0000000
ceIgnore = ceType4 ceTypeQ = 0xC0000000
firstNonPrimary = 0x80000000 Ignore = ceType4
lastSpecialPrimary = 0xA0000000 firstNonPrimary = 0x80000000
secondaryMask = 0x80000000 lastSpecialPrimary = 0xA0000000
hasTertiaryMask = 0x40000000 secondaryMask = 0x80000000
primaryValueMask = 0x3FFFFE00 hasTertiaryMask = 0x40000000
primaryShift = 9 primaryValueMask = 0x3FFFFE00
compactPrimaryBits = 16 maxPrimaryBits = 21
compactSecondaryShift = 5 compactPrimaryBits = 16
minCompactSecondary = defaultSecondary - 4 maxSecondaryBits = 12
maxTertiaryBits = 8
maxCCCBits = 8
maxSecondaryCompactBits = 8
maxSecondaryDiffBits = 4
maxTertiaryCompactBits = 5
primaryShift = 9
compactSecondaryShift = 5
minCompactSecondary = defaultSecondary - 4
) )
func makeImplicitCE(primary int) Elem { func makeImplicitCE(primary int) Elem {
...@@ -120,8 +129,51 @@ func makeImplicitCE(primary int) Elem { ...@@ -120,8 +129,51 @@ func makeImplicitCE(primary int) Elem {
// MakeElem returns an Elem for the given values. It will return an error // MakeElem returns an Elem for the given values. It will return an error
// if the given combination of values is invalid. // if the given combination of values is invalid.
func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) { func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
// TODO: implement if w := primary; w >= 1<<maxPrimaryBits || w < 0 {
return 0, nil return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
}
if w := secondary; w >= 1<<maxSecondaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
}
if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits)
}
ce := Elem(0)
if primary != 0 {
if ccc != 0 {
if primary >= 1<<compactPrimaryBits {
return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits)
}
if secondary != defaultSecondary {
return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc)
}
ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits))
ce |= Elem(ccc) << compactPrimaryBits
ce |= Elem(primary)
ce |= ceType3or4
} else if tertiary == defaultTertiary {
if secondary >= 1<<maxSecondaryCompactBits {
return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits)
}
ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary)
ce |= ceType1
} else {
d := secondary - defaultSecondary + maxSecondaryDiffBits
if d >= 1<<maxSecondaryDiffBits || d < 0 {
return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
}
if tertiary >= 1<<maxTertiaryCompactBits {
return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits)
}
ce = Elem(primary<<maxSecondaryDiffBits + d)
ce = ce<<maxTertiaryCompactBits + Elem(tertiary)
}
} else {
ce = Elem(secondary<<maxTertiaryBits + tertiary)
ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits)
ce |= ceType4
}
return ce, nil
} }
// MakeQuaternary returns an Elem with the given quaternary value. // MakeQuaternary returns an Elem with the given quaternary value.
...@@ -211,12 +263,12 @@ func (ce Elem) updateTertiary(t uint8) Elem { ...@@ -211,12 +263,12 @@ func (ce Elem) updateTertiary(t uint8) Elem {
} }
// Quaternary returns the quaternary value if explicitly specified, // Quaternary returns the quaternary value if explicitly specified,
// 0 if ce == ceIgnore, or MaxQuaternary otherwise. // 0 if ce == Ignore, or MaxQuaternary otherwise.
// Quaternary values are used only for shifted variants. // Quaternary values are used only for shifted variants.
func (ce Elem) Quaternary() int { func (ce Elem) Quaternary() int {
if ce&ceTypeMask == ceTypeQ { if ce&ceTypeMask == ceTypeQ {
return int(ce&primaryValueMask) >> primaryShift return int(ce&primaryValueMask) >> primaryShift
} else if ce == ceIgnore { } else if ce&ceIgnoreMask == Ignore {
return 0 return 0
} }
return MaxQuaternary return MaxQuaternary
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package collate package colltab
import ( import (
"testing" "testing"
...@@ -14,40 +14,8 @@ type ceTest struct { ...@@ -14,40 +14,8 @@ type ceTest struct {
arg []int arg []int
} }
// The make* funcs are simplified versions of the functions in build/colelem.go
func makeCE(weights []int) Elem { func makeCE(weights []int) Elem {
const ( ce, _ := MakeElem(weights[0], weights[1], weights[2], uint8(weights[3]))
maxPrimaryBits = 21
maxSecondaryBits = 12
maxSecondaryCompactBits = 8
maxSecondaryDiffBits = 4
maxTertiaryBits = 8
maxTertiaryCompactBits = 5
isPrimary = 0x40000000
isPrimaryCCC = 0x80000000
isSecondary = 0xA0000000
)
var ce Elem
ccc := weights[3]
if weights[0] != 0 {
if ccc != 0 {
ce = Elem(weights[2] << 24)
ce |= Elem(ccc) << 16
ce |= Elem(weights[0])
ce |= isPrimaryCCC
} else if weights[2] == defaultTertiary {
ce = Elem(weights[0]<<(maxSecondaryCompactBits+1) + weights[1])
ce |= isPrimary
} else {
d := weights[1] - defaultSecondary + 4
ce = Elem(weights[0]<<maxSecondaryDiffBits + d)
ce = ce<<maxTertiaryCompactBits + Elem(weights[2])
}
} else {
ce = Elem(weights[1]<<maxTertiaryBits + weights[2])
ce += Elem(ccc) << 20
ce |= isSecondary
}
return ce return ce
} }
...@@ -104,12 +72,6 @@ func decompCE(inout []int) (ce Elem, t ceType) { ...@@ -104,12 +72,6 @@ func decompCE(inout []int) (ce Elem, t ceType) {
return ce, ceDecompose return ce, ceDecompose
} }
const (
maxPrimaryBits = 21
maxSecondaryBits = 16
maxTertiaryBits = 8
)
var ceTests = []ceTest{ var ceTests = []ceTest{
{normalCE, []int{0, 0, 0, 0}}, {normalCE, []int{0, 0, 0, 0}},
{normalCE, []int{0, 30, 3, 0}}, {normalCE, []int{0, 30, 3, 0}},
...@@ -198,77 +160,3 @@ func TestUpdateTertiary(t *testing.T) { ...@@ -198,77 +160,3 @@ func TestUpdateTertiary(t *testing.T) {
} }
} }
} }
func TestDoNorm(t *testing.T) {
const div = -1 // The insertion point of the next block.
tests := []struct {
in, out []int
}{
{in: []int{4, div, 3},
out: []int{3, 4},
},
{in: []int{4, div, 3, 3, 3},
out: []int{3, 3, 3, 4},
},
{in: []int{0, 4, div, 3},
out: []int{0, 3, 4},
},
{in: []int{0, 0, 4, 5, div, 3, 3},
out: []int{0, 0, 3, 3, 4, 5},
},
{in: []int{0, 0, 1, 4, 5, div, 3, 3},
out: []int{0, 0, 1, 3, 3, 4, 5},
},
{in: []int{0, 0, 1, 4, 5, div, 4, 4},
out: []int{0, 0, 1, 4, 4, 4, 5},
},
}
for j, tt := range tests {
i := iter{}
var w, p, s int
for k, cc := range tt.in {
if cc == 0 {
s = 0
}
if cc == div {
w = 100
p = k
i.pStarter = s
continue
}
i.ce = append(i.ce, makeCE([]int{w, 20, 2, cc}))
}
i.prevCCC = i.ce[p-1].CCC()
i.doNorm(p, i.ce[p].CCC())
if len(i.ce) != len(tt.out) {
t.Errorf("%d: length was %d; want %d", j, len(i.ce), len(tt.out))
}
prevCCC := uint8(0)
for k, ce := range i.ce {
if int(ce.CCC()) != tt.out[k] {
t.Errorf("%d:%d: unexpected CCC. Was %d; want %d", j, k, ce.CCC(), tt.out[k])
}
if k > 0 && ce.CCC() == prevCCC && i.ce[k-1].Primary() > ce.Primary() {
t.Errorf("%d:%d: normalization crossed across CCC boundary.", j, k)
}
}
}
// test cutoff of large sequence of combining characters.
result := []uint8{8, 8, 8, 5, 5}
for o := -2; o <= 2; o++ {
i := iter{pStarter: 2, prevCCC: 8}
n := maxCombiningCharacters + 1 + o
for j := 1; j < n+i.pStarter; j++ {
i.ce = append(i.ce, makeCE([]int{100, 20, 2, 8}))
}
p := len(i.ce)
i.ce = append(i.ce, makeCE([]int{0, 20, 2, 5}))
i.doNorm(p, 5)
if i.prevCCC != result[o+2] {
t.Errorf("%d: i.prevCCC was %d; want %d", n, i.prevCCC, result[o+2])
}
if result[o+2] == 5 && i.pStarter != p {
t.Errorf("%d: i.pStarter was %d; want %d", n, i.pStarter, p)
}
}
}
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package collate package colltab
// A Weigher can be used as a source for Collator and Searcher. // A Weigher can be used as a source for Collator and Searcher.
type Weigher interface { type Weigher interface {
...@@ -25,4 +25,7 @@ type Weigher interface { ...@@ -25,4 +25,7 @@ type Weigher interface {
// Domain returns a slice of all single characters and contractions for which // Domain returns a slice of all single characters and contractions for which
// collation elements are defined in this table. // collation elements are defined in this table.
Domain() []string Domain() []string
// Top returns the highest variable primary value.
Top() uint32
} }
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package collate package colltab
import "unicode/utf8" import "unicode/utf8"
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package collate package colltab
import ( import (
"testing" "testing"
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package collate package colltab
// Init is for internal use only. // Init is for internal use only.
func Init(data interface{}) Weigher { func Init(data interface{}) Weigher {
......
...@@ -2,20 +2,13 @@ ...@@ -2,20 +2,13 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package collate package colltab
import ( import (
"exp/norm" "exp/norm"
"unicode/utf8" "unicode/utf8"
) )
// tableIndex holds information for constructing a table
// for a certain locale based on the main table.
type tableIndex struct {
lookupOffset uint32
valuesOffset uint32
}
// table holds all collation data for a given collation ordering. // table holds all collation data for a given collation ordering.
type table struct { type table struct {
index trie // main trie index trie // main trie
...@@ -30,13 +23,6 @@ type table struct { ...@@ -30,13 +23,6 @@ type table struct {
variableTop uint32 variableTop uint32
} }
func (t *table) indexedTable(idx tableIndex) *table {
nt := *t
nt.index.index0 = t.index.index[idx.lookupOffset*blockSize:]
nt.index.values0 = t.index.values[idx.valuesOffset*blockSize:]
return &nt
}
func (t *table) AppendNext(w []Elem, b []byte) (res []Elem, n int) { func (t *table) AppendNext(w []Elem, b []byte) (res []Elem, n int) {
return t.appendNext(w, source{bytes: b}) return t.appendNext(w, source{bytes: b})
} }
...@@ -60,6 +46,10 @@ func (t *table) Domain() []string { ...@@ -60,6 +46,10 @@ func (t *table) Domain() []string {
panic("not implemented") panic("not implemented")
} }
func (t *table) Top() uint32 {
return t.variableTop
}
type source struct { type source struct {
str string str string
bytes []byte bytes []byte
...@@ -282,36 +272,3 @@ func (t *table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem ...@@ -282,36 +272,3 @@ func (t *table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem
} }
return w, n return w, n
} }
// TODO: this should stay after the rest of this file is moved to colltab
func (t tableIndex) TrieIndex() []uint16 {
return mainLookup[:]
}
func (t tableIndex) TrieValues() []uint32 {
return mainValues[:]
}
func (t tableIndex) FirstBlockOffsets() (lookup, value uint16) {
return uint16(t.lookupOffset), uint16(t.valuesOffset)
}
func (t tableIndex) ExpandElems() []uint32 {
return mainExpandElem[:]
}
func (t tableIndex) ContractTries() []struct{ l, h, n, i uint8 } {
return mainCTEntries[:]
}
func (t tableIndex) ContractElems() []uint32 {
return mainContractElem[:]
}
func (t tableIndex) MaxContractLen() int {
return 18
}
func (t tableIndex) VariableTop() uint32 {
return 0x30E
}
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
// The last byte is used to index into a table of collation elements. // The last byte is used to index into a table of collation elements.
// For a full description, see exp/locale/collate/build/trie.go. // For a full description, see exp/locale/collate/build/trie.go.
package collate package colltab
const blockSize = 64 const blockSize = 64
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package collate package colltab
import ( import (
"testing" "testing"
......
...@@ -5,11 +5,18 @@ ...@@ -5,11 +5,18 @@
package collate package collate
// Export for testing. // Export for testing.
// TODO: no longer necessary. Remove at some point.
import ( import (
"exp/locale/collate/colltab"
"fmt" "fmt"
) )
const (
defaultSecondary = 0x20
defaultTertiary = 0x2
)
type Weights struct { type Weights struct {
Primary, Secondary, Tertiary, Quaternary int Primary, Secondary, Tertiary, Quaternary int
} }
...@@ -24,8 +31,6 @@ func W(ce ...int) Weights { ...@@ -24,8 +31,6 @@ func W(ce ...int) Weights {
} }
if len(ce) > 3 { if len(ce) > 3 {
w.Quaternary = ce[3] w.Quaternary = ce[3]
} else if w.Tertiary != 0 {
w.Quaternary = MaxQuaternary
} }
return w return w
} }
...@@ -33,58 +38,13 @@ func (w Weights) String() string { ...@@ -33,58 +38,13 @@ func (w Weights) String() string {
return fmt.Sprintf("[%X.%X.%X.%X]", w.Primary, w.Secondary, w.Tertiary, w.Quaternary) return fmt.Sprintf("[%X.%X.%X.%X]", w.Primary, w.Secondary, w.Tertiary, w.Quaternary)
} }
type Table struct { func convertFromWeights(ws []Weights) []colltab.Elem {
t Weigher out := make([]colltab.Elem, len(ws))
}
func GetTable(c *Collator) *Table {
return &Table{c.t}
}
func convertToWeights(ws []Elem) []Weights {
out := make([]Weights, len(ws))
for i, w := range ws {
out[i] = Weights{int(w.Primary()), int(w.Secondary()), int(w.Tertiary()), int(w.Quaternary())}
}
return out
}
func convertFromWeights(ws []Weights) []Elem {
out := make([]Elem, len(ws))
for i, w := range ws { for i, w := range ws {
out[i] = makeCE([]int{w.Primary, w.Secondary, w.Tertiary, 0}) out[i], _ = colltab.MakeElem(w.Primary, w.Secondary, w.Tertiary, 0)
if out[i] == ceIgnore && w.Quaternary > 0 { if out[i] == colltab.Ignore && w.Quaternary > 0 {
out[i] = MakeQuaternary(w.Quaternary) out[i] = colltab.MakeQuaternary(w.Quaternary)
} }
} }
return out return out
} }
func (t *Table) AppendNext(s []byte) ([]Weights, int) {
w, n := t.t.AppendNext(nil, s)
return convertToWeights(w), n
}
func SetTop(c *Collator, top int) {
if c.t == nil {
c.t = &table{}
}
c.variableTop = uint32(top)
}
func GetColElems(c *Collator, str []byte) []Weights {
ce := c.getColElems(str)
return convertToWeights(ce)
}
func ProcessWeights(h AlternateHandling, top int, w []Weights) []Weights {
in := convertFromWeights(w)
processWeights(h, uint32(top), in)
return convertToWeights(in)
}
func KeyFromElems(c *Collator, buf *Buffer, w []Weights) []byte {
k := len(buf.key)
c.keyFromElems(buf, convertFromWeights(w))
return buf.key[k:]
}
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
// tableIndex holds information for constructing a table
// for a certain locale based on the main table.
type tableIndex struct {
lookupOffset uint32
valuesOffset uint32
}
func (t tableIndex) TrieIndex() []uint16 {
return mainLookup[:]
}
func (t tableIndex) TrieValues() []uint32 {
return mainValues[:]
}
func (t tableIndex) FirstBlockOffsets() (lookup, value uint16) {
return uint16(t.lookupOffset), uint16(t.valuesOffset)
}
func (t tableIndex) ExpandElems() []uint32 {
return mainExpandElem[:]
}
func (t tableIndex) ContractTries() []struct{ l, h, n, i uint8 } {
return mainCTEntries[:]
}
func (t tableIndex) ContractElems() []uint32 {
return mainContractElem[:]
}
func (t tableIndex) MaxContractLen() int {
return 18 // TODO: generate
}
func (t tableIndex) VariableTop() uint32 {
return varTop
}
...@@ -16,6 +16,7 @@ import ( ...@@ -16,6 +16,7 @@ import (
"encoding/xml" "encoding/xml"
"exp/locale/collate" "exp/locale/collate"
"exp/locale/collate/build" "exp/locale/collate/build"
"exp/locale/collate/colltab"
"flag" "flag"
"fmt" "fmt"
"io" "io"
...@@ -587,11 +588,11 @@ func parseCollation(b *build.Builder) { ...@@ -587,11 +588,11 @@ func parseCollation(b *build.Builder) {
} }
} }
var lmap = map[byte]collate.Level{ var lmap = map[byte]colltab.Level{
'p': collate.Primary, 'p': colltab.Primary,
's': collate.Secondary, 's': colltab.Secondary,
't': collate.Tertiary, 't': colltab.Tertiary,
'i': collate.Identity, 'i': colltab.Identity,
} }
// cldrIndex is a Unicode-reserved sentinel value used. // cldrIndex is a Unicode-reserved sentinel value used.
...@@ -699,7 +700,7 @@ func main() { ...@@ -699,7 +700,7 @@ func main() {
failOnError(err) failOnError(err)
if *test { if *test {
testCollator(c) testCollator(collate.NewFromTable(c))
} else { } else {
fmt.Println("// Generated by running") fmt.Println("// Generated by running")
fmt.Printf("// maketables -root=%s -cldr=%s\n", *root, *cldr) fmt.Printf("// maketables -root=%s -cldr=%s\n", *root, *cldr)
......
...@@ -12,6 +12,7 @@ import ( ...@@ -12,6 +12,7 @@ import (
"bytes" "bytes"
"exp/locale/collate" "exp/locale/collate"
"exp/locale/collate/build" "exp/locale/collate/build"
"exp/locale/collate/colltab"
"flag" "flag"
"fmt" "fmt"
"io" "io"
...@@ -228,12 +229,14 @@ func runes(b []byte) []rune { ...@@ -228,12 +229,14 @@ func runes(b []byte) []rune {
func doTest(t Test) { func doTest(t Test) {
bld := build.NewBuilder() bld := build.NewBuilder()
parseUCA(bld) parseUCA(bld)
c, err := bld.Build() w, err := bld.Build()
Error(err) Error(err)
c.Strength = collate.Tertiary c := collate.NewFromTable(w)
c.Strength = colltab.Quaternary
c.Alternate = collate.AltShifted c.Alternate = collate.AltShifted
b := &collate.Buffer{} b := &collate.Buffer{}
if strings.Contains(t.name, "NON_IGNOR") { if strings.Contains(t.name, "NON_IGNOR") {
c.Strength = colltab.Tertiary
c.Alternate = collate.AltNonIgnorable c.Alternate = collate.AltNonIgnorable
} }
prev := t.str[0] prev := t.str[0]
......
...@@ -2,16 +2,16 @@ ...@@ -2,16 +2,16 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package collate_test package collate
import ( import (
"exp/locale/collate"
"exp/locale/collate/build" "exp/locale/collate/build"
"exp/locale/collate/colltab"
"exp/norm" "exp/norm"
"testing" "testing"
) )
type ColElems []collate.Weights type ColElems []Weights
type input struct { type input struct {
str string str string
...@@ -29,8 +29,8 @@ type tableTest struct { ...@@ -29,8 +29,8 @@ type tableTest struct {
chk []check chk []check
} }
func w(ce ...int) collate.Weights { func w(ce ...int) Weights {
return collate.W(ce...) return W(ce...)
} }
var defaults = w(0) var defaults = w(0)
...@@ -39,14 +39,18 @@ func pt(p, t int) []int { ...@@ -39,14 +39,18 @@ func pt(p, t int) []int {
return []int{p, defaults.Secondary, t} return []int{p, defaults.Secondary, t}
} }
func makeTable(in []input) (*collate.Collator, error) { func makeTable(in []input) (*Collator, error) {
b := build.NewBuilder() b := build.NewBuilder()
for _, r := range in { for _, r := range in {
if e := b.Add([]rune(r.str), r.ces, nil); e != nil { if e := b.Add([]rune(r.str), r.ces, nil); e != nil {
panic(e) panic(e)
} }
} }
return b.Build() t, err := b.Build()
if err != nil {
return nil, err
}
return NewFromTable(t), nil
} }
// modSeq holds a seqeunce of modifiers in increasing order of CCC long enough // modSeq holds a seqeunce of modifiers in increasing order of CCC long enough
...@@ -265,19 +269,20 @@ func TestAppendNext(t *testing.T) { ...@@ -265,19 +269,20 @@ func TestAppendNext(t *testing.T) {
t.Errorf("%d: error creating table: %v", i, err) t.Errorf("%d: error creating table: %v", i, err)
continue continue
} }
ct := collate.GetTable(c)
for j, chk := range tt.chk { for j, chk := range tt.chk {
ws, n := ct.AppendNext([]byte(chk.in)) ws, n := c.t.AppendNext(nil, []byte(chk.in))
if n != chk.n { if n != chk.n {
t.Errorf("%d:%d: bytes consumed was %d; want %d", i, j, n, chk.n) t.Errorf("%d:%d: bytes consumed was %d; want %d", i, j, n, chk.n)
} }
if len(ws) != len(chk.out) { out := convertFromWeights(chk.out)
t.Errorf("%d:%d: len(ws) was %d; want %d (%v vs %v)\n%X", i, j, len(ws), len(chk.out), ws, chk.out, chk.in) if len(ws) != len(out) {
t.Errorf("%d:%d: len(ws) was %d; want %d (%X vs %X)\n%X", i, j, len(ws), len(out), ws, out, chk.in)
continue continue
} }
for k, w := range ws { for k, w := range ws {
if w != chk.out[k] { w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
t.Errorf("%d:%d: Weights %d was %v; want %v", i, j, k, w, chk.out[k]) if w != out[k] {
t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
} }
} }
} }
......
...@@ -7,6 +7,8 @@ package collate ...@@ -7,6 +7,8 @@ package collate
var availableLocales = []string{"af", "ar", "as", "az", "be", "bg", "bn", "ca", "cs", "cy", "da", "de", "dz", "ee", "el", "en_US_POSIX", "eo", "es", "et", "fa", "fi", "fil", "fo", "fr_CA", "gu", "ha", "haw", "he", "hi", "hr", "hu", "hy", "ig", "is", "ja", "kk", "kl", "km", "kn", "ko", "kok", "ln", "lt", "lv", "mk", "ml", "mr", "mt", "my", "nb", "nn", "nso", "om", "or", "pa", "pl", "ps", "ro", "root", "ru", "se", "si", "sk", "sl", "sq", "sr", "sv", "ta", "te", "th", "tn", "to", "tr", "uk", "ur", "vi", "wae", "yo", "zh"} var availableLocales = []string{"af", "ar", "as", "az", "be", "bg", "bn", "ca", "cs", "cy", "da", "de", "dz", "ee", "el", "en_US_POSIX", "eo", "es", "et", "fa", "fi", "fil", "fo", "fr_CA", "gu", "ha", "haw", "he", "hi", "hr", "hu", "hy", "ig", "is", "ja", "kk", "kl", "km", "kn", "ko", "kok", "ln", "lt", "lv", "mk", "ml", "mr", "mt", "my", "nb", "nn", "nso", "om", "or", "pa", "pl", "ps", "ro", "root", "ru", "se", "si", "sk", "sl", "sq", "sr", "sv", "ta", "te", "th", "tn", "to", "tr", "uk", "ur", "vi", "wae", "yo", "zh"}
const varTop = 0x30e
var locales = map[string]tableIndex{ var locales = map[string]tableIndex{
"af": { "af": {
lookupOffset: 0x16, lookupOffset: 0x16,
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment