Commit 45b7084b authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

exp/norm: a few minor fixes to support the implementation of norm.

maketables.go/tables.go
- Properly set combinesForward flag for JamoL and JamoV.
- Fixed Printf bug.
composition.go
- Make insertString use the same control flow as insert.
- Better Hangul and non-Hangul mixing.
forminfo.go
- Fixed bug in compBoundaryBefore that affected a few esoteric cases.
- Buffer overflow now tested in normalize_test.go (other CL).

R=r
CC=golang-dev
https://golang.org/cl/4924041
parent 8b614b42
......@@ -108,10 +108,9 @@ func (rb *reorderBuffer) insertString(src string, info runeInfo) bool {
return rb.decomposeHangul(uint32(rune))
}
pos := rb.nbyte
dcomp := rb.f.decomposeString(src)
dn := len(dcomp)
if dn != 0 {
for i := 0; i < dn; i += int(info.size) {
if info.flags.hasDecomposition() {
dcomp := rb.f.decomposeString(src)
for i := 0; i < len(dcomp); i += int(info.size) {
info = rb.f.info(dcomp[i:])
if !rb.insertOrdered(info) {
return false
......@@ -259,11 +258,10 @@ func (rb *reorderBuffer) decomposeHangul(rune uint32) bool {
// combineHangul algorithmically combines Jamo character components into Hangul.
// See http://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
func (rb *reorderBuffer) combineHangul() {
k := 1
func (rb *reorderBuffer) combineHangul(s, i, k int) {
b := rb.rune[:]
bn := rb.nrune
for s, i := 0, 1; i < bn; i++ {
for ; i < bn; i++ {
cccB := b[k-1].ccc
cccC := b[i].ccc
if cccB == 0 {
......@@ -312,7 +310,7 @@ func (rb *reorderBuffer) compose() {
if isJamoVT(rb.bytesAt(i)) {
// Redo from start in Hangul mode. Necessary to support
// U+320E..U+321E in NFKC mode.
rb.combineHangul()
rb.combineHangul(s, i, k)
return
}
ii := b[i]
......
......@@ -77,7 +77,7 @@ func decompBoundary(f *formInfo, info runeInfo) bool {
}
func compBoundaryBefore(f *formInfo, info runeInfo) bool {
if info.ccc == 0 && info.flags.isYesC() {
if info.ccc == 0 && !info.flags.combinesBackward() {
return true
}
// We assume that the CCC of the first character in a decomposition
......@@ -89,8 +89,6 @@ func compBoundaryBefore(f *formInfo, info runeInfo) bool {
func compBoundaryAfter(f *formInfo, info runeInfo) bool {
// This misses values where the last char in a decomposition is a
// boundary such as Hangul with JamoT.
// TODO(mpvl): verify this does not lead to segments that do
// not fit in the reorderBuffer.
return info.flags.isInert()
}
......
......@@ -515,9 +515,13 @@ func completeCharFields(form int) {
f.quickCheck[MComposed] = QCNo
case (i & 0xffff00) == JamoLBase:
f.quickCheck[MComposed] = QCYes
if JamoVBase <= i && i < JamoVEnd {
if JamoLBase <= i && i < JamoLEnd {
f.combinesForward = true
}
if JamoVBase <= i && i < JamoTEnd {
f.quickCheck[MComposed] = QCMaybe
f.combinesBackward = true
f.combinesForward = true
}
if JamoTBase <= i && i < JamoTEnd {
f.quickCheck[MComposed] = QCMaybe
......@@ -562,7 +566,7 @@ func makeEntry(f *FormInfo) uint16 {
case QCMaybe:
e |= 0x6
default:
log.Fatalf("Illegal quickcheck value %d.", f.quickCheck[MComposed])
log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed])
}
return e
}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment