Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
golang
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
go
golang
Commits
efea5d0f
Commit
efea5d0f
authored
Sep 13, 2011
by
Marcel van Lohuizen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
exp/norm: Added regression test tool for the standard Unicode test set.
R=r CC=golang-dev
https://golang.org/cl/4973064
parent
40d85fb0
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
304 additions
and
1 deletion
+304
-1
Makefile
src/pkg/exp/norm/Makefile
+8
-1
normregtest.go
src/pkg/exp/norm/normregtest.go
+296
-0
No files found.
src/pkg/exp/norm/Makefile
View file @
efea5d0f
...
...
@@ -25,6 +25,10 @@ maketesttables: maketesttables.go triegen.go
$(GC)
maketesttables.go triegen.go
$(LD)
-o
maketesttables maketesttables.
$O
normregtest
:
normregtest.go
$(GC)
normregtest.go
$(LD)
-o
normregtest normregtest.
$O
tables
:
maketables
./maketables
>
tables.go
gofmt
-w
tables.go
...
...
@@ -39,7 +43,10 @@ testshort: maketables maketesttables
# Downloads from www.unicode.org, so not part
# of standard test scripts.
test
:
testtables
test
:
testtables
regtest
testtables
:
maketables
./maketables
-test
-tables
=
regtest
:
normregtest
./normregtest
src/pkg/exp/norm/normregtest.go
0 → 100644
View file @
efea5d0f
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package
main
import
(
"bufio"
"bytes"
"exp/norm"
"exp/regexp"
"flag"
"fmt"
"http"
"log"
"os"
"path"
"runtime"
"strings"
"strconv"
"time"
"utf8"
)
func
main
()
{
flag
.
Parse
()
loadTestData
()
CharacterByCharacterTests
()
StandardTests
()
PerformanceTest
()
if
errorCount
==
0
{
fmt
.
Println
(
"PASS"
)
}
}
const
file
=
"NormalizationTest.txt"
var
url
=
flag
.
String
(
"url"
,
"http://www.unicode.org/Public/6.0.0/ucd/"
+
file
,
"URL of Unicode database directory"
)
var
localFiles
=
flag
.
Bool
(
"local"
,
false
,
"data files have been copied to the current directory; for debugging only"
)
var
logger
=
log
.
New
(
os
.
Stderr
,
""
,
log
.
Lshortfile
)
// This regression test runs the test set in NormalizationTest.txt
// (taken from http://www.unicode.org/Public/6.0.0/ucd/).
//
// NormalizationTest.txt has form:
// @Part0 # Specific cases
// #
// 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
// 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
//
// Each test has 5 columns (c1, c2, c3, c4, c5), where
// (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
//
// CONFORMANCE:
// 1. The following invariants must be true for all conformant implementations
//
// NFC
// c2 == NFC(c1) == NFC(c2) == NFC(c3)
// c4 == NFC(c4) == NFC(c5)
//
// NFD
// c3 == NFD(c1) == NFD(c2) == NFD(c3)
// c5 == NFD(c4) == NFD(c5)
//
// NFKC
// c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
//
// NFKD
// c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
//
// 2. For every code point X assigned in this version of Unicode that is not
// specifically listed in Part 1, the following invariants must be true
// for all conformant implementations:
//
// X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
//
// Column types.
const
(
cRaw
=
iota
cNFC
cNFD
cNFKC
cNFKD
cMaxColumns
)
// Holds data from NormalizationTest.txt
var
part
[]
Part
type
Part
struct
{
name
string
number
int
tests
[]
Test
}
type
Test
struct
{
name
string
partnr
int
number
int
rune
int
// used for character by character test
cols
[
cMaxColumns
]
string
// Each has 5 entries, see below.
}
func
(
t
Test
)
Name
()
string
{
if
t
.
number
<
0
{
return
part
[
t
.
partnr
]
.
name
}
return
fmt
.
Sprintf
(
"%s:%d"
,
part
[
t
.
partnr
]
.
name
,
t
.
number
)
}
var
partRe
=
regexp
.
MustCompile
(
`@Part(\d) # (.*)\n`
)
// TODO: using $ iso \n does not work
// TODO: the following regexp does not work: `^(?:([\dA-F ]+);){5} # (.*)\n`
var
testRe
=
regexp
.
MustCompile
(
`^(?:([\dA-F ]+);)(?:([\dA-F ]+);)(?:([\dA-F ]+);)(?:([\dA-F ]+);)(?:([\dA-F ]+);) # (.*)\n`
)
var
counter
int
// Load the data form NormalizationTest.txt
func
loadTestData
()
{
if
*
localFiles
{
pwd
,
_
:=
os
.
Getwd
()
*
url
=
"file://"
+
path
.
Join
(
pwd
,
file
)
}
t
:=
&
http
.
Transport
{}
t
.
RegisterProtocol
(
"file"
,
http
.
NewFileTransport
(
http
.
Dir
(
"/"
)))
c
:=
&
http
.
Client
{
Transport
:
t
}
resp
,
err
:=
c
.
Get
(
*
url
)
if
err
!=
nil
{
logger
.
Fatal
(
err
)
}
if
resp
.
StatusCode
!=
200
{
logger
.
Fatal
(
"bad GET status for "
+
file
,
resp
.
Status
)
}
f
:=
resp
.
Body
defer
f
.
Close
()
input
:=
bufio
.
NewReader
(
f
)
for
{
line
,
err
:=
input
.
ReadString
(
'\n'
)
if
err
!=
nil
{
if
err
==
os
.
EOF
{
break
}
logger
.
Fatal
(
err
)
}
if
len
(
line
)
==
0
||
line
[
0
]
==
'#'
{
continue
}
m
:=
partRe
.
FindStringSubmatch
(
line
)
if
m
!=
nil
{
if
len
(
m
)
<
3
{
logger
.
Fatal
(
"Failed to parse Part: "
,
line
)
}
i
,
err
:=
strconv
.
Atoi
(
m
[
1
])
if
err
!=
nil
{
logger
.
Fatal
(
err
)
}
name
:=
m
[
2
]
part
=
append
(
part
,
Part
{
name
:
name
[
:
len
(
name
)
-
1
],
number
:
i
})
continue
}
m
=
testRe
.
FindStringSubmatch
(
line
)
if
m
==
nil
||
len
(
m
)
<
7
{
logger
.
Fatalf
(
`Failed to parse: "%s" result: %#v`
,
line
,
m
)
}
test
:=
Test
{
name
:
m
[
6
],
partnr
:
len
(
part
)
-
1
,
number
:
counter
}
counter
++
for
j
:=
1
;
j
<
len
(
m
)
-
1
;
j
++
{
for
_
,
split
:=
range
strings
.
Split
(
m
[
j
],
" "
)
{
r
,
err
:=
strconv
.
Btoui64
(
split
,
16
)
if
err
!=
nil
{
logger
.
Fatal
(
err
)
}
if
test
.
rune
==
0
{
// save for CharacterByCharacterTests
test
.
rune
=
int
(
r
)
}
var
buf
[
utf8
.
UTFMax
]
byte
sz
:=
utf8
.
EncodeRune
(
buf
[
:
],
int
(
r
))
test
.
cols
[
j
-
1
]
+=
string
(
buf
[
:
sz
])
}
}
part
:=
&
part
[
len
(
part
)
-
1
]
part
.
tests
=
append
(
part
.
tests
,
test
)
}
}
var
fstr
=
[]
string
{
"NFC"
,
"NFD"
,
"NFKC"
,
"NFKD"
}
var
errorCount
int
func
cmpResult
(
t
*
Test
,
name
string
,
f
norm
.
Form
,
gold
,
test
,
result
string
)
{
if
gold
!=
result
{
errorCount
++
if
errorCount
>
20
{
return
}
st
,
sr
,
sg
:=
[]
int
(
test
),
[]
int
(
result
),
[]
int
(
gold
)
logger
.
Printf
(
"%s:%s: %s(%X)=%X; want:%X: %s"
,
t
.
Name
(),
name
,
fstr
[
f
],
st
,
sr
,
sg
,
t
.
name
)
}
}
func
cmpIsNormal
(
t
*
Test
,
name
string
,
f
norm
.
Form
,
test
string
,
result
,
want
bool
)
{
if
result
!=
want
{
errorCount
++
if
errorCount
>
20
{
return
}
logger
.
Printf
(
"%s:%s: %s(%X)=%v; want: %v"
,
t
.
Name
(),
name
,
fstr
[
f
],
[]
int
(
test
),
result
,
want
)
}
}
func
doTest
(
t
*
Test
,
f
norm
.
Form
,
gold
,
test
string
)
{
result
:=
f
.
Bytes
([]
byte
(
test
))
cmpResult
(
t
,
"Bytes"
,
f
,
gold
,
test
,
string
(
result
))
for
i
:=
range
test
{
out
:=
f
.
Append
(
f
.
Bytes
([]
byte
(
test
[
:
i
])),
[]
byte
(
test
[
i
:
])
...
)
cmpResult
(
t
,
fmt
.
Sprintf
(
":Append:%d"
,
i
),
f
,
gold
,
test
,
string
(
out
))
}
cmpIsNormal
(
t
,
"IsNormal"
,
f
,
test
,
f
.
IsNormal
([]
byte
(
test
)),
test
==
gold
)
}
func
doConformanceTests
(
t
*
Test
,
partn
int
)
{
for
i
:=
0
;
i
<=
2
;
i
++
{
doTest
(
t
,
norm
.
NFC
,
t
.
cols
[
1
],
t
.
cols
[
i
])
doTest
(
t
,
norm
.
NFD
,
t
.
cols
[
2
],
t
.
cols
[
i
])
doTest
(
t
,
norm
.
NFKC
,
t
.
cols
[
3
],
t
.
cols
[
i
])
doTest
(
t
,
norm
.
NFKD
,
t
.
cols
[
4
],
t
.
cols
[
i
])
}
for
i
:=
3
;
i
<=
4
;
i
++
{
doTest
(
t
,
norm
.
NFC
,
t
.
cols
[
3
],
t
.
cols
[
i
])
doTest
(
t
,
norm
.
NFD
,
t
.
cols
[
4
],
t
.
cols
[
i
])
doTest
(
t
,
norm
.
NFKC
,
t
.
cols
[
3
],
t
.
cols
[
i
])
doTest
(
t
,
norm
.
NFKD
,
t
.
cols
[
4
],
t
.
cols
[
i
])
}
}
func
CharacterByCharacterTests
()
{
tests
:=
part
[
1
]
.
tests
last
:=
0
for
i
:=
0
;
i
<=
len
(
tests
);
i
++
{
// last one is special case
var
rune
int
if
i
==
len
(
tests
)
{
rune
=
0x2FA1E
// Don't have to go to 0x10FFFF
}
else
{
rune
=
tests
[
i
]
.
rune
}
for
last
++
;
last
<
rune
;
last
++
{
// Check all characters that were not explicitly listed in the test.
t
:=
&
Test
{
partnr
:
1
,
number
:
-
1
}
char
:=
string
(
last
)
doTest
(
t
,
norm
.
NFC
,
char
,
char
)
doTest
(
t
,
norm
.
NFD
,
char
,
char
)
doTest
(
t
,
norm
.
NFKC
,
char
,
char
)
doTest
(
t
,
norm
.
NFKD
,
char
,
char
)
}
if
i
<
len
(
tests
)
{
doConformanceTests
(
&
tests
[
i
],
1
)
}
}
}
func
StandardTests
()
{
for
_
,
j
:=
range
[]
int
{
0
,
2
,
3
}
{
for
_
,
test
:=
range
part
[
j
]
.
tests
{
doConformanceTests
(
&
test
,
j
)
}
}
}
// PerformanceTest verifies that normalization is O(n). If any of the
// code does not properly check for maxCombiningChars, normalization
// may exhibit O(n**2) behavior.
func
PerformanceTest
()
{
runtime
.
GOMAXPROCS
(
2
)
success
:=
make
(
chan
bool
,
1
)
go
func
()
{
buf
:=
bytes
.
Repeat
([]
byte
(
"
\u035D
"
),
1024
*
1024
)
buf
=
append
(
buf
,
[]
byte
(
"
\u035B
"
)
...
)
norm
.
NFC
.
Append
(
nil
,
buf
...
)
success
<-
true
}()
timeout
:=
time
.
After
(
1e9
)
select
{
case
<-
success
:
// test completed before the timeout
case
<-
timeout
:
errorCount
++
logger
.
Printf
(
`unexpectedly long time to complete PerformanceTest`
)
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment