Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
golang
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
go
golang
Commits
57d9de3a
Commit
57d9de3a
authored
Apr 21, 2010
by
Russ Cox
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
utf16: add DecodeRune, EncodeRune
R=r CC=golang-dev
https://golang.org/cl/970041
parent
aa2187ef
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
67 additions
and
4 deletions
+67
-4
utf16.go
src/pkg/utf16/utf16.go
+31
-4
utf16_test.go
src/pkg/utf16/utf16_test.go
+36
-0
No files found.
src/pkg/utf16/utf16.go
View file @
57d9de3a
...
@@ -18,6 +18,33 @@ const (
...
@@ -18,6 +18,33 @@ const (
surrSelf
=
0x10000
surrSelf
=
0x10000
)
)
// IsSurrogate returns true if the specified Unicode code point
// can appear in a surrogate pair.
func
IsSurrogate
(
rune
int
)
bool
{
return
surr1
<=
rune
&&
rune
<
surr3
}
// DecodeRune returns the UTF-16 decoding of a surrogate pair.
// If the pair is not a valid UTF-16 surrogate pair, DecodeRune returns
// the Unicode replacement code point U+FFFD.
func
DecodeRune
(
r1
,
r2
int
)
int
{
if
surr1
<=
r1
&&
r1
<
surr2
&&
surr2
<=
r2
&&
r2
<
surr3
{
return
(
int
(
r1
)
-
surr1
)
<<
10
|
(
int
(
r2
)
-
surr2
)
+
0x10000
}
return
unicode
.
ReplacementChar
}
// EncodeRune returns the UTF-16 surrogate pair r1, r2 for the given rune.
// If the rune is not a valid Unicode code point or does not need encoding,
// EncodeRune returns U+FFFD, U+FFFD.
func
EncodeRune
(
rune
int
)
(
r1
,
r2
int
)
{
if
rune
<
surrSelf
||
rune
>
unicode
.
MaxRune
||
IsSurrogate
(
rune
)
{
return
unicode
.
ReplacementChar
,
unicode
.
ReplacementChar
}
rune
-=
surrSelf
return
surr1
+
(
rune
>>
10
)
&
0x3ff
,
surr2
+
rune
&
0x3ff
}
// Encode returns the UTF-16 encoding of the Unicode code point sequence s.
// Encode returns the UTF-16 encoding of the Unicode code point sequence s.
func
Encode
(
s
[]
int
)
[]
uint16
{
func
Encode
(
s
[]
int
)
[]
uint16
{
n
:=
len
(
s
)
n
:=
len
(
s
)
...
@@ -38,9 +65,9 @@ func Encode(s []int) []uint16 {
...
@@ -38,9 +65,9 @@ func Encode(s []int) []uint16 {
a
[
n
]
=
uint16
(
v
)
a
[
n
]
=
uint16
(
v
)
n
++
n
++
default
:
default
:
v
-=
surrSelf
r1
,
r2
:=
EncodeRune
(
v
)
a
[
n
]
=
uint16
(
surr1
+
(
v
>>
10
)
&
0x3ff
)
a
[
n
]
=
uint16
(
r1
)
a
[
n
+
1
]
=
uint16
(
surr2
+
v
&
0x3ff
)
a
[
n
+
1
]
=
uint16
(
r2
)
n
+=
2
n
+=
2
}
}
}
}
...
@@ -57,7 +84,7 @@ func Decode(s []uint16) []int {
...
@@ -57,7 +84,7 @@ func Decode(s []uint16) []int {
case
surr1
<=
r
&&
r
<
surr2
&&
i
+
1
<
len
(
s
)
&&
case
surr1
<=
r
&&
r
<
surr2
&&
i
+
1
<
len
(
s
)
&&
surr2
<=
s
[
i
+
1
]
&&
s
[
i
+
1
]
<
surr3
:
surr2
<=
s
[
i
+
1
]
&&
s
[
i
+
1
]
<
surr3
:
// valid surrogate sequence
// valid surrogate sequence
a
[
n
]
=
(
int
(
r
)
-
surr1
)
<<
10
|
(
int
(
s
[
i
+
1
])
-
surr2
)
+
0x10000
a
[
n
]
=
DecodeRune
(
int
(
r
),
int
(
s
[
i
+
1
]))
i
++
i
++
n
++
n
++
case
surr1
<=
r
&&
r
<
surr3
:
case
surr1
<=
r
&&
r
<
surr3
:
...
...
src/pkg/utf16/utf16_test.go
View file @
57d9de3a
...
@@ -8,6 +8,7 @@ import (
...
@@ -8,6 +8,7 @@ import (
"fmt"
"fmt"
"reflect"
"reflect"
"testing"
"testing"
"unicode"
)
)
type
encodeTest
struct
{
type
encodeTest
struct
{
...
@@ -32,6 +33,41 @@ func TestEncode(t *testing.T) {
...
@@ -32,6 +33,41 @@ func TestEncode(t *testing.T) {
}
}
}
}
func
TestEncodeRune
(
t
*
testing
.
T
)
{
for
i
,
tt
:=
range
encodeTests
{
j
:=
0
for
_
,
r
:=
range
tt
.
in
{
r1
,
r2
:=
EncodeRune
(
r
)
if
r
<
0x10000
||
r
>
unicode
.
MaxRune
{
if
j
>=
len
(
tt
.
out
)
{
t
.
Errorf
(
"#%d: ran out of tt.out"
,
i
)
break
}
if
r1
!=
unicode
.
ReplacementChar
||
r2
!=
unicode
.
ReplacementChar
{
t
.
Errorf
(
"EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd"
,
r
,
r1
,
r2
)
}
j
++
}
else
{
if
j
+
1
>=
len
(
tt
.
out
)
{
t
.
Errorf
(
"#%d: ran out of tt.out"
,
i
)
break
}
if
r1
!=
int
(
tt
.
out
[
j
])
||
r2
!=
int
(
tt
.
out
[
j
+
1
])
{
t
.
Errorf
(
"EncodeRune(%#x) = %#x, %#x; want %#x, %#x"
,
r
,
r1
,
r2
,
tt
.
out
[
j
],
tt
.
out
[
j
+
1
])
}
j
+=
2
dec
:=
DecodeRune
(
r1
,
r2
)
if
dec
!=
r
{
t
.
Errorf
(
"DecodeRune(%#x, %#x) = %#x; want %#x"
,
r1
,
r2
,
dec
,
r
)
}
}
}
if
j
!=
len
(
tt
.
out
)
{
t
.
Errorf
(
"#%d: EncodeRune didn't generate enough output"
,
i
)
}
}
}
type
decodeTest
struct
{
type
decodeTest
struct
{
in
[]
uint16
in
[]
uint16
out
[]
int
out
[]
int
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment