Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
golang
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
go
golang
Commits
dfe08532
Commit
dfe08532
authored
Mar 06, 2009
by
Rob Pike
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
document utf8
R=rsc DELTA=18 (12 added, 0 deleted, 6 changed) OCL=25807 CL=25811
parent
42b49b19
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
6 deletions
+18
-6
utf8.go
src/lib/utf8.go
+18
-6
No files found.
src/lib/utf8.go
View file @
dfe08532
...
@@ -2,15 +2,16 @@
...
@@ -2,15 +2,16 @@
// Use of this source code is governed by a BSD-style
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// license that can be found in the LICENSE file.
//
UTF-8 support
.
//
Functions and constants to support text encoded in UTF-8
.
// This package calls a Unicode character a rune for brevity.
package
utf8
package
utf8
// Numbers fundamental to the encoding.
const
(
const
(
RuneError
=
0xFFFD
;
RuneError
=
0xFFFD
;
// the "error" Rune or "replacement character".
RuneSelf
=
0x80
;
RuneSelf
=
0x80
;
// characters below Runeself are represented as themselves in a single byte.
RuneMax
=
0x10FFFF
;
RuneMax
=
0x10FFFF
;
// maximum Unicode code point.
UTFMax
=
4
;
UTFMax
=
4
;
// maximum number of bytes of a UTF-8 encoded Unicode character.
)
)
const
(
const
(
...
@@ -181,28 +182,34 @@ func decodeRuneInStringInternal(s string, i int, n int) (rune, size int, short b
...
@@ -181,28 +182,34 @@ func decodeRuneInStringInternal(s string, i int, n int) (rune, size int, short b
return
RuneError
,
1
,
false
return
RuneError
,
1
,
false
}
}
// FullRune reports whether the bytes in p begin with a full UTF-8 encoding of a rune.
// An invalid encoding is considered a full Rune since it will convert as a width-1 error rune.
func
FullRune
(
p
[]
byte
)
bool
{
func
FullRune
(
p
[]
byte
)
bool
{
rune
,
size
,
short
:=
decodeRuneInternal
(
p
);
rune
,
size
,
short
:=
decodeRuneInternal
(
p
);
return
!
short
return
!
short
}
}
// FullRuneInString is like FullRune but its input is a string.
func
FullRuneInString
(
s
string
,
i
int
)
bool
{
func
FullRuneInString
(
s
string
,
i
int
)
bool
{
rune
,
size
,
short
:=
decodeRuneInStringInternal
(
s
,
i
,
len
(
s
)
-
i
);
rune
,
size
,
short
:=
decodeRuneInStringInternal
(
s
,
i
,
len
(
s
)
-
i
);
return
!
short
return
!
short
}
}
// DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and its width in bytes.
func
DecodeRune
(
p
[]
byte
)
(
rune
,
size
int
)
{
func
DecodeRune
(
p
[]
byte
)
(
rune
,
size
int
)
{
var
short
bool
;
var
short
bool
;
rune
,
size
,
short
=
decodeRuneInternal
(
p
);
rune
,
size
,
short
=
decodeRuneInternal
(
p
);
return
;
return
;
}
}
// DecodeRuneInString is like DecodeRune but its input is a string.
func
DecodeRuneInString
(
s
string
,
i
int
)
(
rune
,
size
int
)
{
func
DecodeRuneInString
(
s
string
,
i
int
)
(
rune
,
size
int
)
{
var
short
bool
;
var
short
bool
;
rune
,
size
,
short
=
decodeRuneInStringInternal
(
s
,
i
,
len
(
s
)
-
i
);
rune
,
size
,
short
=
decodeRuneInStringInternal
(
s
,
i
,
len
(
s
)
-
i
);
return
;
return
;
}
}
// RuneLen returns the number of bytes required to encode the rune.
func
RuneLen
(
rune
int
)
int
{
func
RuneLen
(
rune
int
)
int
{
switch
{
switch
{
case
rune
<=
_Rune1Max
:
case
rune
<=
_Rune1Max
:
...
@@ -217,6 +224,8 @@ func RuneLen(rune int) int {
...
@@ -217,6 +224,8 @@ func RuneLen(rune int) int {
return
-
1
;
return
-
1
;
}
}
// EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune.
// It returns the number of bytes written.
func
EncodeRune
(
rune
int
,
p
[]
byte
)
int
{
func
EncodeRune
(
rune
int
,
p
[]
byte
)
int
{
if
rune
<=
_Rune1Max
{
if
rune
<=
_Rune1Max
{
p
[
0
]
=
byte
(
rune
);
p
[
0
]
=
byte
(
rune
);
...
@@ -247,6 +256,8 @@ func EncodeRune(rune int, p []byte) int {
...
@@ -247,6 +256,8 @@ func EncodeRune(rune int, p []byte) int {
return
4
;
return
4
;
}
}
// RuneCount returns the number of runes in p. Erroneous and short
// encodings are treated as single runes of width 1 byte.
func
RuneCount
(
p
[]
byte
)
int
{
func
RuneCount
(
p
[]
byte
)
int
{
i
:=
0
;
i
:=
0
;
var
n
int
;
var
n
int
;
...
@@ -261,6 +272,7 @@ func RuneCount(p []byte) int {
...
@@ -261,6 +272,7 @@ func RuneCount(p []byte) int {
return
n
;
return
n
;
}
}
// RuneCountInString is like RuneCount but its input is a string.
func
RuneCountInString
(
s
string
,
i
int
,
l
int
)
int
{
func
RuneCountInString
(
s
string
,
i
int
,
l
int
)
int
{
ei
:=
i
+
l
;
ei
:=
i
+
l
;
n
:=
0
;
n
:=
0
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment