Commit 39fd52d3 authored by Robert Griesemer's avatar Robert Griesemer

gofmt'ed missing regexp file

(one-line functions will be re-established once we have all
formatted uniformly)

R=r
http://go/go-review/1023004
parent 38006946
...@@ -30,28 +30,28 @@ import ( ...@@ -30,28 +30,28 @@ import (
"utf8"; "utf8";
) )
var debug = false; var debug = false
// Error codes returned by failures to parse an expression. // Error codes returned by failures to parse an expression.
var ( var (
ErrInternal = os.NewError("internal error"); ErrInternal = os.NewError("internal error");
ErrUnmatchedLpar = os.NewError("unmatched '('"); ErrUnmatchedLpar = os.NewError("unmatched '('");
ErrUnmatchedRpar = os.NewError("unmatched ')'"); ErrUnmatchedRpar = os.NewError("unmatched ')'");
ErrUnmatchedLbkt = os.NewError("unmatched '['"); ErrUnmatchedLbkt = os.NewError("unmatched '['");
ErrUnmatchedRbkt = os.NewError("unmatched ']'"); ErrUnmatchedRbkt = os.NewError("unmatched ']'");
ErrBadRange = os.NewError("bad range in character class"); ErrBadRange = os.NewError("bad range in character class");
ErrExtraneousBackslash = os.NewError("extraneous backslash"); ErrExtraneousBackslash = os.NewError("extraneous backslash");
ErrBadClosure = os.NewError("repeated closure (**, ++, etc.)"); ErrBadClosure = os.NewError("repeated closure (**, ++, etc.)");
ErrBareClosure = os.NewError("closure applies to nothing"); ErrBareClosure = os.NewError("closure applies to nothing");
ErrBadBackslash = os.NewError("illegal backslash escape"); ErrBadBackslash = os.NewError("illegal backslash escape");
) )
// An instruction executed by the NFA // An instruction executed by the NFA
type instr interface { type instr interface {
kind() int; // the type of this instruction: _CHAR, _ANY, etc. kind() int; // the type of this instruction: _CHAR, _ANY, etc.
next() instr; // the instruction to execute after this one next() instr; // the instruction to execute after this one
setNext(i instr); setNext(i instr);
index() int; index() int;
setIndex(i int); setIndex(i int);
print(); print();
} }
...@@ -62,10 +62,18 @@ type common struct { ...@@ -62,10 +62,18 @@ type common struct {
_index int; _index int;
} }
func (c *common) next() instr { return c._next } func (c *common) next() instr {
func (c *common) setNext(i instr) { c._next = i } return c._next;
func (c *common) index() int { return c._index } }
func (c *common) setIndex(i int) { c._index = i } func (c *common) setNext(i instr) {
c._next = i;
}
func (c *common) index() int {
return c._index;
}
func (c *common) setIndex(i int) {
c._index = i;
}
// Regexp is the representation of a compiled regular expression. // Regexp is the representation of a compiled regular expression.
// The public interface is entirely through methods. // The public interface is entirely through methods.
...@@ -77,51 +85,67 @@ type Regexp struct { ...@@ -77,51 +85,67 @@ type Regexp struct {
} }
const ( const (
_START = iota; // beginning of program _START = iota; // beginning of program
_END; // end of program: success _END; // end of program: success
_BOT; // '^' beginning of text _BOT; // '^' beginning of text
_EOT; // '$' end of text _EOT; // '$' end of text
_CHAR; // 'a' regular character _CHAR; // 'a' regular character
_CHARCLASS; // [a-z] character class _CHARCLASS; // [a-z] character class
_ANY; // '.' any character including newline _ANY; // '.' any character including newline
_NOTNL; // [^\n] special case: any character but newline _NOTNL; // [^\n] special case: any character but newline
_BRA; // '(' parenthesized expression _BRA; // '(' parenthesized expression
_EBRA; // ')'; end of '(' parenthesized expression _EBRA; // ')'; end of '(' parenthesized expression
_ALT; // '|' alternation _ALT; // '|' alternation
_NOP; // do nothing; makes it easy to link without patching _NOP; // do nothing; makes it easy to link without patching
) )
// --- START start of program // --- START start of program
type _Start struct { type _Start struct {
common common;
} }
func (start *_Start) kind() int { return _START } func (start *_Start) kind() int {
func (start *_Start) print() { print("start") } return _START;
}
func (start *_Start) print() {
print("start");
}
// --- END end of program // --- END end of program
type _End struct { type _End struct {
common common;
} }
func (end *_End) kind() int { return _END } func (end *_End) kind() int {
func (end *_End) print() { print("end") } return _END;
}
func (end *_End) print() {
print("end");
}
// --- BOT beginning of text // --- BOT beginning of text
type _Bot struct { type _Bot struct {
common common;
} }
func (bot *_Bot) kind() int { return _BOT } func (bot *_Bot) kind() int {
func (bot *_Bot) print() { print("bot") } return _BOT;
}
func (bot *_Bot) print() {
print("bot");
}
// --- EOT end of text // --- EOT end of text
type _Eot struct { type _Eot struct {
common common;
} }
func (eot *_Eot) kind() int { return _EOT } func (eot *_Eot) kind() int {
func (eot *_Eot) print() { print("eot") } return _EOT;
}
func (eot *_Eot) print() {
print("eot");
}
// --- CHAR a regular character // --- CHAR a regular character
type _Char struct { type _Char struct {
...@@ -129,8 +153,12 @@ type _Char struct { ...@@ -129,8 +153,12 @@ type _Char struct {
char int; char int;
} }
func (char *_Char) kind() int { return _CHAR } func (char *_Char) kind() int {
func (char *_Char) print() { print("char ", string(char.char)) } return _CHAR;
}
func (char *_Char) print() {
print("char ", string(char.char));
}
func newChar(char int) *_Char { func newChar(char int) *_Char {
c := new(_Char); c := new(_Char);
...@@ -148,7 +176,9 @@ type _CharClass struct { ...@@ -148,7 +176,9 @@ type _CharClass struct {
ranges *vector.IntVector; ranges *vector.IntVector;
} }
func (cclass *_CharClass) kind() int { return _CHARCLASS } func (cclass *_CharClass) kind() int {
return _CHARCLASS;
}
func (cclass *_CharClass) print() { func (cclass *_CharClass) print() {
print("charclass"); print("charclass");
...@@ -177,10 +207,10 @@ func (cclass *_CharClass) matches(c int) bool { ...@@ -177,10 +207,10 @@ func (cclass *_CharClass) matches(c int) bool {
min := cclass.ranges.At(i); min := cclass.ranges.At(i);
max := cclass.ranges.At(i+1); max := cclass.ranges.At(i+1);
if min <= c && c <= max { if min <= c && c <= max {
return !cclass.negate return !cclass.negate;
} }
} }
return cclass.negate return cclass.negate;
} }
func newCharClass() *_CharClass { func newCharClass() *_CharClass {
...@@ -191,19 +221,27 @@ func newCharClass() *_CharClass { ...@@ -191,19 +221,27 @@ func newCharClass() *_CharClass {
// --- ANY any character // --- ANY any character
type _Any struct { type _Any struct {
common common;
} }
func (any *_Any) kind() int { return _ANY } func (any *_Any) kind() int {
func (any *_Any) print() { print("any") } return _ANY;
}
func (any *_Any) print() {
print("any");
}
// --- NOTNL any character but newline // --- NOTNL any character but newline
type _NotNl struct { type _NotNl struct {
common common;
} }
func (notnl *_NotNl) kind() int { return _NOTNL } func (notnl *_NotNl) kind() int {
func (notnl *_NotNl) print() { print("notnl") } return _NOTNL;
}
func (notnl *_NotNl) print() {
print("notnl");
}
// --- BRA parenthesized expression // --- BRA parenthesized expression
type _Bra struct { type _Bra struct {
...@@ -211,8 +249,12 @@ type _Bra struct { ...@@ -211,8 +249,12 @@ type _Bra struct {
n int; // subexpression number n int; // subexpression number
} }
func (bra *_Bra) kind() int { return _BRA } func (bra *_Bra) kind() int {
func (bra *_Bra) print() { print("bra", bra.n); } return _BRA;
}
func (bra *_Bra) print() {
print("bra", bra.n);
}
// --- EBRA end of parenthesized expression // --- EBRA end of parenthesized expression
type _Ebra struct { type _Ebra struct {
...@@ -220,8 +262,12 @@ type _Ebra struct { ...@@ -220,8 +262,12 @@ type _Ebra struct {
n int; // subexpression number n int; // subexpression number
} }
func (ebra *_Ebra) kind() int { return _EBRA } func (ebra *_Ebra) kind() int {
func (ebra *_Ebra) print() { print("ebra ", ebra.n); } return _EBRA;
}
func (ebra *_Ebra) print() {
print("ebra ", ebra.n);
}
// --- ALT alternation // --- ALT alternation
type _Alt struct { type _Alt struct {
...@@ -229,16 +275,24 @@ type _Alt struct { ...@@ -229,16 +275,24 @@ type _Alt struct {
left instr; // other branch left instr; // other branch
} }
func (alt *_Alt) kind() int { return _ALT } func (alt *_Alt) kind() int {
func (alt *_Alt) print() { print("alt(", alt.left.index(), ")"); } return _ALT;
}
func (alt *_Alt) print() {
print("alt(", alt.left.index(), ")");
}
// --- NOP no operation // --- NOP no operation
type _Nop struct { type _Nop struct {
common common;
} }
func (nop *_Nop) kind() int { return _NOP } func (nop *_Nop) kind() int {
func (nop *_Nop) print() { print("nop") } return _NOP;
}
func (nop *_Nop) print() {
print("nop");
}
func (re *Regexp) add(i instr) instr { func (re *Regexp) add(i instr) instr {
i.setIndex(re.inst.Len()); i.setIndex(re.inst.Len());
...@@ -262,9 +316,9 @@ func (p *parser) c() int { ...@@ -262,9 +316,9 @@ func (p *parser) c() int {
func (p *parser) nextc() int { func (p *parser) nextc() int {
if p.pos >= len(p.re.expr) { if p.pos >= len(p.re.expr) {
p.ch = endOfFile p.ch = endOfFile;
} else { } else {
c, w := utf8.DecodeRuneInString(p.re.expr[p.pos:len(p.re.expr)]); c, w := utf8.DecodeRuneInString(p.re.expr[p.pos : len(p.re.expr)]);
p.ch = c; p.ch = c;
p.pos += w; p.pos += w;
} }
...@@ -282,20 +336,20 @@ func special(c int) bool { ...@@ -282,20 +336,20 @@ func special(c int) bool {
s := `\.+*?()|[]^$`; s := `\.+*?()|[]^$`;
for i := 0; i < len(s); i++ { for i := 0; i < len(s); i++ {
if c == int(s[i]) { if c == int(s[i]) {
return true return true;
} }
} }
return false return false;
} }
func specialcclass(c int) bool { func specialcclass(c int) bool {
s := `\-[]`; s := `\-[]`;
for i := 0; i < len(s); i++ { for i := 0; i < len(s); i++ {
if c == int(s[i]) { if c == int(s[i]) {
return true return true;
} }
} }
return false return false;
} }
func (p *parser) charClass() instr { func (p *parser) charClass() instr {
...@@ -358,7 +412,7 @@ func (p *parser) charClass() instr { ...@@ -358,7 +412,7 @@ func (p *parser) charClass() instr {
} }
} }
} }
return nil return nil;
} }
func (p *parser) term() (start, end instr) { func (p *parser) term() (start, end instr) {
...@@ -367,14 +421,14 @@ func (p *parser) term() (start, end instr) { ...@@ -367,14 +421,14 @@ func (p *parser) term() (start, end instr) {
// The other functions (closure(), concatenation() etc.) assume // The other functions (closure(), concatenation() etc.) assume
// it's safe to recur to here. // it's safe to recur to here.
if p.error != nil { if p.error != nil {
return return;
} }
switch c := p.c(); c { switch c := p.c(); c {
case '|', endOfFile: case '|', endOfFile:
return nil, nil; return nil, nil;
case '*', '+': case '*', '+':
p.error = ErrBareClosure; p.error = ErrBareClosure;
return return;
case ')': case ')':
if p.nlpar == 0 { if p.nlpar == 0 {
p.error = ErrUnmatchedRpar; p.error = ErrUnmatchedRpar;
...@@ -431,7 +485,7 @@ func (p *parser) term() (start, end instr) { ...@@ -431,7 +485,7 @@ func (p *parser) term() (start, end instr) {
p.error = ErrInternal; p.error = ErrInternal;
return; return;
} }
start = ebra start = ebra;
} else { } else {
end.setNext(ebra); end.setNext(ebra);
} }
...@@ -456,7 +510,7 @@ func (p *parser) term() (start, end instr) { ...@@ -456,7 +510,7 @@ func (p *parser) term() (start, end instr) {
p.nextc(); p.nextc();
start = newChar(c); start = newChar(c);
p.re.add(start); p.re.add(start);
return start, start return start, start;
} }
panic("unreachable"); panic("unreachable");
} }
...@@ -464,7 +518,7 @@ func (p *parser) term() (start, end instr) { ...@@ -464,7 +518,7 @@ func (p *parser) term() (start, end instr) {
func (p *parser) closure() (start, end instr) { func (p *parser) closure() (start, end instr) {
start, end = p.term(); start, end = p.term();
if start == nil || p.error != nil { if start == nil || p.error != nil {
return return;
} }
switch p.c() { switch p.c() {
case '*': case '*':
...@@ -473,7 +527,7 @@ func (p *parser) closure() (start, end instr) { ...@@ -473,7 +527,7 @@ func (p *parser) closure() (start, end instr) {
p.re.add(alt); p.re.add(alt);
end.setNext(alt); // after end, do alt end.setNext(alt); // after end, do alt
alt.left = start; // alternate brach: return to start alt.left = start; // alternate brach: return to start
start = alt; // alt becomes new (start, end) start = alt; // alt becomes new (start, end)
end = alt; end = alt;
case '+': case '+':
// (start,end)+: // (start,end)+:
...@@ -481,7 +535,7 @@ func (p *parser) closure() (start, end instr) { ...@@ -481,7 +535,7 @@ func (p *parser) closure() (start, end instr) {
p.re.add(alt); p.re.add(alt);
end.setNext(alt); // after end, do alt end.setNext(alt); // after end, do alt
alt.left = start; // alternate brach: return to start alt.left = start; // alternate brach: return to start
end = alt; // start is unchanged; end is alt end = alt; // start is unchanged; end is alt
case '?': case '?':
// (start,end)?: // (start,end)?:
alt := new(_Alt); alt := new(_Alt);
...@@ -491,23 +545,23 @@ func (p *parser) closure() (start, end instr) { ...@@ -491,23 +545,23 @@ func (p *parser) closure() (start, end instr) {
alt.left = start; // alternate branch is start alt.left = start; // alternate branch is start
alt.setNext(nop); // follow on to nop alt.setNext(nop); // follow on to nop
end.setNext(nop); // after end, go to nop end.setNext(nop); // after end, go to nop
start = alt; // start is now alt start = alt; // start is now alt
end = nop; // end is nop pointed to by both branches end = nop; // end is nop pointed to by both branches
default: default:
return return;
} }
switch p.nextc() { switch p.nextc() {
case '*', '+', '?': case '*', '+', '?':
p.error = ErrBadClosure; p.error = ErrBadClosure;
} }
return return;
} }
func (p *parser) concatenation() (start, end instr) { func (p *parser) concatenation() (start, end instr) {
for { for {
nstart, nend := p.closure(); nstart, nend := p.closure();
if p.error != nil { if p.error != nil {
return return;
} }
switch { switch {
case nstart == nil: // end of this concatenation case nstart == nil: // end of this concatenation
...@@ -529,7 +583,7 @@ func (p *parser) concatenation() (start, end instr) { ...@@ -529,7 +583,7 @@ func (p *parser) concatenation() (start, end instr) {
func (p *parser) regexp() (start, end instr) { func (p *parser) regexp() (start, end instr) {
start, end = p.concatenation(); start, end = p.concatenation();
if p.error != nil { if p.error != nil {
return return;
} }
for { for {
switch p.c() { switch p.c() {
...@@ -539,7 +593,7 @@ func (p *parser) regexp() (start, end instr) { ...@@ -539,7 +593,7 @@ func (p *parser) regexp() (start, end instr) {
p.nextc(); p.nextc();
nstart, nend := p.concatenation(); nstart, nend := p.concatenation();
if p.error != nil { if p.error != nil {
return return;
} }
alt := new(_Alt); alt := new(_Alt);
p.re.add(alt); p.re.add(alt);
...@@ -557,16 +611,16 @@ func (p *parser) regexp() (start, end instr) { ...@@ -557,16 +611,16 @@ func (p *parser) regexp() (start, end instr) {
func unNop(i instr) instr { func unNop(i instr) instr {
for i.kind() == _NOP { for i.kind() == _NOP {
i = i.next() i = i.next();
} }
return i return i;
} }
func (re *Regexp) eliminateNops() { func (re *Regexp) eliminateNops() {
for i := 0; i < re.inst.Len(); i++ { for i := 0; i < re.inst.Len(); i++ {
inst := re.inst.At(i).(instr); inst := re.inst.At(i).(instr);
if inst.kind() == _END { if inst.kind() == _END {
continue continue;
} }
inst.setNext(unNop(inst.next())); inst.setNext(unNop(inst.next()));
if inst.kind() == _ALT { if inst.kind() == _ALT {
...@@ -582,7 +636,7 @@ func (re *Regexp) dump() { ...@@ -582,7 +636,7 @@ func (re *Regexp) dump() {
print(inst.index(), ": "); print(inst.index(), ": ");
inst.print(); inst.print();
if inst.kind() != _END { if inst.kind() != _END {
print(" -> ", inst.next().index()) print(" -> ", inst.next().index());
} }
print("\n"); print("\n");
} }
...@@ -648,10 +702,10 @@ func addState(s []state, inst instr, match []int) []state { ...@@ -648,10 +702,10 @@ func addState(s []state, inst instr, match []int) []state {
// TODO: Once the state is a vector and we can do insert, have inputs always // TODO: Once the state is a vector and we can do insert, have inputs always
// go in order correctly and this "earlier" test is never necessary, // go in order correctly and this "earlier" test is never necessary,
for i := 0; i < l; i++ { for i := 0; i < l; i++ {
if s[i].inst.index() == index && // same instruction if s[i].inst.index() == index && // same instruction
s[i].match[0] < pos { // earlier match already going; lefmost wins s[i].match[0] < pos { // earlier match already going; lefmost wins
return s return s;
} }
} }
if l == cap(s) { if l == cap(s) {
s1 := make([]state, 2*l)[0:l]; s1 := make([]state, 2*l)[0:l];
...@@ -660,7 +714,7 @@ func addState(s []state, inst instr, match []int) []state { ...@@ -660,7 +714,7 @@ func addState(s []state, inst instr, match []int) []state {
} }
s = s1; s = s1;
} }
s = s[0:l+1]; s = s[0 : l+1];
s[l].inst = inst; s[l].inst = inst;
s[l].match = match; s[l].match = match;
return s; return s;
...@@ -677,16 +731,16 @@ func (re *Regexp) doExecute(str string, bytes []byte, pos int) []int { ...@@ -677,16 +731,16 @@ func (re *Regexp) doExecute(str string, bytes []byte, pos int) []int {
found := false; found := false;
end := len(str); end := len(str);
if bytes != nil { if bytes != nil {
end = len(bytes) end = len(bytes);
} }
for pos <= end { for pos <= end {
if !found { if !found {
// prime the pump if we haven't seen a match yet // prime the pump if we haven't seen a match yet
match := make([]int, 2*(re.nbra+1)); match := make([]int, 2*(re.nbra + 1));
for i := 0; i < len(match); i++ { for i := 0; i < len(match); i++ {
match[i] = -1; // no match seen; catches cases like "a(b)?c" on "ac" match[i] = -1; // no match seen; catches cases like "a(b)?c" on "ac"
} }
match[0] = pos; match[0] = pos;
s[out] = addState(s[out], re.start.next(), match); s[out] = addState(s[out], re.start.next(), match);
} }
in, out = out, in; // old out state is new in state in, out = out, in; // old out state is new in state
...@@ -709,27 +763,27 @@ func (re *Regexp) doExecute(str string, bytes []byte, pos int) []int { ...@@ -709,27 +763,27 @@ func (re *Regexp) doExecute(str string, bytes []byte, pos int) []int {
switch s[in][i].inst.kind() { switch s[in][i].inst.kind() {
case _BOT: case _BOT:
if pos == 0 { if pos == 0 {
s[in] = addState(s[in], st.inst.next(), st.match) s[in] = addState(s[in], st.inst.next(), st.match);
} }
case _EOT: case _EOT:
if pos == end { if pos == end {
s[in] = addState(s[in], st.inst.next(), st.match) s[in] = addState(s[in], st.inst.next(), st.match);
} }
case _CHAR: case _CHAR:
if c == st.inst.(*_Char).char { if c == st.inst.(*_Char).char {
s[out] = addState(s[out], st.inst.next(), st.match) s[out] = addState(s[out], st.inst.next(), st.match);
} }
case _CHARCLASS: case _CHARCLASS:
if st.inst.(*_CharClass).matches(c) { if st.inst.(*_CharClass).matches(c) {
s[out] = addState(s[out], st.inst.next(), st.match) s[out] = addState(s[out], st.inst.next(), st.match);
} }
case _ANY: case _ANY:
if c != endOfFile { if c != endOfFile {
s[out] = addState(s[out], st.inst.next(), st.match) s[out] = addState(s[out], st.inst.next(), st.match);
} }
case _NOTNL: case _NOTNL:
if c != endOfFile && c != '\n' { if c != endOfFile && c != '\n' {
s[out] = addState(s[out], st.inst.next(), st.match) s[out] = addState(s[out], st.inst.next(), st.match);
} }
case _BRA: case _BRA:
n := st.inst.(*_Bra).n; n := st.inst.(*_Bra).n;
...@@ -737,21 +791,21 @@ func (re *Regexp) doExecute(str string, bytes []byte, pos int) []int { ...@@ -737,21 +791,21 @@ func (re *Regexp) doExecute(str string, bytes []byte, pos int) []int {
s[in] = addState(s[in], st.inst.next(), st.match); s[in] = addState(s[in], st.inst.next(), st.match);
case _EBRA: case _EBRA:
n := st.inst.(*_Ebra).n; n := st.inst.(*_Ebra).n;
st.match[2*n+1] = pos; st.match[2*n + 1] = pos;
s[in] = addState(s[in], st.inst.next(), st.match); s[in] = addState(s[in], st.inst.next(), st.match);
case _ALT: case _ALT:
s[in] = addState(s[in], st.inst.(*_Alt).left, st.match); s[in] = addState(s[in], st.inst.(*_Alt).left, st.match);
// give other branch a copy of this match vector // give other branch a copy of this match vector
s1 := make([]int, 2*(re.nbra+1)); s1 := make([]int, 2*(re.nbra + 1));
for i := 0; i < len(s1); i++ { for i := 0; i < len(s1); i++ {
s1[i] = st.match[i] s1[i] = st.match[i];
} }
s[in] = addState(s[in], st.inst.next(), s1); s[in] = addState(s[in], st.inst.next(), s1);
case _END: case _END:
// choose leftmost longest // choose leftmost longest
if !found || // first if !found || // first
st.match[0] < final.match[0] || // leftmost st.match[0] < final.match[0] || // leftmost
(st.match[0] == final.match[0] && pos > final.match[1]) { // longest (st.match[0] == final.match[0] && pos > final.match[1]) { // longest
final = st; final = st;
final.match[1] = pos; final.match[1] = pos;
} }
...@@ -775,7 +829,7 @@ func (re *Regexp) doExecute(str string, bytes []byte, pos int) []int { ...@@ -775,7 +829,7 @@ func (re *Regexp) doExecute(str string, bytes []byte, pos int) []int {
// A negative value means the subexpression did not match any element of the string. // A negative value means the subexpression did not match any element of the string.
// An empty array means "no match". // An empty array means "no match".
func (re *Regexp) ExecuteString(s string) (a []int) { func (re *Regexp) ExecuteString(s string) (a []int) {
return re.doExecute(s, nil, 0) return re.doExecute(s, nil, 0);
} }
...@@ -787,21 +841,21 @@ func (re *Regexp) ExecuteString(s string) (a []int) { ...@@ -787,21 +841,21 @@ func (re *Regexp) ExecuteString(s string) (a []int) {
// A negative value means the subexpression did not match any element of the slice. // A negative value means the subexpression did not match any element of the slice.
// An empty array means "no match". // An empty array means "no match".
func (re *Regexp) Execute(b []byte) (a []int) { func (re *Regexp) Execute(b []byte) (a []int) {
return re.doExecute("", b, 0) return re.doExecute("", b, 0);
} }
// MatchString returns whether the Regexp matches the string s. // MatchString returns whether the Regexp matches the string s.
// The return value is a boolean: true for match, false for no match. // The return value is a boolean: true for match, false for no match.
func (re *Regexp) MatchString(s string) bool { func (re *Regexp) MatchString(s string) bool {
return len(re.doExecute(s, nil, 0)) > 0 return len(re.doExecute(s, nil, 0)) > 0;
} }
// Match returns whether the Regexp matches the byte slice b. // Match returns whether the Regexp matches the byte slice b.
// The return value is a boolean: true for match, false for no match. // The return value is a boolean: true for match, false for no match.
func (re *Regexp) Match(b []byte) bool { func (re *Regexp) Match(b []byte) bool {
return len(re.doExecute("", b, 0)) > 0 return len(re.doExecute("", b, 0)) > 0;
} }
...@@ -813,15 +867,15 @@ func (re *Regexp) Match(b []byte) bool { ...@@ -813,15 +867,15 @@ func (re *Regexp) Match(b []byte) bool {
func (re *Regexp) MatchStrings(s string) (a []string) { func (re *Regexp) MatchStrings(s string) (a []string) {
r := re.doExecute(s, nil, 0); r := re.doExecute(s, nil, 0);
if r == nil { if r == nil {
return nil return nil;
} }
a = make([]string, len(r)/2); a = make([]string, len(r)/2);
for i := 0; i < len(r); i += 2 { for i := 0; i < len(r); i += 2 {
if r[i] != -1 { // -1 means no match for this subexpression if r[i] != -1 { // -1 means no match for this subexpression
a[i/2] = s[r[i] : r[i+1]] a[i/2] = s[r[i]:r[i+1]];
} }
} }
return return;
} }
// MatchSlices matches the Regexp against the byte slice b. // MatchSlices matches the Regexp against the byte slice b.
...@@ -832,15 +886,15 @@ func (re *Regexp) MatchStrings(s string) (a []string) { ...@@ -832,15 +886,15 @@ func (re *Regexp) MatchStrings(s string) (a []string) {
func (re *Regexp) MatchSlices(b []byte) (a [][]byte) { func (re *Regexp) MatchSlices(b []byte) (a [][]byte) {
r := re.doExecute("", b, 0); r := re.doExecute("", b, 0);
if r == nil { if r == nil {
return nil return nil;
} }
a = make([][]byte, len(r)/2); a = make([][]byte, len(r)/2);
for i := 0; i < len(r); i += 2 { for i := 0; i < len(r); i += 2 {
if r[i] != -1 { // -1 means no match for this subexpression if r[i] != -1 { // -1 means no match for this subexpression
a[i/2] = b[r[i] : r[i+1]] a[i/2] = b[r[i]:r[i+1]];
} }
} }
return return;
} }
// MatchString checks whether a textual regular expression // MatchString checks whether a textual regular expression
...@@ -849,9 +903,9 @@ func (re *Regexp) MatchSlices(b []byte) (a [][]byte) { ...@@ -849,9 +903,9 @@ func (re *Regexp) MatchSlices(b []byte) (a [][]byte) {
func MatchString(pattern string, s string) (matched bool, error os.Error) { func MatchString(pattern string, s string) (matched bool, error os.Error) {
re, err := Compile(pattern); re, err := Compile(pattern);
if err != nil { if err != nil {
return false, err return false, err;
} }
return re.MatchString(s), nil return re.MatchString(s), nil;
} }
// Match checks whether a textual regular expression // Match checks whether a textual regular expression
...@@ -860,26 +914,26 @@ func MatchString(pattern string, s string) (matched bool, error os.Error) { ...@@ -860,26 +914,26 @@ func MatchString(pattern string, s string) (matched bool, error os.Error) {
func Match(pattern string, b []byte) (matched bool, error os.Error) { func Match(pattern string, b []byte) (matched bool, error os.Error) {
re, err := Compile(pattern); re, err := Compile(pattern);
if err != nil { if err != nil {
return false, err return false, err;
} }
return re.Match(b), nil return re.Match(b), nil;
} }
// ReplaceAllString returns a copy of src in which all matches for the Regexp // ReplaceAllString returns a copy of src in which all matches for the Regexp
// have been replaced by repl. No support is provided for expressions // have been replaced by repl. No support is provided for expressions
// (e.g. \1 or $1) in the replacement string. // (e.g. \1 or $1) in the replacement string.
func (re *Regexp) ReplaceAllString(src, repl string) string { func (re *Regexp) ReplaceAllString(src, repl string) string {
lastMatchEnd := 0; // end position of the most recent match lastMatchEnd := 0; // end position of the most recent match
searchPos := 0; // position where we next look for a match searchPos := 0; // position where we next look for a match
buf := new(bytes.Buffer); buf := new(bytes.Buffer);
for searchPos <= len(src) { for searchPos <= len(src) {
a := re.doExecute(src, nil, searchPos); a := re.doExecute(src, nil, searchPos);
if len(a) == 0 { if len(a) == 0 {
break; // no more matches break; // no more matches
} }
// Copy the unmatched characters before this match. // Copy the unmatched characters before this match.
io.WriteString(buf, src[lastMatchEnd:a[0]]); io.WriteString(buf, src[lastMatchEnd : a[0]]);
// Now insert a copy of the replacement string, but not for a // Now insert a copy of the replacement string, but not for a
// match of the empty string immediately after another match. // match of the empty string immediately after another match.
...@@ -891,7 +945,7 @@ func (re *Regexp) ReplaceAllString(src, repl string) string { ...@@ -891,7 +945,7 @@ func (re *Regexp) ReplaceAllString(src, repl string) string {
lastMatchEnd = a[1]; lastMatchEnd = a[1];
// Advance past this match; always advance at least one character. // Advance past this match; always advance at least one character.
_, width := utf8.DecodeRuneInString(src[searchPos:len(src)]); _, width := utf8.DecodeRuneInString(src[searchPos : len(src)]);
if searchPos + width > a[1] { if searchPos + width > a[1] {
searchPos += width; searchPos += width;
} else if searchPos + 1 > a[1] { } else if searchPos + 1 > a[1] {
...@@ -904,7 +958,7 @@ func (re *Regexp) ReplaceAllString(src, repl string) string { ...@@ -904,7 +958,7 @@ func (re *Regexp) ReplaceAllString(src, repl string) string {
} }
// Copy the unmatched characters after the last match. // Copy the unmatched characters after the last match.
io.WriteString(buf, src[lastMatchEnd:len(src)]); io.WriteString(buf, src[lastMatchEnd : len(src)]);
return buf.String(); return buf.String();
} }
...@@ -913,17 +967,17 @@ func (re *Regexp) ReplaceAllString(src, repl string) string { ...@@ -913,17 +967,17 @@ func (re *Regexp) ReplaceAllString(src, repl string) string {
// have been replaced by repl. No support is provided for expressions // have been replaced by repl. No support is provided for expressions
// (e.g. \1 or $1) in the replacement text. // (e.g. \1 or $1) in the replacement text.
func (re *Regexp) ReplaceAll(src, repl []byte) []byte { func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
lastMatchEnd := 0; // end position of the most recent match lastMatchEnd := 0; // end position of the most recent match
searchPos := 0; // position where we next look for a match searchPos := 0; // position where we next look for a match
buf := new(bytes.Buffer); buf := new(bytes.Buffer);
for searchPos <= len(src) { for searchPos <= len(src) {
a := re.doExecute("", src, searchPos); a := re.doExecute("", src, searchPos);
if len(a) == 0 { if len(a) == 0 {
break; // no more matches break; // no more matches
} }
// Copy the unmatched characters before this match. // Copy the unmatched characters before this match.
buf.Write(src[lastMatchEnd:a[0]]); buf.Write(src[lastMatchEnd : a[0]]);
// Now insert a copy of the replacement string, but not for a // Now insert a copy of the replacement string, but not for a
// match of the empty string immediately after another match. // match of the empty string immediately after another match.
...@@ -935,7 +989,7 @@ func (re *Regexp) ReplaceAll(src, repl []byte) []byte { ...@@ -935,7 +989,7 @@ func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
lastMatchEnd = a[1]; lastMatchEnd = a[1];
// Advance past this match; always advance at least one character. // Advance past this match; always advance at least one character.
_, width := utf8.DecodeRune(src[searchPos:len(src)]); _, width := utf8.DecodeRune(src[searchPos : len(src)]);
if searchPos + width > a[1] { if searchPos + width > a[1] {
searchPos += width; searchPos += width;
} else if searchPos + 1 > a[1] { } else if searchPos + 1 > a[1] {
...@@ -948,7 +1002,7 @@ func (re *Regexp) ReplaceAll(src, repl []byte) []byte { ...@@ -948,7 +1002,7 @@ func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
} }
// Copy the unmatched characters after the last match. // Copy the unmatched characters after the last match.
buf.Write(src[lastMatchEnd:len(src)]); buf.Write(src[lastMatchEnd : len(src)]);
return buf.Bytes(); return buf.Bytes();
} }
...@@ -957,7 +1011,7 @@ func (re *Regexp) ReplaceAll(src, repl []byte) []byte { ...@@ -957,7 +1011,7 @@ func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
// inside the argument text; the returned string is a regular expression matching // inside the argument text; the returned string is a regular expression matching
// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`. // the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
func QuoteMeta(s string) string { func QuoteMeta(s string) string {
b := make([]byte, 2 * len(s)); b := make([]byte, 2*len(s));
// A byte loop is correct because all metacharacters are ASCII. // A byte loop is correct because all metacharacters are ASCII.
j := 0; j := 0;
...@@ -1004,7 +1058,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func(int, int)) ...@@ -1004,7 +1058,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func(int, int))
if width > 0 { if width > 0 {
pos += width; pos += width;
} else { } else {
pos = end + 1; pos = end+1;
} }
} else { } else {
pos = matches[1]; pos = matches[1];
...@@ -1025,7 +1079,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func(int, int)) ...@@ -1025,7 +1079,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func(int, int))
// containing the matching substrings. // containing the matching substrings.
func (re *Regexp) AllMatches(b []byte, n int) [][]byte { func (re *Regexp) AllMatches(b []byte, n int) [][]byte {
if n <= 0 { if n <= 0 {
n = len(b) + 1; n = len(b)+1;
} }
result := make([][]byte, n); result := make([][]byte, n);
i := 0; i := 0;
...@@ -1043,7 +1097,7 @@ func (re *Regexp) AllMatches(b []byte, n int) [][]byte { ...@@ -1043,7 +1097,7 @@ func (re *Regexp) AllMatches(b []byte, n int) [][]byte {
// containing the matching substrings. // containing the matching substrings.
func (re *Regexp) AllMatchesString(s string, n int) []string { func (re *Regexp) AllMatchesString(s string, n int) []string {
if n <= 0 { if n <= 0 {
n = len(s) + 1; n = len(s)+1;
} }
result := make([]string, n); result := make([]string, n);
i := 0; i := 0;
...@@ -1059,15 +1113,13 @@ func (re *Regexp) AllMatchesString(s string, n int) []string { ...@@ -1059,15 +1113,13 @@ func (re *Regexp) AllMatchesString(s string, n int) []string {
// matches. Text that does not match the expression will be skipped. Empty // matches. Text that does not match the expression will be skipped. Empty
// matches abutting a preceding match are ignored. The function returns a // matches abutting a preceding match are ignored. The function returns a
// channel that iterates over the matching substrings. // channel that iterates over the matching substrings.
func (re *Regexp) AllMatchesIter(b []byte, n int) (<-chan []byte) { func (re *Regexp) AllMatchesIter(b []byte, n int) <-chan []byte {
if n <= 0 { if n <= 0 {
n = len(b) + 1; n = len(b)+1;
} }
c := make(chan []byte, 10); c := make(chan []byte, 10);
go func() { go func() {
re.allMatches("", b, n, func(start, end int) { re.allMatches("", b, n, func(start, end int) { c <- b[start:end] });
c <- b[start:end];
});
close(c); close(c);
}(); }();
return c; return c;
...@@ -1078,15 +1130,13 @@ func (re *Regexp) AllMatchesIter(b []byte, n int) (<-chan []byte) { ...@@ -1078,15 +1130,13 @@ func (re *Regexp) AllMatchesIter(b []byte, n int) (<-chan []byte) {
// matches. Text that does not match the expression will be skipped. Empty // matches. Text that does not match the expression will be skipped. Empty
// matches abutting a preceding match are ignored. The function returns a // matches abutting a preceding match are ignored. The function returns a
// channel that iterates over the matching substrings. // channel that iterates over the matching substrings.
func (re *Regexp) AllMatchesStringIter(s string, n int) (<-chan string) { func (re *Regexp) AllMatchesStringIter(s string, n int) <-chan string {
if n <= 0 { if n <= 0 {
n = len(s) + 1; n = len(s)+1;
} }
c := make(chan string, 10); c := make(chan string, 10);
go func() { go func() {
re.allMatches(s, nil, n, func(start, end int) { re.allMatches(s, nil, n, func(start, end int) { c <- s[start:end] });
c <- s[start:end];
});
close(c); close(c);
}(); }();
return c; return c;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment