Commit 7db472fd authored by Rob Pike's avatar Rob Pike

The prefix optimization applies only to the first iteration.

Fixes #596.

R=rsc
CC=golang-dev
https://golang.org/cl/206101
parent ca075494
...@@ -30,7 +30,6 @@ var good_re = []string{ ...@@ -30,7 +30,6 @@ var good_re = []string{
`[^\n]`, `[^\n]`,
} }
// TODO: nice to do this with a map
type stringError struct { type stringError struct {
re string re string
err os.Error err os.Error
...@@ -97,6 +96,10 @@ var matches = []tester{ ...@@ -97,6 +96,10 @@ var matches = []tester{
tester{`[.]`, ".", vec{0, 1}}, tester{`[.]`, ".", vec{0, 1}},
tester{`/$`, "/abc/", vec{4, 5}}, tester{`/$`, "/abc/", vec{4, 5}},
tester{`/$`, "/abc", vec{}}, tester{`/$`, "/abc", vec{}},
// fixed bugs
tester{`ab$`, "cab", vec{1, 3}},
tester{`axxb$`, "axxcb", vec{}},
} }
func compileTest(t *testing.T, expr string, error os.Error) *Regexp { func compileTest(t *testing.T, expr string, error os.Error) *Regexp {
......
...@@ -75,8 +75,9 @@ type Regexp struct { ...@@ -75,8 +75,9 @@ type Regexp struct {
prefix string // initial plain text string prefix string // initial plain text string
prefixBytes []byte // initial plain text bytes prefixBytes []byte // initial plain text bytes
inst *vector.Vector inst *vector.Vector
start instr start instr // first instruction of machine
nbra int // number of brackets in expression, for subexpressions prefixStart instr // where to start if there is a prefix
nbra int // number of brackets in expression, for subexpressions
} }
const ( const (
...@@ -650,8 +651,8 @@ Loop: ...@@ -650,8 +651,8 @@ Loop:
b = bytes.Add(b, utf[0:n]) b = bytes.Add(b, utf[0:n])
i = inst.next().index() i = inst.next().index()
} }
// point start instruction to first non-CHAR // point prefixStart instruction to first non-CHAR after prefix
re.inst.At(0).(instr).setNext(re.inst.At(i).(instr)) re.prefixStart = re.inst.At(i).(instr)
re.prefixBytes = b re.prefixBytes = b
re.prefix = string(b) re.prefix = string(b)
} }
...@@ -807,6 +808,7 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int { ...@@ -807,6 +808,7 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
end = len(bytestr) end = len(bytestr)
} }
// fast check for initial plain substring // fast check for initial plain substring
prefixed := false // has this iteration begun by skipping a prefix?
if re.prefix != "" { if re.prefix != "" {
var advance int var advance int
if bytestr == nil { if bytestr == nil {
...@@ -818,6 +820,7 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int { ...@@ -818,6 +820,7 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
return []int{} return []int{}
} }
pos += advance + len(re.prefix) pos += advance + len(re.prefix)
prefixed = true
} }
arena := &matchArena{nil, 2 * (re.nbra + 1)} arena := &matchArena{nil, 2 * (re.nbra + 1)}
for pos <= end { for pos <= end {
...@@ -825,7 +828,12 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int { ...@@ -825,7 +828,12 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
// prime the pump if we haven't seen a match yet // prime the pump if we haven't seen a match yet
match := arena.noMatch() match := arena.noMatch()
match.m[0] = pos match.m[0] = pos
s[out] = arena.addState(s[out], re.start.next(), match, pos, end) if prefixed {
s[out] = arena.addState(s[out], re.prefixStart, match, pos, end)
prefixed = false // next iteration should start at beginning of machine.
} else {
s[out] = arena.addState(s[out], re.start.next(), match, pos, end)
}
arena.free(match) // if addState saved it, ref was incremented arena.free(match) // if addState saved it, ref was incremented
} }
in, out = out, in // old out state is new in state in, out = out, in // old out state is new in state
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment