Commit 3ca1f28e authored by Russ Cox's avatar Russ Cox

regexp: evaluate context flags lazily

There's no point in computing whether we're at the
beginning of the line if the NFA isn't going to ask.
Wait to compute that until asked.

Whatever minor slowdowns were introduced by
the conversion to pools that were not repaid by
other optimizations are taken care of by this one.

name                             old time/op    new time/op    delta
Find-12                             252ns ± 0%     260ns ± 0%   +3.34%  (p=0.000 n=10+8)
FindAllNoMatches-12                 136ns ± 4%     134ns ± 4%   -0.96%  (p=0.033 n=10+10)
FindString-12                       246ns ± 0%     250ns ± 0%   +1.46%  (p=0.000 n=8+10)
FindSubmatch-12                     332ns ± 1%     332ns ± 0%     ~     (p=0.101 n=9+10)
FindStringSubmatch-12               321ns ± 1%     322ns ± 1%     ~     (p=0.717 n=9+10)
Literal-12                         91.6ns ± 0%    92.3ns ± 0%   +0.74%  (p=0.000 n=9+9)
NotLiteral-12                      1.47µs ± 0%    1.47µs ± 0%   +0.38%  (p=0.000 n=9+8)
MatchClass-12                      2.15µs ± 0%    2.15µs ± 0%   +0.39%  (p=0.000 n=10+10)
MatchClass_InRange-12              2.09µs ± 0%    2.11µs ± 0%   +0.75%  (p=0.000 n=9+9)
ReplaceAll-12                      1.40µs ± 0%    1.40µs ± 0%     ~     (p=0.525 n=10+10)
AnchoredLiteralShortNonMatch-12    83.5ns ± 0%    81.6ns ± 0%   -2.28%  (p=0.000 n=9+10)
AnchoredLiteralLongNonMatch-12      101ns ± 0%      97ns ± 1%   -3.54%  (p=0.000 n=10+10)
AnchoredShortMatch-12               131ns ± 0%     128ns ± 0%   -2.29%  (p=0.000 n=10+9)
AnchoredLongMatch-12                268ns ± 1%     252ns ± 1%   -6.04%  (p=0.000 n=10+10)
OnePassShortA-12                    614ns ± 0%     587ns ± 1%   -4.33%  (p=0.000 n=6+10)
NotOnePassShortA-12                 552ns ± 0%     547ns ± 1%   -0.89%  (p=0.000 n=10+10)
OnePassShortB-12                    494ns ± 0%     455ns ± 0%   -7.96%  (p=0.000 n=9+9)
NotOnePassShortB-12                 411ns ± 0%     406ns ± 0%   -1.30%  (p=0.000 n=9+9)
OnePassLongPrefix-12                109ns ± 0%     108ns ± 1%     ~     (p=0.064 n=8+9)
OnePassLongNotPrefix-12             403ns ± 0%     349ns ± 0%  -13.30%  (p=0.000 n=9+8)
MatchParallelShared-12             38.9ns ± 1%    37.9ns ± 1%   -2.65%  (p=0.000 n=10+8)
MatchParallelCopied-12             39.2ns ± 1%    38.3ns ± 2%   -2.20%  (p=0.001 n=10+10)
QuoteMetaAll-12                    94.5ns ± 0%    94.7ns ± 0%   +0.18%  (p=0.043 n=10+9)
QuoteMetaNone-12                   52.7ns ± 0%    52.7ns ± 0%     ~     (all equal)
Match/Easy0/32-12                  72.2ns ± 0%    71.9ns ± 0%   -0.38%  (p=0.009 n=8+10)
Match/Easy0/1K-12                   296ns ± 1%     297ns ± 0%   +0.51%  (p=0.001 n=10+9)
Match/Easy0/32K-12                 4.57µs ± 3%    4.61µs ± 2%     ~     (p=0.280 n=10+10)
Match/Easy0/1M-12                   234µs ± 0%     234µs ± 0%     ~     (p=0.986 n=10+10)
Match/Easy0/32M-12                 7.96ms ± 0%    7.98ms ± 0%   +0.22%  (p=0.010 n=10+9)
Match/Easy0i/32-12                 1.09µs ± 0%    1.10µs ± 0%   +0.23%  (p=0.000 n=8+9)
Match/Easy0i/1K-12                 31.7µs ± 0%    31.7µs ± 0%   +0.09%  (p=0.003 n=9+8)
Match/Easy0i/32K-12                1.61ms ± 0%    1.27ms ± 1%  -21.03%  (p=0.000 n=8+10)
Match/Easy0i/1M-12                 51.4ms ± 0%    40.4ms ± 0%  -21.29%  (p=0.000 n=8+8)
Match/Easy0i/32M-12                 1.65s ± 0%     1.30s ± 1%  -21.22%  (p=0.000 n=9+9)
Match/Easy1/32-12                  67.6ns ± 1%    67.2ns ± 0%     ~     (p=0.085 n=10+9)
Match/Easy1/1K-12                   873ns ± 2%     880ns ± 0%   +0.78%  (p=0.006 n=9+7)
Match/Easy1/32K-12                 39.7µs ± 1%    34.3µs ± 3%  -13.53%  (p=0.000 n=10+10)
Match/Easy1/1M-12                  1.41ms ± 1%    1.19ms ± 3%  -15.48%  (p=0.000 n=10+10)
Match/Easy1/32M-12                 44.9ms ± 1%    38.0ms ± 2%  -15.21%  (p=0.000 n=10+10)
Match/Medium/32-12                 1.04µs ± 0%    1.03µs ± 0%   -0.57%  (p=0.000 n=9+9)
Match/Medium/1K-12                 31.2µs ± 0%    31.4µs ± 1%   +0.61%  (p=0.000 n=8+10)
Match/Medium/32K-12                1.45ms ± 1%    1.20ms ± 0%  -17.70%  (p=0.000 n=10+8)
Match/Medium/1M-12                 46.4ms ± 0%    38.4ms ± 2%  -17.32%  (p=0.000 n=6+9)
Match/Medium/32M-12                 1.49s ± 1%     1.24s ± 1%  -16.81%  (p=0.000 n=10+10)
Match/Hard/32-12                   1.47µs ± 0%    1.47µs ± 0%   -0.31%  (p=0.000 n=9+10)
Match/Hard/1K-12                   44.5µs ± 1%    44.4µs ± 0%     ~     (p=0.075 n=10+10)
Match/Hard/32K-12                  2.09ms ± 0%    1.78ms ± 7%  -14.88%  (p=0.000 n=8+10)
Match/Hard/1M-12                   67.8ms ± 5%    56.9ms ± 7%  -16.05%  (p=0.000 n=10+10)
Match/Hard/32M-12                   2.17s ± 5%     1.84s ± 6%  -15.21%  (p=0.000 n=10+10)
Match/Hard1/32-12                  7.89µs ± 0%    7.94µs ± 0%   +0.61%  (p=0.000 n=9+9)
Match/Hard1/1K-12                   246µs ± 0%     245µs ± 0%   -0.30%  (p=0.010 n=9+10)
Match/Hard1/32K-12                 8.93ms ± 0%    8.17ms ± 0%   -8.44%  (p=0.000 n=9+8)
Match/Hard1/1M-12                   286ms ± 0%     269ms ± 9%   -5.66%  (p=0.028 n=9+10)
Match/Hard1/32M-12                  9.16s ± 0%     8.61s ± 8%   -5.98%  (p=0.028 n=9+10)
Match_onepass_regex/32-12           825ns ± 0%     712ns ± 0%  -13.75%  (p=0.000 n=8+8)
Match_onepass_regex/1K-12          28.7µs ± 1%    19.8µs ± 0%  -30.99%  (p=0.000 n=9+8)
Match_onepass_regex/32K-12          950µs ± 1%     628µs ± 0%  -33.83%  (p=0.000 n=9+8)
Match_onepass_regex/1M-12          30.4ms ± 0%    20.1ms ± 0%  -33.74%  (p=0.000 n=9+8)
Match_onepass_regex/32M-12          974ms ± 1%     646ms ± 0%  -33.73%  (p=0.000 n=9+8)
CompileOnepass-12                  4.60µs ± 0%    4.59µs ± 0%     ~     (p=0.063 n=8+9)
[Geo mean]                         23.1µs         21.3µs        -7.44%

https://perf.golang.org/search?q=upload:20181004.4

Change-Id: I47cdd09f6dcde1d7c317080e9b4df42c7d0a8d24
Reviewed-on: https://go-review.googlesource.com/c/139782
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarBrad Fitzpatrick <bradfitz@golang.org>
parent a376435a
...@@ -257,7 +257,8 @@ func (re *Regexp) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { ...@@ -257,7 +257,8 @@ func (re *Regexp) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
} }
case syntax.InstEmptyWidth: case syntax.InstEmptyWidth:
if syntax.EmptyOp(inst.Arg)&^i.context(pos) != 0 { flag := i.context(pos)
if !flag.match(syntax.EmptyOp(inst.Arg)) {
continue continue
} }
pc = inst.Out pc = inst.Out
......
...@@ -114,6 +114,61 @@ func (m *machine) alloc(i *syntax.Inst) *thread { ...@@ -114,6 +114,61 @@ func (m *machine) alloc(i *syntax.Inst) *thread {
return t return t
} }
// A lazyFlag is a lazily-evaluated syntax.EmptyOp,
// for checking zero-width flags like ^ $ \A \z \B \b.
// It records the pair of relevant runes and does not
// determine the implied flags until absolutely necessary
// (most of the time, that means never).
type lazyFlag uint64
func newLazyFlag(r1, r2 rune) lazyFlag {
return lazyFlag(uint64(r1)<<32 | uint64(uint32(r2)))
}
func (f lazyFlag) match(op syntax.EmptyOp) bool {
if op == 0 {
return true
}
r1 := rune(f >> 32)
if op&syntax.EmptyBeginLine != 0 {
if r1 != '\n' && r1 >= 0 {
return false
}
op &^= syntax.EmptyBeginLine
}
if op&syntax.EmptyBeginText != 0 {
if r1 >= 0 {
return false
}
op &^= syntax.EmptyBeginText
}
if op == 0 {
return true
}
r2 := rune(f)
if op&syntax.EmptyEndLine != 0 {
if r2 != '\n' && r2 >= 0 {
return false
}
op &^= syntax.EmptyEndLine
}
if op&syntax.EmptyEndText != 0 {
if r2 >= 0 {
return false
}
op &^= syntax.EmptyEndText
}
if op == 0 {
return true
}
if syntax.IsWordChar(r1) != syntax.IsWordChar(r2) {
op &^= syntax.EmptyWordBoundary
} else {
op &^= syntax.EmptyNoWordBoundary
}
return op == 0
}
// match runs the machine over the input starting at pos. // match runs the machine over the input starting at pos.
// It reports whether a match was found. // It reports whether a match was found.
// If so, m.matchcap holds the submatch information. // If so, m.matchcap holds the submatch information.
...@@ -133,9 +188,9 @@ func (m *machine) match(i input, pos int) bool { ...@@ -133,9 +188,9 @@ func (m *machine) match(i input, pos int) bool {
if r != endOfText { if r != endOfText {
r1, width1 = i.step(pos + width) r1, width1 = i.step(pos + width)
} }
var flag syntax.EmptyOp var flag lazyFlag
if pos == 0 { if pos == 0 {
flag = syntax.EmptyOpContext(-1, r) flag = newLazyFlag(-1, r)
} else { } else {
flag = i.context(pos) flag = i.context(pos)
} }
...@@ -164,10 +219,10 @@ func (m *machine) match(i input, pos int) bool { ...@@ -164,10 +219,10 @@ func (m *machine) match(i input, pos int) bool {
if len(m.matchcap) > 0 { if len(m.matchcap) > 0 {
m.matchcap[0] = pos m.matchcap[0] = pos
} }
m.add(runq, uint32(m.p.Start), pos, m.matchcap, flag, nil) m.add(runq, uint32(m.p.Start), pos, m.matchcap, &flag, nil)
} }
flag = syntax.EmptyOpContext(r, r1) flag = newLazyFlag(r, r1)
m.step(runq, nextq, pos, pos+width, r, flag) m.step(runq, nextq, pos, pos+width, r, &flag)
if width == 0 { if width == 0 {
break break
} }
...@@ -202,7 +257,7 @@ func (m *machine) clear(q *queue) { ...@@ -202,7 +257,7 @@ func (m *machine) clear(q *queue) {
// The step processes the rune c (which may be endOfText), // The step processes the rune c (which may be endOfText),
// which starts at position pos and ends at nextPos. // which starts at position pos and ends at nextPos.
// nextCond gives the setting for the empty-width flags after c. // nextCond gives the setting for the empty-width flags after c.
func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond syntax.EmptyOp) { func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond *lazyFlag) {
longest := m.re.longest longest := m.re.longest
for j := 0; j < len(runq.dense); j++ { for j := 0; j < len(runq.dense); j++ {
d := &runq.dense[j] d := &runq.dense[j]
...@@ -259,7 +314,7 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy ...@@ -259,7 +314,7 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy
// It also recursively adds an entry for all instructions reachable from pc by following // It also recursively adds an entry for all instructions reachable from pc by following
// empty-width conditions satisfied by cond. pos gives the current position // empty-width conditions satisfied by cond. pos gives the current position
// in the input. // in the input.
func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.EmptyOp, t *thread) *thread { func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond *lazyFlag, t *thread) *thread {
Again: Again:
if pc == 0 { if pc == 0 {
return t return t
...@@ -286,7 +341,7 @@ Again: ...@@ -286,7 +341,7 @@ Again:
pc = i.Arg pc = i.Arg
goto Again goto Again
case syntax.InstEmptyWidth: case syntax.InstEmptyWidth:
if syntax.EmptyOp(i.Arg)&^cond == 0 { if cond.match(syntax.EmptyOp(i.Arg)) {
pc = i.Out pc = i.Out
goto Again goto Again
} }
...@@ -365,16 +420,16 @@ func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap in ...@@ -365,16 +420,16 @@ func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap in
if r != endOfText { if r != endOfText {
r1, width1 = i.step(pos + width) r1, width1 = i.step(pos + width)
} }
var flag syntax.EmptyOp var flag lazyFlag
if pos == 0 { if pos == 0 {
flag = syntax.EmptyOpContext(-1, r) flag = newLazyFlag(-1, r)
} else { } else {
flag = i.context(pos) flag = i.context(pos)
} }
pc := re.onepass.Start pc := re.onepass.Start
inst := re.onepass.Inst[pc] inst := re.onepass.Inst[pc]
// If there is a simple literal prefix, skip over it. // If there is a simple literal prefix, skip over it.
if pos == 0 && syntax.EmptyOp(inst.Arg)&^flag == 0 && if pos == 0 && flag.match(syntax.EmptyOp(inst.Arg)) &&
len(re.prefix) > 0 && i.canCheckPrefix() { len(re.prefix) > 0 && i.canCheckPrefix() {
// Match requires literal prefix; fast search for it. // Match requires literal prefix; fast search for it.
if !i.hasPrefix(re) { if !i.hasPrefix(re) {
...@@ -422,7 +477,7 @@ func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap in ...@@ -422,7 +477,7 @@ func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap in
case syntax.InstNop: case syntax.InstNop:
continue continue
case syntax.InstEmptyWidth: case syntax.InstEmptyWidth:
if syntax.EmptyOp(inst.Arg)&^flag != 0 { if !flag.match(syntax.EmptyOp(inst.Arg)) {
goto Return goto Return
} }
continue continue
...@@ -435,7 +490,7 @@ func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap in ...@@ -435,7 +490,7 @@ func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap in
if width == 0 { if width == 0 {
break break
} }
flag = syntax.EmptyOpContext(r, r1) flag = newLazyFlag(r, r1)
pos += width pos += width
r, width = r1, width1 r, width = r1, width1
if r != endOfText { if r != endOfText {
......
...@@ -311,7 +311,7 @@ type input interface { ...@@ -311,7 +311,7 @@ type input interface {
canCheckPrefix() bool // can we look ahead without losing info? canCheckPrefix() bool // can we look ahead without losing info?
hasPrefix(re *Regexp) bool hasPrefix(re *Regexp) bool
index(re *Regexp, pos int) int index(re *Regexp, pos int) int
context(pos int) syntax.EmptyOp context(pos int) lazyFlag
} }
// inputString scans a string. // inputString scans a string.
...@@ -342,7 +342,7 @@ func (i *inputString) index(re *Regexp, pos int) int { ...@@ -342,7 +342,7 @@ func (i *inputString) index(re *Regexp, pos int) int {
return strings.Index(i.str[pos:], re.prefix) return strings.Index(i.str[pos:], re.prefix)
} }
func (i *inputString) context(pos int) syntax.EmptyOp { func (i *inputString) context(pos int) lazyFlag {
r1, r2 := endOfText, endOfText r1, r2 := endOfText, endOfText
// 0 < pos && pos <= len(i.str) // 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) { if uint(pos-1) < uint(len(i.str)) {
...@@ -358,7 +358,7 @@ func (i *inputString) context(pos int) syntax.EmptyOp { ...@@ -358,7 +358,7 @@ func (i *inputString) context(pos int) syntax.EmptyOp {
r2, _ = utf8.DecodeRuneInString(i.str[pos:]) r2, _ = utf8.DecodeRuneInString(i.str[pos:])
} }
} }
return syntax.EmptyOpContext(r1, r2) return newLazyFlag(r1, r2)
} }
// inputBytes scans a byte slice. // inputBytes scans a byte slice.
...@@ -389,7 +389,7 @@ func (i *inputBytes) index(re *Regexp, pos int) int { ...@@ -389,7 +389,7 @@ func (i *inputBytes) index(re *Regexp, pos int) int {
return bytes.Index(i.str[pos:], re.prefixBytes) return bytes.Index(i.str[pos:], re.prefixBytes)
} }
func (i *inputBytes) context(pos int) syntax.EmptyOp { func (i *inputBytes) context(pos int) lazyFlag {
r1, r2 := endOfText, endOfText r1, r2 := endOfText, endOfText
// 0 < pos && pos <= len(i.str) // 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) { if uint(pos-1) < uint(len(i.str)) {
...@@ -405,7 +405,7 @@ func (i *inputBytes) context(pos int) syntax.EmptyOp { ...@@ -405,7 +405,7 @@ func (i *inputBytes) context(pos int) syntax.EmptyOp {
r2, _ = utf8.DecodeRune(i.str[pos:]) r2, _ = utf8.DecodeRune(i.str[pos:])
} }
} }
return syntax.EmptyOpContext(r1, r2) return newLazyFlag(r1, r2)
} }
// inputReader scans a RuneReader. // inputReader scans a RuneReader.
...@@ -441,8 +441,8 @@ func (i *inputReader) index(re *Regexp, pos int) int { ...@@ -441,8 +441,8 @@ func (i *inputReader) index(re *Regexp, pos int) int {
return -1 return -1
} }
func (i *inputReader) context(pos int) syntax.EmptyOp { func (i *inputReader) context(pos int) lazyFlag {
return 0 return 0 // not used
} }
// LiteralPrefix returns a literal string that must begin any match // LiteralPrefix returns a literal string that must begin any match
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment