Commit b9a59d9f authored by Martin Möhrmann's avatar Martin Möhrmann

cmd/compile: optimize len([]rune(string))

Adds a new runtime function to count runes in a string.
Modifies the compiler to detect the pattern len([]rune(string))
and replaces it with the new rune counting runtime function.

RuneCount/lenruneslice/ASCII                  27.8ns ± 2%  14.5ns ± 3%  -47.70%  (p=0.000 n=10+10)
RuneCount/lenruneslice/Japanese                126ns ± 2%    60ns ± 2%  -52.03%  (p=0.000 n=10+10)
RuneCount/lenruneslice/MixedLength             104ns ± 2%    50ns ± 1%  -51.71%  (p=0.000 n=10+9)

Fixes #24923

Change-Id: Ie9c7e7391a4e2cca675c5cdcc1e5ce7d523948b9
Reviewed-on: https://go-review.googlesource.com/108985
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarJosh Bleecher Snyder <josharian@gmail.com>
parent a8a60ac2
This diff is collapsed.
......@@ -55,10 +55,12 @@ func slicebytetostringtmp([]byte) string
func slicerunetostring(*[32]byte, []rune) string
func stringtoslicebyte(*[32]byte, string) []byte
func stringtoslicerune(*[32]rune, string) []rune
func decoderune(string, int) (retv rune, retk int)
func slicecopy(to any, fr any, wid uintptr) int
func slicestringcopy(to any, fr any) int
func decoderune(string, int) (retv rune, retk int)
func countrunes(string) int
// interface conversions
func convI2I(typ *byte, elem any) (ret any)
......
......@@ -1098,7 +1098,14 @@ func (o *Order) expr(n, lhs *Node) *Node {
OSTRARRAYBYTE,
OSTRARRAYBYTETMP,
OSTRARRAYRUNE:
if isRuneCount(n) {
// len([]rune(s)) is rewritten to runtime.countrunes(s) later.
n.Left.Left = o.expr(n.Left.Left, nil)
} else {
o.call(n)
}
if lhs == nil || lhs.Op != ONAME || instrumenting {
n = o.copyExpr(n, n.Type, false)
}
......
......@@ -538,6 +538,12 @@ opswitch:
n.Left = walkexpr(n.Left, init)
case OLEN, OCAP:
if isRuneCount(n) {
// Replace len([]rune(string)) with runtime.countrunes(string).
n = mkcall("countrunes", n.Type, init, conv(n.Left.Left, types.Types[TSTRING]))
break
}
n.Left = walkexpr(n.Left, init)
// replace len(*[10]int) with 10.
......@@ -4085,3 +4091,9 @@ func canMergeLoads() bool {
}
return false
}
// isRuneCount reports whether n is of the form len([]rune(string)).
// These are optimized into a call to runtime.runecount.
func isRuneCount(n *Node) bool {
return Debug['N'] == 0 && !instrumenting && n.Op == OLEN && n.Left.Op == OSTRARRAYRUNE
}
......@@ -9,6 +9,7 @@ import (
"strconv"
"strings"
"testing"
"unicode/utf8"
)
// Strings and slices that don't escape and fit into tmpBuf are stack allocated,
......@@ -110,6 +111,43 @@ var stringdata = []struct{ name, data string }{
{"MixedLength", "$Ѐࠀက퀀𐀀\U00040000\U0010FFFF"},
}
var sinkInt int
func BenchmarkRuneCount(b *testing.B) {
// Each sub-benchmark counts the runes in a string in a different way.
b.Run("lenruneslice", func(b *testing.B) {
for _, sd := range stringdata {
b.Run(sd.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
sinkInt += len([]rune(sd.data))
}
})
}
})
b.Run("rangeloop", func(b *testing.B) {
for _, sd := range stringdata {
b.Run(sd.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
n := 0
for range sd.data {
n++
}
sinkInt += n
}
})
}
})
b.Run("utf8.RuneCountInString", func(b *testing.B) {
for _, sd := range stringdata {
b.Run(sd.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
sinkInt += utf8.RuneCountInString(sd.data)
}
})
}
})
}
func BenchmarkRuneIterate(b *testing.B) {
b.Run("range", func(b *testing.B) {
for _, sd := range stringdata {
......
......@@ -39,6 +39,15 @@ const (
hicb = 0xBF // 1011 1111
)
// countrunes returns the number of runes in s.
func countrunes(s string) int {
n := 0
for range s {
n++
}
return n
}
// decoderune returns the non-ASCII rune at the start of
// s[k:] and the index after the rune in s.
//
......
......@@ -212,14 +212,25 @@ func TestSequencing(t *testing.T) {
}
}
// Check that a range loop and a []int conversion visit the same runes.
func runtimeRuneCount(s string) int {
return len([]rune(s)) // Replaced by gc with call to runtime.countrunes(s).
}
// Check that a range loop, len([]rune(string)) optimization and
// []rune conversions visit the same runes.
// Not really a test of this package, but the assumption is used here and
// it's good to verify
func TestIntConversion(t *testing.T) {
// it's good to verify.
func TestRuntimeConversion(t *testing.T) {
for _, ts := range testStrings {
count := RuneCountInString(ts)
if n := runtimeRuneCount(ts); n != count {
t.Errorf("%q: len([]rune()) counted %d runes; got %d from RuneCountInString", ts, n, count)
break
}
runes := []rune(ts)
if RuneCountInString(ts) != len(runes) {
t.Errorf("%q: expected %d runes; got %d", ts, len(runes), RuneCountInString(ts))
if n := len(runes); n != count {
t.Errorf("%q: []rune() has length %d; got %d from RuneCountInString", ts, n, count)
break
}
i := 0
......
// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
// This file contains code generation tests related to the handling of
// string types.
func CountRunes(s string) int { // Issue #24923
// amd64:`.*countrunes`
return len([]rune(s))
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment