math/big: more conservative use of lock for divisor table

Minor performance impact running sequentially: benchmark old ns/op new ns/op delta BenchmarkString10Base2 389 391 +0.51% BenchmarkString100Base2 1530 1534 +0.26% BenchmarkString1000Base2 11789 11787 -0.02% BenchmarkString10000Base2 111443 112030 +0.53% BenchmarkString100000Base2 1017483 1015347 -0.21% BenchmarkString10Base8 339 344 +1.47% BenchmarkString100Base8 753 756 +0.40% BenchmarkString1000Base8 4618 4641 +0.50% BenchmarkString10000Base8 43217 43534 +0.73% BenchmarkString100000Base8 397518 400602 +0.78% BenchmarkString10Base10 630 630 +0.00% BenchmarkString100Base10 1975 1960 -0.76% BenchmarkString1000Base10 10179 10174 -0.05% BenchmarkString10000Base10 44527 44416 -0.25% BenchmarkString100000Base10 14404694 14425308 +0.14% BenchmarkString10Base16 283 288 +1.77% BenchmarkString100Base16 597 598 +0.17% BenchmarkString1000Base16 3189 3186 -0.09% BenchmarkString10000Base16 29403 29364 -0.13% BenchmarkString100000Base16 265657 265587 -0.03% Note that due to other improvements (faster assembly routines, better code generation by compiler), these benchmarks now run up to 37% faster than they used to at the last time measured (1/9/2012). Minor performance impact for StringPiParallel running in parallel: Current CL but with Lock/Unlock commented out (removed): BenchmarkStringPiParallel 5000 343581 ns/op BenchmarkStringPiParallel-2 10000 184511 ns/op BenchmarkStringPiParallel-3 10000 129768 ns/op BenchmarkStringPiParallel-4 10000 102326 ns/op Current CL: BenchmarkStringPiParallel 5000 345169 ns/op BenchmarkStringPiParallel-2 10000 185827 ns/op BenchmarkStringPiParallel-3 10000 131168 ns/op BenchmarkStringPiParallel-4 10000 102353 ns/op Fixes #4218. R=dvyukov, michael.jones, dave CC=golang-dev https://golang.org/cl/6643053

math/big: more conservative use of lock for divisor table
Minor performance impact running sequentially: benchmark old ns/op new ns/op delta BenchmarkString10Base2 389 391 +0.51% BenchmarkString100Base2 1530 1534 +0.26% BenchmarkString1000Base2 11789 11787 -0.02% BenchmarkString10000Base2 111443 112030 +0.53% BenchmarkString100000Base2 1017483 1015347 -0.21% BenchmarkString10Base8 339 344 +1.47% BenchmarkString100Base8 753 756 +0.40% BenchmarkString1000Base8 4618 4641 +0.50% BenchmarkString10000Base8 43217 43534 +0.73% BenchmarkString100000Base8 397518 400602 +0.78% BenchmarkString10Base10 630 630 +0.00% BenchmarkString100Base10 1975 1960 -0.76% BenchmarkString1000Base10 10179 10174 -0.05% BenchmarkString10000Base10 44527 44416 -0.25% BenchmarkString100000Base10 14404694 14425308 +0.14% BenchmarkString10Base16 283 288 +1.77% BenchmarkString100Base16 597 598 +0.17% BenchmarkString1000Base16 3189 3186 -0.09% BenchmarkString10000Base16 29403 29364 -0.13% BenchmarkString100000Base16 265657 265587 -0.03% Note that due to other improvements (faster assembly routines, better code generation by compiler), these benchmarks now run up to 37% faster than they used to at the last time measured (1/9/2012). Minor performance impact for StringPiParallel running in parallel: Current CL but with Lock/Unlock commented out (removed): BenchmarkStringPiParallel 5000 343581 ns/op BenchmarkStringPiParallel-2 10000 184511 ns/op BenchmarkStringPiParallel-3 10000 129768 ns/op BenchmarkStringPiParallel-4 10000 102326 ns/op Current CL: BenchmarkStringPiParallel 5000 345169 ns/op BenchmarkStringPiParallel-2 10000 185827 ns/op BenchmarkStringPiParallel-3 10000 131168 ns/op BenchmarkStringPiParallel-4 10000 102353 ns/op Fixes #4218. R=dvyukov, michael.jones, dave CC=golang-dev https://golang.org/cl/6643053
4bee88d4 · Robert Griesemer · 3acce59b · 4bee88d4 · 4bee88d4
Commit 4bee88d4 authored Oct 11, 2012 by Robert Griesemer
Show whitespace changes
Inline Side-by-side

Showing with 55 additions and 23 deletions

nat.go src/pkg/math/big/nat.go +17 -21

nat_test.go src/pkg/math/big/nat_test.go +38 -2

No files found.
--- a/src/pkg/math/big/nat.go
+++ b/src/pkg/math/big/nat.go
@@ -920,8 +920,10 @@ type divisor struct {
 	ndigits int // digit length of divisor in terms of output base digits
 }

-var cacheBase10 [64]divisor // cached divisors for base 10
-var cacheLock sync.Mutex    // protects cacheBase10
+var cacheBase10 struct {
+	sync.Mutex
+	table [64]divisor // cached divisors for base 10
+}

 // expWW computes x**y
 func (z nat) expWW(x, y Word) nat {
@@ -937,34 +939,28 @@ func divisors(m int, b Word, ndigits int, bb Word) []divisor {

 	// determine k where (bb**leafSize)**(2**k) >= sqrt(x)
 	k := 1
-	for words := leafSize; words < m>>1 && k < len(cacheBase10); words <<= 1 {
+	for words := leafSize; words < m>>1 && k < len(cacheBase10.table); words <<= 1 {
 		k++
 	}

-	// create new table of divisors or extend and reuse existing table as appropriate
-	var table []divisor
-	var cached bool
-	switch b {
-	case 10:
-		table = cacheBase10[0:k] // reuse old table for this conversion
-		cached = true
-	default:
-		table = make([]divisor, k) // new table for this conversion
+	// reuse and extend existing table of divisors or create new table as appropriate
+	var table []divisor // for b == 10, table overlaps with cacheBase10.table
+	if b == 10 {
+		cacheBase10.Lock()
+		table = cacheBase10.table[0:k] // reuse old table for this conversion
+	} else {
+		table = make([]divisor, k) // create new table for this conversion
 	}

 	// extend table
 	if table[k-1].ndigits == 0 {
-		if cached {
-			cacheLock.Lock() // begin critical section
-		}
-
 		// add new entries as needed
 		var larger nat
 		for i := 0; i < k; i++ {
 			if table[i].ndigits == 0 {
 				if i == 0 {
-					table[i].bbb = nat(nil).expWW(bb, Word(leafSize))
-					table[i].ndigits = ndigits * leafSize
+					table[0].bbb = nat(nil).expWW(bb, Word(leafSize))
+					table[0].ndigits = ndigits * leafSize
 				} else {
 					table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb)
 					table[i].ndigits = 2 * table[i-1].ndigits
@@ -980,10 +976,10 @@ func divisors(m int, b Word, ndigits int, bb Word) []divisor {
 				table[i].nbits = table[i].bbb.bitLen()
 			}
 		}
-
-		if cached {
-			cacheLock.Unlock() // end critical section
 	}
+
+	if b == 10 {
+		cacheBase10.Unlock()
 	}

 	return table

--- a/src/pkg/math/big/nat_test.go
+++ b/src/pkg/math/big/nat_test.go
@@ -409,6 +409,20 @@ func TestScanPi(t *testing.T) {
 	}
 }

+func TestScanPiParallel(t *testing.T) {
+	const n = 2
+	c := make(chan int)
+	for i := 0; i < n; i++ {
+		go func() {
+			TestScanPi(t)
+			c <- 0
+		}()
+	}
+	for i := 0; i < n; i++ {
+		<-c
+	}
+}
+
 func BenchmarkScanPi(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		var x nat
@@ -416,6 +430,28 @@ func BenchmarkScanPi(b *testing.B) {
 	}
 }

+func BenchmarkStringPiParallel(b *testing.B) {
+	var x nat
+	x, _, _ = x.scan(strings.NewReader(pi), 0)
+	if x.decimalString() != pi {
+		panic("benchmark incorrect: conversion failed")
+	}
+	n := runtime.GOMAXPROCS(0)
+	m := b.N / n // n*m <= b.N due to flooring, but the error is neglibible (n is not very large)
+	c := make(chan int, n)
+	for i := 0; i < n; i++ {
+		go func() {
+			for j := 0; j < m; j++ {
+				x.decimalString()
+			}
+			c <- 0
+		}()
+	}
+	for i := 0; i < n; i++ {
+		<-c
+	}
+}
+
 func BenchmarkScan10Base2(b *testing.B)     { ScanHelper(b, 2, 10, 10) }
 func BenchmarkScan100Base2(b *testing.B)    { ScanHelper(b, 2, 10, 100) }
 func BenchmarkScan1000Base2(b *testing.B)   { ScanHelper(b, 2, 10, 1000) }
@@ -516,7 +552,7 @@ func BenchmarkLeafSize64(b *testing.B) { LeafSizeHelper(b, 10, 64) }
 func LeafSizeHelper(b *testing.B, base Word, size int) {
 	b.StopTimer()
 	originalLeafSize := leafSize
-	resetTable(cacheBase10[:])
+	resetTable(cacheBase10.table[:])
 	leafSize = size
 	b.StartTimer()

@@ -533,7 +569,7 @@ func LeafSizeHelper(b *testing.B, base Word, size int) {
 	}

 	b.StopTimer()
-	resetTable(cacheBase10[:])
+	resetTable(cacheBase10.table[:])
 	leafSize = originalLeafSize
 	b.StartTimer()
 }