Commit 2b0a30c4 authored by Robert Griesemer's avatar Robert Griesemer

big: implemented core shift routines in arith.go and

     provide assembly versions (for x86-64 for now)

(Not yet used - waiting for previous CL to clear)

R=rsc
CC=golang-dev
https://golang.org/cl/1040041
parent 05cf357d
...@@ -310,6 +310,28 @@ func subVW_g(z, x *Word, y Word, n int) (c Word) { ...@@ -310,6 +310,28 @@ func subVW_g(z, x *Word, y Word, n int) (c Word) {
} }
func shlVW(z, x *Word, s Word, n int) (c Word)
func shlVW_g(z, x *Word, s Word, n int) (c Word) {
ŝ := _W - s
for i := 0; i < n; i++ {
w := *x.at(i)
c, *z.at(i) = w>>ŝ, w<<s|c
}
return
}
func shrVW(z, x *Word, s Word, n int) (c Word)
func shrVW_g(z, x *Word, s Word, n int) (c Word) {
ŝ := _W - s
for i := n - 1; i >= 0; i-- {
w := *x.at(i)
c, *z.at(i) = w<<ŝ, w>>s|c
}
return
}
func mulAddVWW(z, x *Word, y, r Word, n int) (c Word) func mulAddVWW(z, x *Word, y, r Word, n int) (c Word)
func mulAddVWW_g(z, x *Word, y, r Word, n int) (c Word) { func mulAddVWW_g(z, x *Word, y, r Word, n int) (c Word) {
c = r c = r
......
...@@ -99,6 +99,20 @@ E4: CMPL BX, BP // i < n ...@@ -99,6 +99,20 @@ E4: CMPL BX, BP // i < n
RET RET
// func shlVW(z, x *Word, s Word, n int) (c Word)
// TODO(gri) implement this routine
TEXT ·shlVW(SB),7,$0
NOP // work around bug in linker
JMP ·shlVW_g(SB)
// func shrVW(z, x *Word, s Word, n int) (c Word)
// TODO(gri) implement this routine
TEXT ·shrVW(SB),7,$0
NOP // work around bug in linker
JMP ·shrVW_g(SB)
// func mulAddVWW(z, x *Word, y, r Word, n int) (c Word) // func mulAddVWW(z, x *Word, y, r Word, n int) (c Word)
TEXT ·mulAddVWW(SB),7,$0 TEXT ·mulAddVWW(SB),7,$0
MOVL z+0(FP), DI MOVL z+0(FP), DI
......
...@@ -101,6 +101,56 @@ E4: CMPQ BX, R11 // i < n ...@@ -101,6 +101,56 @@ E4: CMPQ BX, R11 // i < n
RET RET
// func shlVW(z, x *Word, s Word, n int) (c Word)
TEXT ·shlVW(SB),7,$0
MOVQ z+0(FP), R10
MOVQ x+8(FP), R8
MOVQ s+16(FP), CX
MOVL n+24(FP), R11
MOVQ $0, AX // c = 0
MOVQ $0, BX // i = 0
JMP E8
L8: MOVQ (R8)(BX*8), DX
MOVQ DX, R12
SHLQ CX, DX:AX
MOVQ DX, (R10)(BX*8)
MOVQ R12, AX
ADDL $1, BX // i++
E8: CMPQ BX, R11 // i < n
JL L8
MOVQ $0, DX
SHLQ CX, DX:AX
MOVQ DX, c+32(FP)
RET
// func shrVW(z, x *Word, s Word, n int) (c Word)
TEXT ·shrVW(SB),7,$0
MOVQ z+0(FP), R10
MOVQ x+8(FP), R8
MOVQ s+16(FP), CX
MOVL n+24(FP), BX // i = n
MOVQ $0, AX // c = 0
JMP E9
L9: MOVQ (R8)(BX*8), DX
MOVQ DX, R12
SHRQ CX, DX:AX
MOVQ DX, (R10)(BX*8)
MOVQ R12, AX
E9: SUBL $1, BX // i--
JGE L9
MOVQ $0, DX
SHRQ CX, DX:AX
MOVQ DX, c+32(FP)
RET
// func mulAddVWW(z, x *Word, y, r Word, n int) (c Word) // func mulAddVWW(z, x *Word, y, r Word, n int) (c Word)
TEXT ·mulAddVWW(SB),7,$0 TEXT ·mulAddVWW(SB),7,$0
MOVQ z+0(FP), R10 MOVQ z+0(FP), R10
......
...@@ -18,6 +18,12 @@ TEXT ·addVW(SB),7,$0 ...@@ -18,6 +18,12 @@ TEXT ·addVW(SB),7,$0
TEXT ·subVW(SB),7,$0 TEXT ·subVW(SB),7,$0
B ·subVW_g(SB) B ·subVW_g(SB)
TEXT ·shlVW(SB),7,$0
B ·shlVW_g(SB)
TEXT ·shrVW(SB),7,$0
B ·shrVW_g(SB)
TEXT ·mulAddVWW(SB),7,$0 TEXT ·mulAddVWW(SB),7,$0
B ·mulAddVWW_g(SB) B ·mulAddVWW_g(SB)
......
...@@ -147,6 +147,36 @@ var prodVW = []argVW{ ...@@ -147,6 +147,36 @@ var prodVW = []argVW{
argVW{nat{_M << 7 & _M, _M, _M, _M}, nat{_M, _M, _M, _M}, 1 << 7, _M >> (_W - 7)}, argVW{nat{_M << 7 & _M, _M, _M, _M}, nat{_M, _M, _M, _M}, 1 << 7, _M >> (_W - 7)},
} }
var lshVW = []argVW{
argVW{},
argVW{nat{0}, nat{0}, 0, 0},
argVW{nat{0}, nat{0}, 1, 0},
argVW{nat{0}, nat{0}, 20, 0},
argVW{nat{_M}, nat{_M}, 0, 0},
argVW{nat{_M << 1 & _M}, nat{_M}, 1, 1},
argVW{nat{_M << 20 & _M}, nat{_M}, 20, _M >> (_W - 20)},
argVW{nat{_M, _M, _M}, nat{_M, _M, _M}, 0, 0},
argVW{nat{_M << 1 & _M, _M, _M}, nat{_M, _M, _M}, 1, 1},
argVW{nat{_M << 20 & _M, _M, _M}, nat{_M, _M, _M}, 20, _M >> (_W - 20)},
}
var rshVW = []argVW{
argVW{},
argVW{nat{0}, nat{0}, 0, 0},
argVW{nat{0}, nat{0}, 1, 0},
argVW{nat{0}, nat{0}, 20, 0},
argVW{nat{_M}, nat{_M}, 0, 0},
argVW{nat{_M >> 1}, nat{_M}, 1, _M << (_W - 1) & _M},
argVW{nat{_M >> 20}, nat{_M}, 20, _M << (_W - 20) & _M},
argVW{nat{_M, _M, _M}, nat{_M, _M, _M}, 0, 0},
argVW{nat{_M, _M, _M >> 1}, nat{_M, _M, _M}, 1, _M << (_W - 1) & _M},
argVW{nat{_M, _M, _M >> 20}, nat{_M, _M, _M}, 20, _M << (_W - 20) & _M},
}
func testFunVW(t *testing.T, msg string, f funVW, a argVW) { func testFunVW(t *testing.T, msg string, f funVW, a argVW) {
n := len(a.z) n := len(a.z)
...@@ -174,6 +204,18 @@ func TestFunVW(t *testing.T) { ...@@ -174,6 +204,18 @@ func TestFunVW(t *testing.T) {
testFunVW(t, "subVW_g", subVW_g, arg) testFunVW(t, "subVW_g", subVW_g, arg)
testFunVW(t, "subVW", subVW, arg) testFunVW(t, "subVW", subVW, arg)
} }
for _, a := range lshVW {
arg := a
testFunVW(t, "shlVW_g", shlVW_g, arg)
testFunVW(t, "shlVW", shlVW, arg)
}
for _, a := range rshVW {
arg := a
testFunVW(t, "shrVW_g", shrVW_g, arg)
testFunVW(t, "shrVW", shrVW, arg)
}
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment