Commit b55791e2 authored by Russ Cox's avatar Russ Cox

[dev.power64] cmd/5a, cmd/6a, cmd/8a, cmd/9a: make labels function-scoped

I removed support for jumping between functions years ago,
as part of doing the instruction layout for each function separately.

Given that, it makes sense to treat labels as function-scoped.
This lets each function have its own 'loop' label, for example.

Makes the assembly much cleaner and removes the last
reason anyone would reach for the 123(PC) form instead.

Note that this is on the dev.power64 branch, but it changes all
the assemblers. The change will ship in Go 1.5 (perhaps after
being ported into the new assembler).

Came up as part of CL 167730043.

LGTM=r
R=r
CC=austin, dave, golang-codereviews, minux
https://golang.org/cl/159670043
parent 87b4149b
......@@ -67,6 +67,7 @@ struct Sym
int32 value;
ushort type;
char *name;
char* labelname;
char sym;
};
#define S ((Sym*)0)
......@@ -136,6 +137,8 @@ void newio(void);
void newfile(char*, int);
Sym* slookup(char*);
Sym* lookup(void);
Sym* labellookup(Sym*);
void settext(LSym*);
void syminit(Sym*);
int32 yylex(void);
int getc(void);
......
......@@ -73,15 +73,11 @@ prog:
line
line:
LLAB ':'
{
if($1->value != pc)
yyerror("redeclaration of %s", $1->name);
$1->value = pc;
}
line
| LNAME ':'
LNAME ':'
{
$1 = labellookup($1);
if($1->type == LLAB && $1->value != pc)
yyerror("redeclaration of %s", $1->labelname);
$1->type = LLAB;
$1->value = pc;
}
......@@ -218,18 +214,21 @@ inst:
*/
| LTYPEB name ',' imm
{
settext($2.sym);
$4.type = D_CONST2;
$4.offset2 = ArgsSizeUnknown;
outcode($1, Always, &$2, 0, &$4);
}
| LTYPEB name ',' con ',' imm
{
settext($2.sym);
$6.type = D_CONST2;
$6.offset2 = ArgsSizeUnknown;
outcode($1, Always, &$2, $4, &$6);
}
| LTYPEB name ',' con ',' imm '-' con
{
settext($2.sym);
$6.type = D_CONST2;
$6.offset2 = $8;
outcode($1, Always, &$2, $4, &$6);
......@@ -373,15 +372,10 @@ rel:
}
| LNAME offset
{
$1 = labellookup($1);
$$ = nullgen;
if(pass == 2)
yyerror("undefined label: %s", $1->name);
$$.type = D_BRANCH;
$$.offset = $2;
}
| LLAB offset
{
$$ = nullgen;
if(pass == 2 && $1->type != LLAB)
yyerror("undefined label: %s", $1->labelname);
$$.type = D_BRANCH;
$$.offset = $1->value + $2;
}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
/* A Bison parser, made by GNU Bison 2.7.12-4996. */
/* A Bison parser, made by GNU Bison 2.3. */
/* Bison interface for Yacc-like parsers in C
Copyright (C) 1984, 1989-1990, 2000-2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
/* Skeleton interface for Bison's Yacc-like parsers in C
Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
/* As a special exception, you may create a larger work that contains
part or all of the Bison parser skeleton and distribute that work
......@@ -26,20 +29,10 @@
special exception, which will cause the skeleton and the resulting
Bison output files to be licensed under the GNU General Public
License without this special exception.
This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */
#ifndef YY_YY_Y_TAB_H_INCLUDED
# define YY_YY_Y_TAB_H_INCLUDED
/* Enabling traces. */
#ifndef YYDEBUG
# define YYDEBUG 0
#endif
#if YYDEBUG
extern int yydebug;
#endif
/* Tokens. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
......@@ -148,41 +141,24 @@ extern int yydebug;
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE
{
/* Line 2053 of yacc.c */
#line 39 "a.y"
{
Sym *sym;
int32 lval;
double dval;
char sval[8];
Addr addr;
/* Line 2053 of yacc.c */
#line 166 "y.tab.h"
} YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
}
/* Line 1529 of yacc.c. */
#line 157 "y.tab.h"
YYSTYPE;
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1
# define YYSTYPE_IS_TRIVIAL 1
#endif
extern YYSTYPE yylval;
#ifdef YYPARSE_PARAM
#if defined __STDC__ || defined __cplusplus
int yyparse (void *YYPARSE_PARAM);
#else
int yyparse ();
#endif
#else /* ! YYPARSE_PARAM */
#if defined __STDC__ || defined __cplusplus
int yyparse (void);
#else
int yyparse ();
#endif
#endif /* ! YYPARSE_PARAM */
#endif /* !YY_YY_Y_TAB_H_INCLUDED */
......@@ -70,6 +70,7 @@ struct Sym
vlong value;
ushort type;
char *name;
char* labelname;
char sym;
};
#define S ((Sym*)0)
......@@ -148,6 +149,8 @@ void newio(void);
void newfile(char*, int);
Sym* slookup(char*);
Sym* lookup(void);
Sym* labellookup(Sym*);
void settext(LSym*);
void syminit(Sym*);
int32 yylex(void);
int getc(void);
......
......@@ -71,15 +71,11 @@ prog:
line
line:
LLAB ':'
{
if($1->value != pc)
yyerror("redeclaration of %s", $1->name);
$1->value = pc;
}
line
| LNAME ':'
LNAME ':'
{
$1 = labellookup($1);
if($1->type == LLAB && $1->value != pc)
yyerror("redeclaration of %s (%s)", $1->labelname, $1->name);
$1->type = LLAB;
$1->value = pc;
}
......@@ -197,11 +193,13 @@ spec1: /* DATA */
spec2: /* TEXT */
mem ',' imm2
{
settext($1.sym);
$$.from = $1;
$$.to = $3;
}
| mem ',' con ',' imm2
{
settext($1.sym);
$$.from = $1;
$$.from.scale = $3;
$$.to = $5;
......@@ -363,15 +361,10 @@ rel:
}
| LNAME offset
{
$1 = labellookup($1);
$$ = nullgen;
if(pass == 2)
yyerror("undefined label: %s", $1->name);
$$.type = D_BRANCH;
$$.offset = $2;
}
| LLAB offset
{
$$ = nullgen;
if(pass == 2 && $1->type != LLAB)
yyerror("undefined label: %s", $1->labelname);
$$.type = D_BRANCH;
$$.offset = $1->value + $2;
}
......
This diff is collapsed.
......@@ -70,6 +70,7 @@ struct Sym
int32 value;
ushort type;
char *name;
char* labelname;
char sym;
};
#define S ((Sym*)0)
......@@ -148,6 +149,8 @@ void newio(void);
void newfile(char*, int);
Sym* slookup(char*);
Sym* lookup(void);
Sym* labellookup(Sym*);
void settext(LSym*);
void syminit(Sym*);
int32 yylex(void);
int getc(void);
......
......@@ -74,15 +74,11 @@ prog:
line
line:
LLAB ':'
{
if($1->value != pc)
yyerror("redeclaration of %s", $1->name);
$1->value = pc;
}
line
| LNAME ':'
LNAME ':'
{
$1 = labellookup($1);
if($1->type == LLAB && $1->value != pc)
yyerror("redeclaration of %s", $1->labelname);
$1->type = LLAB;
$1->value = pc;
}
......@@ -199,11 +195,13 @@ spec1: /* DATA */
spec2: /* TEXT */
mem ',' imm2
{
settext($1.sym);
$$.from = $1;
$$.to = $3;
}
| mem ',' con ',' imm2
{
settext($1.sym);
$$.from = $1;
$$.from.scale = $3;
$$.to = $5;
......@@ -362,15 +360,10 @@ rel:
}
| LNAME offset
{
$1 = labellookup($1);
$$ = nullgen;
if(pass == 2)
yyerror("undefined label: %s", $1->name);
$$.type = D_BRANCH;
$$.offset = $2;
}
| LLAB offset
{
$$ = nullgen;
if(pass == 2 && $1->type != LLAB)
yyerror("undefined label: %s", $1->labelname);
$$.type = D_BRANCH;
$$.offset = $1->value + $2;
}
......
This diff is collapsed.
......@@ -68,6 +68,7 @@ struct Sym
vlong value;
ushort type;
char *name;
char* labelname;
char sym;
};
#define S ((Sym*)0)
......@@ -135,6 +136,8 @@ void newio(void);
void newfile(char*, int);
Sym* slookup(char*);
Sym* lookup(void);
Sym* labellookup(Sym*);
void settext(LSym*);
void syminit(Sym*);
int32 yylex(void);
int getc(void);
......
......@@ -67,15 +67,11 @@ prog:
| prog line
line:
LLAB ':'
{
if($1->value != pc)
yyerror("redeclaration of %s", $1->name);
$1->value = pc;
}
line
| LNAME ':'
LNAME ':'
{
$1 = labellookup($1);
if($1->type == LLAB && $1->value != pc)
yyerror("redeclaration of %s", $1->labelname);
$1->type = LLAB;
$1->value = pc;
}
......@@ -623,16 +619,19 @@ inst:
*/
| LTEXT name ',' imm
{
settext($2.sym);
outcode($1, &$2, NREG, &$4);
}
| LTEXT name ',' con ',' imm
{
settext($2.sym);
$6.offset &= 0xffffffffull;
$6.offset |= (vlong)ArgsSizeUnknown << 32;
outcode($1, &$2, $4, &$6);
}
| LTEXT name ',' con ',' imm '-' con
{
settext($2.sym);
$6.offset &= 0xffffffffull;
$6.offset |= ($8 & 0xffffffffull) << 32;
outcode($1, &$2, $4, &$6);
......@@ -669,15 +668,10 @@ rel:
}
| LNAME offset
{
$1 = labellookup($1);
$$ = nullgen;
if(pass == 2)
yyerror("undefined label: %s", $1->name);
$$.type = D_BRANCH;
$$.offset = $2;
}
| LLAB offset
{
$$ = nullgen;
if(pass == 2 && $1->type != LLAB)
yyerror("undefined label: %s", $1->labelname);
$$.type = D_BRANCH;
$$.offset = $1->value + $2;
}
......
This diff is collapsed.
/* A Bison parser, made by GNU Bison 2.5. */
/* A Bison parser, made by GNU Bison 2.3. */
/* Bison interface for Yacc-like parsers in C
Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
/* Skeleton interface for Bison's Yacc-like parsers in C
Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
/* As a special exception, you may create a larger work that contains
part or all of the Bison parser skeleton and distribute that work
......@@ -26,11 +29,10 @@
special exception, which will cause the skeleton and the resulting
Bison output files to be licensed under the GNU General Public
License without this special exception.
This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */
/* Tokens. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
......@@ -166,27 +168,21 @@
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE
{
/* Line 2068 of yacc.c */
#line 38 "a.y"
{
Sym *sym;
vlong lval;
double dval;
char sval[8];
Addr addr;
/* Line 2068 of yacc.c */
#line 184 "y.tab.h"
} YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
}
/* Line 1529 of yacc.c. */
#line 181 "y.tab.h"
YYSTYPE;
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1
# define YYSTYPE_IS_TRIVIAL 1
#endif
extern YYSTYPE yylval;
......@@ -220,6 +220,31 @@ slookup(char *s)
return lookup();
}
LSym *thetext;
void
settext(LSym *s)
{
thetext = s;
}
Sym*
labellookup(Sym *s)
{
char *p;
Sym *lab;
if(thetext == nil) {
s->labelname = s->name;
return s;
}
p = smprint("%s.%s", thetext->name, s->name);
lab = slookup(p);
free(p);
lab->labelname = s->name;
return lab;
}
Sym*
lookup(void)
{
......
......@@ -486,11 +486,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-21
MOVL new_hi+16(FP), CX
LOCK
CMPXCHG8B 0(BP)
JNZ cas64_fail
JNZ fail
MOVL $1, AX
MOVB AX, ret+20(FP)
RET
cas64_fail:
fail:
MOVL $0, AX
MOVB AX, ret+20(FP)
RET
......@@ -1342,29 +1342,29 @@ TEXT strings·IndexByte(SB),NOSPLIT,$0
// AX = 1/0/-1
TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
CMPL SI, DI
JEQ cmp_allsame
JEQ allsame
CMPL BX, DX
MOVL DX, BP
CMOVLLT BX, BP // BP = min(alen, blen)
CMPL BP, $4
JB cmp_small
JB small
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
JE cmp_mediumloop
cmp_largeloop:
JE mediumloop
largeloop:
CMPL BP, $16
JB cmp_mediumloop
JB mediumloop
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORL $0xffff, AX // convert EQ to NE
JNE cmp_diff16 // branch if at least one byte is not equal
JNE diff16 // branch if at least one byte is not equal
ADDL $16, SI
ADDL $16, DI
SUBL $16, BP
JMP cmp_largeloop
JMP largeloop
cmp_diff16:
diff16:
BSFL AX, BX // index of first byte that differs
XORL AX, AX
MOVB (SI)(BX*1), CX
......@@ -1373,25 +1373,25 @@ cmp_diff16:
LEAL -1(AX*2), AX // convert 1/0 to +1/-1
RET
cmp_mediumloop:
mediumloop:
CMPL BP, $4
JBE cmp_0through4
JBE _0through4
MOVL (SI), AX
MOVL (DI), CX
CMPL AX, CX
JNE cmp_diff4
JNE diff4
ADDL $4, SI
ADDL $4, DI
SUBL $4, BP
JMP cmp_mediumloop
JMP mediumloop
cmp_0through4:
_0through4:
MOVL -4(SI)(BP*1), AX
MOVL -4(DI)(BP*1), CX
CMPL AX, CX
JEQ cmp_allsame
JEQ allsame
cmp_diff4:
diff4:
BSWAPL AX // reverse order of bytes
BSWAPL CX
XORL AX, CX // find bit differences
......@@ -1402,37 +1402,37 @@ cmp_diff4:
RET
// 0-3 bytes in common
cmp_small:
small:
LEAL (BP*8), CX
NEGL CX
JEQ cmp_allsame
JEQ allsame
// load si
CMPB SI, $0xfc
JA cmp_si_high
JA si_high
MOVL (SI), SI
JMP cmp_si_finish
cmp_si_high:
JMP si_finish
si_high:
MOVL -4(SI)(BP*1), SI
SHRL CX, SI
cmp_si_finish:
si_finish:
SHLL CX, SI
// same for di
CMPB DI, $0xfc
JA cmp_di_high
JA di_high
MOVL (DI), DI
JMP cmp_di_finish
cmp_di_high:
JMP di_finish
di_high:
MOVL -4(DI)(BP*1), DI
SHRL CX, DI
cmp_di_finish:
di_finish:
SHLL CX, DI
BSWAPL SI // reverse order of bytes
BSWAPL DI
XORL SI, DI // find bit differences
JEQ cmp_allsame
JEQ allsame
BSRL DI, CX // index of highest bit difference
SHRL CX, SI // move a's bit to bottom
ANDL $1, SI // mask bit
......@@ -1441,7 +1441,7 @@ cmp_di_finish:
// all the bytes in common are the same, so we just need
// to compare the lengths.
cmp_allsame:
allsame:
XORL AX, AX
XORL CX, CX
CMPL BX, DX
......
......@@ -461,11 +461,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-25
MOVQ new+16(FP), CX
LOCK
CMPXCHGQ CX, 0(BX)
JNZ cas64_fail
JNZ fail
MOVL $1, AX
MOVB AX, ret+24(FP)
RET
cas64_fail:
fail:
MOVL $0, AX
MOVB AX, ret+24(FP)
RET
......@@ -876,24 +876,24 @@ TEXT runtime·aeshashbody(SB),NOSPLIT,$0-32
MOVO runtime·aeskeysched+0(SB), X2
MOVO runtime·aeskeysched+16(SB), X3
CMPQ CX, $16
JB aessmall
aesloop:
JB small
loop:
CMPQ CX, $16
JBE aesloopend
JBE loopend
MOVOU (AX), X1
AESENC X2, X0
AESENC X1, X0
SUBQ $16, CX
ADDQ $16, AX
JMP aesloop
JMP loop
// 1-16 bytes remaining
aesloopend:
loopend:
// This load may overlap with the previous load above.
// We'll hash some bytes twice, but that's ok.
MOVOU -16(AX)(CX*1), X1
JMP partial
// 0-15 bytes
aessmall:
small:
TESTQ CX, CX
JE finalize // 0 bytes
......@@ -1036,18 +1036,18 @@ TEXT runtime·eqstring(SB),NOSPLIT,$0-33
MOVQ s1len+8(FP), AX
MOVQ s2len+24(FP), BX
CMPQ AX, BX
JNE different
JNE noteq
MOVQ s1str+0(FP), SI
MOVQ s2str+16(FP), DI
CMPQ SI, DI
JEQ same
JEQ eq
CALL runtime·memeqbody(SB)
MOVB AX, v+32(FP)
RET
same:
eq:
MOVB $1, v+32(FP)
RET
different:
noteq:
MOVB $0, v+32(FP)
RET
......@@ -1170,29 +1170,29 @@ TEXT runtime·cmpbytes(SB),NOSPLIT,$0-56
// AX = 1/0/-1
TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
CMPQ SI, DI
JEQ cmp_allsame
JEQ allsame
CMPQ BX, DX
MOVQ DX, BP
CMOVQLT BX, BP // BP = min(alen, blen) = # of bytes to compare
CMPQ BP, $8
JB cmp_small
JB small
cmp_loop:
loop:
CMPQ BP, $16
JBE cmp_0through16
JBE _0through16
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX // convert EQ to NE
JNE cmp_diff16 // branch if at least one byte is not equal
JNE diff16 // branch if at least one byte is not equal
ADDQ $16, SI
ADDQ $16, DI
SUBQ $16, BP
JMP cmp_loop
JMP loop
// AX = bit mask of differences
cmp_diff16:
diff16:
BSFQ AX, BX // index of first byte that differs
XORQ AX, AX
MOVB (SI)(BX*1), CX
......@@ -1202,21 +1202,21 @@ cmp_diff16:
RET
// 0 through 16 bytes left, alen>=8, blen>=8
cmp_0through16:
_0through16:
CMPQ BP, $8
JBE cmp_0through8
JBE _0through8
MOVQ (SI), AX
MOVQ (DI), CX
CMPQ AX, CX
JNE cmp_diff8
cmp_0through8:
JNE diff8
_0through8:
MOVQ -8(SI)(BP*1), AX
MOVQ -8(DI)(BP*1), CX
CMPQ AX, CX
JEQ cmp_allsame
JEQ allsame
// AX and CX contain parts of a and b that differ.
cmp_diff8:
diff8:
BSWAPQ AX // reverse order of bytes
BSWAPQ CX
XORQ AX, CX
......@@ -1227,44 +1227,44 @@ cmp_diff8:
RET
// 0-7 bytes in common
cmp_small:
small:
LEAQ (BP*8), CX // bytes left -> bits left
NEGQ CX // - bits lift (== 64 - bits left mod 64)
JEQ cmp_allsame
JEQ allsame
// load bytes of a into high bytes of AX
CMPB SI, $0xf8
JA cmp_si_high
JA si_high
MOVQ (SI), SI
JMP cmp_si_finish
cmp_si_high:
JMP si_finish
si_high:
MOVQ -8(SI)(BP*1), SI
SHRQ CX, SI
cmp_si_finish:
si_finish:
SHLQ CX, SI
// load bytes of b in to high bytes of BX
CMPB DI, $0xf8
JA cmp_di_high
JA di_high
MOVQ (DI), DI
JMP cmp_di_finish
cmp_di_high:
JMP di_finish
di_high:
MOVQ -8(DI)(BP*1), DI
SHRQ CX, DI
cmp_di_finish:
di_finish:
SHLQ CX, DI
BSWAPQ SI // reverse order of bytes
BSWAPQ DI
XORQ SI, DI // find bit differences
JEQ cmp_allsame
JEQ allsame
BSRQ DI, CX // index of highest bit difference
SHRQ CX, SI // move a's bit to bottom
ANDQ $1, SI // mask bit
LEAQ -1(SI*2), AX // 1/0 => +1/-1
RET
cmp_allsame:
allsame:
XORQ AX, AX
XORQ CX, CX
CMPQ BX, DX
......@@ -1299,7 +1299,7 @@ TEXT runtime·indexbytebody(SB),NOSPLIT,$0
MOVQ SI, DI
CMPQ BX, $16
JLT indexbyte_small
JLT small
// round up to first 16-byte boundary
TESTQ $15, SI
......@@ -1357,7 +1357,7 @@ failure:
RET
// handle for lengths < 16
indexbyte_small:
small:
MOVQ BX, CX
REPN; SCASB
JZ success
......
......@@ -444,11 +444,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-25
MOVQ new+16(FP), CX
LOCK
CMPXCHGQ CX, 0(BX)
JNZ cas64_fail
JNZ fail
MOVL $1, AX
MOVB AX, ret+24(FP)
RET
cas64_fail:
fail:
MOVL $0, AX
MOVB AX, ret+24(FP)
RET
......@@ -834,29 +834,29 @@ TEXT runtime·cmpbytes(SB),NOSPLIT,$0-28
// AX = 1/0/-1
TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
CMPQ SI, DI
JEQ cmp_allsame
JEQ allsame
CMPQ BX, DX
MOVQ DX, R8
CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
CMPQ R8, $8
JB cmp_small
JB small
cmp_loop:
loop:
CMPQ R8, $16
JBE cmp_0through16
JBE _0through16
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX // convert EQ to NE
JNE cmp_diff16 // branch if at least one byte is not equal
JNE diff16 // branch if at least one byte is not equal
ADDQ $16, SI
ADDQ $16, DI
SUBQ $16, R8
JMP cmp_loop
JMP loop
// AX = bit mask of differences
cmp_diff16:
diff16:
BSFQ AX, BX // index of first byte that differs
XORQ AX, AX
ADDQ BX, SI
......@@ -868,23 +868,23 @@ cmp_diff16:
RET
// 0 through 16 bytes left, alen>=8, blen>=8
cmp_0through16:
_0through16:
CMPQ R8, $8
JBE cmp_0through8
JBE _0through8
MOVQ (SI), AX
MOVQ (DI), CX
CMPQ AX, CX
JNE cmp_diff8
cmp_0through8:
JNE diff8
_0through8:
ADDQ R8, SI
ADDQ R8, DI
MOVQ -8(SI), AX
MOVQ -8(DI), CX
CMPQ AX, CX
JEQ cmp_allsame
JEQ allsame
// AX and CX contain parts of a and b that differ.
cmp_diff8:
diff8:
BSWAPQ AX // reverse order of bytes
BSWAPQ CX
XORQ AX, CX
......@@ -895,46 +895,46 @@ cmp_diff8:
RET
// 0-7 bytes in common
cmp_small:
small:
LEAQ (R8*8), CX // bytes left -> bits left
NEGQ CX // - bits lift (== 64 - bits left mod 64)
JEQ cmp_allsame
JEQ allsame
// load bytes of a into high bytes of AX
CMPB SI, $0xf8
JA cmp_si_high
JA si_high
MOVQ (SI), SI
JMP cmp_si_finish
cmp_si_high:
JMP si_finish
si_high:
ADDQ R8, SI
MOVQ -8(SI), SI
SHRQ CX, SI
cmp_si_finish:
si_finish:
SHLQ CX, SI
// load bytes of b in to high bytes of BX
CMPB DI, $0xf8
JA cmp_di_high
JA di_high
MOVQ (DI), DI
JMP cmp_di_finish
cmp_di_high:
JMP di_finish
di_high:
ADDQ R8, DI
MOVQ -8(DI), DI
SHRQ CX, DI
cmp_di_finish:
di_finish:
SHLQ CX, DI
BSWAPQ SI // reverse order of bytes
BSWAPQ DI
XORQ SI, DI // find bit differences
JEQ cmp_allsame
JEQ allsame
BSRQ DI, CX // index of highest bit difference
SHRQ CX, SI // move a's bit to bottom
ANDQ $1, SI // mask bit
LEAQ -1(SI*2), AX // 1/0 => +1/-1
RET
cmp_allsame:
allsame:
XORQ AX, AX
XORQ CX, CX
CMPQ BX, DX
......@@ -969,7 +969,7 @@ TEXT runtime·indexbytebody(SB),NOSPLIT,$0
MOVL SI, DI
CMPL BX, $16
JLT indexbyte_small
JLT small
// round up to first 16-byte boundary
TESTL $15, SI
......@@ -1027,7 +1027,7 @@ failure:
RET
// handle for lengths < 16
indexbyte_small:
small:
MOVL BX, CX
REPN; SCASB
JZ success
......
......@@ -492,7 +492,7 @@ TEXT asmcgocall<>(SB),NOSPLIT,$0-0
MOVW g_m(g), R8
MOVW m_g0(R8), R3
CMP R3, g
BEQ asmcgocall_g0
BEQ g0
BL gosave<>(SB)
MOVW R0, R5
MOVW R3, R0
......@@ -501,7 +501,7 @@ TEXT asmcgocall<>(SB),NOSPLIT,$0-0
MOVW (g_sched+gobuf_sp)(g), R13
// Now on a scheduling stack (a pthread-created stack).
asmcgocall_g0:
g0:
SUB $24, R13
BIC $0x7, R13 // alignment for gcc ABI
MOVW R4, 20(R13) // save old g
......@@ -736,13 +736,13 @@ TEXT runtime·memeq(SB),NOSPLIT,$-4-13
ADD R1, R3, R6
MOVW $1, R0
MOVB R0, ret+12(FP)
_next2:
loop:
CMP R1, R6
RET.EQ
MOVBU.P 1(R1), R4
MOVBU.P 1(R2), R5
CMP R4, R5
BEQ _next2
BEQ loop
MOVW $0, R0
MOVB R0, ret+12(FP)
......@@ -765,13 +765,13 @@ TEXT runtime·eqstring(SB),NOSPLIT,$-4-17
CMP R2, R3
RET.EQ
ADD R2, R0, R6
_eqnext:
loop:
CMP R2, R6
RET.EQ
MOVBU.P 1(R2), R4
MOVBU.P 1(R3), R5
CMP R4, R5
BEQ _eqnext
BEQ loop
MOVB R7, v+16(FP)
RET
......@@ -786,26 +786,26 @@ TEXT bytes·Equal(SB),NOSPLIT,$0
MOVW b_len+16(FP), R3
CMP R1, R3 // unequal lengths are not equal
B.NE _notequal
B.NE notequal
MOVW a+0(FP), R0
MOVW b+12(FP), R2
ADD R0, R1 // end
_byteseq_next:
loop:
CMP R0, R1
B.EQ _equal // reached the end
B.EQ equal // reached the end
MOVBU.P 1(R0), R4
MOVBU.P 1(R2), R5
CMP R4, R5
B.EQ _byteseq_next
B.EQ loop
_notequal:
notequal:
MOVW $0, R0
MOVBU R0, ret+24(FP)
RET
_equal:
equal:
MOVW $1, R0
MOVBU R0, ret+24(FP)
RET
......
......@@ -699,7 +699,7 @@ TEXT runtime·memeq(SB),NOSPLIT,$-8-25
SUB $1, R3
SUB $1, R4
ADD R3, R5, R8
_next:
loop:
CMP R3, R8
BNE 4(PC)
MOVD $1, R3
......@@ -708,7 +708,7 @@ _next:
MOVBZU 1(R3), R6
MOVBZU 1(R4), R7
CMP R6, R7
BEQ _next
BEQ loop
MOVB R0, ret+24(FP)
RETURN
......@@ -720,14 +720,14 @@ TEXT runtime·eqstring(SB),NOSPLIT,$0-33
MOVD s1len+8(FP), R4
MOVD s2len+24(FP), R5
CMP R4, R5
BNE str_noteq
BNE noteq
MOVD s1str+0(FP), R3
MOVD s2str+16(FP), R4
SUB $1, R3
SUB $1, R4
ADD R3, R5, R8
eq_next:
loop:
CMP R3, R8
BNE 4(PC)
MOVD $1, R3
......@@ -736,8 +736,8 @@ eq_next:
MOVBZU 1(R3), R6
MOVBZU 1(R4), R7
CMP R6, R7
BEQ eq_next
str_noteq:
BEQ loop
noteq:
MOVB R0, ret+32(FP)
RETURN
......@@ -747,7 +747,7 @@ TEXT bytes·Equal(SB),NOSPLIT,$0-49
MOVD b_len+32(FP), R4
CMP R3, R4 // unequal lengths are not equal
BNE _notequal
BNE noteq
MOVD a+0(FP), R5
MOVD b+24(FP), R6
......@@ -755,19 +755,19 @@ TEXT bytes·Equal(SB),NOSPLIT,$0-49
SUB $1, R6
ADD R5, R3 // end-1
_byteseq_next:
loop:
CMP R5, R3
BEQ _equal // reached the end
BEQ equal // reached the end
MOVBZU 1(R5), R4
MOVBZU 1(R6), R7
CMP R4, R7
BEQ _byteseq_next
BEQ loop
_notequal:
noteq:
MOVBZ R0, ret+48(FP)
RETURN
_equal:
equal:
MOVD $1, R3
MOVBZ R3, ret+48(FP)
RETURN
......@@ -780,18 +780,18 @@ TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
SUB $1, R3
ADD R3, R4 // end-1
_index_loop:
loop:
CMP R3, R4
BEQ _index_notfound
BEQ notfound
MOVBZU 1(R3), R7
CMP R7, R5
BNE _index_loop
BNE loop
SUB R6, R3 // remove base
MOVD R3, ret+32(FP)
RETURN
_index_notfound:
notfound:
MOVD $-1, R3
MOVD R3, ret+32(FP)
RETURN
......@@ -804,18 +804,18 @@ TEXT strings·IndexByte(SB),NOSPLIT,$0
SUB $1, R3
ADD R3, R4 // end-1
_index2_loop:
loop:
CMP R3, R4
BEQ _index2_notfound
BEQ notfound
MOVBZU 1(R3), R7
CMP R7, R5
BNE _index2_loop
BNE loop
SUB R6, R3 // remove base
MOVD R3, ret+24(FP)
RETURN
_index2_notfound:
notfound:
MOVD $-1, R3
MOVD R3, ret+24(FP)
RETURN
......
......@@ -15,31 +15,31 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-8
XORL AX, AX
// MOVOU seems always faster than REP STOSL.
clr_tail:
tail:
TESTL BX, BX
JEQ clr_0
JEQ _0
CMPL BX, $2
JBE clr_1or2
JBE _1or2
CMPL BX, $4
JBE clr_3or4
JBE _3or4
CMPL BX, $8
JBE clr_5through8
JBE _5through8
CMPL BX, $16
JBE clr_9through16
JBE _9through16
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
JEQ nosse2
PXOR X0, X0
CMPL BX, $32
JBE clr_17through32
JBE _17through32
CMPL BX, $64
JBE clr_33through64
JBE _33through64
CMPL BX, $128
JBE clr_65through128
JBE _65through128
CMPL BX, $256
JBE clr_129through256
JBE _129through256
// TODO: use branch table and BSR to make this just a single dispatch
clr_loop:
loop:
MOVOU X0, 0(DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
......@@ -59,40 +59,40 @@ clr_loop:
SUBL $256, BX
ADDL $256, DI
CMPL BX, $256
JAE clr_loop
JMP clr_tail
JAE loop
JMP tail
clr_1or2:
_1or2:
MOVB AX, (DI)
MOVB AX, -1(DI)(BX*1)
RET
clr_0:
_0:
RET
clr_3or4:
_3or4:
MOVW AX, (DI)
MOVW AX, -2(DI)(BX*1)
RET
clr_5through8:
_5through8:
MOVL AX, (DI)
MOVL AX, -4(DI)(BX*1)
RET
clr_9through16:
_9through16:
MOVL AX, (DI)
MOVL AX, 4(DI)
MOVL AX, -8(DI)(BX*1)
MOVL AX, -4(DI)(BX*1)
RET
clr_17through32:
_17through32:
MOVOU X0, (DI)
MOVOU X0, -16(DI)(BX*1)
RET
clr_33through64:
_33through64:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1)
RET
clr_65through128:
_65through128:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
......@@ -102,7 +102,7 @@ clr_65through128:
MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1)
RET
clr_129through256:
_129through256:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
......@@ -126,5 +126,5 @@ nosse2:
REP
STOSL
ANDL $3, BX
JNE clr_tail
JNE tail
RET
......@@ -15,30 +15,30 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-16
XORQ AX, AX
// MOVOU seems always faster than REP STOSQ.
clr_tail:
tail:
TESTQ BX, BX
JEQ clr_0
JEQ _0
CMPQ BX, $2
JBE clr_1or2
JBE _1or2
CMPQ BX, $4
JBE clr_3or4
JBE _3or4
CMPQ BX, $8
JBE clr_5through8
JBE _5through8
CMPQ BX, $16
JBE clr_9through16
JBE _9through16
PXOR X0, X0
CMPQ BX, $32
JBE clr_17through32
JBE _17through32
CMPQ BX, $64
JBE clr_33through64
JBE _33through64
CMPQ BX, $128
JBE clr_65through128
JBE _65through128
CMPQ BX, $256
JBE clr_129through256
JBE _129through256
// TODO: use branch table and BSR to make this just a single dispatch
// TODO: for really big clears, use MOVNTDQ.
clr_loop:
loop:
MOVOU X0, 0(DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
......@@ -58,38 +58,38 @@ clr_loop:
SUBQ $256, BX
ADDQ $256, DI
CMPQ BX, $256
JAE clr_loop
JMP clr_tail
JAE loop
JMP tail
clr_1or2:
_1or2:
MOVB AX, (DI)
MOVB AX, -1(DI)(BX*1)
RET
clr_0:
_0:
RET
clr_3or4:
_3or4:
MOVW AX, (DI)
MOVW AX, -2(DI)(BX*1)
RET
clr_5through8:
_5through8:
MOVL AX, (DI)
MOVL AX, -4(DI)(BX*1)
RET
clr_9through16:
_9through16:
MOVQ AX, (DI)
MOVQ AX, -8(DI)(BX*1)
RET
clr_17through32:
_17through32:
MOVOU X0, (DI)
MOVOU X0, -16(DI)(BX*1)
RET
clr_33through64:
_33through64:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1)
RET
clr_65through128:
_65through128:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
......@@ -99,7 +99,7 @@ clr_65through128:
MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1)
RET
clr_129through256:
_129through256:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
......
......@@ -10,40 +10,40 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-8
MOVL n+4(FP), BX
XORL AX, AX
clr_tail:
tail:
TESTL BX, BX
JEQ clr_0
JEQ _0
CMPL BX, $2
JBE clr_1or2
JBE _1or2
CMPL BX, $4
JBE clr_3or4
JBE _3or4
CMPL BX, $8
JBE clr_5through8
JBE _5through8
CMPL BX, $16
JBE clr_9through16
JBE _9through16
MOVL BX, CX
SHRL $2, CX
REP
STOSL
ANDL $3, BX
JNE clr_tail
JNE tail
RET
clr_1or2:
_1or2:
MOVB AX, (DI)
MOVB AX, -1(DI)(BX*1)
RET
clr_0:
_0:
RET
clr_3or4:
_3or4:
MOVW AX, (DI)
MOVW AX, -2(DI)(BX*1)
RET
clr_5through8:
_5through8:
MOVL AX, (DI)
MOVL AX, -4(DI)(BX*1)
RET
clr_9through16:
_9through16:
MOVL AX, (DI)
MOVL AX, 4(DI)
MOVL AX, -8(DI)(BX*1)
......
......@@ -140,20 +140,20 @@ TEXT racecalladdr<>(SB), NOSPLIT, $0-0
MOVQ g_racectx(R14), RARG0 // goroutine context
// Check that addr is within [arenastart, arenaend) or within [noptrdata, enoptrbss).
CMPQ RARG1, runtime·racearenastart(SB)
JB racecalladdr_data
JB data
CMPQ RARG1, runtime·racearenaend(SB)
JB racecalladdr_call
racecalladdr_data:
JB call
data:
MOVQ $runtime·noptrdata(SB), R13
CMPQ RARG1, R13
JB racecalladdr_ret
JB ret
MOVQ $runtime·enoptrbss(SB), R13
CMPQ RARG1, R13
JAE racecalladdr_ret
racecalladdr_call:
JAE ret
call:
MOVQ AX, AX // w/o this 6a miscompiles this function
JMP racecall<>(SB)
racecalladdr_ret:
ret:
RET
// func runtime·racefuncenter(pc uintptr)
......@@ -335,9 +335,9 @@ TEXT racecall<>(SB), NOSPLIT, $0-0
MOVQ SP, R12 // callee-saved, preserved across the CALL
MOVQ m_g0(R13), R10
CMPQ R10, R14
JE racecall_cont // already on g0
JE call // already on g0
MOVQ (g_sched+gobuf_sp)(R10), SP
racecall_cont:
call:
ANDQ $~15, SP // alignment for gcc ABI
CALL AX
MOVQ R12, SP
......
......@@ -248,7 +248,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$40
MOVL BX, 0(SP)
MOVL $runtime·badsignal(SB), AX
CALL AX
JMP sigtramp_ret
JMP ret
// save g
MOVL DI, 20(SP)
......@@ -275,7 +275,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$40
MOVL 20(SP), DI
MOVL DI, g(CX)
sigtramp_ret:
ret:
// call sigreturn
MOVL context+16(FP), CX
MOVL style+4(FP), BX
......
......@@ -211,7 +211,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64
MOVL DX, 0(SP)
MOVQ $runtime·badsignal(SB), AX
CALL AX
JMP sigtramp_ret
JMP ret
// save g
MOVQ R10, 48(SP)
......@@ -233,7 +233,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64
MOVQ 48(SP), R10
MOVQ R10, g(BX)
sigtramp_ret:
ret:
// call sigreturn
MOVL $(0x2000000+184), AX // sigreturn(ucontext, infostyle)
MOVQ 32(SP), DI // saved ucontext
......
......@@ -217,7 +217,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL BX, 0(SP)
MOVL $runtime·badsignal(SB), AX
CALL AX
JMP sigtramp_ret
JMP ret
// save g
MOVL DI, 20(SP)
......@@ -243,7 +243,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL 20(SP), BX
MOVL BX, g(CX)
sigtramp_ret:
ret:
// call sigreturn
MOVL context+8(FP), AX
MOVL $0, 0(SP) // syscall gap
......
......@@ -197,7 +197,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL BX, 0(SP)
MOVL $runtime·badsignal(SB), AX
CALL AX
JMP sigtramp_ret
JMP ret
// save g
MOVL DI, 20(SP)
......@@ -223,7 +223,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL 20(SP), BX
MOVL BX, g(CX)
sigtramp_ret:
ret:
// call sigreturn
MOVL context+8(FP), AX
MOVL $0, 0(SP) // syscall gap
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment