Commit b55791e2 authored by Russ Cox's avatar Russ Cox

[dev.power64] cmd/5a, cmd/6a, cmd/8a, cmd/9a: make labels function-scoped

I removed support for jumping between functions years ago,
as part of doing the instruction layout for each function separately.

Given that, it makes sense to treat labels as function-scoped.
This lets each function have its own 'loop' label, for example.

Makes the assembly much cleaner and removes the last
reason anyone would reach for the 123(PC) form instead.

Note that this is on the dev.power64 branch, but it changes all
the assemblers. The change will ship in Go 1.5 (perhaps after
being ported into the new assembler).

Came up as part of CL 167730043.

LGTM=r
R=r
CC=austin, dave, golang-codereviews, minux
https://golang.org/cl/159670043
parent 87b4149b
...@@ -67,6 +67,7 @@ struct Sym ...@@ -67,6 +67,7 @@ struct Sym
int32 value; int32 value;
ushort type; ushort type;
char *name; char *name;
char* labelname;
char sym; char sym;
}; };
#define S ((Sym*)0) #define S ((Sym*)0)
...@@ -136,6 +137,8 @@ void newio(void); ...@@ -136,6 +137,8 @@ void newio(void);
void newfile(char*, int); void newfile(char*, int);
Sym* slookup(char*); Sym* slookup(char*);
Sym* lookup(void); Sym* lookup(void);
Sym* labellookup(Sym*);
void settext(LSym*);
void syminit(Sym*); void syminit(Sym*);
int32 yylex(void); int32 yylex(void);
int getc(void); int getc(void);
......
...@@ -73,15 +73,11 @@ prog: ...@@ -73,15 +73,11 @@ prog:
line line
line: line:
LLAB ':' LNAME ':'
{
if($1->value != pc)
yyerror("redeclaration of %s", $1->name);
$1->value = pc;
}
line
| LNAME ':'
{ {
$1 = labellookup($1);
if($1->type == LLAB && $1->value != pc)
yyerror("redeclaration of %s", $1->labelname);
$1->type = LLAB; $1->type = LLAB;
$1->value = pc; $1->value = pc;
} }
...@@ -218,18 +214,21 @@ inst: ...@@ -218,18 +214,21 @@ inst:
*/ */
| LTYPEB name ',' imm | LTYPEB name ',' imm
{ {
settext($2.sym);
$4.type = D_CONST2; $4.type = D_CONST2;
$4.offset2 = ArgsSizeUnknown; $4.offset2 = ArgsSizeUnknown;
outcode($1, Always, &$2, 0, &$4); outcode($1, Always, &$2, 0, &$4);
} }
| LTYPEB name ',' con ',' imm | LTYPEB name ',' con ',' imm
{ {
settext($2.sym);
$6.type = D_CONST2; $6.type = D_CONST2;
$6.offset2 = ArgsSizeUnknown; $6.offset2 = ArgsSizeUnknown;
outcode($1, Always, &$2, $4, &$6); outcode($1, Always, &$2, $4, &$6);
} }
| LTYPEB name ',' con ',' imm '-' con | LTYPEB name ',' con ',' imm '-' con
{ {
settext($2.sym);
$6.type = D_CONST2; $6.type = D_CONST2;
$6.offset2 = $8; $6.offset2 = $8;
outcode($1, Always, &$2, $4, &$6); outcode($1, Always, &$2, $4, &$6);
...@@ -373,15 +372,10 @@ rel: ...@@ -373,15 +372,10 @@ rel:
} }
| LNAME offset | LNAME offset
{ {
$1 = labellookup($1);
$$ = nullgen; $$ = nullgen;
if(pass == 2) if(pass == 2 && $1->type != LLAB)
yyerror("undefined label: %s", $1->name); yyerror("undefined label: %s", $1->labelname);
$$.type = D_BRANCH;
$$.offset = $2;
}
| LLAB offset
{
$$ = nullgen;
$$.type = D_BRANCH; $$.type = D_BRANCH;
$$.offset = $1->value + $2; $$.offset = $1->value + $2;
} }
......
This source diff could not be displayed because it is too large. You can view the blob instead.
/* A Bison parser, made by GNU Bison 2.7.12-4996. */ /* A Bison parser, made by GNU Bison 2.3. */
/* Bison interface for Yacc-like parsers in C /* Skeleton interface for Bison's Yacc-like parsers in C
Copyright (C) 1984, 1989-1990, 2000-2013 Free Software Foundation, Inc. Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation; either version 2, or (at your option)
(at your option) any later version. any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */ along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
/* As a special exception, you may create a larger work that contains /* As a special exception, you may create a larger work that contains
part or all of the Bison parser skeleton and distribute that work part or all of the Bison parser skeleton and distribute that work
...@@ -26,20 +29,10 @@ ...@@ -26,20 +29,10 @@
special exception, which will cause the skeleton and the resulting special exception, which will cause the skeleton and the resulting
Bison output files to be licensed under the GNU General Public Bison output files to be licensed under the GNU General Public
License without this special exception. License without this special exception.
This special exception was added by the Free Software Foundation in This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */ version 2.2 of Bison. */
#ifndef YY_YY_Y_TAB_H_INCLUDED
# define YY_YY_Y_TAB_H_INCLUDED
/* Enabling traces. */
#ifndef YYDEBUG
# define YYDEBUG 0
#endif
#if YYDEBUG
extern int yydebug;
#endif
/* Tokens. */ /* Tokens. */
#ifndef YYTOKENTYPE #ifndef YYTOKENTYPE
# define YYTOKENTYPE # define YYTOKENTYPE
...@@ -148,41 +141,24 @@ extern int yydebug; ...@@ -148,41 +141,24 @@ extern int yydebug;
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE typedef union YYSTYPE
{
/* Line 2053 of yacc.c */
#line 39 "a.y" #line 39 "a.y"
{
Sym *sym; Sym *sym;
int32 lval; int32 lval;
double dval; double dval;
char sval[8]; char sval[8];
Addr addr; Addr addr;
}
/* Line 1529 of yacc.c. */
/* Line 2053 of yacc.c */ #line 157 "y.tab.h"
#line 166 "y.tab.h" YYSTYPE;
} YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define yystype YYSTYPE /* obsolescent; will be withdrawn */ # define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1 # define YYSTYPE_IS_DECLARED 1
# define YYSTYPE_IS_TRIVIAL 1
#endif #endif
extern YYSTYPE yylval; extern YYSTYPE yylval;
#ifdef YYPARSE_PARAM
#if defined __STDC__ || defined __cplusplus
int yyparse (void *YYPARSE_PARAM);
#else
int yyparse ();
#endif
#else /* ! YYPARSE_PARAM */
#if defined __STDC__ || defined __cplusplus
int yyparse (void);
#else
int yyparse ();
#endif
#endif /* ! YYPARSE_PARAM */
#endif /* !YY_YY_Y_TAB_H_INCLUDED */
...@@ -70,6 +70,7 @@ struct Sym ...@@ -70,6 +70,7 @@ struct Sym
vlong value; vlong value;
ushort type; ushort type;
char *name; char *name;
char* labelname;
char sym; char sym;
}; };
#define S ((Sym*)0) #define S ((Sym*)0)
...@@ -148,6 +149,8 @@ void newio(void); ...@@ -148,6 +149,8 @@ void newio(void);
void newfile(char*, int); void newfile(char*, int);
Sym* slookup(char*); Sym* slookup(char*);
Sym* lookup(void); Sym* lookup(void);
Sym* labellookup(Sym*);
void settext(LSym*);
void syminit(Sym*); void syminit(Sym*);
int32 yylex(void); int32 yylex(void);
int getc(void); int getc(void);
......
...@@ -71,15 +71,11 @@ prog: ...@@ -71,15 +71,11 @@ prog:
line line
line: line:
LLAB ':' LNAME ':'
{
if($1->value != pc)
yyerror("redeclaration of %s", $1->name);
$1->value = pc;
}
line
| LNAME ':'
{ {
$1 = labellookup($1);
if($1->type == LLAB && $1->value != pc)
yyerror("redeclaration of %s (%s)", $1->labelname, $1->name);
$1->type = LLAB; $1->type = LLAB;
$1->value = pc; $1->value = pc;
} }
...@@ -197,11 +193,13 @@ spec1: /* DATA */ ...@@ -197,11 +193,13 @@ spec1: /* DATA */
spec2: /* TEXT */ spec2: /* TEXT */
mem ',' imm2 mem ',' imm2
{ {
settext($1.sym);
$$.from = $1; $$.from = $1;
$$.to = $3; $$.to = $3;
} }
| mem ',' con ',' imm2 | mem ',' con ',' imm2
{ {
settext($1.sym);
$$.from = $1; $$.from = $1;
$$.from.scale = $3; $$.from.scale = $3;
$$.to = $5; $$.to = $5;
...@@ -363,15 +361,10 @@ rel: ...@@ -363,15 +361,10 @@ rel:
} }
| LNAME offset | LNAME offset
{ {
$1 = labellookup($1);
$$ = nullgen; $$ = nullgen;
if(pass == 2) if(pass == 2 && $1->type != LLAB)
yyerror("undefined label: %s", $1->name); yyerror("undefined label: %s", $1->labelname);
$$.type = D_BRANCH;
$$.offset = $2;
}
| LLAB offset
{
$$ = nullgen;
$$.type = D_BRANCH; $$.type = D_BRANCH;
$$.offset = $1->value + $2; $$.offset = $1->value + $2;
} }
......
This diff is collapsed.
...@@ -70,6 +70,7 @@ struct Sym ...@@ -70,6 +70,7 @@ struct Sym
int32 value; int32 value;
ushort type; ushort type;
char *name; char *name;
char* labelname;
char sym; char sym;
}; };
#define S ((Sym*)0) #define S ((Sym*)0)
...@@ -148,6 +149,8 @@ void newio(void); ...@@ -148,6 +149,8 @@ void newio(void);
void newfile(char*, int); void newfile(char*, int);
Sym* slookup(char*); Sym* slookup(char*);
Sym* lookup(void); Sym* lookup(void);
Sym* labellookup(Sym*);
void settext(LSym*);
void syminit(Sym*); void syminit(Sym*);
int32 yylex(void); int32 yylex(void);
int getc(void); int getc(void);
......
...@@ -74,15 +74,11 @@ prog: ...@@ -74,15 +74,11 @@ prog:
line line
line: line:
LLAB ':' LNAME ':'
{
if($1->value != pc)
yyerror("redeclaration of %s", $1->name);
$1->value = pc;
}
line
| LNAME ':'
{ {
$1 = labellookup($1);
if($1->type == LLAB && $1->value != pc)
yyerror("redeclaration of %s", $1->labelname);
$1->type = LLAB; $1->type = LLAB;
$1->value = pc; $1->value = pc;
} }
...@@ -199,11 +195,13 @@ spec1: /* DATA */ ...@@ -199,11 +195,13 @@ spec1: /* DATA */
spec2: /* TEXT */ spec2: /* TEXT */
mem ',' imm2 mem ',' imm2
{ {
settext($1.sym);
$$.from = $1; $$.from = $1;
$$.to = $3; $$.to = $3;
} }
| mem ',' con ',' imm2 | mem ',' con ',' imm2
{ {
settext($1.sym);
$$.from = $1; $$.from = $1;
$$.from.scale = $3; $$.from.scale = $3;
$$.to = $5; $$.to = $5;
...@@ -362,15 +360,10 @@ rel: ...@@ -362,15 +360,10 @@ rel:
} }
| LNAME offset | LNAME offset
{ {
$1 = labellookup($1);
$$ = nullgen; $$ = nullgen;
if(pass == 2) if(pass == 2 && $1->type != LLAB)
yyerror("undefined label: %s", $1->name); yyerror("undefined label: %s", $1->labelname);
$$.type = D_BRANCH;
$$.offset = $2;
}
| LLAB offset
{
$$ = nullgen;
$$.type = D_BRANCH; $$.type = D_BRANCH;
$$.offset = $1->value + $2; $$.offset = $1->value + $2;
} }
......
This diff is collapsed.
...@@ -68,6 +68,7 @@ struct Sym ...@@ -68,6 +68,7 @@ struct Sym
vlong value; vlong value;
ushort type; ushort type;
char *name; char *name;
char* labelname;
char sym; char sym;
}; };
#define S ((Sym*)0) #define S ((Sym*)0)
...@@ -135,6 +136,8 @@ void newio(void); ...@@ -135,6 +136,8 @@ void newio(void);
void newfile(char*, int); void newfile(char*, int);
Sym* slookup(char*); Sym* slookup(char*);
Sym* lookup(void); Sym* lookup(void);
Sym* labellookup(Sym*);
void settext(LSym*);
void syminit(Sym*); void syminit(Sym*);
int32 yylex(void); int32 yylex(void);
int getc(void); int getc(void);
......
...@@ -67,15 +67,11 @@ prog: ...@@ -67,15 +67,11 @@ prog:
| prog line | prog line
line: line:
LLAB ':' LNAME ':'
{
if($1->value != pc)
yyerror("redeclaration of %s", $1->name);
$1->value = pc;
}
line
| LNAME ':'
{ {
$1 = labellookup($1);
if($1->type == LLAB && $1->value != pc)
yyerror("redeclaration of %s", $1->labelname);
$1->type = LLAB; $1->type = LLAB;
$1->value = pc; $1->value = pc;
} }
...@@ -623,16 +619,19 @@ inst: ...@@ -623,16 +619,19 @@ inst:
*/ */
| LTEXT name ',' imm | LTEXT name ',' imm
{ {
settext($2.sym);
outcode($1, &$2, NREG, &$4); outcode($1, &$2, NREG, &$4);
} }
| LTEXT name ',' con ',' imm | LTEXT name ',' con ',' imm
{ {
settext($2.sym);
$6.offset &= 0xffffffffull; $6.offset &= 0xffffffffull;
$6.offset |= (vlong)ArgsSizeUnknown << 32; $6.offset |= (vlong)ArgsSizeUnknown << 32;
outcode($1, &$2, $4, &$6); outcode($1, &$2, $4, &$6);
} }
| LTEXT name ',' con ',' imm '-' con | LTEXT name ',' con ',' imm '-' con
{ {
settext($2.sym);
$6.offset &= 0xffffffffull; $6.offset &= 0xffffffffull;
$6.offset |= ($8 & 0xffffffffull) << 32; $6.offset |= ($8 & 0xffffffffull) << 32;
outcode($1, &$2, $4, &$6); outcode($1, &$2, $4, &$6);
...@@ -669,15 +668,10 @@ rel: ...@@ -669,15 +668,10 @@ rel:
} }
| LNAME offset | LNAME offset
{ {
$1 = labellookup($1);
$$ = nullgen; $$ = nullgen;
if(pass == 2) if(pass == 2 && $1->type != LLAB)
yyerror("undefined label: %s", $1->name); yyerror("undefined label: %s", $1->labelname);
$$.type = D_BRANCH;
$$.offset = $2;
}
| LLAB offset
{
$$ = nullgen;
$$.type = D_BRANCH; $$.type = D_BRANCH;
$$.offset = $1->value + $2; $$.offset = $1->value + $2;
} }
......
This diff is collapsed.
/* A Bison parser, made by GNU Bison 2.5. */ /* A Bison parser, made by GNU Bison 2.3. */
/* Bison interface for Yacc-like parsers in C /* Skeleton interface for Bison's Yacc-like parsers in C
Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc. Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation; either version 2, or (at your option)
(at your option) any later version. any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */ along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
/* As a special exception, you may create a larger work that contains /* As a special exception, you may create a larger work that contains
part or all of the Bison parser skeleton and distribute that work part or all of the Bison parser skeleton and distribute that work
...@@ -26,11 +29,10 @@ ...@@ -26,11 +29,10 @@
special exception, which will cause the skeleton and the resulting special exception, which will cause the skeleton and the resulting
Bison output files to be licensed under the GNU General Public Bison output files to be licensed under the GNU General Public
License without this special exception. License without this special exception.
This special exception was added by the Free Software Foundation in This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */ version 2.2 of Bison. */
/* Tokens. */ /* Tokens. */
#ifndef YYTOKENTYPE #ifndef YYTOKENTYPE
# define YYTOKENTYPE # define YYTOKENTYPE
...@@ -166,27 +168,21 @@ ...@@ -166,27 +168,21 @@
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE typedef union YYSTYPE
{
/* Line 2068 of yacc.c */
#line 38 "a.y" #line 38 "a.y"
{
Sym *sym; Sym *sym;
vlong lval; vlong lval;
double dval; double dval;
char sval[8]; char sval[8];
Addr addr; Addr addr;
}
/* Line 1529 of yacc.c. */
#line 181 "y.tab.h"
/* Line 2068 of yacc.c */ YYSTYPE;
#line 184 "y.tab.h"
} YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define yystype YYSTYPE /* obsolescent; will be withdrawn */ # define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1 # define YYSTYPE_IS_DECLARED 1
# define YYSTYPE_IS_TRIVIAL 1
#endif #endif
extern YYSTYPE yylval; extern YYSTYPE yylval;
...@@ -220,6 +220,31 @@ slookup(char *s) ...@@ -220,6 +220,31 @@ slookup(char *s)
return lookup(); return lookup();
} }
LSym *thetext;
void
settext(LSym *s)
{
thetext = s;
}
Sym*
labellookup(Sym *s)
{
char *p;
Sym *lab;
if(thetext == nil) {
s->labelname = s->name;
return s;
}
p = smprint("%s.%s", thetext->name, s->name);
lab = slookup(p);
free(p);
lab->labelname = s->name;
return lab;
}
Sym* Sym*
lookup(void) lookup(void)
{ {
......
...@@ -486,11 +486,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-21 ...@@ -486,11 +486,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-21
MOVL new_hi+16(FP), CX MOVL new_hi+16(FP), CX
LOCK LOCK
CMPXCHG8B 0(BP) CMPXCHG8B 0(BP)
JNZ cas64_fail JNZ fail
MOVL $1, AX MOVL $1, AX
MOVB AX, ret+20(FP) MOVB AX, ret+20(FP)
RET RET
cas64_fail: fail:
MOVL $0, AX MOVL $0, AX
MOVB AX, ret+20(FP) MOVB AX, ret+20(FP)
RET RET
...@@ -1342,29 +1342,29 @@ TEXT strings·IndexByte(SB),NOSPLIT,$0 ...@@ -1342,29 +1342,29 @@ TEXT strings·IndexByte(SB),NOSPLIT,$0
// AX = 1/0/-1 // AX = 1/0/-1
TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
CMPL SI, DI CMPL SI, DI
JEQ cmp_allsame JEQ allsame
CMPL BX, DX CMPL BX, DX
MOVL DX, BP MOVL DX, BP
CMOVLLT BX, BP // BP = min(alen, blen) CMOVLLT BX, BP // BP = min(alen, blen)
CMPL BP, $4 CMPL BP, $4
JB cmp_small JB small
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
JE cmp_mediumloop JE mediumloop
cmp_largeloop: largeloop:
CMPL BP, $16 CMPL BP, $16
JB cmp_mediumloop JB mediumloop
MOVOU (SI), X0 MOVOU (SI), X0
MOVOU (DI), X1 MOVOU (DI), X1
PCMPEQB X0, X1 PCMPEQB X0, X1
PMOVMSKB X1, AX PMOVMSKB X1, AX
XORL $0xffff, AX // convert EQ to NE XORL $0xffff, AX // convert EQ to NE
JNE cmp_diff16 // branch if at least one byte is not equal JNE diff16 // branch if at least one byte is not equal
ADDL $16, SI ADDL $16, SI
ADDL $16, DI ADDL $16, DI
SUBL $16, BP SUBL $16, BP
JMP cmp_largeloop JMP largeloop
cmp_diff16: diff16:
BSFL AX, BX // index of first byte that differs BSFL AX, BX // index of first byte that differs
XORL AX, AX XORL AX, AX
MOVB (SI)(BX*1), CX MOVB (SI)(BX*1), CX
...@@ -1373,25 +1373,25 @@ cmp_diff16: ...@@ -1373,25 +1373,25 @@ cmp_diff16:
LEAL -1(AX*2), AX // convert 1/0 to +1/-1 LEAL -1(AX*2), AX // convert 1/0 to +1/-1
RET RET
cmp_mediumloop: mediumloop:
CMPL BP, $4 CMPL BP, $4
JBE cmp_0through4 JBE _0through4
MOVL (SI), AX MOVL (SI), AX
MOVL (DI), CX MOVL (DI), CX
CMPL AX, CX CMPL AX, CX
JNE cmp_diff4 JNE diff4
ADDL $4, SI ADDL $4, SI
ADDL $4, DI ADDL $4, DI
SUBL $4, BP SUBL $4, BP
JMP cmp_mediumloop JMP mediumloop
cmp_0through4: _0through4:
MOVL -4(SI)(BP*1), AX MOVL -4(SI)(BP*1), AX
MOVL -4(DI)(BP*1), CX MOVL -4(DI)(BP*1), CX
CMPL AX, CX CMPL AX, CX
JEQ cmp_allsame JEQ allsame
cmp_diff4: diff4:
BSWAPL AX // reverse order of bytes BSWAPL AX // reverse order of bytes
BSWAPL CX BSWAPL CX
XORL AX, CX // find bit differences XORL AX, CX // find bit differences
...@@ -1402,37 +1402,37 @@ cmp_diff4: ...@@ -1402,37 +1402,37 @@ cmp_diff4:
RET RET
// 0-3 bytes in common // 0-3 bytes in common
cmp_small: small:
LEAL (BP*8), CX LEAL (BP*8), CX
NEGL CX NEGL CX
JEQ cmp_allsame JEQ allsame
// load si // load si
CMPB SI, $0xfc CMPB SI, $0xfc
JA cmp_si_high JA si_high
MOVL (SI), SI MOVL (SI), SI
JMP cmp_si_finish JMP si_finish
cmp_si_high: si_high:
MOVL -4(SI)(BP*1), SI MOVL -4(SI)(BP*1), SI
SHRL CX, SI SHRL CX, SI
cmp_si_finish: si_finish:
SHLL CX, SI SHLL CX, SI
// same for di // same for di
CMPB DI, $0xfc CMPB DI, $0xfc
JA cmp_di_high JA di_high
MOVL (DI), DI MOVL (DI), DI
JMP cmp_di_finish JMP di_finish
cmp_di_high: di_high:
MOVL -4(DI)(BP*1), DI MOVL -4(DI)(BP*1), DI
SHRL CX, DI SHRL CX, DI
cmp_di_finish: di_finish:
SHLL CX, DI SHLL CX, DI
BSWAPL SI // reverse order of bytes BSWAPL SI // reverse order of bytes
BSWAPL DI BSWAPL DI
XORL SI, DI // find bit differences XORL SI, DI // find bit differences
JEQ cmp_allsame JEQ allsame
BSRL DI, CX // index of highest bit difference BSRL DI, CX // index of highest bit difference
SHRL CX, SI // move a's bit to bottom SHRL CX, SI // move a's bit to bottom
ANDL $1, SI // mask bit ANDL $1, SI // mask bit
...@@ -1441,7 +1441,7 @@ cmp_di_finish: ...@@ -1441,7 +1441,7 @@ cmp_di_finish:
// all the bytes in common are the same, so we just need // all the bytes in common are the same, so we just need
// to compare the lengths. // to compare the lengths.
cmp_allsame: allsame:
XORL AX, AX XORL AX, AX
XORL CX, CX XORL CX, CX
CMPL BX, DX CMPL BX, DX
......
...@@ -461,11 +461,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-25 ...@@ -461,11 +461,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-25
MOVQ new+16(FP), CX MOVQ new+16(FP), CX
LOCK LOCK
CMPXCHGQ CX, 0(BX) CMPXCHGQ CX, 0(BX)
JNZ cas64_fail JNZ fail
MOVL $1, AX MOVL $1, AX
MOVB AX, ret+24(FP) MOVB AX, ret+24(FP)
RET RET
cas64_fail: fail:
MOVL $0, AX MOVL $0, AX
MOVB AX, ret+24(FP) MOVB AX, ret+24(FP)
RET RET
...@@ -876,24 +876,24 @@ TEXT runtime·aeshashbody(SB),NOSPLIT,$0-32 ...@@ -876,24 +876,24 @@ TEXT runtime·aeshashbody(SB),NOSPLIT,$0-32
MOVO runtime·aeskeysched+0(SB), X2 MOVO runtime·aeskeysched+0(SB), X2
MOVO runtime·aeskeysched+16(SB), X3 MOVO runtime·aeskeysched+16(SB), X3
CMPQ CX, $16 CMPQ CX, $16
JB aessmall JB small
aesloop: loop:
CMPQ CX, $16 CMPQ CX, $16
JBE aesloopend JBE loopend
MOVOU (AX), X1 MOVOU (AX), X1
AESENC X2, X0 AESENC X2, X0
AESENC X1, X0 AESENC X1, X0
SUBQ $16, CX SUBQ $16, CX
ADDQ $16, AX ADDQ $16, AX
JMP aesloop JMP loop
// 1-16 bytes remaining // 1-16 bytes remaining
aesloopend: loopend:
// This load may overlap with the previous load above. // This load may overlap with the previous load above.
// We'll hash some bytes twice, but that's ok. // We'll hash some bytes twice, but that's ok.
MOVOU -16(AX)(CX*1), X1 MOVOU -16(AX)(CX*1), X1
JMP partial JMP partial
// 0-15 bytes // 0-15 bytes
aessmall: small:
TESTQ CX, CX TESTQ CX, CX
JE finalize // 0 bytes JE finalize // 0 bytes
...@@ -1036,18 +1036,18 @@ TEXT runtime·eqstring(SB),NOSPLIT,$0-33 ...@@ -1036,18 +1036,18 @@ TEXT runtime·eqstring(SB),NOSPLIT,$0-33
MOVQ s1len+8(FP), AX MOVQ s1len+8(FP), AX
MOVQ s2len+24(FP), BX MOVQ s2len+24(FP), BX
CMPQ AX, BX CMPQ AX, BX
JNE different JNE noteq
MOVQ s1str+0(FP), SI MOVQ s1str+0(FP), SI
MOVQ s2str+16(FP), DI MOVQ s2str+16(FP), DI
CMPQ SI, DI CMPQ SI, DI
JEQ same JEQ eq
CALL runtime·memeqbody(SB) CALL runtime·memeqbody(SB)
MOVB AX, v+32(FP) MOVB AX, v+32(FP)
RET RET
same: eq:
MOVB $1, v+32(FP) MOVB $1, v+32(FP)
RET RET
different: noteq:
MOVB $0, v+32(FP) MOVB $0, v+32(FP)
RET RET
...@@ -1170,29 +1170,29 @@ TEXT runtime·cmpbytes(SB),NOSPLIT,$0-56 ...@@ -1170,29 +1170,29 @@ TEXT runtime·cmpbytes(SB),NOSPLIT,$0-56
// AX = 1/0/-1 // AX = 1/0/-1
TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
CMPQ SI, DI CMPQ SI, DI
JEQ cmp_allsame JEQ allsame
CMPQ BX, DX CMPQ BX, DX
MOVQ DX, BP MOVQ DX, BP
CMOVQLT BX, BP // BP = min(alen, blen) = # of bytes to compare CMOVQLT BX, BP // BP = min(alen, blen) = # of bytes to compare
CMPQ BP, $8 CMPQ BP, $8
JB cmp_small JB small
cmp_loop: loop:
CMPQ BP, $16 CMPQ BP, $16
JBE cmp_0through16 JBE _0through16
MOVOU (SI), X0 MOVOU (SI), X0
MOVOU (DI), X1 MOVOU (DI), X1
PCMPEQB X0, X1 PCMPEQB X0, X1
PMOVMSKB X1, AX PMOVMSKB X1, AX
XORQ $0xffff, AX // convert EQ to NE XORQ $0xffff, AX // convert EQ to NE
JNE cmp_diff16 // branch if at least one byte is not equal JNE diff16 // branch if at least one byte is not equal
ADDQ $16, SI ADDQ $16, SI
ADDQ $16, DI ADDQ $16, DI
SUBQ $16, BP SUBQ $16, BP
JMP cmp_loop JMP loop
// AX = bit mask of differences // AX = bit mask of differences
cmp_diff16: diff16:
BSFQ AX, BX // index of first byte that differs BSFQ AX, BX // index of first byte that differs
XORQ AX, AX XORQ AX, AX
MOVB (SI)(BX*1), CX MOVB (SI)(BX*1), CX
...@@ -1202,21 +1202,21 @@ cmp_diff16: ...@@ -1202,21 +1202,21 @@ cmp_diff16:
RET RET
// 0 through 16 bytes left, alen>=8, blen>=8 // 0 through 16 bytes left, alen>=8, blen>=8
cmp_0through16: _0through16:
CMPQ BP, $8 CMPQ BP, $8
JBE cmp_0through8 JBE _0through8
MOVQ (SI), AX MOVQ (SI), AX
MOVQ (DI), CX MOVQ (DI), CX
CMPQ AX, CX CMPQ AX, CX
JNE cmp_diff8 JNE diff8
cmp_0through8: _0through8:
MOVQ -8(SI)(BP*1), AX MOVQ -8(SI)(BP*1), AX
MOVQ -8(DI)(BP*1), CX MOVQ -8(DI)(BP*1), CX
CMPQ AX, CX CMPQ AX, CX
JEQ cmp_allsame JEQ allsame
// AX and CX contain parts of a and b that differ. // AX and CX contain parts of a and b that differ.
cmp_diff8: diff8:
BSWAPQ AX // reverse order of bytes BSWAPQ AX // reverse order of bytes
BSWAPQ CX BSWAPQ CX
XORQ AX, CX XORQ AX, CX
...@@ -1227,44 +1227,44 @@ cmp_diff8: ...@@ -1227,44 +1227,44 @@ cmp_diff8:
RET RET
// 0-7 bytes in common // 0-7 bytes in common
cmp_small: small:
LEAQ (BP*8), CX // bytes left -> bits left LEAQ (BP*8), CX // bytes left -> bits left
NEGQ CX // - bits lift (== 64 - bits left mod 64) NEGQ CX // - bits lift (== 64 - bits left mod 64)
JEQ cmp_allsame JEQ allsame
// load bytes of a into high bytes of AX // load bytes of a into high bytes of AX
CMPB SI, $0xf8 CMPB SI, $0xf8
JA cmp_si_high JA si_high
MOVQ (SI), SI MOVQ (SI), SI
JMP cmp_si_finish JMP si_finish
cmp_si_high: si_high:
MOVQ -8(SI)(BP*1), SI MOVQ -8(SI)(BP*1), SI
SHRQ CX, SI SHRQ CX, SI
cmp_si_finish: si_finish:
SHLQ CX, SI SHLQ CX, SI
// load bytes of b in to high bytes of BX // load bytes of b in to high bytes of BX
CMPB DI, $0xf8 CMPB DI, $0xf8
JA cmp_di_high JA di_high
MOVQ (DI), DI MOVQ (DI), DI
JMP cmp_di_finish JMP di_finish
cmp_di_high: di_high:
MOVQ -8(DI)(BP*1), DI MOVQ -8(DI)(BP*1), DI
SHRQ CX, DI SHRQ CX, DI
cmp_di_finish: di_finish:
SHLQ CX, DI SHLQ CX, DI
BSWAPQ SI // reverse order of bytes BSWAPQ SI // reverse order of bytes
BSWAPQ DI BSWAPQ DI
XORQ SI, DI // find bit differences XORQ SI, DI // find bit differences
JEQ cmp_allsame JEQ allsame
BSRQ DI, CX // index of highest bit difference BSRQ DI, CX // index of highest bit difference
SHRQ CX, SI // move a's bit to bottom SHRQ CX, SI // move a's bit to bottom
ANDQ $1, SI // mask bit ANDQ $1, SI // mask bit
LEAQ -1(SI*2), AX // 1/0 => +1/-1 LEAQ -1(SI*2), AX // 1/0 => +1/-1
RET RET
cmp_allsame: allsame:
XORQ AX, AX XORQ AX, AX
XORQ CX, CX XORQ CX, CX
CMPQ BX, DX CMPQ BX, DX
...@@ -1299,7 +1299,7 @@ TEXT runtime·indexbytebody(SB),NOSPLIT,$0 ...@@ -1299,7 +1299,7 @@ TEXT runtime·indexbytebody(SB),NOSPLIT,$0
MOVQ SI, DI MOVQ SI, DI
CMPQ BX, $16 CMPQ BX, $16
JLT indexbyte_small JLT small
// round up to first 16-byte boundary // round up to first 16-byte boundary
TESTQ $15, SI TESTQ $15, SI
...@@ -1357,7 +1357,7 @@ failure: ...@@ -1357,7 +1357,7 @@ failure:
RET RET
// handle for lengths < 16 // handle for lengths < 16
indexbyte_small: small:
MOVQ BX, CX MOVQ BX, CX
REPN; SCASB REPN; SCASB
JZ success JZ success
......
...@@ -444,11 +444,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-25 ...@@ -444,11 +444,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-25
MOVQ new+16(FP), CX MOVQ new+16(FP), CX
LOCK LOCK
CMPXCHGQ CX, 0(BX) CMPXCHGQ CX, 0(BX)
JNZ cas64_fail JNZ fail
MOVL $1, AX MOVL $1, AX
MOVB AX, ret+24(FP) MOVB AX, ret+24(FP)
RET RET
cas64_fail: fail:
MOVL $0, AX MOVL $0, AX
MOVB AX, ret+24(FP) MOVB AX, ret+24(FP)
RET RET
...@@ -834,29 +834,29 @@ TEXT runtime·cmpbytes(SB),NOSPLIT,$0-28 ...@@ -834,29 +834,29 @@ TEXT runtime·cmpbytes(SB),NOSPLIT,$0-28
// AX = 1/0/-1 // AX = 1/0/-1
TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
CMPQ SI, DI CMPQ SI, DI
JEQ cmp_allsame JEQ allsame
CMPQ BX, DX CMPQ BX, DX
MOVQ DX, R8 MOVQ DX, R8
CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
CMPQ R8, $8 CMPQ R8, $8
JB cmp_small JB small
cmp_loop: loop:
CMPQ R8, $16 CMPQ R8, $16
JBE cmp_0through16 JBE _0through16
MOVOU (SI), X0 MOVOU (SI), X0
MOVOU (DI), X1 MOVOU (DI), X1
PCMPEQB X0, X1 PCMPEQB X0, X1
PMOVMSKB X1, AX PMOVMSKB X1, AX
XORQ $0xffff, AX // convert EQ to NE XORQ $0xffff, AX // convert EQ to NE
JNE cmp_diff16 // branch if at least one byte is not equal JNE diff16 // branch if at least one byte is not equal
ADDQ $16, SI ADDQ $16, SI
ADDQ $16, DI ADDQ $16, DI
SUBQ $16, R8 SUBQ $16, R8
JMP cmp_loop JMP loop
// AX = bit mask of differences // AX = bit mask of differences
cmp_diff16: diff16:
BSFQ AX, BX // index of first byte that differs BSFQ AX, BX // index of first byte that differs
XORQ AX, AX XORQ AX, AX
ADDQ BX, SI ADDQ BX, SI
...@@ -868,23 +868,23 @@ cmp_diff16: ...@@ -868,23 +868,23 @@ cmp_diff16:
RET RET
// 0 through 16 bytes left, alen>=8, blen>=8 // 0 through 16 bytes left, alen>=8, blen>=8
cmp_0through16: _0through16:
CMPQ R8, $8 CMPQ R8, $8
JBE cmp_0through8 JBE _0through8
MOVQ (SI), AX MOVQ (SI), AX
MOVQ (DI), CX MOVQ (DI), CX
CMPQ AX, CX CMPQ AX, CX
JNE cmp_diff8 JNE diff8
cmp_0through8: _0through8:
ADDQ R8, SI ADDQ R8, SI
ADDQ R8, DI ADDQ R8, DI
MOVQ -8(SI), AX MOVQ -8(SI), AX
MOVQ -8(DI), CX MOVQ -8(DI), CX
CMPQ AX, CX CMPQ AX, CX
JEQ cmp_allsame JEQ allsame
// AX and CX contain parts of a and b that differ. // AX and CX contain parts of a and b that differ.
cmp_diff8: diff8:
BSWAPQ AX // reverse order of bytes BSWAPQ AX // reverse order of bytes
BSWAPQ CX BSWAPQ CX
XORQ AX, CX XORQ AX, CX
...@@ -895,46 +895,46 @@ cmp_diff8: ...@@ -895,46 +895,46 @@ cmp_diff8:
RET RET
// 0-7 bytes in common // 0-7 bytes in common
cmp_small: small:
LEAQ (R8*8), CX // bytes left -> bits left LEAQ (R8*8), CX // bytes left -> bits left
NEGQ CX // - bits lift (== 64 - bits left mod 64) NEGQ CX // - bits lift (== 64 - bits left mod 64)
JEQ cmp_allsame JEQ allsame
// load bytes of a into high bytes of AX // load bytes of a into high bytes of AX
CMPB SI, $0xf8 CMPB SI, $0xf8
JA cmp_si_high JA si_high
MOVQ (SI), SI MOVQ (SI), SI
JMP cmp_si_finish JMP si_finish
cmp_si_high: si_high:
ADDQ R8, SI ADDQ R8, SI
MOVQ -8(SI), SI MOVQ -8(SI), SI
SHRQ CX, SI SHRQ CX, SI
cmp_si_finish: si_finish:
SHLQ CX, SI SHLQ CX, SI
// load bytes of b in to high bytes of BX // load bytes of b in to high bytes of BX
CMPB DI, $0xf8 CMPB DI, $0xf8
JA cmp_di_high JA di_high
MOVQ (DI), DI MOVQ (DI), DI
JMP cmp_di_finish JMP di_finish
cmp_di_high: di_high:
ADDQ R8, DI ADDQ R8, DI
MOVQ -8(DI), DI MOVQ -8(DI), DI
SHRQ CX, DI SHRQ CX, DI
cmp_di_finish: di_finish:
SHLQ CX, DI SHLQ CX, DI
BSWAPQ SI // reverse order of bytes BSWAPQ SI // reverse order of bytes
BSWAPQ DI BSWAPQ DI
XORQ SI, DI // find bit differences XORQ SI, DI // find bit differences
JEQ cmp_allsame JEQ allsame
BSRQ DI, CX // index of highest bit difference BSRQ DI, CX // index of highest bit difference
SHRQ CX, SI // move a's bit to bottom SHRQ CX, SI // move a's bit to bottom
ANDQ $1, SI // mask bit ANDQ $1, SI // mask bit
LEAQ -1(SI*2), AX // 1/0 => +1/-1 LEAQ -1(SI*2), AX // 1/0 => +1/-1
RET RET
cmp_allsame: allsame:
XORQ AX, AX XORQ AX, AX
XORQ CX, CX XORQ CX, CX
CMPQ BX, DX CMPQ BX, DX
...@@ -969,7 +969,7 @@ TEXT runtime·indexbytebody(SB),NOSPLIT,$0 ...@@ -969,7 +969,7 @@ TEXT runtime·indexbytebody(SB),NOSPLIT,$0
MOVL SI, DI MOVL SI, DI
CMPL BX, $16 CMPL BX, $16
JLT indexbyte_small JLT small
// round up to first 16-byte boundary // round up to first 16-byte boundary
TESTL $15, SI TESTL $15, SI
...@@ -1027,7 +1027,7 @@ failure: ...@@ -1027,7 +1027,7 @@ failure:
RET RET
// handle for lengths < 16 // handle for lengths < 16
indexbyte_small: small:
MOVL BX, CX MOVL BX, CX
REPN; SCASB REPN; SCASB
JZ success JZ success
......
...@@ -492,7 +492,7 @@ TEXT asmcgocall<>(SB),NOSPLIT,$0-0 ...@@ -492,7 +492,7 @@ TEXT asmcgocall<>(SB),NOSPLIT,$0-0
MOVW g_m(g), R8 MOVW g_m(g), R8
MOVW m_g0(R8), R3 MOVW m_g0(R8), R3
CMP R3, g CMP R3, g
BEQ asmcgocall_g0 BEQ g0
BL gosave<>(SB) BL gosave<>(SB)
MOVW R0, R5 MOVW R0, R5
MOVW R3, R0 MOVW R3, R0
...@@ -501,7 +501,7 @@ TEXT asmcgocall<>(SB),NOSPLIT,$0-0 ...@@ -501,7 +501,7 @@ TEXT asmcgocall<>(SB),NOSPLIT,$0-0
MOVW (g_sched+gobuf_sp)(g), R13 MOVW (g_sched+gobuf_sp)(g), R13
// Now on a scheduling stack (a pthread-created stack). // Now on a scheduling stack (a pthread-created stack).
asmcgocall_g0: g0:
SUB $24, R13 SUB $24, R13
BIC $0x7, R13 // alignment for gcc ABI BIC $0x7, R13 // alignment for gcc ABI
MOVW R4, 20(R13) // save old g MOVW R4, 20(R13) // save old g
...@@ -736,13 +736,13 @@ TEXT runtime·memeq(SB),NOSPLIT,$-4-13 ...@@ -736,13 +736,13 @@ TEXT runtime·memeq(SB),NOSPLIT,$-4-13
ADD R1, R3, R6 ADD R1, R3, R6
MOVW $1, R0 MOVW $1, R0
MOVB R0, ret+12(FP) MOVB R0, ret+12(FP)
_next2: loop:
CMP R1, R6 CMP R1, R6
RET.EQ RET.EQ
MOVBU.P 1(R1), R4 MOVBU.P 1(R1), R4
MOVBU.P 1(R2), R5 MOVBU.P 1(R2), R5
CMP R4, R5 CMP R4, R5
BEQ _next2 BEQ loop
MOVW $0, R0 MOVW $0, R0
MOVB R0, ret+12(FP) MOVB R0, ret+12(FP)
...@@ -765,13 +765,13 @@ TEXT runtime·eqstring(SB),NOSPLIT,$-4-17 ...@@ -765,13 +765,13 @@ TEXT runtime·eqstring(SB),NOSPLIT,$-4-17
CMP R2, R3 CMP R2, R3
RET.EQ RET.EQ
ADD R2, R0, R6 ADD R2, R0, R6
_eqnext: loop:
CMP R2, R6 CMP R2, R6
RET.EQ RET.EQ
MOVBU.P 1(R2), R4 MOVBU.P 1(R2), R4
MOVBU.P 1(R3), R5 MOVBU.P 1(R3), R5
CMP R4, R5 CMP R4, R5
BEQ _eqnext BEQ loop
MOVB R7, v+16(FP) MOVB R7, v+16(FP)
RET RET
...@@ -786,26 +786,26 @@ TEXT bytes·Equal(SB),NOSPLIT,$0 ...@@ -786,26 +786,26 @@ TEXT bytes·Equal(SB),NOSPLIT,$0
MOVW b_len+16(FP), R3 MOVW b_len+16(FP), R3
CMP R1, R3 // unequal lengths are not equal CMP R1, R3 // unequal lengths are not equal
B.NE _notequal B.NE notequal
MOVW a+0(FP), R0 MOVW a+0(FP), R0
MOVW b+12(FP), R2 MOVW b+12(FP), R2
ADD R0, R1 // end ADD R0, R1 // end
_byteseq_next: loop:
CMP R0, R1 CMP R0, R1
B.EQ _equal // reached the end B.EQ equal // reached the end
MOVBU.P 1(R0), R4 MOVBU.P 1(R0), R4
MOVBU.P 1(R2), R5 MOVBU.P 1(R2), R5
CMP R4, R5 CMP R4, R5
B.EQ _byteseq_next B.EQ loop
_notequal: notequal:
MOVW $0, R0 MOVW $0, R0
MOVBU R0, ret+24(FP) MOVBU R0, ret+24(FP)
RET RET
_equal: equal:
MOVW $1, R0 MOVW $1, R0
MOVBU R0, ret+24(FP) MOVBU R0, ret+24(FP)
RET RET
......
...@@ -699,7 +699,7 @@ TEXT runtime·memeq(SB),NOSPLIT,$-8-25 ...@@ -699,7 +699,7 @@ TEXT runtime·memeq(SB),NOSPLIT,$-8-25
SUB $1, R3 SUB $1, R3
SUB $1, R4 SUB $1, R4
ADD R3, R5, R8 ADD R3, R5, R8
_next: loop:
CMP R3, R8 CMP R3, R8
BNE 4(PC) BNE 4(PC)
MOVD $1, R3 MOVD $1, R3
...@@ -708,7 +708,7 @@ _next: ...@@ -708,7 +708,7 @@ _next:
MOVBZU 1(R3), R6 MOVBZU 1(R3), R6
MOVBZU 1(R4), R7 MOVBZU 1(R4), R7
CMP R6, R7 CMP R6, R7
BEQ _next BEQ loop
MOVB R0, ret+24(FP) MOVB R0, ret+24(FP)
RETURN RETURN
...@@ -720,14 +720,14 @@ TEXT runtime·eqstring(SB),NOSPLIT,$0-33 ...@@ -720,14 +720,14 @@ TEXT runtime·eqstring(SB),NOSPLIT,$0-33
MOVD s1len+8(FP), R4 MOVD s1len+8(FP), R4
MOVD s2len+24(FP), R5 MOVD s2len+24(FP), R5
CMP R4, R5 CMP R4, R5
BNE str_noteq BNE noteq
MOVD s1str+0(FP), R3 MOVD s1str+0(FP), R3
MOVD s2str+16(FP), R4 MOVD s2str+16(FP), R4
SUB $1, R3 SUB $1, R3
SUB $1, R4 SUB $1, R4
ADD R3, R5, R8 ADD R3, R5, R8
eq_next: loop:
CMP R3, R8 CMP R3, R8
BNE 4(PC) BNE 4(PC)
MOVD $1, R3 MOVD $1, R3
...@@ -736,8 +736,8 @@ eq_next: ...@@ -736,8 +736,8 @@ eq_next:
MOVBZU 1(R3), R6 MOVBZU 1(R3), R6
MOVBZU 1(R4), R7 MOVBZU 1(R4), R7
CMP R6, R7 CMP R6, R7
BEQ eq_next BEQ loop
str_noteq: noteq:
MOVB R0, ret+32(FP) MOVB R0, ret+32(FP)
RETURN RETURN
...@@ -747,7 +747,7 @@ TEXT bytes·Equal(SB),NOSPLIT,$0-49 ...@@ -747,7 +747,7 @@ TEXT bytes·Equal(SB),NOSPLIT,$0-49
MOVD b_len+32(FP), R4 MOVD b_len+32(FP), R4
CMP R3, R4 // unequal lengths are not equal CMP R3, R4 // unequal lengths are not equal
BNE _notequal BNE noteq
MOVD a+0(FP), R5 MOVD a+0(FP), R5
MOVD b+24(FP), R6 MOVD b+24(FP), R6
...@@ -755,19 +755,19 @@ TEXT bytes·Equal(SB),NOSPLIT,$0-49 ...@@ -755,19 +755,19 @@ TEXT bytes·Equal(SB),NOSPLIT,$0-49
SUB $1, R6 SUB $1, R6
ADD R5, R3 // end-1 ADD R5, R3 // end-1
_byteseq_next: loop:
CMP R5, R3 CMP R5, R3
BEQ _equal // reached the end BEQ equal // reached the end
MOVBZU 1(R5), R4 MOVBZU 1(R5), R4
MOVBZU 1(R6), R7 MOVBZU 1(R6), R7
CMP R4, R7 CMP R4, R7
BEQ _byteseq_next BEQ loop
_notequal: noteq:
MOVBZ R0, ret+48(FP) MOVBZ R0, ret+48(FP)
RETURN RETURN
_equal: equal:
MOVD $1, R3 MOVD $1, R3
MOVBZ R3, ret+48(FP) MOVBZ R3, ret+48(FP)
RETURN RETURN
...@@ -780,18 +780,18 @@ TEXT bytes·IndexByte(SB),NOSPLIT,$0-40 ...@@ -780,18 +780,18 @@ TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
SUB $1, R3 SUB $1, R3
ADD R3, R4 // end-1 ADD R3, R4 // end-1
_index_loop: loop:
CMP R3, R4 CMP R3, R4
BEQ _index_notfound BEQ notfound
MOVBZU 1(R3), R7 MOVBZU 1(R3), R7
CMP R7, R5 CMP R7, R5
BNE _index_loop BNE loop
SUB R6, R3 // remove base SUB R6, R3 // remove base
MOVD R3, ret+32(FP) MOVD R3, ret+32(FP)
RETURN RETURN
_index_notfound: notfound:
MOVD $-1, R3 MOVD $-1, R3
MOVD R3, ret+32(FP) MOVD R3, ret+32(FP)
RETURN RETURN
...@@ -804,18 +804,18 @@ TEXT strings·IndexByte(SB),NOSPLIT,$0 ...@@ -804,18 +804,18 @@ TEXT strings·IndexByte(SB),NOSPLIT,$0
SUB $1, R3 SUB $1, R3
ADD R3, R4 // end-1 ADD R3, R4 // end-1
_index2_loop: loop:
CMP R3, R4 CMP R3, R4
BEQ _index2_notfound BEQ notfound
MOVBZU 1(R3), R7 MOVBZU 1(R3), R7
CMP R7, R5 CMP R7, R5
BNE _index2_loop BNE loop
SUB R6, R3 // remove base SUB R6, R3 // remove base
MOVD R3, ret+24(FP) MOVD R3, ret+24(FP)
RETURN RETURN
_index2_notfound: notfound:
MOVD $-1, R3 MOVD $-1, R3
MOVD R3, ret+24(FP) MOVD R3, ret+24(FP)
RETURN RETURN
......
...@@ -15,31 +15,31 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-8 ...@@ -15,31 +15,31 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-8
XORL AX, AX XORL AX, AX
// MOVOU seems always faster than REP STOSL. // MOVOU seems always faster than REP STOSL.
clr_tail: tail:
TESTL BX, BX TESTL BX, BX
JEQ clr_0 JEQ _0
CMPL BX, $2 CMPL BX, $2
JBE clr_1or2 JBE _1or2
CMPL BX, $4 CMPL BX, $4
JBE clr_3or4 JBE _3or4
CMPL BX, $8 CMPL BX, $8
JBE clr_5through8 JBE _5through8
CMPL BX, $16 CMPL BX, $16
JBE clr_9through16 JBE _9through16
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
JEQ nosse2 JEQ nosse2
PXOR X0, X0 PXOR X0, X0
CMPL BX, $32 CMPL BX, $32
JBE clr_17through32 JBE _17through32
CMPL BX, $64 CMPL BX, $64
JBE clr_33through64 JBE _33through64
CMPL BX, $128 CMPL BX, $128
JBE clr_65through128 JBE _65through128
CMPL BX, $256 CMPL BX, $256
JBE clr_129through256 JBE _129through256
// TODO: use branch table and BSR to make this just a single dispatch // TODO: use branch table and BSR to make this just a single dispatch
clr_loop: loop:
MOVOU X0, 0(DI) MOVOU X0, 0(DI)
MOVOU X0, 16(DI) MOVOU X0, 16(DI)
MOVOU X0, 32(DI) MOVOU X0, 32(DI)
...@@ -59,40 +59,40 @@ clr_loop: ...@@ -59,40 +59,40 @@ clr_loop:
SUBL $256, BX SUBL $256, BX
ADDL $256, DI ADDL $256, DI
CMPL BX, $256 CMPL BX, $256
JAE clr_loop JAE loop
JMP clr_tail JMP tail
clr_1or2: _1or2:
MOVB AX, (DI) MOVB AX, (DI)
MOVB AX, -1(DI)(BX*1) MOVB AX, -1(DI)(BX*1)
RET RET
clr_0: _0:
RET RET
clr_3or4: _3or4:
MOVW AX, (DI) MOVW AX, (DI)
MOVW AX, -2(DI)(BX*1) MOVW AX, -2(DI)(BX*1)
RET RET
clr_5through8: _5through8:
MOVL AX, (DI) MOVL AX, (DI)
MOVL AX, -4(DI)(BX*1) MOVL AX, -4(DI)(BX*1)
RET RET
clr_9through16: _9through16:
MOVL AX, (DI) MOVL AX, (DI)
MOVL AX, 4(DI) MOVL AX, 4(DI)
MOVL AX, -8(DI)(BX*1) MOVL AX, -8(DI)(BX*1)
MOVL AX, -4(DI)(BX*1) MOVL AX, -4(DI)(BX*1)
RET RET
clr_17through32: _17through32:
MOVOU X0, (DI) MOVOU X0, (DI)
MOVOU X0, -16(DI)(BX*1) MOVOU X0, -16(DI)(BX*1)
RET RET
clr_33through64: _33through64:
MOVOU X0, (DI) MOVOU X0, (DI)
MOVOU X0, 16(DI) MOVOU X0, 16(DI)
MOVOU X0, -32(DI)(BX*1) MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1) MOVOU X0, -16(DI)(BX*1)
RET RET
clr_65through128: _65through128:
MOVOU X0, (DI) MOVOU X0, (DI)
MOVOU X0, 16(DI) MOVOU X0, 16(DI)
MOVOU X0, 32(DI) MOVOU X0, 32(DI)
...@@ -102,7 +102,7 @@ clr_65through128: ...@@ -102,7 +102,7 @@ clr_65through128:
MOVOU X0, -32(DI)(BX*1) MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1) MOVOU X0, -16(DI)(BX*1)
RET RET
clr_129through256: _129through256:
MOVOU X0, (DI) MOVOU X0, (DI)
MOVOU X0, 16(DI) MOVOU X0, 16(DI)
MOVOU X0, 32(DI) MOVOU X0, 32(DI)
...@@ -126,5 +126,5 @@ nosse2: ...@@ -126,5 +126,5 @@ nosse2:
REP REP
STOSL STOSL
ANDL $3, BX ANDL $3, BX
JNE clr_tail JNE tail
RET RET
...@@ -15,30 +15,30 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-16 ...@@ -15,30 +15,30 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-16
XORQ AX, AX XORQ AX, AX
// MOVOU seems always faster than REP STOSQ. // MOVOU seems always faster than REP STOSQ.
clr_tail: tail:
TESTQ BX, BX TESTQ BX, BX
JEQ clr_0 JEQ _0
CMPQ BX, $2 CMPQ BX, $2
JBE clr_1or2 JBE _1or2
CMPQ BX, $4 CMPQ BX, $4
JBE clr_3or4 JBE _3or4
CMPQ BX, $8 CMPQ BX, $8
JBE clr_5through8 JBE _5through8
CMPQ BX, $16 CMPQ BX, $16
JBE clr_9through16 JBE _9through16
PXOR X0, X0 PXOR X0, X0
CMPQ BX, $32 CMPQ BX, $32
JBE clr_17through32 JBE _17through32
CMPQ BX, $64 CMPQ BX, $64
JBE clr_33through64 JBE _33through64
CMPQ BX, $128 CMPQ BX, $128
JBE clr_65through128 JBE _65through128
CMPQ BX, $256 CMPQ BX, $256
JBE clr_129through256 JBE _129through256
// TODO: use branch table and BSR to make this just a single dispatch // TODO: use branch table and BSR to make this just a single dispatch
// TODO: for really big clears, use MOVNTDQ. // TODO: for really big clears, use MOVNTDQ.
clr_loop: loop:
MOVOU X0, 0(DI) MOVOU X0, 0(DI)
MOVOU X0, 16(DI) MOVOU X0, 16(DI)
MOVOU X0, 32(DI) MOVOU X0, 32(DI)
...@@ -58,38 +58,38 @@ clr_loop: ...@@ -58,38 +58,38 @@ clr_loop:
SUBQ $256, BX SUBQ $256, BX
ADDQ $256, DI ADDQ $256, DI
CMPQ BX, $256 CMPQ BX, $256
JAE clr_loop JAE loop
JMP clr_tail JMP tail
clr_1or2: _1or2:
MOVB AX, (DI) MOVB AX, (DI)
MOVB AX, -1(DI)(BX*1) MOVB AX, -1(DI)(BX*1)
RET RET
clr_0: _0:
RET RET
clr_3or4: _3or4:
MOVW AX, (DI) MOVW AX, (DI)
MOVW AX, -2(DI)(BX*1) MOVW AX, -2(DI)(BX*1)
RET RET
clr_5through8: _5through8:
MOVL AX, (DI) MOVL AX, (DI)
MOVL AX, -4(DI)(BX*1) MOVL AX, -4(DI)(BX*1)
RET RET
clr_9through16: _9through16:
MOVQ AX, (DI) MOVQ AX, (DI)
MOVQ AX, -8(DI)(BX*1) MOVQ AX, -8(DI)(BX*1)
RET RET
clr_17through32: _17through32:
MOVOU X0, (DI) MOVOU X0, (DI)
MOVOU X0, -16(DI)(BX*1) MOVOU X0, -16(DI)(BX*1)
RET RET
clr_33through64: _33through64:
MOVOU X0, (DI) MOVOU X0, (DI)
MOVOU X0, 16(DI) MOVOU X0, 16(DI)
MOVOU X0, -32(DI)(BX*1) MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1) MOVOU X0, -16(DI)(BX*1)
RET RET
clr_65through128: _65through128:
MOVOU X0, (DI) MOVOU X0, (DI)
MOVOU X0, 16(DI) MOVOU X0, 16(DI)
MOVOU X0, 32(DI) MOVOU X0, 32(DI)
...@@ -99,7 +99,7 @@ clr_65through128: ...@@ -99,7 +99,7 @@ clr_65through128:
MOVOU X0, -32(DI)(BX*1) MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1) MOVOU X0, -16(DI)(BX*1)
RET RET
clr_129through256: _129through256:
MOVOU X0, (DI) MOVOU X0, (DI)
MOVOU X0, 16(DI) MOVOU X0, 16(DI)
MOVOU X0, 32(DI) MOVOU X0, 32(DI)
......
...@@ -10,40 +10,40 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-8 ...@@ -10,40 +10,40 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-8
MOVL n+4(FP), BX MOVL n+4(FP), BX
XORL AX, AX XORL AX, AX
clr_tail: tail:
TESTL BX, BX TESTL BX, BX
JEQ clr_0 JEQ _0
CMPL BX, $2 CMPL BX, $2
JBE clr_1or2 JBE _1or2
CMPL BX, $4 CMPL BX, $4
JBE clr_3or4 JBE _3or4
CMPL BX, $8 CMPL BX, $8
JBE clr_5through8 JBE _5through8
CMPL BX, $16 CMPL BX, $16
JBE clr_9through16 JBE _9through16
MOVL BX, CX MOVL BX, CX
SHRL $2, CX SHRL $2, CX
REP REP
STOSL STOSL
ANDL $3, BX ANDL $3, BX
JNE clr_tail JNE tail
RET RET
clr_1or2: _1or2:
MOVB AX, (DI) MOVB AX, (DI)
MOVB AX, -1(DI)(BX*1) MOVB AX, -1(DI)(BX*1)
RET RET
clr_0: _0:
RET RET
clr_3or4: _3or4:
MOVW AX, (DI) MOVW AX, (DI)
MOVW AX, -2(DI)(BX*1) MOVW AX, -2(DI)(BX*1)
RET RET
clr_5through8: _5through8:
MOVL AX, (DI) MOVL AX, (DI)
MOVL AX, -4(DI)(BX*1) MOVL AX, -4(DI)(BX*1)
RET RET
clr_9through16: _9through16:
MOVL AX, (DI) MOVL AX, (DI)
MOVL AX, 4(DI) MOVL AX, 4(DI)
MOVL AX, -8(DI)(BX*1) MOVL AX, -8(DI)(BX*1)
......
...@@ -140,20 +140,20 @@ TEXT racecalladdr<>(SB), NOSPLIT, $0-0 ...@@ -140,20 +140,20 @@ TEXT racecalladdr<>(SB), NOSPLIT, $0-0
MOVQ g_racectx(R14), RARG0 // goroutine context MOVQ g_racectx(R14), RARG0 // goroutine context
// Check that addr is within [arenastart, arenaend) or within [noptrdata, enoptrbss). // Check that addr is within [arenastart, arenaend) or within [noptrdata, enoptrbss).
CMPQ RARG1, runtime·racearenastart(SB) CMPQ RARG1, runtime·racearenastart(SB)
JB racecalladdr_data JB data
CMPQ RARG1, runtime·racearenaend(SB) CMPQ RARG1, runtime·racearenaend(SB)
JB racecalladdr_call JB call
racecalladdr_data: data:
MOVQ $runtime·noptrdata(SB), R13 MOVQ $runtime·noptrdata(SB), R13
CMPQ RARG1, R13 CMPQ RARG1, R13
JB racecalladdr_ret JB ret
MOVQ $runtime·enoptrbss(SB), R13 MOVQ $runtime·enoptrbss(SB), R13
CMPQ RARG1, R13 CMPQ RARG1, R13
JAE racecalladdr_ret JAE ret
racecalladdr_call: call:
MOVQ AX, AX // w/o this 6a miscompiles this function MOVQ AX, AX // w/o this 6a miscompiles this function
JMP racecall<>(SB) JMP racecall<>(SB)
racecalladdr_ret: ret:
RET RET
// func runtime·racefuncenter(pc uintptr) // func runtime·racefuncenter(pc uintptr)
...@@ -335,9 +335,9 @@ TEXT racecall<>(SB), NOSPLIT, $0-0 ...@@ -335,9 +335,9 @@ TEXT racecall<>(SB), NOSPLIT, $0-0
MOVQ SP, R12 // callee-saved, preserved across the CALL MOVQ SP, R12 // callee-saved, preserved across the CALL
MOVQ m_g0(R13), R10 MOVQ m_g0(R13), R10
CMPQ R10, R14 CMPQ R10, R14
JE racecall_cont // already on g0 JE call // already on g0
MOVQ (g_sched+gobuf_sp)(R10), SP MOVQ (g_sched+gobuf_sp)(R10), SP
racecall_cont: call:
ANDQ $~15, SP // alignment for gcc ABI ANDQ $~15, SP // alignment for gcc ABI
CALL AX CALL AX
MOVQ R12, SP MOVQ R12, SP
......
...@@ -248,7 +248,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$40 ...@@ -248,7 +248,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$40
MOVL BX, 0(SP) MOVL BX, 0(SP)
MOVL $runtime·badsignal(SB), AX MOVL $runtime·badsignal(SB), AX
CALL AX CALL AX
JMP sigtramp_ret JMP ret
// save g // save g
MOVL DI, 20(SP) MOVL DI, 20(SP)
...@@ -275,7 +275,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$40 ...@@ -275,7 +275,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$40
MOVL 20(SP), DI MOVL 20(SP), DI
MOVL DI, g(CX) MOVL DI, g(CX)
sigtramp_ret: ret:
// call sigreturn // call sigreturn
MOVL context+16(FP), CX MOVL context+16(FP), CX
MOVL style+4(FP), BX MOVL style+4(FP), BX
......
...@@ -211,7 +211,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64 ...@@ -211,7 +211,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64
MOVL DX, 0(SP) MOVL DX, 0(SP)
MOVQ $runtime·badsignal(SB), AX MOVQ $runtime·badsignal(SB), AX
CALL AX CALL AX
JMP sigtramp_ret JMP ret
// save g // save g
MOVQ R10, 48(SP) MOVQ R10, 48(SP)
...@@ -233,7 +233,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64 ...@@ -233,7 +233,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64
MOVQ 48(SP), R10 MOVQ 48(SP), R10
MOVQ R10, g(BX) MOVQ R10, g(BX)
sigtramp_ret: ret:
// call sigreturn // call sigreturn
MOVL $(0x2000000+184), AX // sigreturn(ucontext, infostyle) MOVL $(0x2000000+184), AX // sigreturn(ucontext, infostyle)
MOVQ 32(SP), DI // saved ucontext MOVQ 32(SP), DI // saved ucontext
......
...@@ -217,7 +217,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44 ...@@ -217,7 +217,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL BX, 0(SP) MOVL BX, 0(SP)
MOVL $runtime·badsignal(SB), AX MOVL $runtime·badsignal(SB), AX
CALL AX CALL AX
JMP sigtramp_ret JMP ret
// save g // save g
MOVL DI, 20(SP) MOVL DI, 20(SP)
...@@ -243,7 +243,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44 ...@@ -243,7 +243,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL 20(SP), BX MOVL 20(SP), BX
MOVL BX, g(CX) MOVL BX, g(CX)
sigtramp_ret: ret:
// call sigreturn // call sigreturn
MOVL context+8(FP), AX MOVL context+8(FP), AX
MOVL $0, 0(SP) // syscall gap MOVL $0, 0(SP) // syscall gap
......
...@@ -197,7 +197,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44 ...@@ -197,7 +197,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL BX, 0(SP) MOVL BX, 0(SP)
MOVL $runtime·badsignal(SB), AX MOVL $runtime·badsignal(SB), AX
CALL AX CALL AX
JMP sigtramp_ret JMP ret
// save g // save g
MOVL DI, 20(SP) MOVL DI, 20(SP)
...@@ -223,7 +223,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44 ...@@ -223,7 +223,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL 20(SP), BX MOVL 20(SP), BX
MOVL BX, g(CX) MOVL BX, g(CX)
sigtramp_ret: ret:
// call sigreturn // call sigreturn
MOVL context+8(FP), AX MOVL context+8(FP), AX
MOVL $0, 0(SP) // syscall gap MOVL $0, 0(SP) // syscall gap
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment