@ -40,11 +40,12 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
TCG_REG_X16, TCG_REG_X17,
TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
/* X16 reserved as temporary */
/* X17 reserved as temporary */
/* X18 reserved by system */
/* X19 reserved for AREG0 */
/* X29 reserved as fp */
@ -71,8 +72,10 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
return TCG_REG_X0 + slot;
}
#define TCG_REG_TMP TCG_REG_X30
#define TCG_VEC_TMP TCG_REG_V31
#define TCG_REG_TMP0 TCG_REG_X16
#define TCG_REG_TMP1 TCG_REG_X17
#define TCG_REG_TMP2 TCG_REG_X30
#define TCG_VEC_TMP0 TCG_REG_V31
#ifndef CONFIG_SOFTMMU
#define TCG_REG_GUEST_BASE TCG_REG_X28
@ -129,14 +132,6 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
#define ALL_GENERAL_REGS 0xffffffffu
#define ALL_VECTOR_REGS 0xffffffff00000000ull
#ifdef CONFIG_SOFTMMU
#define ALL_QLDST_REGS \
(ALL_GENERAL_REGS & ~((1 < < TCG_REG_X0 ) | ( 1 < < TCG_REG_X1 ) | \
(1 < < TCG_REG_X2 ) | ( 1 < < TCG_REG_X3 ) ) )
#else
#define ALL_QLDST_REGS ALL_GENERAL_REGS
#endif
/* Match a constant valid for addition (12-bit, optionally shifted). */
static inline bool is_aimm(uint64_t val)
{
@ -390,6 +385,10 @@ typedef enum {
I3305_LDR_v64 = 0x5c000000,
I3305_LDR_v128 = 0x9c000000,
/* Load/store exclusive. */
I3306_LDXP = 0xc8600000,
I3306_STXP = 0xc8200000,
/* Load/store register. Described here as 3.3.12, but the helper
that emits them can transform to 3.3.10 or 3.3.13. */
I3312_STRB = 0x38000000 | LDST_ST < < 22 | MO_8 < < 30 ,
@ -454,6 +453,9 @@ typedef enum {
I3406_ADR = 0x10000000,
I3406_ADRP = 0x90000000,
/* Add/subtract extended register instructions. */
I3501_ADD = 0x0b200000,
/* Add/subtract shifted register instructions (without a shift). */
I3502_ADD = 0x0b000000,
I3502_ADDS = 0x2b000000,
@ -624,6 +626,12 @@ static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
tcg_out32(s, insn | (imm19 & 0x7ffff) < < 5 | rt ) ;
}
static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
TCGReg rt, TCGReg rt2, TCGReg rn)
{
tcg_out32(s, insn | rs < < 16 | rt2 < < 10 | rn < < 5 | rt ) ;
}
static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
TCGReg rt, int imm19)
{
@ -706,6 +714,14 @@ static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
tcg_out32(s, insn | (disp & 3) < < 29 | ( disp & 0x1ffffc ) < < ( 5 - 2 ) | rd ) ;
}
static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
TCGType sf, TCGReg rd, TCGReg rn,
TCGReg rm, int opt, int imm3)
{
tcg_out32(s, insn | sf < < 31 | rm < < 16 | opt < < 13 |
imm3 < < 10 | rn < < 5 | rd ) ;
}
/* This function is for both 3.5.2 (Add/Subtract shifted register), for
the rare occasion when we actually want to supply a shift amount. */
static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
@ -984,7 +1000,7 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg r, TCGReg base, intptr_t offset)
{
TCGReg temp = TCG_REG_TMP;
TCGReg temp = TCG_REG_TMP0 ;
if (offset < -0xffffff | | offset > 0xffffff) {
tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
@ -1136,8 +1152,8 @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
}
/* Worst-case scenario, move offset to temp register, use reg offset. */
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0 , offset);
tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0 );
}
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
@ -1353,8 +1369,8 @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
if (offset == sextract64(offset, 0, 26)) {
tcg_out_insn(s, 3206, BL, offset);
} else {
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0 , (intptr_t)target);
tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0 );
}
}
@ -1491,7 +1507,7 @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
AArch64Insn insn;
if (rl == ah || (!const_bh & & rl == bh)) {
rl = TCG_REG_TMP;
rl = TCG_REG_TMP0 ;
}
if (const_bl) {
@ -1508,7 +1524,7 @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
possibility of adding 0+const in the low part, and the
immediate add instructions encode XSP not XZR. Don't try
anything more elaborate here than loading another zero. */
al = TCG_REG_TMP;
al = TCG_REG_TMP0 ;
tcg_out_movi(s, ext, al, 0);
}
tcg_out_insn_3401(s, insn, ext, rl, al, bl);
@ -1549,7 +1565,7 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
{
TCGReg a1 = a0;
if (is_ctz) {
a1 = TCG_REG_TMP;
a1 = TCG_REG_TMP0 ;
tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
}
if (const_b & & b == (ext ? 64 : 32)) {
@ -1558,7 +1574,7 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
AArch64Insn sel = I3506_CSEL;
tcg_out_cmp(s, ext, a0, 0, 1);
tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0 , a1);
if (const_b) {
if (b == -1) {
@ -1571,7 +1587,7 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
b = d;
}
}
tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0 , b, TCG_COND_NE);
}
}
@ -1588,7 +1604,7 @@ bool tcg_target_has_memory_bswap(MemOp memop)
}
static const TCGLdstHelperParam ldst_helper_param = {
.ntmp = 1, .tmp = { TCG_REG_TMP }
.ntmp = 1, .tmp = { TCG_REG_TMP0 }
};
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
@ -1633,19 +1649,19 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGType addr_type = s->addr_type;
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
MemOp s_bits = opc & MO_SIZE;
unsigned a_mask;
h->aa = atom_and_align_for_opc(s, opc,
have_lse2 ? MO_ATOM_WITHIN16
: MO_ATOM_IFALIGN,
false );
s_bits == MO_128 );
a_mask = (1 < < h- > aa.align) - 1;
#ifdef CONFIG_SOFTMMU
unsigned s_bits = opc & MO_SIZE;
unsigned s_mask = (1u < < s_bits ) - 1 ;
unsigned mem_index = get_mmuidx(oi);
TCGReg x3 ;
TCGReg addr_adj ;
TCGType mask_type;
uint64_t compare_mask;
@ -1657,27 +1673,27 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
? TCG_TYPE_I64 : TCG_TYPE_I32);
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512 ) ;
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X 1, TCG_AREG0,
tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP 1, TCG_AREG0,
TLB_MASK_TABLE_OFS(mem_index), 1, 0);
/* Extract the TLB index from the address into X0. */
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
TCG_REG_X0, TCG_REG_X 0, addr_reg,
TCG_REG_TMP0, TCG_REG_TMP 0, addr_reg,
s->page_bits - CPU_TLB_ENTRY_BITS);
/* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X 0);
/* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP 0);
/* Load the tlb comparator into X0, and the fast path addend into X1. */
tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X 1,
/* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP 1,
is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X 1,
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP 1,
offsetof(CPUTLBEntry, addend));
/*
@ -1686,25 +1702,26 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
* cross pages using the address of the last byte of the access.
*/
if (a_mask >= s_mask) {
x3 = addr_reg;
addr_adj = addr_reg;
} else {
addr_adj = TCG_REG_TMP2;
tcg_out_insn(s, 3401, ADDI, addr_type,
TCG_REG_X3, addr_reg, s_mask - a_mask);
x3 = TCG_REG_X3;
addr_adj, addr_reg, s_mask - a_mask);
}
compare_mask = (uint64_t)s->page_mask | a_mask;
/* Store the page mask part of the address into X3. */
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
/* Store the page mask part of the address into TMP2. */
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
addr_adj, compare_mask);
/* Perform the address comparison. */
tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3 , 0);
tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2 , 0);
/* If not equal, we jump to the slow path. */
ldst->label_ptr[0] = s->code_ptr;
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
h->base = TCG_REG_X1,
h->base = TCG_REG_TMP1;
h->index = addr_reg;
h->index_ext = addr_type;
#else
@ -1822,6 +1839,108 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
}
}
static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg addr_reg, MemOpIdx oi, bool is_ld)
{
TCGLabelQemuLdst *ldst;
HostAddress h;
TCGReg base;
bool use_pair;
ldst = prepare_host_addr(s, & h, addr_reg, oi, is_ld);
/* Compose the final address, as LDP/STP have no indexing. */
if (h.index == TCG_REG_XZR) {
base = h.base;
} else {
base = TCG_REG_TMP2;
if (h.index_ext == TCG_TYPE_I32) {
/* add base, base, index, uxtw */
tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
h.base, h.index, MO_32, 0);
} else {
/* add base, base, index */
tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
}
}
use_pair = h.aa.atom < MO_128 | | have_lse2 ;
if (!use_pair) {
tcg_insn_unit *branch = NULL;
TCGReg ll, lh, sl, sh;
/*
* If we have already checked for 16-byte alignment, that's all
* we need. Otherwise we have determined that misaligned atomicity
* may be handled with two 8-byte loads.
*/
if (h.aa.align < MO_128 ) {
/*
* TODO: align should be MO_64, so we only need test bit 3,
* which means we could use TBNZ instead of ANDS+B_C.
*/
tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
branch = s->code_ptr;
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
use_pair = true;
}
if (is_ld) {
/*
* 16-byte atomicity without LSE2 requires LDXP+STXP loop:
* ldxp lo, hi, [base]
* stxp t0, lo, hi, [base]
* cbnz t0, .-8
* Require no overlap between data{lo,hi} and base.
*/
if (datalo == base || datahi == base) {
tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
base = TCG_REG_TMP2;
}
ll = sl = datalo;
lh = sh = datahi;
} else {
/*
* 16-byte atomicity without LSE2 requires LDXP+STXP loop:
* 1: ldxp t0, t1, [base]
* stxp t0, lo, hi, [base]
* cbnz t0, 1b
*/
tcg_debug_assert(base != TCG_REG_TMP0 & & base != TCG_REG_TMP1);
ll = TCG_REG_TMP0;
lh = TCG_REG_TMP1;
sl = datalo;
sh = datahi;
}
tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
if (use_pair) {
/* "b .+8", branching across the one insn of use_pair. */
tcg_out_insn(s, 3206, B, 2);
reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
}
}
if (use_pair) {
if (is_ld) {
tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
} else {
tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
}
}
if (ldst) {
ldst->type = TCG_TYPE_I128;
ldst->datalo_reg = datalo;
ldst->datahi_reg = datahi;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
}
static const tcg_insn_unit *tb_ret_addr;
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
@ -1847,7 +1966,7 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
set_jmp_insn_offset(s, which);
tcg_out32(s, I3206_B);
tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
tcg_out_insn(s, 3207, BR, TCG_REG_TMP0 );
set_jmp_reset_offset(s, which);
}
@ -1866,7 +1985,7 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
ptrdiff_t i_offset = i_addr - jmp_rx;
/* Note that we asserted this in range in tcg_out_goto_tb. */
insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
insn = deposit32(I3305_LDR | TCG_REG_TMP0 , 5, 19, i_offset >> 2);
}
qatomic_set((uint32_t *)jmp_rw, insn);
flush_idcache_range(jmp_rx, jmp_rw, 4);
@ -2060,13 +2179,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_rem_i64:
case INDEX_op_rem_i32:
tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0 , a1, a2);
tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0 , a2, a1);
break;
case INDEX_op_remu_i64:
case INDEX_op_remu_i32:
tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0 , a1, a2);
tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0 , a2, a1);
break;
case INDEX_op_shl_i64:
@ -2110,8 +2229,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (c2) {
tcg_out_rotl(s, ext, a0, a1, a2);
} else {
tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0 , TCG_REG_XZR, a2);
tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0 );
}
break;
@ -2161,6 +2280,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_qemu_st_a64_i64:
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
break;
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
break;
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
break;
case INDEX_op_bswap64_i64:
tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
@ -2517,8 +2644,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
break;
}
}
tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
a2 = TCG_VEC_TMP;
tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0 , 0);
a2 = TCG_VEC_TMP0 ;
}
if (is_scalar) {
insn = cmp_scalar_insn[cond];
@ -2799,12 +2926,18 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_ld_a64_i32:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
return C_O1_I1(r, l);
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
return C_O2_I1(r, r, r);
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
return C_O0_I2(lZ, l);
return C_O0_I2(rZ, r);
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
return C_O0_I3(rZ, rZ, r);
case INDEX_op_deposit_i32:
case INDEX_op_deposit_i64:
@ -2900,9 +3033,11 @@ static void tcg_target_init(TCGContext *s)
s->reserved_regs = 0;
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
}
/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */