diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c index 4fa629cb14..54dbee2554 100644 --- a/target/loongarch/cpu.c +++ b/target/loongarch/cpu.c @@ -417,7 +417,16 @@ static void loongarch_max_initfn(Object *obj) if (tcg_enabled()) { cpu->env.cpucfg[1] = FIELD_DP32(cpu->env.cpucfg[1], CPUCFG1, MSG_INT, 1); cpu->msgint = ON_OFF_AUTO_AUTO; - cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, HPTW, 1); + + uint32_t data = cpu->env.cpucfg[2]; + data = FIELD_DP32(data, CPUCFG2, HPTW, 1); + /* Enable LA v1.1 instructions */ + data = FIELD_DP32(data, CPUCFG2, FRECIPE, 1); + data = FIELD_DP32(data, CPUCFG2, LAM_BH, 1); + data = FIELD_DP32(data, CPUCFG2, LAMCAS, 1); + data = FIELD_DP32(data, CPUCFG2, LLACQ_SCREL, 1); + data = FIELD_DP32(data, CPUCFG2, SCQ, 1); + cpu->env.cpucfg[2] = data; } } diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h index 0485cdbda0..8c198fa584 100644 --- a/target/loongarch/cpu.h +++ b/target/loongarch/cpu.h @@ -138,6 +138,11 @@ FIELD(CPUCFG2, LBT_ALL, 18, 3) FIELD(CPUCFG2, LSPW, 21, 1) FIELD(CPUCFG2, LAM, 22, 1) FIELD(CPUCFG2, HPTW, 24, 1) +FIELD(CPUCFG2, FRECIPE, 25, 1) +FIELD(CPUCFG2, LAM_BH, 27, 1) +FIELD(CPUCFG2, LAMCAS, 28, 1) +FIELD(CPUCFG2, LLACQ_SCREL, 29, 1) +FIELD(CPUCFG2, SCQ, 30, 1) /* cpucfg[3] bits */ FIELD(CPUCFG3, CCDMA, 0, 1) @@ -377,6 +382,8 @@ typedef struct CPUArchState { uint32_t fcsr0_mask; uint64_t lladdr; /* LL virtual address compared against SC */ uint64_t llval; + uint64_t llval_high; /* For 128-bit atomic SC.Q */ + uint64_t llbit_scq; /* Potential LL.D+LD.D+SC.Q sequence in effect */ #endif #ifndef CONFIG_USER_ONLY #ifdef CONFIG_TCG diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c index 63989a6282..3249ab7ac6 100644 --- a/target/loongarch/disas.c +++ b/target/loongarch/disas.c @@ -478,6 +478,10 @@ INSN(frecip_s, ff) INSN(frecip_d, ff) INSN(frsqrt_s, ff) INSN(frsqrt_d, ff) +INSN(frecipe_s, ff) +INSN(frecipe_d, ff) +INSN(frsqrte_s, ff) +INSN(frsqrte_d, ff) INSN(fmov_s, ff) INSN(fmov_d, ff) INSN(movgr2fr_w, fr) @@ -580,6 +584,27 @@ INSN(fldx_s, frr) INSN(fldx_d, frr) INSN(fstx_s, frr) INSN(fstx_d, frr) +INSN(sc_q, rrr) +INSN(llacq_w, rr_i) +INSN(screl_w, rr_i) +INSN(llacq_d, rr_i) +INSN(screl_d, rr_i) +INSN(amcas_b, rrr) +INSN(amcas_h, rrr) +INSN(amcas_w, rrr) +INSN(amcas_d, rrr) +INSN(amcas_db_b, rrr) +INSN(amcas_db_h, rrr) +INSN(amcas_db_w, rrr) +INSN(amcas_db_d, rrr) +INSN(amswap_b, rrr) +INSN(amswap_h, rrr) +INSN(amadd_b, rrr) +INSN(amadd_h, rrr) +INSN(amswap_db_b, rrr) +INSN(amswap_db_h, rrr) +INSN(amadd_db_b, rrr) +INSN(amadd_db_h, rrr) INSN(amswap_w, rrr) INSN(amswap_d, rrr) INSN(amadd_w, rrr) @@ -1413,6 +1438,10 @@ INSN_LSX(vfrecip_s, vv) INSN_LSX(vfrecip_d, vv) INSN_LSX(vfrsqrt_s, vv) INSN_LSX(vfrsqrt_d, vv) +INSN_LSX(vfrecipe_s, vv) +INSN_LSX(vfrecipe_d, vv) +INSN_LSX(vfrsqrte_s, vv) +INSN_LSX(vfrsqrte_d, vv) INSN_LSX(vfcvtl_s_h, vv) INSN_LSX(vfcvth_s_h, vv) @@ -2327,6 +2356,10 @@ INSN_LASX(xvfrecip_s, vv) INSN_LASX(xvfrecip_d, vv) INSN_LASX(xvfrsqrt_s, vv) INSN_LASX(xvfrsqrt_d, vv) +INSN_LASX(xvfrecipe_s, vv) +INSN_LASX(xvfrecipe_d, vv) +INSN_LASX(xvfrsqrte_s, vv) +INSN_LASX(xvfrsqrte_d, vv) INSN_LASX(xvfcvtl_s_h, vv) INSN_LASX(xvfcvth_s_h, vv) diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode index 62f58cc541..3089d42044 100644 --- a/target/loongarch/insns.decode +++ b/target/loongarch/insns.decode @@ -69,6 +69,7 @@ @rr_i14s2 .... .... .............. rj:5 rd:5 &rr_i imm=%i14s2 @rr_i16 .... .. imm:s16 rj:5 rd:5 &rr_i @rr_i16s2 .... .. ................ rj:5 rd:5 &rr_i imm=%offs16 +@rr_i0 .... .. ................ rj:5 rd:5 &rr_i imm=0 @hint_r_i12 .... ...... imm:s12 rj:5 hint:5 &hint_r_i @hint_rr .... ........ ..... rk:5 rj:5 hint:5 &hint_rr @rrr_sa2p1 .... ........ ... .. rk:5 rj:5 rd:5 &rrr_sa sa=%sa2p1 @@ -261,6 +262,27 @@ ll_w 0010 0000 .............. ..... ..... @rr_i14s2 sc_w 0010 0001 .............. ..... ..... @rr_i14s2 ll_d 0010 0010 .............. ..... ..... @rr_i14s2 sc_d 0010 0011 .............. ..... ..... @rr_i14s2 +sc_q 0011 10000101 01110 ..... ..... ..... @rrr +llacq_w 0011 10000101 01111 00000 ..... ..... @rr_i0 +screl_w 0011 10000101 01111 00001 ..... ..... @rr_i0 +llacq_d 0011 10000101 01111 00010 ..... ..... @rr_i0 +screl_d 0011 10000101 01111 00011 ..... ..... @rr_i0 +amcas_b 0011 10000101 10000 ..... ..... ..... @rrr +amcas_h 0011 10000101 10001 ..... ..... ..... @rrr +amcas_w 0011 10000101 10010 ..... ..... ..... @rrr +amcas_d 0011 10000101 10011 ..... ..... ..... @rrr +amcas_db_b 0011 10000101 10100 ..... ..... ..... @rrr +amcas_db_h 0011 10000101 10101 ..... ..... ..... @rrr +amcas_db_w 0011 10000101 10110 ..... ..... ..... @rrr +amcas_db_d 0011 10000101 10111 ..... ..... ..... @rrr +amswap_b 0011 10000101 11000 ..... ..... ..... @rrr +amswap_h 0011 10000101 11001 ..... ..... ..... @rrr +amadd_b 0011 10000101 11010 ..... ..... ..... @rrr +amadd_h 0011 10000101 11011 ..... ..... ..... @rrr +amswap_db_b 0011 10000101 11100 ..... ..... ..... @rrr +amswap_db_h 0011 10000101 11101 ..... ..... ..... @rrr +amadd_db_b 0011 10000101 11110 ..... ..... ..... @rrr +amadd_db_h 0011 10000101 11111 ..... ..... ..... @rrr amswap_w 0011 10000110 00000 ..... ..... ..... @rrr amswap_d 0011 10000110 00001 ..... ..... ..... @rrr amadd_w 0011 10000110 00010 ..... ..... ..... @rrr @@ -355,6 +377,10 @@ frecip_s 0000 00010001 01000 10101 ..... ..... @ff frecip_d 0000 00010001 01000 10110 ..... ..... @ff frsqrt_s 0000 00010001 01000 11001 ..... ..... @ff frsqrt_d 0000 00010001 01000 11010 ..... ..... @ff +frecipe_s 0000 00010001 01000 11101 ..... ..... @ff +frecipe_d 0000 00010001 01000 11110 ..... ..... @ff +frsqrte_s 0000 00010001 01001 00001 ..... ..... @ff +frsqrte_d 0000 00010001 01001 00010 ..... ..... @ff fscaleb_s 0000 00010001 00001 ..... ..... ..... @fff fscaleb_d 0000 00010001 00010 ..... ..... ..... @fff flogb_s 0000 00010001 01000 01001 ..... ..... @ff @@ -1099,6 +1125,10 @@ vfrecip_s 0111 00101001 11001 11101 ..... ..... @vv vfrecip_d 0111 00101001 11001 11110 ..... ..... @vv vfrsqrt_s 0111 00101001 11010 00001 ..... ..... @vv vfrsqrt_d 0111 00101001 11010 00010 ..... ..... @vv +vfrecipe_s 0111 00101001 11010 00101 ..... ..... @vv +vfrecipe_d 0111 00101001 11010 00110 ..... ..... @vv +vfrsqrte_s 0111 00101001 11010 01001 ..... ..... @vv +vfrsqrte_d 0111 00101001 11010 01010 ..... ..... @vv vfcvtl_s_h 0111 00101001 11011 11010 ..... ..... @vv vfcvth_s_h 0111 00101001 11011 11011 ..... ..... @vv @@ -1863,6 +1893,10 @@ xvfrecip_s 0111 01101001 11001 11101 ..... ..... @vv xvfrecip_d 0111 01101001 11001 11110 ..... ..... @vv xvfrsqrt_s 0111 01101001 11010 00001 ..... ..... @vv xvfrsqrt_d 0111 01101001 11010 00010 ..... ..... @vv +xvfrecipe_s 0111 01101001 11010 00101 ..... ..... @vv +xvfrecipe_d 0111 01101001 11010 00110 ..... ..... @vv +xvfrsqrte_s 0111 01101001 11010 01001 ..... ..... @vv +xvfrsqrte_d 0111 01101001 11010 01010 ..... ..... @vv xvfcvtl_s_h 0111 01101001 11011 11010 ..... ..... @vv xvfcvth_s_h 0111 01101001 11011 11011 ..... ..... @vv diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc index a1de389e51..4bf8dab780 100644 --- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc @@ -3,32 +3,80 @@ * Copyright (c) 2021 Loongson Technology Corporation Limited */ -static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop) +static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool acq) { TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); TCGv t0 = make_address_i(ctx, src1, a->imm); + TCGv_i128 t16 = tcg_temp_new_i128(); + TCGv mask = tcg_constant_tl(0xf); + TCGv one = tcg_constant_tl(1); + TCGv zero = tcg_constant_tl(0); + TCGLabel *l1 = gen_new_label(); + TCGLabel *done = gen_new_label(); + + if (avail_SCQ(ctx) && mop == MO_LEUQ) { + /* + * The LL.D+LD.D may be paired with SC.Q, + * load 128-bit if aligned: (t0 & 0xf) == 0 + */ + tcg_gen_and_tl(t1, t0, mask); + tcg_gen_brcond_tl(TCG_COND_EQ, t1, zero, l1); + /* fallthrough if not aligned to 16B */ + } - tcg_gen_qemu_ld_i64(t1, t0, ctx->mem_idx, mop); + tcg_gen_qemu_ld_i64(t1, t0, ctx->mem_idx, mop | MO_ALIGN); tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr)); tcg_gen_st_tl(t1, tcg_env, offsetof(CPULoongArchState, llval)); gen_set_gpr(a->rd, t1, EXT_NONE); + if (avail_SCQ(ctx) && mop == MO_LEUQ) { + tcg_gen_br(done); + + gen_set_label(l1); + + /* Load 16B data and save into llval/llval_high */ + tcg_gen_qemu_ld_i128(t16, t0, ctx->mem_idx, MO_128 | MO_ALIGN); + tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr)); + tcg_gen_extr_i128_i64(t1, t2, t16); + tcg_gen_st_tl(t1, tcg_env, offsetof(CPULoongArchState, llval)); + tcg_gen_st_tl(t2, tcg_env, offsetof(CPULoongArchState, llval_high)); + tcg_gen_st_tl(one, tcg_env, offsetof(CPULoongArchState, llbit_scq)); + gen_set_gpr(a->rd, t1, EXT_NONE); + + gen_set_label(done); + } + + if (acq) { + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + } + return true; } -static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop) +static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool rel) { TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE); TCGv t0 = tcg_temp_new(); TCGv val = tcg_temp_new(); + TCGv zero = tcg_constant_tl(0); TCGLabel *l1 = gen_new_label(); TCGLabel *done = gen_new_label(); tcg_gen_addi_tl(t0, src1, a->imm); + + if (rel) { + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + } + + if (avail_SCQ(ctx)) { + tcg_gen_st_tl(zero, tcg_env, offsetof(CPULoongArchState, llbit_scq)); + } + tcg_gen_brcond_tl(TCG_COND_EQ, t0, cpu_lladdr, l1); tcg_gen_movi_tl(dest, 0); tcg_gen_br(done); @@ -37,7 +85,7 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop) tcg_gen_mov_tl(val, src2); /* generate cmpxchg */ tcg_gen_atomic_cmpxchg_tl(t0, cpu_lladdr, cpu_llval, - val, ctx->mem_idx, mop); + val, ctx->mem_idx, mop | MO_ALIGN); tcg_gen_setcond_tl(TCG_COND_EQ, dest, t0, cpu_llval); gen_set_label(done); gen_set_gpr(a->rd, dest, EXT_NONE); @@ -45,6 +93,64 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop) return true; } +static bool gen_sc_q(DisasContext *ctx, arg_rrr *a, MemOp mop) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE); + TCGv src3 = gpr_src(ctx, a->rk, EXT_NONE); + TCGv_i128 t16 = tcg_temp_new_i128(); + TCGv_i128 c16 = tcg_temp_new_i128(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv zero = tcg_constant_tl(0); + + TCGLabel *l1 = gen_new_label(); + TCGLabel *done = gen_new_label(); + + tcg_gen_st_tl(zero, tcg_env, offsetof(CPULoongArchState, llbit_scq)); + tcg_gen_brcond_tl(TCG_COND_EQ, src1, cpu_lladdr, l1); + tcg_gen_movi_tl(dest, 0); + tcg_gen_br(done); + + gen_set_label(l1); + tcg_gen_concat_i64_i128(t16, src2, src3); + tcg_gen_concat_i64_i128(c16, cpu_llval, + cpu_llval_high); + + /* generate cmpxchg */ + tcg_gen_atomic_cmpxchg_i128(t16, cpu_lladdr, c16, + t16, ctx->mem_idx, mop | MO_ALIGN); + + /* check if success */ + tcg_gen_extr_i128_i64(t1, t2, t16); + tcg_gen_xor_i64(t1, t1, cpu_llval); + tcg_gen_xor_i64(t2, t2, cpu_llval_high); + tcg_gen_or_i64(t1, t1, t2); + tcg_gen_setcondi_i64(TCG_COND_EQ, dest, t1, 0); + gen_set_label(done); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +static bool gen_cas(DisasContext *ctx, arg_rrr *a, + void (*func)(TCGv, TCGv, TCGv, TCGv, TCGArg, MemOp), + MemOp mop) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); + TCGv val = gpr_src(ctx, a->rk, EXT_NONE); + TCGv old = gpr_src(ctx, a->rd, EXT_NONE); + + addr = make_address_i(ctx, addr, 0); + + func(dest, addr, old, val, ctx->mem_idx, mop | MO_ALIGN); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + static bool gen_am(DisasContext *ctx, arg_rrr *a, void (*func)(TCGv, TCGv, TCGv, TCGArg, MemOp), MemOp mop) @@ -63,16 +169,37 @@ static bool gen_am(DisasContext *ctx, arg_rrr *a, addr = make_address_i(ctx, addr, 0); - func(dest, addr, val, ctx->mem_idx, mop); + func(dest, addr, val, ctx->mem_idx, mop | MO_ALIGN); gen_set_gpr(a->rd, dest, EXT_NONE); return true; } -TRANS(ll_w, ALL, gen_ll, MO_LESL) -TRANS(sc_w, ALL, gen_sc, MO_LESL) -TRANS(ll_d, 64, gen_ll, MO_LEUQ) -TRANS(sc_d, 64, gen_sc, MO_LEUQ) +TRANS(ll_w, ALL, gen_ll, MO_LESL, false) +TRANS(sc_w, ALL, gen_sc, MO_LESL, false) +TRANS(ll_d, 64, gen_ll, MO_LEUQ, false) +TRANS(sc_d, 64, gen_sc, MO_LEUQ, false) +TRANS(sc_q, 64, gen_sc_q, MO_128) +TRANS(llacq_w, LLACQ_SCREL, gen_ll, MO_LESL, true) +TRANS(screl_w, LLACQ_SCREL, gen_sc, MO_LESL, true) +TRANS(llacq_d, LLACQ_SCREL_64, gen_ll, MO_LEUQ, true) +TRANS(screl_d, LLACQ_SCREL_64, gen_sc, MO_LEUQ, true) +TRANS(amcas_b, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_SB) +TRANS(amcas_h, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_LESW) +TRANS(amcas_w, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_LESL) +TRANS(amcas_d, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_LEUQ) +TRANS(amcas_db_b, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_SB) +TRANS(amcas_db_h, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_LESW) +TRANS(amcas_db_w, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_LESL) +TRANS(amcas_db_d, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_LEUQ) +TRANS(amswap_b, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_SB) +TRANS(amswap_h, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_LESW) +TRANS(amadd_b, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_SB) +TRANS(amadd_h, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_LESW) +TRANS(amswap_db_b, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_SB) +TRANS(amswap_db_h, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_LESW) +TRANS(amadd_db_b, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_SB) +TRANS(amadd_db_h, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_LESW) TRANS(amswap_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_LESL) TRANS64(amswap_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_LEUQ) TRANS(amadd_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_LESL) diff --git a/target/loongarch/tcg/insn_trans/trans_farith.c.inc b/target/loongarch/tcg/insn_trans/trans_farith.c.inc index ff6cf3448e..eed6ab7312 100644 --- a/target/loongarch/tcg/insn_trans/trans_farith.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_farith.c.inc @@ -191,6 +191,10 @@ TRANS(frecip_s, FP_SP, gen_ff, gen_helper_frecip_s) TRANS(frecip_d, FP_DP, gen_ff, gen_helper_frecip_d) TRANS(frsqrt_s, FP_SP, gen_ff, gen_helper_frsqrt_s) TRANS(frsqrt_d, FP_DP, gen_ff, gen_helper_frsqrt_d) +TRANS(frecipe_s, FRECIPE_FP_SP, gen_ff, gen_helper_frecip_s) +TRANS(frecipe_d, FRECIPE_FP_DP, gen_ff, gen_helper_frecip_d) +TRANS(frsqrte_s, FRECIPE_FP_SP, gen_ff, gen_helper_frsqrt_s) +TRANS(frsqrte_d, FRECIPE_FP_DP, gen_ff, gen_helper_frsqrt_d) TRANS64(flogb_s, FP_SP, gen_ff, gen_helper_flogb_s) TRANS64(flogb_d, FP_DP, gen_ff, gen_helper_flogb_d) TRANS(fclass_s, FP_SP, gen_ff, gen_helper_fclass_s) diff --git a/target/loongarch/tcg/insn_trans/trans_memory.c.inc b/target/loongarch/tcg/insn_trans/trans_memory.c.inc index 90bb0815ff..e287d46363 100644 --- a/target/loongarch/tcg/insn_trans/trans_memory.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_memory.c.inc @@ -7,11 +7,33 @@ static bool gen_load(DisasContext *ctx, arg_rr_i *a, MemOp mop) { TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); + TCGv t1 = tcg_temp_new(); + TCGv mask = tcg_constant_tl(0x8); + TCGv zero = tcg_constant_tl(0); + TCGLabel *done = gen_new_label(); + TCGLabel *l1 = gen_new_label(); addr = make_address_i(ctx, addr, a->imm); + if (avail_SCQ(ctx) && mop == MO_LEUQ) { + /* + * The LL.D+LD.D may be paired with SC.Q, + * use llval_high if llbit_scq && (addr == lladdr ^ 0x8) + */ + tcg_gen_brcond_tl(TCG_COND_EQ, cpu_llbit_scq, zero, l1); + tcg_gen_xor_tl(t1, addr, mask); + tcg_gen_brcond_tl(TCG_COND_NE, cpu_lladdr, t1, l1); + gen_set_gpr(a->rd, cpu_llval_high, EXT_NONE); + tcg_gen_br(done); + gen_set_label(l1); + } + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); gen_set_gpr(a->rd, dest, EXT_NONE); + + if (avail_SCQ(ctx) && mop == MO_LEUQ) { + gen_set_label(done); + } return true; } diff --git a/target/loongarch/tcg/insn_trans/trans_vec.c.inc b/target/loongarch/tcg/insn_trans/trans_vec.c.inc index ea7e705bab..195b89f788 100644 --- a/target/loongarch/tcg/insn_trans/trans_vec.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_vec.c.inc @@ -4407,12 +4407,20 @@ TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s) TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d) TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s) TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d) +TRANS(vfrecipe_s, FRECIPE_LSX, gen_vv_ptr, gen_helper_vfrecip_s) +TRANS(vfrecipe_d, FRECIPE_LSX, gen_vv_ptr, gen_helper_vfrecip_d) +TRANS(vfrsqrte_s, FRECIPE_LSX, gen_vv_ptr, gen_helper_vfrsqrt_s) +TRANS(vfrsqrte_d, FRECIPE_LSX, gen_vv_ptr, gen_helper_vfrsqrt_d) TRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s) TRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d) TRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s) TRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d) TRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s) TRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d) +TRANS(xvfrecipe_s, FRECIPE_LASX, gen_xx_ptr, gen_helper_vfrecip_s) +TRANS(xvfrecipe_d, FRECIPE_LASX, gen_xx_ptr, gen_helper_vfrecip_d) +TRANS(xvfrsqrte_s, FRECIPE_LASX, gen_xx_ptr, gen_helper_vfrsqrt_s) +TRANS(xvfrsqrte_d, FRECIPE_LASX, gen_xx_ptr, gen_helper_vfrsqrt_d) TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h) TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h) diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c index c23d2a614a..30f375b33f 100644 --- a/target/loongarch/tcg/translate.c +++ b/target/loongarch/tcg/translate.c @@ -24,7 +24,7 @@ /* Global register indices */ TCGv cpu_gpr[32], cpu_pc; -static TCGv cpu_lladdr, cpu_llval; +static TCGv cpu_lladdr, cpu_llval, cpu_llval_high, cpu_llbit_scq; #define HELPER_H "helper.h" #include "exec/helper-info.c.inc" @@ -361,6 +361,10 @@ void loongarch_translate_init(void) offsetof(CPULoongArchState, lladdr), "lladdr"); cpu_llval = tcg_global_mem_new(tcg_env, offsetof(CPULoongArchState, llval), "llval"); + cpu_llval_high = tcg_global_mem_new(tcg_env, + offsetof(CPULoongArchState, llval_high), "llval_high"); + cpu_llbit_scq = tcg_global_mem_new(tcg_env, + offsetof(CPULoongArchState, llbit_scq), "llbit_scq"); #ifndef CONFIG_USER_ONLY loongarch_csr_translate_init(); diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h index bbe015ba57..ba1c89e57b 100644 --- a/target/loongarch/translate.h +++ b/target/loongarch/translate.h @@ -21,15 +21,27 @@ #define avail_ALL(C) true #define avail_64(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, ARCH) == \ CPUCFG1_ARCH_LA64) -#define avail_FP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP)) -#define avail_FP_SP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP_SP)) -#define avail_FP_DP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP_DP)) -#define avail_LSPW(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSPW)) -#define avail_LAM(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM)) -#define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX)) -#define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX)) -#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR)) -#define avail_CRC(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, CRC)) +#define avail_FP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP)) +#define avail_FP_SP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP_SP)) +#define avail_FP_DP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP_DP)) +#define avail_LSPW(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSPW)) +#define avail_LAM(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM)) +#define avail_LAM_BH(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM_BH)) +#define avail_LAMCAS(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAMCAS)) +#define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX)) +#define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX)) +#define avail_SCQ(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, SCQ)) +#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR)) +#define avail_CRC(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, CRC)) + +#define avail_FRECIPE(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FRECIPE)) +#define avail_FRECIPE_FP_SP(C) (avail_FRECIPE(C) && avail_FP_SP(C)) +#define avail_FRECIPE_FP_DP(C) (avail_FRECIPE(C) && avail_FP_DP(C)) +#define avail_FRECIPE_LSX(C) (avail_FRECIPE(C) && avail_LSX(C)) +#define avail_FRECIPE_LASX(C) (avail_FRECIPE(C) && avail_LASX(C)) + +#define avail_LLACQ_SCREL(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LLACQ_SCREL)) +#define avail_LLACQ_SCREL_64(C) (avail_64(C) && avail_LLACQ_SCREL(C)) /* * If an operation is being performed on less than TARGET_LONG_BITS,