mirror of
https://github.com/yuzu-emu/unicorn
synced 2024-11-25 07:28:59 +00:00
arm/translate-a64: add FP16 pairwise ops simd_three_reg_same_fp16
This includes FMAXNMP, FADDP, FMAXP, FMINNMP, FMINP. Backports commit 7a2c6e618156674cf9eac8bf36e79f674fbf974e from qemu
This commit is contained in:
parent
4b2577537b
commit
c6fda07628
1 changed files with 132 additions and 75 deletions
|
@ -10399,6 +10399,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
|
|||
int datasize, elements;
|
||||
int pass;
|
||||
TCGv_ptr fpst;
|
||||
bool pairwise = false;
|
||||
|
||||
if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
|
||||
unallocated_encoding(s);
|
||||
|
@ -10424,91 +10425,147 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
|
|||
datasize = is_q ? 128 : 64;
|
||||
elements = datasize / 16;
|
||||
|
||||
switch (fpopcode) {
|
||||
case 0x10: /* FMAXNMP */
|
||||
case 0x12: /* FADDP */
|
||||
case 0x16: /* FMAXP */
|
||||
case 0x18: /* FMINNMP */
|
||||
case 0x1e: /* FMINP */
|
||||
pairwise = true;
|
||||
break;
|
||||
}
|
||||
|
||||
fpst = get_fpstatus_ptr(tcg_ctx, true);
|
||||
|
||||
for (pass = 0; pass < elements; pass++) {
|
||||
if (pairwise) {
|
||||
int maxpass = is_q ? 8 : 4;
|
||||
TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
|
||||
TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
|
||||
TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
|
||||
TCGv_i32 tcg_res[8];
|
||||
|
||||
read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
|
||||
read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
|
||||
for (pass = 0; pass < maxpass; pass++) {
|
||||
int passreg = pass < (maxpass / 2) ? rn : rm;
|
||||
int passelt = (pass << 1) & (maxpass - 1);
|
||||
|
||||
switch (fpopcode) {
|
||||
case 0x0: /* FMAXNM */
|
||||
gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x1: /* FMLA */
|
||||
read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
|
||||
gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_res,
|
||||
fpst);
|
||||
break;
|
||||
case 0x2: /* FADD */
|
||||
gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x3: /* FMULX */
|
||||
gen_helper_advsimd_mulxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x4: /* FCMEQ */
|
||||
gen_helper_advsimd_ceq_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x6: /* FMAX */
|
||||
gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x7: /* FRECPS */
|
||||
gen_helper_recpsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x8: /* FMINNM */
|
||||
gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x9: /* FMLS */
|
||||
/* As usual for ARM, separate negation for fused multiply-add */
|
||||
tcg_gen_xori_i32(tcg_ctx, tcg_op1, tcg_op1, 0x8000);
|
||||
read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
|
||||
gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_res,
|
||||
fpst);
|
||||
break;
|
||||
case 0xa: /* FSUB */
|
||||
gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0xe: /* FMIN */
|
||||
gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0xf: /* FRSQRTS */
|
||||
gen_helper_rsqrtsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x13: /* FMUL */
|
||||
gen_helper_advsimd_mulh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x14: /* FCMGE */
|
||||
gen_helper_advsimd_cge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x15: /* FACGE */
|
||||
gen_helper_advsimd_acge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x17: /* FDIV */
|
||||
gen_helper_advsimd_divh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x1a: /* FABD */
|
||||
gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
tcg_gen_andi_i32(tcg_ctx, tcg_res, tcg_res, 0x7fff);
|
||||
break;
|
||||
case 0x1c: /* FCMGT */
|
||||
gen_helper_advsimd_cgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x1d: /* FACGT */
|
||||
gen_helper_advsimd_acgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
|
||||
__func__, insn, fpopcode, s->pc);
|
||||
g_assert_not_reached();
|
||||
read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
|
||||
read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
|
||||
tcg_res[pass] = tcg_temp_new_i32(tcg_ctx);
|
||||
|
||||
switch (fpopcode) {
|
||||
case 0x10: /* FMAXNMP */
|
||||
gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
|
||||
fpst);
|
||||
break;
|
||||
case 0x12: /* FADDP */
|
||||
gen_helper_advsimd_addh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x16: /* FMAXP */
|
||||
gen_helper_advsimd_maxh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x18: /* FMINNMP */
|
||||
gen_helper_advsimd_minnumh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2,
|
||||
fpst);
|
||||
break;
|
||||
case 0x1e: /* FMINP */
|
||||
gen_helper_advsimd_minh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
||||
for (pass = 0; pass < maxpass; pass++) {
|
||||
write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
|
||||
tcg_temp_free_i32(tcg_ctx, tcg_res[pass]);
|
||||
}
|
||||
|
||||
write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
|
||||
tcg_temp_free_i32(tcg_ctx, tcg_res);
|
||||
tcg_temp_free_i32(tcg_ctx, tcg_op1);
|
||||
tcg_temp_free_i32(tcg_ctx, tcg_op2);
|
||||
} else {
|
||||
for (pass = 0; pass < elements; pass++) {
|
||||
TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
|
||||
TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
|
||||
TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
|
||||
|
||||
read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
|
||||
read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
|
||||
|
||||
switch (fpopcode) {
|
||||
case 0x0: /* FMAXNM */
|
||||
gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x1: /* FMLA */
|
||||
read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
|
||||
gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_res,
|
||||
fpst);
|
||||
break;
|
||||
case 0x2: /* FADD */
|
||||
gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x3: /* FMULX */
|
||||
gen_helper_advsimd_mulxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x4: /* FCMEQ */
|
||||
gen_helper_advsimd_ceq_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x6: /* FMAX */
|
||||
gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x7: /* FRECPS */
|
||||
gen_helper_recpsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x8: /* FMINNM */
|
||||
gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x9: /* FMLS */
|
||||
/* As usual for ARM, separate negation for fused multiply-add */
|
||||
tcg_gen_xori_i32(tcg_ctx, tcg_op1, tcg_op1, 0x8000);
|
||||
read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
|
||||
gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_res,
|
||||
fpst);
|
||||
break;
|
||||
case 0xa: /* FSUB */
|
||||
gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0xe: /* FMIN */
|
||||
gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0xf: /* FRSQRTS */
|
||||
gen_helper_rsqrtsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x13: /* FMUL */
|
||||
gen_helper_advsimd_mulh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x14: /* FCMGE */
|
||||
gen_helper_advsimd_cge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x15: /* FACGE */
|
||||
gen_helper_advsimd_acge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x17: /* FDIV */
|
||||
gen_helper_advsimd_divh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x1a: /* FABD */
|
||||
gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
tcg_gen_andi_i32(tcg_ctx, tcg_res, tcg_res, 0x7fff);
|
||||
break;
|
||||
case 0x1c: /* FCMGT */
|
||||
gen_helper_advsimd_cgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
case 0x1d: /* FACGT */
|
||||
gen_helper_advsimd_acgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
|
||||
__func__, insn, fpopcode, s->pc);
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
|
||||
tcg_temp_free_i32(tcg_ctx, tcg_res);
|
||||
tcg_temp_free_i32(tcg_ctx, tcg_op1);
|
||||
tcg_temp_free_i32(tcg_ctx, tcg_op2);
|
||||
}
|
||||
}
|
||||
|
||||
tcg_temp_free_ptr(tcg_ctx, fpst);
|
||||
|
|
Loading…
Reference in a new issue