target/i386: optimize indirect branches

Speed up indirect branches by jumping to the target if it is valid.

Softmmu measurements (see later commit for user-mode numbers):

Note: baseline (i.e. speedup == 1x) is QEMU v2.9.0.

- SPECint06 (test set), x86_64-softmmu (Ubuntu 16.04 guest). Host: Intel i7-4790K @ 4.00GHz

2.4x +-+--------------------------------------------------------------------------------------------------------------+-+
| |
| cross |
2.2x +cross+jr..........................................................................+++...........................+-+
| | |
| +++ | |
2x +-+..............................................................................|..|............................+-+
| | | |
| | | |
1.8x +-+..............................................................................|####...........................+-+
| |# |# |
| **** |# |
1.6x +-+............................................................................*.|*.|#...........................+-+
| * |* |# |
| * |* |# |
1.4x +-+.......................................................................+++..*.|*.|#...........................+-+
| ++++++ #### * |*++# +++ |
| +++ | | #++# *++* # +++ | |
1.2x +-+......................###.....####....+++............|..|...........****..#.*..*..#....####...|.###.....####..+-+
| +++ **** # **** # #### ***### *++* # * * # #++# ****|# +++#++# |
| ****### +++ *++* # *++* # ++# # #### *|* |# +++ * * # * * # *** # *| *|# **** # |
1x +-++-*++*++#++***###++*++*+#++*+-*++#+****++#++***++#+-*+*++#-+****##++*++*-+#+*++*-+#++*+*++#++*-+*+#++*++*++#-++-+
| * * # * * # * * # * * # * * # * * # *|* |# *++* # * * # * * # * * # * * # * * # |
| * * # * * # * * # * * # * * # * * # *+*++# * * # * * # * * # * * # * * # * * # |
0.8x +-+--****###--***###--****##--****###-****###--***###--***###--****##--****###-****###--***###--****##--****###--+-+
astar bzip2 gcc gobmk h264ref hmmlibquantum mcf omnetpperlbench sjengxalancbmk hmean
png: http://imgur.com/DU36YFU

NB. 'cross' represents the previous commit.

Backports commit b4aa297781ceddef79deb0e99da7817551fa89f8 from qemu
This commit is contained in:
Emilio G. Cota 2018-03-03 14:10:11 -05:00 committed by Lioncash
parent 3895eea3b4
commit 2d16da435e
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7

View file

@ -5642,7 +5642,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_push_v(s, cpu_T1); gen_push_v(s, cpu_T1);
gen_op_jmp_v(tcg_ctx, cpu_T0); gen_op_jmp_v(tcg_ctx, cpu_T0);
gen_bnd_jmp(s); gen_bnd_jmp(s);
gen_eob(s); gen_jr(s, cpu_T0);
break; break;
case 3: /* lcall Ev */ case 3: /* lcall Ev */
gen_op_ld_v(s, ot, cpu_T1, cpu_A0); gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
@ -5660,7 +5660,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_const_i32(tcg_ctx, dflag - 1), tcg_const_i32(tcg_ctx, dflag - 1),
tcg_const_i32(tcg_ctx, s->pc - s->cs_base)); tcg_const_i32(tcg_ctx, s->pc - s->cs_base));
} }
gen_eob(s); tcg_gen_ld_tl(tcg_ctx, cpu_tmp4, tcg_ctx->cpu_env, offsetof(CPUX86State, eip));
gen_jr(s, cpu_tmp4);
break; break;
case 4: /* jmp Ev */ case 4: /* jmp Ev */
if (dflag == MO_16) { if (dflag == MO_16) {
@ -5668,7 +5669,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
} }
gen_op_jmp_v(tcg_ctx, cpu_T0); gen_op_jmp_v(tcg_ctx, cpu_T0);
gen_bnd_jmp(s); gen_bnd_jmp(s);
gen_eob(s); gen_jr(s, cpu_T0);
break; break;
case 5: /* ljmp Ev */ case 5: /* ljmp Ev */
gen_op_ld_v(s, ot, cpu_T1, cpu_A0); gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
@ -5683,7 +5684,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_movl_seg_T0_vm(tcg_ctx, R_CS); gen_op_movl_seg_T0_vm(tcg_ctx, R_CS);
gen_op_jmp_v(tcg_ctx, cpu_T1); gen_op_jmp_v(tcg_ctx, cpu_T1);
} }
gen_eob(s); tcg_gen_ld_tl(tcg_ctx, cpu_tmp4, tcg_ctx->cpu_env, offsetof(CPUX86State, eip));
gen_jr(s, cpu_tmp4);
break; break;
case 6: /* push Ev */ case 6: /* push Ev */
gen_push_v(s, cpu_T0); gen_push_v(s, cpu_T0);
@ -7081,7 +7083,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
/* Note that gen_pop_T0 uses a zero-extending load. */ /* Note that gen_pop_T0 uses a zero-extending load. */
gen_op_jmp_v(tcg_ctx, cpu_T0); gen_op_jmp_v(tcg_ctx, cpu_T0);
gen_bnd_jmp(s); gen_bnd_jmp(s);
gen_eob(s); gen_jr(s, cpu_T0);
break; break;
case 0xc3: /* ret */ case 0xc3: /* ret */
ot = gen_pop_T0(s); ot = gen_pop_T0(s);
@ -7089,7 +7091,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
/* Note that gen_pop_T0 uses a zero-extending load. */ /* Note that gen_pop_T0 uses a zero-extending load. */
gen_op_jmp_v(tcg_ctx, cpu_T0); gen_op_jmp_v(tcg_ctx, cpu_T0);
gen_bnd_jmp(s); gen_bnd_jmp(s);
gen_eob(s); gen_jr(s, cpu_T0);
break; break;
case 0xca: /* lret im */ case 0xca: /* lret im */
val = cpu_ldsw_code(env, s->pc); val = cpu_ldsw_code(env, s->pc);