Re: [PATCH 1/8] target-arm: A64: Implement plain vector SIMD indexed element insns

Peter Maydell <peter.maydell@xxxxxxxxxx> · Tue, 11 Feb 2014 14:52:47 +0000

On 7 February 2014 21:49, Peter Maydell <peter.maydell@xxxxxxxxxx> wrote:
> Implement all the SIMD vector x indexed element instructions
> in the subcategory which are not 'long' ops.
>
> Signed-off-by: Peter Maydell <peter.maydell@xxxxxxxxxx>
> ---
>  target-arm/helper-a64.c    |  26 +++++
>  target-arm/helper-a64.h    |   2 +
>  target-arm/translate-a64.c | 245 ++++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 272 insertions(+), 1 deletion(-)
>
> diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
> index 6ca958a..fe90a5c 100644
> --- a/target-arm/helper-a64.c
> +++ b/target-arm/helper-a64.c
> @@ -123,6 +123,32 @@ uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
>      return float_rel_to_flags(float64_compare(x, y, fp_status));
>  }
>
> +float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
> +{
> +    float_status *fpst = fpstp;
> +
> +    if ((float32_is_zero(a) && float32_is_infinity(b)) ||
> +        (float32_is_infinity(a) && float32_is_zero(b))) {
> +        /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
> +        return make_float32((1U << 30) |
> +                            ((float32_val(a) ^ float32_val(b)) & (1U << 31)));
> +    }
> +    return float32_mul(a, b, fpst);
> +}
> +
> +float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
> +{
> +    float_status *fpst = fpstp;
> +
> +    if ((float64_is_zero(a) && float64_is_infinity(b)) ||
> +        (float64_is_infinity(a) && float64_is_zero(b))) {
> +        /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
> +        return make_float64((1ULL << 62) |
> +                            ((float64_val(a) ^ float64_val(b)) & (1ULL << 63)));
> +    }
> +    return float64_mul(a, b, fpst);
> +}
> +
>  uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
>                            uint32_t rn, uint32_t numregs)
>  {
> diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
> index 99832ee..84310e8 100644
> --- a/target-arm/helper-a64.h
> +++ b/target-arm/helper-a64.h
> @@ -27,3 +27,5 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
>  DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
>  DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
>  DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
> +DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
> +DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
> diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
> index d60223a..b7f1ecf 100644
> --- a/target-arm/translate-a64.c
> +++ b/target-arm/translate-a64.c
> @@ -7813,7 +7813,250 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
>   */
>  static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
>  {
> -    unsupported_encoding(s, insn);
> +    /* This encoding has two kinds of instruction:
> +     *  normal, where we perform elt x idxelt => elt for each
> +     *     element in the vector
> +     *  long, where we perform elt x idxelt and generate a result of
> +     *     double the width of the input element
> +     * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
> +     */
> +    bool is_q = extract32(insn, 30, 1);
> +    bool u = extract32(insn, 29, 1);
> +    int size = extract32(insn, 22, 2);
> +    int l = extract32(insn, 21, 1);
> +    int m = extract32(insn, 20, 1);
> +    /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
> +    int rm = extract32(insn, 16, 4);
> +    int opcode = extract32(insn, 12, 4);
> +    int h = extract32(insn, 11, 1);
> +    int rn = extract32(insn, 5, 5);
> +    int rd = extract32(insn, 0, 5);
> +    bool is_long = false;
> +    bool is_fp = false;
> +    int index;
> +    TCGv_ptr fpst;
> +
> +    switch (opcode) {
> +    case 0x0: /* MLA */
> +    case 0x4: /* MLS */
> +        if (!u) {
> +            unallocated_encoding(s);
> +            return;
> +        }
> +        break;
> +    case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
> +    case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
> +    case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
> +        is_long = true;
> +        break;
> +    case 0x3: /* SQDMLAL, SQDMLAL2 */
> +    case 0x7: /* SQDMLSL, SQDMLSL2 */
> +    case 0xb: /* SQDMULL, SQDMULL2 */
> +        is_long = true;
> +        /* fall through */
> +    case 0xc: /* SQDMULH */
> +    case 0xd: /* SQRDMULH */
> +    case 0x8: /* MUL */
> +        if (u) {
> +            unallocated_encoding(s);
> +            return;
> +        }
> +        break;
> +    case 0x1: /* FMLA */
> +    case 0x5: /* FMLS */
> +        if (u) {
> +            unallocated_encoding(s);
> +            return;
> +        }
> +        /* fall through */
> +    case 0x9: /* FMUL, FMULX */
> +        if (!extract32(size, 1, 1)) {
> +            unallocated_encoding(s);
> +            return;
> +        }
> +        is_fp = true;
> +        break;
> +    }

This is missing the
    default:
        unallocated_encoding(s);
        return;

so the unallocated opcodes will fall through and
assert later. I think this is a trivial enough fixup
that I'm going to just fix it in target-arm.next
since I already put this patch in there before I
noticed, unless anybody disagrees.

thanks
-- PMM
_______________________________________________
kvmarm mailing list
kvmarm@xxxxxxxxxxxxxxxxxxxxx
https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm