target-arm: Implement VFPv4 fused multiply-accumulate insns
Implement the fused multiply-accumulate instructions (VFMA, VFMS, VFNMA, VFNMS) which are new in VFPv4. Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
		
							parent
							
								
									369be8f618
								
							
						
					
					
						commit
						da97f52cb3
					
				| 
						 | 
				
			
			@ -376,6 +376,7 @@ enum arm_features {
 | 
			
		|||
    ARM_FEATURE_STRONGARM,
 | 
			
		||||
    ARM_FEATURE_VAPA, /* cp15 VA to PA lookups */
 | 
			
		||||
    ARM_FEATURE_ARM_DIV, /* divide supported in ARM encoding */
 | 
			
		||||
    ARM_FEATURE_VFP4, /* VFPv4 (implies that NEON is v2) */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static inline int arm_feature(CPUARMState *env, int feature)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -204,6 +204,7 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
 | 
			
		|||
        set_feature(env, ARM_FEATURE_THUMB2);
 | 
			
		||||
        set_feature(env, ARM_FEATURE_VFP);
 | 
			
		||||
        set_feature(env, ARM_FEATURE_VFP3);
 | 
			
		||||
        set_feature(env, ARM_FEATURE_VFP4);
 | 
			
		||||
        set_feature(env, ARM_FEATURE_VFP_FP16);
 | 
			
		||||
        set_feature(env, ARM_FEATURE_NEON);
 | 
			
		||||
        set_feature(env, ARM_FEATURE_THUMB2EE);
 | 
			
		||||
| 
						 | 
				
			
			@ -3084,6 +3085,19 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUState *env)
 | 
			
		|||
    return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* VFPv4 fused multiply-accumulate */
 | 
			
		||||
float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
 | 
			
		||||
{
 | 
			
		||||
    float_status *fpst = fpstp;
 | 
			
		||||
    return float32_muladd(a, b, c, 0, fpst);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
 | 
			
		||||
{
 | 
			
		||||
    float_status *fpst = fpstp;
 | 
			
		||||
    return float64_muladd(a, b, c, 0, fpst);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void HELPER(set_teecr)(CPUState *env, uint32_t val)
 | 
			
		||||
{
 | 
			
		||||
    val &= 1;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -132,6 +132,9 @@ DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
 | 
			
		|||
DEF_HELPER_2(neon_fcvt_f16_to_f32, f32, i32, env)
 | 
			
		||||
DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env)
 | 
			
		||||
 | 
			
		||||
DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
 | 
			
		||||
DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
 | 
			
		||||
 | 
			
		||||
DEF_HELPER_3(recps_f32, f32, f32, f32, env)
 | 
			
		||||
DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
 | 
			
		||||
DEF_HELPER_2(recpe_f32, f32, f32, env)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3141,6 +3141,57 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
 | 
			
		|||
                case 8: /* div: fn / fm */
 | 
			
		||||
                    gen_vfp_div(dp);
 | 
			
		||||
                    break;
 | 
			
		||||
                case 10: /* VFNMA : fd = muladd(-fd,  fn, fm) */
 | 
			
		||||
                case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
 | 
			
		||||
                case 12: /* VFMA  : fd = muladd( fd,  fn, fm) */
 | 
			
		||||
                case 13: /* VFMS  : fd = muladd( fd, -fn, fm) */
 | 
			
		||||
                    /* These are fused multiply-add, and must be done as one
 | 
			
		||||
                     * floating point operation with no rounding between the
 | 
			
		||||
                     * multiplication and addition steps.
 | 
			
		||||
                     * NB that doing the negations here as separate steps is
 | 
			
		||||
                     * correct : an input NaN should come out with its sign bit
 | 
			
		||||
                     * flipped if it is a negated-input.
 | 
			
		||||
                     */
 | 
			
		||||
                    if (!arm_feature(env, ARM_FEATURE_VFP4)) {
 | 
			
		||||
                        return 1;
 | 
			
		||||
                    }
 | 
			
		||||
                    if (dp) {
 | 
			
		||||
                        TCGv_ptr fpst;
 | 
			
		||||
                        TCGv_i64 frd;
 | 
			
		||||
                        if (op & 1) {
 | 
			
		||||
                            /* VFNMS, VFMS */
 | 
			
		||||
                            gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
 | 
			
		||||
                        }
 | 
			
		||||
                        frd = tcg_temp_new_i64();
 | 
			
		||||
                        tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
 | 
			
		||||
                        if (op & 2) {
 | 
			
		||||
                            /* VFNMA, VFNMS */
 | 
			
		||||
                            gen_helper_vfp_negd(frd, frd);
 | 
			
		||||
                        }
 | 
			
		||||
                        fpst = get_fpstatus_ptr(0);
 | 
			
		||||
                        gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
 | 
			
		||||
                                               cpu_F1d, frd, fpst);
 | 
			
		||||
                        tcg_temp_free_ptr(fpst);
 | 
			
		||||
                        tcg_temp_free_i64(frd);
 | 
			
		||||
                    } else {
 | 
			
		||||
                        TCGv_ptr fpst;
 | 
			
		||||
                        TCGv_i32 frd;
 | 
			
		||||
                        if (op & 1) {
 | 
			
		||||
                            /* VFNMS, VFMS */
 | 
			
		||||
                            gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
 | 
			
		||||
                        }
 | 
			
		||||
                        frd = tcg_temp_new_i32();
 | 
			
		||||
                        tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
 | 
			
		||||
                        if (op & 2) {
 | 
			
		||||
                            gen_helper_vfp_negs(frd, frd);
 | 
			
		||||
                        }
 | 
			
		||||
                        fpst = get_fpstatus_ptr(0);
 | 
			
		||||
                        gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
 | 
			
		||||
                                               cpu_F1s, frd, fpst);
 | 
			
		||||
                        tcg_temp_free_ptr(fpst);
 | 
			
		||||
                        tcg_temp_free_i32(frd);
 | 
			
		||||
                    }
 | 
			
		||||
                    break;
 | 
			
		||||
                case 14: /* fconst */
 | 
			
		||||
                    if (!arm_feature(env, ARM_FEATURE_VFP3))
 | 
			
		||||
                      return 1;
 | 
			
		||||
| 
						 | 
				
			
			@ -4417,6 +4468,7 @@ static void gen_neon_narrow_op(int op, int u, int size, TCGv dest, TCGv_i64 src)
 | 
			
		|||
#define NEON_3R_VPMIN 21
 | 
			
		||||
#define NEON_3R_VQDMULH_VQRDMULH 22
 | 
			
		||||
#define NEON_3R_VPADD 23
 | 
			
		||||
#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
 | 
			
		||||
#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
 | 
			
		||||
#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
 | 
			
		||||
#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
 | 
			
		||||
| 
						 | 
				
			
			@ -4449,6 +4501,7 @@ static const uint8_t neon_3r_sizes[] = {
 | 
			
		|||
    [NEON_3R_VPMIN] = 0x7,
 | 
			
		||||
    [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
 | 
			
		||||
    [NEON_3R_VPADD] = 0x7,
 | 
			
		||||
    [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
 | 
			
		||||
    [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
 | 
			
		||||
    [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
 | 
			
		||||
    [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
 | 
			
		||||
| 
						 | 
				
			
			@ -4726,6 +4779,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
 | 
			
		|||
                return 1;
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
        case NEON_3R_VFM:
 | 
			
		||||
            if (!arm_feature(env, ARM_FEATURE_VFP4) || u) {
 | 
			
		||||
                return 1;
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
        default:
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
| 
						 | 
				
			
			@ -5006,6 +5064,20 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
 | 
			
		|||
            else
 | 
			
		||||
                gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
 | 
			
		||||
            break;
 | 
			
		||||
        case NEON_3R_VFM:
 | 
			
		||||
        {
 | 
			
		||||
            /* VFMA, VFMS: fused multiply-add */
 | 
			
		||||
            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
 | 
			
		||||
            TCGv_i32 tmp3 = neon_load_reg(rd, pass);
 | 
			
		||||
            if (size) {
 | 
			
		||||
                /* VFMS */
 | 
			
		||||
                gen_helper_vfp_negs(tmp, tmp);
 | 
			
		||||
            }
 | 
			
		||||
            gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
 | 
			
		||||
            tcg_temp_free_i32(tmp3);
 | 
			
		||||
            tcg_temp_free_ptr(fpstatus);
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
        default:
 | 
			
		||||
            abort();
 | 
			
		||||
        }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue