cutils: Remove aarch64 buffer zero checking
The revised integer version is 4 times faster than the neon version on an AppliedMicro Mustang. Even with hand scheduling and additional unrolling I cannot make any neon version run as fast as the integer. Signed-off-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
		
							parent
							
								
									5e33a87222
								
							
						
					
					
						commit
						2250d3a293
					
				| 
						 | 
				
			
			@ -200,21 +200,6 @@ static bool select_accel_fn(const void *buf, size_t len)
 | 
			
		|||
    return buffer_zero_int(buf, len);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#elif defined(__aarch64__)
 | 
			
		||||
#include "arm_neon.h"
 | 
			
		||||
 | 
			
		||||
#define DO_NONZERO(X)  (vgetq_lane_u64((X), 0) | vgetq_lane_u64((X), 1))
 | 
			
		||||
ACCEL_BUFFER_ZERO(buffer_zero_neon, 128, uint64x2_t, DO_NONZERO)
 | 
			
		||||
 | 
			
		||||
static bool select_accel_fn(const void *buf, size_t len)
 | 
			
		||||
{
 | 
			
		||||
    uintptr_t ibuf = (uintptr_t)buf;
 | 
			
		||||
    if (len % 128 == 0 && ibuf % sizeof(uint64x2_t) == 0) {
 | 
			
		||||
        return buffer_zero_neon(buf, len);
 | 
			
		||||
    }
 | 
			
		||||
    return buffer_zero_int(buf, len);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
#define select_accel_fn  buffer_zero_int
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue