host-utils: Improve mulu64 and muls64
The new formulation makes better use of add-with-carry type insns that the host may have. Use gcc's sign adjustment trick to avoid having to perform a 128-bit negation. Signed-off-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
This commit is contained in:
		
							parent
							
								
									f540166b7d
								
							
						
					
					
						commit
						ff7a1eb0a1
					
				| 
						 | 
				
			
			@ -27,79 +27,63 @@
 | 
			
		|||
#include <stdint.h>
 | 
			
		||||
#include "qemu/host-utils.h"
 | 
			
		||||
 | 
			
		||||
//#define DEBUG_MULDIV
 | 
			
		||||
 | 
			
		||||
/* Long integer helpers */
 | 
			
		||||
#ifndef CONFIG_INT128
 | 
			
		||||
static void add128 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
 | 
			
		||||
static inline void mul64(uint64_t *plow, uint64_t *phigh,
 | 
			
		||||
                         uint64_t a, uint64_t b)
 | 
			
		||||
{
 | 
			
		||||
    *plow += a;
 | 
			
		||||
    /* carry test */
 | 
			
		||||
    if (*plow < a)
 | 
			
		||||
        (*phigh)++;
 | 
			
		||||
    *phigh += b;
 | 
			
		||||
}
 | 
			
		||||
    typedef union {
 | 
			
		||||
        uint64_t ll;
 | 
			
		||||
        struct {
 | 
			
		||||
#ifdef HOST_WORDS_BIGENDIAN
 | 
			
		||||
            uint32_t high, low;
 | 
			
		||||
#else
 | 
			
		||||
            uint32_t low, high;
 | 
			
		||||
#endif
 | 
			
		||||
        } l;
 | 
			
		||||
    } LL;
 | 
			
		||||
    LL rl, rm, rn, rh, a0, b0;
 | 
			
		||||
    uint64_t c;
 | 
			
		||||
 | 
			
		||||
static void neg128 (uint64_t *plow, uint64_t *phigh)
 | 
			
		||||
{
 | 
			
		||||
    *plow = ~*plow;
 | 
			
		||||
    *phigh = ~*phigh;
 | 
			
		||||
    add128(plow, phigh, 1, 0);
 | 
			
		||||
}
 | 
			
		||||
    a0.ll = a;
 | 
			
		||||
    b0.ll = b;
 | 
			
		||||
 | 
			
		||||
static void mul64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
 | 
			
		||||
{
 | 
			
		||||
    uint32_t a0, a1, b0, b1;
 | 
			
		||||
    uint64_t v;
 | 
			
		||||
    rl.ll = (uint64_t)a0.l.low * b0.l.low;
 | 
			
		||||
    rm.ll = (uint64_t)a0.l.low * b0.l.high;
 | 
			
		||||
    rn.ll = (uint64_t)a0.l.high * b0.l.low;
 | 
			
		||||
    rh.ll = (uint64_t)a0.l.high * b0.l.high;
 | 
			
		||||
 | 
			
		||||
    a0 = a;
 | 
			
		||||
    a1 = a >> 32;
 | 
			
		||||
    c = (uint64_t)rl.l.high + rm.l.low + rn.l.low;
 | 
			
		||||
    rl.l.high = c;
 | 
			
		||||
    c >>= 32;
 | 
			
		||||
    c = c + rm.l.high + rn.l.high + rh.l.low;
 | 
			
		||||
    rh.l.low = c;
 | 
			
		||||
    rh.l.high += (uint32_t)(c >> 32);
 | 
			
		||||
 | 
			
		||||
    b0 = b;
 | 
			
		||||
    b1 = b >> 32;
 | 
			
		||||
 | 
			
		||||
    v = (uint64_t)a0 * (uint64_t)b0;
 | 
			
		||||
    *plow = v;
 | 
			
		||||
    *phigh = 0;
 | 
			
		||||
 | 
			
		||||
    v = (uint64_t)a0 * (uint64_t)b1;
 | 
			
		||||
    add128(plow, phigh, v << 32, v >> 32);
 | 
			
		||||
 | 
			
		||||
    v = (uint64_t)a1 * (uint64_t)b0;
 | 
			
		||||
    add128(plow, phigh, v << 32, v >> 32);
 | 
			
		||||
 | 
			
		||||
    v = (uint64_t)a1 * (uint64_t)b1;
 | 
			
		||||
    *phigh += v;
 | 
			
		||||
    *plow = rl.ll;
 | 
			
		||||
    *phigh = rh.ll;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Unsigned 64x64 -> 128 multiplication */
 | 
			
		||||
void mulu64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
 | 
			
		||||
{
 | 
			
		||||
    mul64(plow, phigh, a, b);
 | 
			
		||||
#if defined(DEBUG_MULDIV)
 | 
			
		||||
    printf("mulu64: 0x%016llx * 0x%016llx = 0x%016llx%016llx\n",
 | 
			
		||||
           a, b, *phigh, *plow);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Signed 64x64 -> 128 multiplication */
 | 
			
		||||
void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
 | 
			
		||||
{
 | 
			
		||||
    int sa, sb;
 | 
			
		||||
    uint64_t rh;
 | 
			
		||||
 | 
			
		||||
    sa = (a < 0);
 | 
			
		||||
    if (sa)
 | 
			
		||||
        a = -a;
 | 
			
		||||
    sb = (b < 0);
 | 
			
		||||
    if (sb)
 | 
			
		||||
        b = -b;
 | 
			
		||||
    mul64(plow, phigh, a, b);
 | 
			
		||||
    if (sa ^ sb) {
 | 
			
		||||
        neg128(plow, phigh);
 | 
			
		||||
    mul64(plow, &rh, a, b);
 | 
			
		||||
 | 
			
		||||
    /* Adjust for signs.  */
 | 
			
		||||
    if (b < 0) {
 | 
			
		||||
        rh -= a;
 | 
			
		||||
    }
 | 
			
		||||
#if defined(DEBUG_MULDIV)
 | 
			
		||||
    printf("muls64: 0x%016llx * 0x%016llx = 0x%016llx%016llx\n",
 | 
			
		||||
           a, b, *phigh, *plow);
 | 
			
		||||
#endif
 | 
			
		||||
    if (a < 0) {
 | 
			
		||||
        rh -= b;
 | 
			
		||||
    }
 | 
			
		||||
    *phigh = rh;
 | 
			
		||||
}
 | 
			
		||||
#endif /* !CONFIG_INT128 */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue