tcg/arm: fix TLB access in qemu-ld/st ops
The TCG arm backend considers likely that the offset to the TLB entries does not exceed 12 bits for mem_index = 0. In practice this is not true for at least the MIPS target. The current patch fixes that by loading the bits 23-12 with a separate instruction, and using loads with address writeback, independently of the value of mem_idx. In total this allow a 24-bit offset, which is a lot more than needed. Cc: Andrzej Zaborowski <balrogg@gmail.com> Cc: Peter Maydell <peter.maydell@linaro.org> Cc: qemu-stable@nongnu.org Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
This commit is contained in:
		
							parent
							
								
									1ccbc28512
								
							
						
					
					
						commit
						d17bd1d8cc
					
				| 
						 | 
				
			
			@ -639,6 +639,22 @@ static inline void tcg_out_ld32_12(TCGContext *s, int cond,
 | 
			
		|||
                        (rn << 16) | (rd << 12) | ((-im) & 0xfff));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Offset pre-increment with base writeback.  */
 | 
			
		||||
static inline void tcg_out_ld32_12wb(TCGContext *s, int cond,
 | 
			
		||||
                                     int rd, int rn, tcg_target_long im)
 | 
			
		||||
{
 | 
			
		||||
    /* ldr with writeback and both register equals is UNPREDICTABLE */
 | 
			
		||||
    assert(rd != rn);
 | 
			
		||||
 | 
			
		||||
    if (im >= 0) {
 | 
			
		||||
        tcg_out32(s, (cond << 28) | 0x05b00000 |
 | 
			
		||||
                        (rn << 16) | (rd << 12) | (im & 0xfff));
 | 
			
		||||
    } else {
 | 
			
		||||
        tcg_out32(s, (cond << 28) | 0x05300000 |
 | 
			
		||||
                        (rn << 16) | (rd << 12) | ((-im) & 0xfff));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void tcg_out_st32_12(TCGContext *s, int cond,
 | 
			
		||||
                int rd, int rn, tcg_target_long im)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -1071,7 +1087,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 | 
			
		|||
{
 | 
			
		||||
    int addr_reg, data_reg, data_reg2, bswap;
 | 
			
		||||
#ifdef CONFIG_SOFTMMU
 | 
			
		||||
    int mem_index, s_bits;
 | 
			
		||||
    int mem_index, s_bits, tlb_offset;
 | 
			
		||||
    TCGReg argreg;
 | 
			
		||||
# if TARGET_LONG_BITS == 64
 | 
			
		||||
    int addr_reg2;
 | 
			
		||||
| 
						 | 
				
			
			@ -1111,19 +1127,15 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 | 
			
		|||
                    TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
 | 
			
		||||
    tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0,
 | 
			
		||||
                    TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
 | 
			
		||||
    /* In the
 | 
			
		||||
     *  ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_read))]
 | 
			
		||||
     * below, the offset is likely to exceed 12 bits if mem_index != 0 and
 | 
			
		||||
     * not exceed otherwise, so use an
 | 
			
		||||
     *  add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
 | 
			
		||||
     * before.
 | 
			
		||||
     */
 | 
			
		||||
    if (mem_index)
 | 
			
		||||
    /* We assume that the offset is contained within 20 bits.  */
 | 
			
		||||
    tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
 | 
			
		||||
    assert(tlb_offset & ~0xfffff == 0);
 | 
			
		||||
    if (tlb_offset > 0xfff) {
 | 
			
		||||
        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
 | 
			
		||||
                        (mem_index << (TLB_SHIFT & 1)) |
 | 
			
		||||
                        ((16 - (TLB_SHIFT >> 1)) << 8));
 | 
			
		||||
    tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
 | 
			
		||||
                    offsetof(CPUArchState, tlb_table[0][0].addr_read));
 | 
			
		||||
                        0xa00 | (tlb_offset >> 12));
 | 
			
		||||
        tlb_offset &= 0xfff;
 | 
			
		||||
    }
 | 
			
		||||
    tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset);
 | 
			
		||||
    tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
 | 
			
		||||
                    TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
 | 
			
		||||
    /* Check alignment.  */
 | 
			
		||||
| 
						 | 
				
			
			@ -1131,15 +1143,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 | 
			
		|||
        tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
 | 
			
		||||
                        0, addr_reg, (1 << s_bits) - 1);
 | 
			
		||||
#  if TARGET_LONG_BITS == 64
 | 
			
		||||
    /* XXX: possibly we could use a block data load or writeback in
 | 
			
		||||
     * the first access.  */
 | 
			
		||||
    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
 | 
			
		||||
                    offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4);
 | 
			
		||||
    /* XXX: possibly we could use a block data load in the first access.  */
 | 
			
		||||
    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4);
 | 
			
		||||
    tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
 | 
			
		||||
                    TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
 | 
			
		||||
#  endif
 | 
			
		||||
    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
 | 
			
		||||
                    offsetof(CPUArchState, tlb_table[0][0].addend));
 | 
			
		||||
                    offsetof(CPUTLBEntry, addend)
 | 
			
		||||
                    - offsetof(CPUTLBEntry, addr_read));
 | 
			
		||||
 | 
			
		||||
    switch (opc) {
 | 
			
		||||
    case 0:
 | 
			
		||||
| 
						 | 
				
			
			@ -1288,7 +1299,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 | 
			
		|||
{
 | 
			
		||||
    int addr_reg, data_reg, data_reg2, bswap;
 | 
			
		||||
#ifdef CONFIG_SOFTMMU
 | 
			
		||||
    int mem_index, s_bits;
 | 
			
		||||
    int mem_index, s_bits, tlb_offset;
 | 
			
		||||
    TCGReg argreg;
 | 
			
		||||
# if TARGET_LONG_BITS == 64
 | 
			
		||||
    int addr_reg2;
 | 
			
		||||
| 
						 | 
				
			
			@ -1325,19 +1336,15 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 | 
			
		|||
                    TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
 | 
			
		||||
    tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0,
 | 
			
		||||
                    TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
 | 
			
		||||
    /* In the
 | 
			
		||||
     *  ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_write))]
 | 
			
		||||
     * below, the offset is likely to exceed 12 bits if mem_index != 0 and
 | 
			
		||||
     * not exceed otherwise, so use an
 | 
			
		||||
     *  add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
 | 
			
		||||
     * before.
 | 
			
		||||
     */
 | 
			
		||||
    if (mem_index)
 | 
			
		||||
    /* We assume that the offset is contained within 20 bits.  */
 | 
			
		||||
    tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
 | 
			
		||||
    assert(tlb_offset & ~0xfffff == 0);
 | 
			
		||||
    if (tlb_offset > 0xfff) {
 | 
			
		||||
        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
 | 
			
		||||
                        (mem_index << (TLB_SHIFT & 1)) |
 | 
			
		||||
                        ((16 - (TLB_SHIFT >> 1)) << 8));
 | 
			
		||||
    tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
 | 
			
		||||
                    offsetof(CPUArchState, tlb_table[0][0].addr_write));
 | 
			
		||||
                        0xa00 | (tlb_offset >> 12));
 | 
			
		||||
        tlb_offset &= 0xfff;
 | 
			
		||||
    }
 | 
			
		||||
    tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset);
 | 
			
		||||
    tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
 | 
			
		||||
                    TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
 | 
			
		||||
    /* Check alignment.  */
 | 
			
		||||
| 
						 | 
				
			
			@ -1345,15 +1352,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 | 
			
		|||
        tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
 | 
			
		||||
                        0, addr_reg, (1 << s_bits) - 1);
 | 
			
		||||
#  if TARGET_LONG_BITS == 64
 | 
			
		||||
    /* XXX: possibly we could use a block data load or writeback in
 | 
			
		||||
     * the first access.  */
 | 
			
		||||
    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
 | 
			
		||||
                    offsetof(CPUArchState, tlb_table[0][0].addr_write) + 4);
 | 
			
		||||
    /* XXX: possibly we could use a block data load in the first access.  */
 | 
			
		||||
    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4);
 | 
			
		||||
    tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
 | 
			
		||||
                    TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
 | 
			
		||||
#  endif
 | 
			
		||||
    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
 | 
			
		||||
                    offsetof(CPUArchState, tlb_table[0][0].addend));
 | 
			
		||||
                    offsetof(CPUTLBEntry, addend)
 | 
			
		||||
                    - offsetof(CPUTLBEntry, addr_write));
 | 
			
		||||
 | 
			
		||||
    switch (opc) {
 | 
			
		||||
    case 0:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue