From 03a96b83b539498510e22aab585e41015ba18247 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 27 Apr 2015 18:59:04 +0200 Subject: [PATCH 1/9] kvm: Silence warning from valgrind valgrind complains here about uninitialized bytes with the following message: ==17814== Syscall param ioctl(generic) points to uninitialised byte(s) ==17814== at 0x466A780: ioctl (in /usr/lib64/power8/libc-2.17.so) ==17814== by 0x100735B7: kvm_vm_ioctl (kvm-all.c:1920) ==17814== by 0x10074583: kvm_set_ioeventfd_mmio (kvm-all.c:574) Let's fix it by using a proper struct initializer in kvm_set_ioeventfd_mmio(). Signed-off-by: Thomas Huth Message-Id: <1430153944-24368-1-git-send-email-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- kvm-all.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 3f7061a180..28f4589219 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -552,13 +552,13 @@ static int kvm_set_ioeventfd_mmio(int fd, hwaddr addr, uint32_t val, bool assign, uint32_t size, bool datamatch) { int ret; - struct kvm_ioeventfd iofd; - - iofd.datamatch = datamatch ? adjust_ioeventfd_endianness(val, size) : 0; - iofd.addr = addr; - iofd.len = size; - iofd.flags = 0; - iofd.fd = fd; + struct kvm_ioeventfd iofd = { + .datamatch = datamatch ? adjust_ioeventfd_endianness(val, size) : 0, + .addr = addr, + .len = size, + .flags = 0, + .fd = fd, + }; if (!kvm_enabled()) { return -ENOSYS; From 813297541196698f60525d611dd09007fa60b45b Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 7 Apr 2015 16:53:52 +0300 Subject: [PATCH 2/9] apic_common: improve readability of apic_reset_common MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace call of cpu_is_bsp(s->cpu) which really returns !!(s->apicbase & MSR_IA32_APICBASE_BSP) with directly collected value. Due to this the tracepoint trace_cpu_get_apic_base((uint64_t)s->apicbase); will not be hit anymore in apic_reset_common. Signed-off-by: Denis V. Lunev CC: Andreas Färber CC: Paolo Bonzini Message-Id: <1428414832-3104-1-git-send-email-den@openvz.org> Signed-off-by: Paolo Bonzini --- hw/intc/apic_common.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index d38d24b814..d595d63a51 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -233,11 +233,10 @@ static void apic_reset_common(DeviceState *dev) { APICCommonState *s = APIC_COMMON(dev); APICCommonClass *info = APIC_COMMON_GET_CLASS(s); - bool bsp; + uint32_t bsp; - bsp = cpu_is_bsp(s->cpu); - s->apicbase = APIC_DEFAULT_ADDRESS | - (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; + bsp = s->apicbase & MSR_IA32_APICBASE_BSP; + s->apicbase = APIC_DEFAULT_ADDRESS | bsp | MSR_IA32_APICBASE_ENABLE; s->vapic_paddr = 0; info->vapic_base_update(s); From e48816aac6eef50c851e3833add886f0403b6f11 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 8 Apr 2015 12:53:47 +0200 Subject: [PATCH 3/9] mtree: tag & indent a bit better Signed-off-by: Gerd Hoffmann Signed-off-by: Paolo Bonzini --- memory.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/memory.c b/memory.c index 0f6cb812db..8fe6d79c1d 100644 --- a/memory.c +++ b/memory.c @@ -2185,15 +2185,16 @@ void mtree_info(fprintf_function mon_printf, void *f) QTAILQ_INIT(&ml_head); QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { - mon_printf(f, "%s\n", as->name); - mtree_print_mr(mon_printf, f, as->root, 0, 0, &ml_head); + mon_printf(f, "address-space: %s\n", as->name); + mtree_print_mr(mon_printf, f, as->root, 1, 0, &ml_head); + mon_printf(f, "\n"); } - mon_printf(f, "aliases\n"); /* print aliased regions */ QTAILQ_FOREACH(ml, &ml_head, queue) { - mon_printf(f, "%s\n", memory_region_name(ml->mr)); - mtree_print_mr(mon_printf, f, ml->mr, 0, 0, &ml_head); + mon_printf(f, "memory-region: %s\n", memory_region_name(ml->mr)); + mtree_print_mr(mon_printf, f, ml->mr, 1, 0, &ml_head); + mon_printf(f, "\n"); } QTAILQ_FOREACH_SAFE(ml, &ml_head, queue, ml2) { From f8a9f720dd2fa5c1560838c26c6dad396a0cef5b Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 8 Apr 2015 12:57:11 +0200 Subject: [PATCH 4/9] mtree: also print disabled regions Signed-off-by: Gerd Hoffmann Signed-off-by: Paolo Bonzini --- memory.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/memory.c b/memory.c index 8fe6d79c1d..03c536b857 100644 --- a/memory.c +++ b/memory.c @@ -2089,7 +2089,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, const MemoryRegion *submr; unsigned int i; - if (!mr || !mr->enabled) { + if (!mr) { return; } @@ -2115,7 +2115,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, } mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %c%c): alias %s @%s " TARGET_FMT_plx - "-" TARGET_FMT_plx "\n", + "-" TARGET_FMT_plx "%s\n", base + mr->addr, base + mr->addr + (int128_nz(mr->size) ? @@ -2131,10 +2131,11 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, mr->alias_offset + (int128_nz(mr->size) ? (hwaddr)int128_get64(int128_sub(mr->size, - int128_one())) : 0)); + int128_one())) : 0), + mr->enabled ? "" : " [disabled]"); } else { mon_printf(f, - TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %c%c): %s\n", + TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %c%c): %s%s\n", base + mr->addr, base + mr->addr + (int128_nz(mr->size) ? @@ -2144,7 +2145,8 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, mr->romd_mode ? 'R' : '-', !mr->readonly && !(mr->rom_device && mr->romd_mode) ? 'W' : '-', - memory_region_name(mr)); + memory_region_name(mr), + mr->enabled ? "" : " [disabled]"); } QTAILQ_INIT(&submr_print_queue); From 4c6637525290dc863a00be7f58fc11d07b780bd4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Apr 2015 13:30:58 +0200 Subject: [PATCH 5/9] kvm: add support for memory transaction attributes Let kvm_arch_post_run convert fields in the kvm_run struct to MemTxAttrs. These are then passed to address_space_rw. Signed-off-by: Paolo Bonzini --- include/sysemu/kvm.h | 3 ++- kvm-all.c | 21 ++++++++++++--------- target-arm/kvm.c | 4 +++- target-i386/kvm.c | 4 +++- target-mips/kvm.c | 4 +++- target-ppc/kvm.c | 4 +++- target-s390x/kvm.c | 4 +++- 7 files changed, 29 insertions(+), 15 deletions(-) diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 197e6c0214..4878959404 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -18,6 +18,7 @@ #include "config-host.h" #include "qemu/queue.h" #include "qom/cpu.h" +#include "exec/memattrs.h" #ifdef CONFIG_KVM #include @@ -254,7 +255,7 @@ int kvm_create_device(KVMState *s, uint64_t type, bool test); extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run); -void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run); +MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run); int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run); diff --git a/kvm-all.c b/kvm-all.c index 28f4589219..17a3771efe 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1669,14 +1669,14 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len) s->sigmask_len = sigmask_len; } -static void kvm_handle_io(uint16_t port, void *data, int direction, int size, - uint32_t count) +static void kvm_handle_io(uint16_t port, MemTxAttrs attrs, void *data, int direction, + int size, uint32_t count) { int i; uint8_t *ptr = data; for (i = 0; i < count; i++) { - address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED, + address_space_rw(&address_space_io, port, attrs, ptr, size, direction == KVM_EXIT_IO_OUT); ptr += size; @@ -1796,6 +1796,8 @@ int kvm_cpu_exec(CPUState *cpu) } do { + MemTxAttrs attrs; + if (cpu->kvm_vcpu_dirty) { kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE); cpu->kvm_vcpu_dirty = false; @@ -1816,7 +1818,7 @@ int kvm_cpu_exec(CPUState *cpu) run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0); qemu_mutex_lock_iothread(); - kvm_arch_post_run(cpu, run); + attrs = kvm_arch_post_run(cpu, run); if (run_ret < 0) { if (run_ret == -EINTR || run_ret == -EAGAIN) { @@ -1834,7 +1836,7 @@ int kvm_cpu_exec(CPUState *cpu) switch (run->exit_reason) { case KVM_EXIT_IO: DPRINTF("handle_io\n"); - kvm_handle_io(run->io.port, + kvm_handle_io(run->io.port, attrs, (uint8_t *)run + run->io.data_offset, run->io.direction, run->io.size, @@ -1843,10 +1845,11 @@ int kvm_cpu_exec(CPUState *cpu) break; case KVM_EXIT_MMIO: DPRINTF("handle_mmio\n"); - cpu_physical_memory_rw(run->mmio.phys_addr, - run->mmio.data, - run->mmio.len, - run->mmio.is_write); + address_space_rw(&address_space_memory, + run->mmio.phys_addr, attrs, + run->mmio.data, + run->mmio.len, + run->mmio.is_write); ret = 0; break; case KVM_EXIT_IRQ_WINDOW_OPEN: diff --git a/target-arm/kvm.c b/target-arm/kvm.c index fdd9ba3f1d..16abbf198c 100644 --- a/target-arm/kvm.c +++ b/target-arm/kvm.c @@ -23,6 +23,7 @@ #include "cpu.h" #include "internals.h" #include "hw/arm/arm.h" +#include "exec/memattrs.h" const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_LAST_INFO @@ -506,8 +507,9 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) { } -void kvm_arch_post_run(CPUState *cs, struct kvm_run *run) +MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) { + return MEMTXATTRS_UNSPECIFIED; } int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 41d09e52de..a26d25a81f 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -37,6 +37,7 @@ #include "hw/pci/pci.h" #include "migration/migration.h" #include "qapi/qmp/qerror.h" +#include "exec/memattrs.h" //#define DEBUG_KVM @@ -2246,7 +2247,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) } } -void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) +MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) { X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; @@ -2258,6 +2259,7 @@ void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) } cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8); cpu_set_apic_base(x86_cpu->apic_state, run->apic_base); + return MEMTXATTRS_UNSPECIFIED; } int kvm_arch_process_async_events(CPUState *cs) diff --git a/target-mips/kvm.c b/target-mips/kvm.c index 4d1f7ead81..59eb11105a 100644 --- a/target-mips/kvm.c +++ b/target-mips/kvm.c @@ -23,6 +23,7 @@ #include "cpu.h" #include "sysemu/cpus.h" #include "kvm_mips.h" +#include "exec/memattrs.h" #define DEBUG_KVM 0 @@ -110,9 +111,10 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) } } -void kvm_arch_post_run(CPUState *cs, struct kvm_run *run) +MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) { DPRINTF("%s\n", __func__); + return MEMTXATTRS_UNSPECIFIED; } int kvm_arch_process_async_events(CPUState *cs) diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 12328a4027..1da9ea81e5 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -39,6 +39,7 @@ #include "sysemu/watchdog.h" #include "trace.h" #include "exec/gdbstub.h" +#include "exec/memattrs.h" //#define DEBUG_KVM @@ -1270,8 +1271,9 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) * anyways, so we will get a chance to deliver the rest. */ } -void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) +MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) { + return MEMTXATTRS_UNSPECIFIED; } int kvm_arch_process_async_events(CPUState *cs) diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index 8e65e43f02..08cbffbe36 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -45,6 +45,7 @@ #include "hw/s390x/s390-pci-bus.h" #include "hw/s390x/ipl.h" #include "hw/s390x/ebcdic.h" +#include "exec/memattrs.h" /* #define DEBUG_KVM */ @@ -769,8 +770,9 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) { } -void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) +MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) { + return MEMTXATTRS_UNSPECIFIED; } int kvm_arch_process_async_events(CPUState *cs) From 41063e1e7afcb2f13e103720fe96221657f5dbbc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 18 Mar 2015 14:21:43 +0100 Subject: [PATCH 6/9] exec: move rcu_read_lock/unlock to address_space_translate callers Once address_space_translate will be called outside the BQL, the returned MemoryRegion might disappear as soon as the RCU read-side critical section ends. Avoid this by moving the critical section to the callers. Signed-off-by: Paolo Bonzini Message-Id: <1426684909-95030-3-git-send-email-pbonzini@redhat.com> --- exec.c | 33 +++++++++++++++++++++++++++++---- hw/vfio/common.c | 7 +++++-- include/exec/memory.h | 4 +++- translate-all.c | 3 +++ 4 files changed, 40 insertions(+), 7 deletions(-) diff --git a/exec.c b/exec.c index ae37b98e8c..e19ab22cd6 100644 --- a/exec.c +++ b/exec.c @@ -373,6 +373,7 @@ static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write) return false; } +/* Called from RCU critical section */ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, hwaddr *xlat, hwaddr *plen, bool is_write) @@ -381,7 +382,6 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, MemoryRegionSection *section; MemoryRegion *mr; - rcu_read_lock(); for (;;) { AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch); section = address_space_translate_internal(d, addr, &addr, plen, true); @@ -409,7 +409,6 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, } *xlat = addr; - rcu_read_unlock(); return mr; } @@ -2329,6 +2328,7 @@ MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs, MemoryRegion *mr; MemTxResult result = MEMTX_OK; + rcu_read_lock(); while (len > 0) { l = len; mr = address_space_translate(as, addr, &addr1, &l, is_write); @@ -2415,6 +2415,7 @@ MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs, buf += l; addr += l; } + rcu_read_unlock(); return result; } @@ -2452,6 +2453,7 @@ static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as, hwaddr addr1; MemoryRegion *mr; + rcu_read_lock(); while (len > 0) { l = len; mr = address_space_translate(as, addr, &addr1, &l, true); @@ -2477,6 +2479,7 @@ static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as, buf += l; addr += l; } + rcu_read_unlock(); } /* used for ROM loading : can write in RAM and ROM */ @@ -2585,6 +2588,7 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_ MemoryRegion *mr; hwaddr l, xlat; + rcu_read_lock(); while (len > 0) { l = len; mr = address_space_translate(as, addr, &xlat, &l, is_write); @@ -2598,6 +2602,7 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_ len -= l; addr += l; } + rcu_read_unlock(); return true; } @@ -2624,9 +2629,12 @@ void *address_space_map(AddressSpace *as, } l = len; + rcu_read_lock(); mr = address_space_translate(as, addr, &xlat, &l, is_write); + if (!memory_access_is_direct(mr, is_write)) { if (atomic_xchg(&bounce.in_use, true)) { + rcu_read_unlock(); return NULL; } /* Avoid unbounded allocations */ @@ -2642,6 +2650,7 @@ void *address_space_map(AddressSpace *as, bounce.buffer, l); } + rcu_read_unlock(); *plen = l; return bounce.buffer; } @@ -2665,6 +2674,7 @@ void *address_space_map(AddressSpace *as, } memory_region_ref(mr); + rcu_read_unlock(); *plen = done; return qemu_ram_ptr_length(raddr + base, plen); } @@ -2728,6 +2738,7 @@ static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr, hwaddr addr1; MemTxResult r; + rcu_read_lock(); mr = address_space_translate(as, addr, &addr1, &l, false); if (l < 4 || !memory_access_is_direct(mr, false)) { /* I/O case */ @@ -2762,6 +2773,7 @@ static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr, if (result) { *result = r; } + rcu_read_unlock(); return val; } @@ -2814,6 +2826,7 @@ static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr, hwaddr addr1; MemTxResult r; + rcu_read_lock(); mr = address_space_translate(as, addr, &addr1, &l, false); if (l < 8 || !memory_access_is_direct(mr, false)) { @@ -2849,6 +2862,7 @@ static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr, if (result) { *result = r; } + rcu_read_unlock(); return val; } @@ -2921,6 +2935,7 @@ static inline uint32_t address_space_lduw_internal(AddressSpace *as, hwaddr addr1; MemTxResult r; + rcu_read_lock(); mr = address_space_translate(as, addr, &addr1, &l, false); if (l < 2 || !memory_access_is_direct(mr, false)) { @@ -2956,6 +2971,7 @@ static inline uint32_t address_space_lduw_internal(AddressSpace *as, if (result) { *result = r; } + rcu_read_unlock(); return val; } @@ -3007,6 +3023,7 @@ void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val, hwaddr addr1; MemTxResult r; + rcu_read_lock(); mr = address_space_translate(as, addr, &addr1, &l, true); if (l < 4 || !memory_access_is_direct(mr, true)) { @@ -3029,6 +3046,7 @@ void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val, if (result) { *result = r; } + rcu_read_unlock(); } void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val) @@ -3049,6 +3067,7 @@ static inline void address_space_stl_internal(AddressSpace *as, hwaddr addr1; MemTxResult r; + rcu_read_lock(); mr = address_space_translate(as, addr, &addr1, &l, true); if (l < 4 || !memory_access_is_direct(mr, true)) { @@ -3083,6 +3102,7 @@ static inline void address_space_stl_internal(AddressSpace *as, if (result) { *result = r; } + rcu_read_unlock(); } void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val, @@ -3152,6 +3172,7 @@ static inline void address_space_stw_internal(AddressSpace *as, hwaddr addr1; MemTxResult r; + rcu_read_lock(); mr = address_space_translate(as, addr, &addr1, &l, true); if (l < 2 || !memory_access_is_direct(mr, true)) { #if defined(TARGET_WORDS_BIGENDIAN) @@ -3185,6 +3206,7 @@ static inline void address_space_stw_internal(AddressSpace *as, if (result) { *result = r; } + rcu_read_unlock(); } void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val, @@ -3322,12 +3344,15 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr) { MemoryRegion*mr; hwaddr l = 1; + bool res; + rcu_read_lock(); mr = address_space_translate(&address_space_memory, phys_addr, &phys_addr, &l, false); - return !(memory_region_is_ram(mr) || - memory_region_is_romd(mr)); + res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr)); + rcu_read_unlock(); + return res; } void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index b01262063d..b1045da857 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -270,13 +270,14 @@ static void vfio_iommu_map_notify(Notifier *n, void *data) * this IOMMU to its immediate target. We need to translate * it the rest of the way through to memory. */ + rcu_read_lock(); mr = address_space_translate(&address_space_memory, iotlb->translated_addr, &xlat, &len, iotlb->perm & IOMMU_WO); if (!memory_region_is_ram(mr)) { error_report("iommu map to non memory area %"HWADDR_PRIx"", xlat); - return; + goto out; } /* * Translation truncates length to the IOMMU page size, @@ -284,7 +285,7 @@ static void vfio_iommu_map_notify(Notifier *n, void *data) */ if (len & iotlb->addr_mask) { error_report("iommu has granularity incompatible with target AS"); - return; + goto out; } if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { @@ -307,6 +308,8 @@ static void vfio_iommu_map_notify(Notifier *n, void *data) iotlb->addr_mask + 1, ret); } } +out: + rcu_read_unlock(); } static void vfio_listener_region_add(MemoryListener *listener, diff --git a/include/exec/memory.h b/include/exec/memory.h index 0ccfd3b42a..b61c84f62a 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1233,7 +1233,9 @@ void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val, #endif /* address_space_translate: translate an address range into an address space - * into a MemoryRegion and an address range into that section + * into a MemoryRegion and an address range into that section. Should be + * called from an RCU critical section, to avoid that the last reference + * to the returned region disappears after address_space_translate returns. * * @as: #AddressSpace to be accessed * @addr: address within that address space diff --git a/translate-all.c b/translate-all.c index 65a76c5b9f..536008f52d 100644 --- a/translate-all.c +++ b/translate-all.c @@ -1416,14 +1416,17 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr) MemoryRegion *mr; hwaddr l = 1; + rcu_read_lock(); mr = address_space_translate(as, addr, &addr, &l, false); if (!(memory_region_is_ram(mr) || memory_region_is_romd(mr))) { + rcu_read_unlock(); return; } ram_addr = (memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK) + addr; tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0); + rcu_read_unlock(); } #endif /* !defined(CONFIG_USER_ONLY) */ From 768b7855c86c4f46b605183ae9451e9af64ca288 Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Wed, 29 Apr 2015 13:09:02 +0200 Subject: [PATCH 7/9] configure: require __thread support The codebase doesn't build without __thread support. Formalise this requirement by adding a check for it in the configure script. Signed-off-by: Emilio G. Cota Signed-off-by: Paolo Bonzini --- configure | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/configure b/configure index 75a4def8b5..40bc49a29f 100755 --- a/configure +++ b/configure @@ -1556,6 +1556,17 @@ if test "$static" = "yes" ; then fi fi +# Unconditional check for compiler __thread support + cat > $TMPC << EOF +static __thread int tls_var; +int main(void) { return tls_var; } +EOF + +if ! compile_prog "-Werror" "" ; then + error_exit "Your compiler does not support the __thread specifier for " \ + "Thread-Local Storage (TLS). Please upgrade to a version that does." +fi + if test "$pie" = ""; then case "$cpu-$targetos" in i386-Linux|x86_64-Linux|x32-Linux|i386-OpenBSD|x86_64-OpenBSD) From d24697e1824467f3921c84a94f011f43d6466403 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 7 May 2015 14:55:15 +0800 Subject: [PATCH 8/9] rules.mak: Force CFLAGS for all objects in DSO Because of the trick of process-archive-undefs, all .mo objects, even with --enable-modules, are dependencies of executables. This breaks CFLAGS propogation because the compiling of module object will happen too early before building for DSO. With GCC 5, the linking would fail because .o doesn't have -fPIC. Also, BUILD_DSO will be missed. (module-common.o will have it, so the stamp symbol was still liked in .so). Fix the problem by forcing the CFLAGS on individual .o-cflags during unnest-vars. Reported-by: Alexander Graf Signed-off-by: Fam Zheng Cc: qemu-stable@nongnu.org # 2.3 Message-Id: <1430981715-31465-1-git-send-email-famz@redhat.com> Signed-off-by: Paolo Bonzini --- rules.mak | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rules.mak b/rules.mak index 3a056272e2..aec27f81db 100644 --- a/rules.mak +++ b/rules.mak @@ -102,7 +102,8 @@ endif %.o: %.dtrace $(call quiet-command,dtrace -o $@ -G -s $<, " GEN $(TARGET_DIR)$@") -%$(DSOSUF): CFLAGS += -fPIC -DBUILD_DSO +DSO_OBJ_CFLAGS := -fPIC -DBUILD_DSO +module-common.o: CFLAGS += $(DSO_OBJ_CFLAGS) %$(DSOSUF): LDFLAGS += $(LDFLAGS_SHARED) %$(DSOSUF): %.mo $(call LINK,$^) @@ -351,6 +352,7 @@ define unnest-vars # For non-module build, add -m to -y $(if $(CONFIG_MODULES), $(foreach o,$($v), + $(eval $($o-objs): CFLAGS += $(DSO_OBJ_CFLAGS)) $(eval $o: $($o-objs))) $(eval $(patsubst %-m,%-y,$v) += $($v)) $(eval modules: $($v:%.mo=%$(DSOSUF))), From ca4414804114fd0095b317785bc0b51862e62ebb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 7 May 2015 17:25:10 +0200 Subject: [PATCH 9/9] qemu-nbd: only send a limited number of errno codes on the wire Right now, NBD includes potentially platform-specific error values in the wire protocol. Luckily, most common error values are more or less universal: in particular, of all errno values <= 34 (up to ERANGE), they are all the same on supported platforms except for 11 (which is EAGAIN on Windows and Linux, but EDEADLK on Darwin and the *BSDs). So, in order to guarantee some portability, only keep a handful of possible error codes and squash everything else to EINVAL. This patch defines a limited set of errno values that are valid for the NBD protocol, and specifies recommendations for what error to return in specific corner cases. The set of errno values is roughly based on the errors listed in the read(2) and write(2) man pages, with some exceptions: - ENOMEM is added for servers that implement copy-on-write or other formats that require dynamic allocation. - EDQUOT is not part of the universal set of errors; it can be changed to ENOSPC on the wire format. - EFBIG is part of the universal set of errors, but it is also changed to ENOSPC because it is pretty similar to ENOSPC or EDQUOT. Incoming values will in general match system errno values, but not on the Hurd which has different errno values (they have a "subsystem code" equal to 0x10 in bits 24-31). The Hurd is probably not something to which QEMU has been ported, but still do the right thing and reverse-map the NBD errno values to the system errno values. The corresponding patch to the NBD protocol description can be found at http://article.gmane.org/gmane.linux.drivers.nbd.general/3154. Signed-off-by: Paolo Bonzini --- nbd.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/nbd.c b/nbd.c index cb1b9bbf7c..06b501ba67 100644 --- a/nbd.c +++ b/nbd.c @@ -86,6 +86,59 @@ #define NBD_OPT_ABORT (2) #define NBD_OPT_LIST (3) +/* NBD errors are based on errno numbers, so there is a 1:1 mapping, + * but only a limited set of errno values is specified in the protocol. + * Everything else is squashed to EINVAL. + */ +#define NBD_SUCCESS 0 +#define NBD_EPERM 1 +#define NBD_EIO 5 +#define NBD_ENOMEM 12 +#define NBD_EINVAL 22 +#define NBD_ENOSPC 28 + +static int system_errno_to_nbd_errno(int err) +{ + switch (err) { + case 0: + return NBD_SUCCESS; + case EPERM: + return NBD_EPERM; + case EIO: + return NBD_EIO; + case ENOMEM: + return NBD_ENOMEM; +#ifdef EDQUOT + case EDQUOT: +#endif + case EFBIG: + case ENOSPC: + return NBD_ENOSPC; + case EINVAL: + default: + return NBD_EINVAL; + } +} + +static int nbd_errno_to_system_errno(int err) +{ + switch (err) { + case NBD_SUCCESS: + return 0; + case NBD_EPERM: + return EPERM; + case NBD_EIO: + return EIO; + case NBD_ENOMEM: + return ENOMEM; + case NBD_ENOSPC: + return ENOSPC; + case NBD_EINVAL: + default: + return EINVAL; + } +} + /* Definitions for opaque data types */ typedef struct NBDRequest NBDRequest; @@ -856,6 +909,8 @@ ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply) reply->error = be32_to_cpup((uint32_t*)(buf + 4)); reply->handle = be64_to_cpup((uint64_t*)(buf + 8)); + reply->error = nbd_errno_to_system_errno(reply->error); + TRACE("Got reply: " "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }", magic, reply->error, reply->handle); @@ -872,6 +927,8 @@ static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply) uint8_t buf[NBD_REPLY_SIZE]; ssize_t ret; + reply->error = system_errno_to_nbd_errno(reply->error); + /* Reply [ 0 .. 3] magic (NBD_REPLY_MAGIC) [ 4 .. 7] error (0 == no error)