numa: introduce machine callback for VCPU to node mapping
Current default round-robin way of distributing VCPUs among NUMA nodes might be wrong in case on multi-core/threads CPUs. Making guests confused wrt topology where cores from the same socket are on different nodes. Allow a machine to override default mapping by providing MachineClass::cpu_index_to_socket_id() callback which would allow it group VCPUs from a socket on the same NUMA node. Signed-off-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: Andreas Färber <afaerber@suse.de> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
This commit is contained in:
parent
3ef7197505
commit
57924bcd87
|
@ -82,6 +82,10 @@ bool machine_mem_merge(MachineState *machine);
|
||||||
* of HotplugHandler object, which handles hotplug operation
|
* of HotplugHandler object, which handles hotplug operation
|
||||||
* for a given @dev. It may return NULL if @dev doesn't require
|
* for a given @dev. It may return NULL if @dev doesn't require
|
||||||
* any actions to be performed by hotplug handler.
|
* any actions to be performed by hotplug handler.
|
||||||
|
* @cpu_index_to_socket_id:
|
||||||
|
* used to provide @cpu_index to socket number mapping, allowing
|
||||||
|
* a machine to group CPU threads belonging to the same socket/package
|
||||||
|
* Returns: socket number given cpu_index belongs to.
|
||||||
*/
|
*/
|
||||||
struct MachineClass {
|
struct MachineClass {
|
||||||
/*< private >*/
|
/*< private >*/
|
||||||
|
@ -118,6 +122,7 @@ struct MachineClass {
|
||||||
|
|
||||||
HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
|
HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
|
||||||
DeviceState *dev);
|
DeviceState *dev);
|
||||||
|
unsigned (*cpu_index_to_socket_id)(unsigned cpu_index);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include "qemu/option.h"
|
#include "qemu/option.h"
|
||||||
#include "sysemu/sysemu.h"
|
#include "sysemu/sysemu.h"
|
||||||
#include "sysemu/hostmem.h"
|
#include "sysemu/hostmem.h"
|
||||||
|
#include "hw/boards.h"
|
||||||
|
|
||||||
extern int nb_numa_nodes; /* Number of NUMA nodes */
|
extern int nb_numa_nodes; /* Number of NUMA nodes */
|
||||||
|
|
||||||
|
@ -16,7 +17,7 @@ typedef struct node_info {
|
||||||
bool present;
|
bool present;
|
||||||
} NodeInfo;
|
} NodeInfo;
|
||||||
extern NodeInfo numa_info[MAX_NODES];
|
extern NodeInfo numa_info[MAX_NODES];
|
||||||
void parse_numa_opts(void);
|
void parse_numa_opts(MachineClass *mc);
|
||||||
void numa_post_machine_init(void);
|
void numa_post_machine_init(void);
|
||||||
void query_numa_node_mem(uint64_t node_mem[]);
|
void query_numa_node_mem(uint64_t node_mem[]);
|
||||||
extern QemuOptsList qemu_numa_opts;
|
extern QemuOptsList qemu_numa_opts;
|
||||||
|
|
18
numa.c
18
numa.c
|
@ -202,7 +202,7 @@ static void validate_numa_cpus(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_numa_opts(void)
|
void parse_numa_opts(MachineClass *mc)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@ -270,13 +270,21 @@ void parse_numa_opts(void)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* assigning the VCPUs round-robin is easier to implement, guest OSes
|
/* Historically VCPUs were assigned in round-robin order to NUMA
|
||||||
* must cope with this anyway, because there are BIOSes out there in
|
* nodes. However it causes issues with guest not handling it nice
|
||||||
* real machines which also use this scheme.
|
* in case where cores/threads from a multicore CPU appear on
|
||||||
|
* different nodes. So allow boards to override default distribution
|
||||||
|
* rule grouping VCPUs by socket so that VCPUs from the same socket
|
||||||
|
* would be on the same node.
|
||||||
*/
|
*/
|
||||||
if (i == nb_numa_nodes) {
|
if (i == nb_numa_nodes) {
|
||||||
for (i = 0; i < max_cpus; i++) {
|
for (i = 0; i < max_cpus; i++) {
|
||||||
set_bit(i, numa_info[i % nb_numa_nodes].node_cpu);
|
unsigned node_id = i % nb_numa_nodes;
|
||||||
|
if (mc->cpu_index_to_socket_id) {
|
||||||
|
node_id = mc->cpu_index_to_socket_id(i) % nb_numa_nodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
set_bit(i, numa_info[node_id].node_cpu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
2
vl.c
2
vl.c
|
@ -4170,7 +4170,7 @@ int main(int argc, char **argv, char **envp)
|
||||||
default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
|
default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
|
||||||
default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
|
default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
|
||||||
|
|
||||||
parse_numa_opts();
|
parse_numa_opts(machine_class);
|
||||||
|
|
||||||
if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) {
|
if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) {
|
||||||
exit(1);
|
exit(1);
|
||||||
|
|
Loading…
Reference in New Issue