numa: introduce machine callback for VCPU to node mapping
Current default round-robin way of distributing VCPUs among NUMA nodes might be wrong in case on multi-core/threads CPUs. Making guests confused wrt topology where cores from the same socket are on different nodes. Allow a machine to override default mapping by providing MachineClass::cpu_index_to_socket_id() callback which would allow it group VCPUs from a socket on the same NUMA node. Signed-off-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: Andreas Färber <afaerber@suse.de> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
This commit is contained in:
		
							parent
							
								
									3ef7197505
								
							
						
					
					
						commit
						57924bcd87
					
				| 
						 | 
				
			
			@ -82,6 +82,10 @@ bool machine_mem_merge(MachineState *machine);
 | 
			
		|||
 *    of HotplugHandler object, which handles hotplug operation
 | 
			
		||||
 *    for a given @dev. It may return NULL if @dev doesn't require
 | 
			
		||||
 *    any actions to be performed by hotplug handler.
 | 
			
		||||
 * @cpu_index_to_socket_id:
 | 
			
		||||
 *    used to provide @cpu_index to socket number mapping, allowing
 | 
			
		||||
 *    a machine to group CPU threads belonging to the same socket/package
 | 
			
		||||
 *    Returns: socket number given cpu_index belongs to.
 | 
			
		||||
 */
 | 
			
		||||
struct MachineClass {
 | 
			
		||||
    /*< private >*/
 | 
			
		||||
| 
						 | 
				
			
			@ -118,6 +122,7 @@ struct MachineClass {
 | 
			
		|||
 | 
			
		||||
    HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
 | 
			
		||||
                                           DeviceState *dev);
 | 
			
		||||
    unsigned (*cpu_index_to_socket_id)(unsigned cpu_index);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6,6 +6,7 @@
 | 
			
		|||
#include "qemu/option.h"
 | 
			
		||||
#include "sysemu/sysemu.h"
 | 
			
		||||
#include "sysemu/hostmem.h"
 | 
			
		||||
#include "hw/boards.h"
 | 
			
		||||
 | 
			
		||||
extern int nb_numa_nodes;   /* Number of NUMA nodes */
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -16,7 +17,7 @@ typedef struct node_info {
 | 
			
		|||
    bool present;
 | 
			
		||||
} NodeInfo;
 | 
			
		||||
extern NodeInfo numa_info[MAX_NODES];
 | 
			
		||||
void parse_numa_opts(void);
 | 
			
		||||
void parse_numa_opts(MachineClass *mc);
 | 
			
		||||
void numa_post_machine_init(void);
 | 
			
		||||
void query_numa_node_mem(uint64_t node_mem[]);
 | 
			
		||||
extern QemuOptsList qemu_numa_opts;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										18
									
								
								numa.c
								
								
								
								
							
							
						
						
									
										18
									
								
								numa.c
								
								
								
								
							| 
						 | 
				
			
			@ -202,7 +202,7 @@ static void validate_numa_cpus(void)
 | 
			
		|||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void parse_numa_opts(void)
 | 
			
		||||
void parse_numa_opts(MachineClass *mc)
 | 
			
		||||
{
 | 
			
		||||
    int i;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -270,13 +270,21 @@ void parse_numa_opts(void)
 | 
			
		|||
                break;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        /* assigning the VCPUs round-robin is easier to implement, guest OSes
 | 
			
		||||
         * must cope with this anyway, because there are BIOSes out there in
 | 
			
		||||
         * real machines which also use this scheme.
 | 
			
		||||
        /* Historically VCPUs were assigned in round-robin order to NUMA
 | 
			
		||||
         * nodes. However it causes issues with guest not handling it nice
 | 
			
		||||
         * in case where cores/threads from a multicore CPU appear on
 | 
			
		||||
         * different nodes. So allow boards to override default distribution
 | 
			
		||||
         * rule grouping VCPUs by socket so that VCPUs from the same socket
 | 
			
		||||
         * would be on the same node.
 | 
			
		||||
         */
 | 
			
		||||
        if (i == nb_numa_nodes) {
 | 
			
		||||
            for (i = 0; i < max_cpus; i++) {
 | 
			
		||||
                set_bit(i, numa_info[i % nb_numa_nodes].node_cpu);
 | 
			
		||||
                unsigned node_id = i % nb_numa_nodes;
 | 
			
		||||
                if (mc->cpu_index_to_socket_id) {
 | 
			
		||||
                    node_id = mc->cpu_index_to_socket_id(i) % nb_numa_nodes;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                set_bit(i, numa_info[node_id].node_cpu);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										2
									
								
								vl.c
								
								
								
								
							
							
						
						
									
										2
									
								
								vl.c
								
								
								
								
							| 
						 | 
				
			
			@ -4170,7 +4170,7 @@ int main(int argc, char **argv, char **envp)
 | 
			
		|||
    default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
 | 
			
		||||
    default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
 | 
			
		||||
 | 
			
		||||
    parse_numa_opts();
 | 
			
		||||
    parse_numa_opts(machine_class);
 | 
			
		||||
 | 
			
		||||
    if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) {
 | 
			
		||||
        exit(1);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue