qcow2: Order concurrent AIO requests on the same unallocated cluster
When two AIO requests write to the same cluster, and this cluster is unallocated, currently both requests allocate a new cluster and the second one merges the first one when it is completed. This means an cluster allocation, a read and a cluster deallocation which cause some overhead. If we simply let the second request wait until the first one is done, we improve overall performance with AIO requests (specifially, qcow2/virtio combinations). This patch maintains a list of in-flight requests that have allocated new clusters. A second request touching the same cluster is limited so that it either doesn't touch the allocation of the first request (so it can have a non-overlapping allocation) or it waits for the first request to complete. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
		
							parent
							
								
									ea80b906f4
								
							
						
					
					
						commit
						f214978a42
					
				| 
						 | 
				
			
			@ -684,6 +684,7 @@ uint64_t qcow2_alloc_cluster_offset(BlockDriverState *bs,
 | 
			
		|||
    int l2_index, ret;
 | 
			
		||||
    uint64_t l2_offset, *l2_table, cluster_offset;
 | 
			
		||||
    int nb_clusters, i = 0;
 | 
			
		||||
    QCowL2Meta *old_alloc;
 | 
			
		||||
 | 
			
		||||
    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
 | 
			
		||||
    if (ret == 0)
 | 
			
		||||
| 
						 | 
				
			
			@ -732,6 +733,44 @@ uint64_t qcow2_alloc_cluster_offset(BlockDriverState *bs,
 | 
			
		|||
    }
 | 
			
		||||
    nb_clusters = i;
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
     * Check if there already is an AIO write request in flight which allocates
 | 
			
		||||
     * the same cluster. In this case we need to wait until the previous
 | 
			
		||||
     * request has completed and updated the L2 table accordingly.
 | 
			
		||||
     */
 | 
			
		||||
    LIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
 | 
			
		||||
 | 
			
		||||
        uint64_t end_offset = offset + nb_clusters * s->cluster_size;
 | 
			
		||||
        uint64_t old_offset = old_alloc->offset;
 | 
			
		||||
        uint64_t old_end_offset = old_alloc->offset +
 | 
			
		||||
            old_alloc->nb_clusters * s->cluster_size;
 | 
			
		||||
 | 
			
		||||
        if (end_offset < old_offset || offset > old_end_offset) {
 | 
			
		||||
            /* No intersection */
 | 
			
		||||
        } else {
 | 
			
		||||
            if (offset < old_offset) {
 | 
			
		||||
                /* Stop at the start of a running allocation */
 | 
			
		||||
                nb_clusters = (old_offset - offset) >> s->cluster_bits;
 | 
			
		||||
            } else {
 | 
			
		||||
                nb_clusters = 0;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (nb_clusters == 0) {
 | 
			
		||||
                /* Set dependency and wait for a callback */
 | 
			
		||||
                m->depends_on = old_alloc;
 | 
			
		||||
                m->nb_clusters = 0;
 | 
			
		||||
                *num = 0;
 | 
			
		||||
                return 0;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (!nb_clusters) {
 | 
			
		||||
        abort();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    LIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
 | 
			
		||||
 | 
			
		||||
    /* allocate a new cluster */
 | 
			
		||||
 | 
			
		||||
    cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -219,6 +219,8 @@ static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
 | 
			
		|||
    if (qcow2_refcount_init(bs) < 0)
 | 
			
		||||
        goto fail;
 | 
			
		||||
 | 
			
		||||
    LIST_INIT(&s->cluster_allocs);
 | 
			
		||||
 | 
			
		||||
    /* read qcow2 extensions */
 | 
			
		||||
    if (header.backing_file_offset)
 | 
			
		||||
        ext_end = header.backing_file_offset;
 | 
			
		||||
| 
						 | 
				
			
			@ -338,6 +340,7 @@ typedef struct QCowAIOCB {
 | 
			
		|||
    QEMUIOVector hd_qiov;
 | 
			
		||||
    QEMUBH *bh;
 | 
			
		||||
    QCowL2Meta l2meta;
 | 
			
		||||
    LIST_ENTRY(QCowAIOCB) next_depend;
 | 
			
		||||
} QCowAIOCB;
 | 
			
		||||
 | 
			
		||||
static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
 | 
			
		||||
| 
						 | 
				
			
			@ -500,6 +503,7 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
 | 
			
		|||
    acb->n = 0;
 | 
			
		||||
    acb->cluster_offset = 0;
 | 
			
		||||
    acb->l2meta.nb_clusters = 0;
 | 
			
		||||
    LIST_INIT(&acb->l2meta.dependent_requests);
 | 
			
		||||
    return acb;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -517,6 +521,33 @@ static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
 | 
			
		|||
    return &acb->common;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void qcow_aio_write_cb(void *opaque, int ret);
 | 
			
		||||
 | 
			
		||||
static void run_dependent_requests(QCowL2Meta *m)
 | 
			
		||||
{
 | 
			
		||||
    QCowAIOCB *req;
 | 
			
		||||
    QCowAIOCB *next;
 | 
			
		||||
 | 
			
		||||
    /* Take the request off the list of running requests */
 | 
			
		||||
    if (m->nb_clusters != 0) {
 | 
			
		||||
        LIST_REMOVE(m, next_in_flight);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
     * Restart all dependent requests.
 | 
			
		||||
     * Can't use LIST_FOREACH here - the next link might not be the same
 | 
			
		||||
     * any more after the callback  (request could depend on a different
 | 
			
		||||
     * request now)
 | 
			
		||||
     */
 | 
			
		||||
    for (req = m->dependent_requests.lh_first; req != NULL; req = next) {
 | 
			
		||||
        next = req->next_depend.le_next;
 | 
			
		||||
        qcow_aio_write_cb(req, 0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /* Empty the list for the next part of the request */
 | 
			
		||||
    LIST_INIT(&m->dependent_requests);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void qcow_aio_write_cb(void *opaque, int ret)
 | 
			
		||||
{
 | 
			
		||||
    QCowAIOCB *acb = opaque;
 | 
			
		||||
| 
						 | 
				
			
			@ -528,14 +559,15 @@ static void qcow_aio_write_cb(void *opaque, int ret)
 | 
			
		|||
 | 
			
		||||
    acb->hd_aiocb = NULL;
 | 
			
		||||
 | 
			
		||||
    if (ret >= 0) {
 | 
			
		||||
        ret = qcow2_alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    run_dependent_requests(&acb->l2meta);
 | 
			
		||||
 | 
			
		||||
    if (ret < 0)
 | 
			
		||||
        goto done;
 | 
			
		||||
 | 
			
		||||
    if (qcow2_alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) {
 | 
			
		||||
        qcow2_free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters);
 | 
			
		||||
        goto done;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    acb->nb_sectors -= acb->n;
 | 
			
		||||
    acb->sector_num += acb->n;
 | 
			
		||||
    acb->buf += acb->n * 512;
 | 
			
		||||
| 
						 | 
				
			
			@ -555,6 +587,14 @@ static void qcow_aio_write_cb(void *opaque, int ret)
 | 
			
		|||
    acb->cluster_offset = qcow2_alloc_cluster_offset(bs, acb->sector_num << 9,
 | 
			
		||||
                                          index_in_cluster,
 | 
			
		||||
                                          n_end, &acb->n, &acb->l2meta);
 | 
			
		||||
 | 
			
		||||
    /* Need to wait for another request? If so, we are done for now. */
 | 
			
		||||
    if (!acb->cluster_offset && acb->l2meta.depends_on != NULL) {
 | 
			
		||||
        LIST_INSERT_HEAD(&acb->l2meta.depends_on->dependent_requests,
 | 
			
		||||
            acb, next_depend);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) {
 | 
			
		||||
        ret = -EIO;
 | 
			
		||||
        goto done;
 | 
			
		||||
| 
						 | 
				
			
			@ -650,6 +690,7 @@ static int preallocate(BlockDriverState *bs)
 | 
			
		|||
 | 
			
		||||
    nb_sectors = bdrv_getlength(bs) >> 9;
 | 
			
		||||
    offset = 0;
 | 
			
		||||
    LIST_INIT(&meta.dependent_requests);
 | 
			
		||||
 | 
			
		||||
    while (nb_sectors) {
 | 
			
		||||
        num = MIN(nb_sectors, INT_MAX >> 9);
 | 
			
		||||
| 
						 | 
				
			
			@ -665,6 +706,10 @@ static int preallocate(BlockDriverState *bs)
 | 
			
		|||
            return -1;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        /* There are no dependent requests, but we need to remove our request
 | 
			
		||||
         * from the list of in-flight requests */
 | 
			
		||||
        run_dependent_requests(&meta);
 | 
			
		||||
 | 
			
		||||
        /* TODO Preallocate data if requested */
 | 
			
		||||
 | 
			
		||||
        nb_sectors -= num;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -98,6 +98,7 @@ typedef struct BDRVQcowState {
 | 
			
		|||
    uint8_t *cluster_cache;
 | 
			
		||||
    uint8_t *cluster_data;
 | 
			
		||||
    uint64_t cluster_cache_offset;
 | 
			
		||||
    LIST_HEAD(QCowClusterAlloc, QCowL2Meta) cluster_allocs;
 | 
			
		||||
 | 
			
		||||
    uint64_t *refcount_table;
 | 
			
		||||
    uint64_t refcount_table_offset;
 | 
			
		||||
| 
						 | 
				
			
			@ -128,6 +129,8 @@ typedef struct QCowCreateState {
 | 
			
		|||
    int64_t refcount_block_offset;
 | 
			
		||||
} QCowCreateState;
 | 
			
		||||
 | 
			
		||||
struct QCowAIOCB;
 | 
			
		||||
 | 
			
		||||
/* XXX This could be private for qcow2-cluster.c */
 | 
			
		||||
typedef struct QCowL2Meta
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -135,6 +138,10 @@ typedef struct QCowL2Meta
 | 
			
		|||
    int n_start;
 | 
			
		||||
    int nb_available;
 | 
			
		||||
    int nb_clusters;
 | 
			
		||||
    struct QCowL2Meta *depends_on;
 | 
			
		||||
    LIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests;
 | 
			
		||||
 | 
			
		||||
    LIST_ENTRY(QCowL2Meta) next_in_flight;
 | 
			
		||||
} QCowL2Meta;
 | 
			
		||||
 | 
			
		||||
static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue