Align file accesses with cache=off (O_DIRECT) (Kevin Wolf, Laurent Vivier)
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4599 c046a42c-6fe2-441c-8c8c-71466251a162
This commit is contained in:
		
							parent
							
								
									0ac087f1f3
								
							
						
					
					
						commit
						bed5cc5207
					
				| 
						 | 
				
			
			@ -70,6 +70,8 @@
 | 
			
		|||
#define FTYPE_CD     1
 | 
			
		||||
#define FTYPE_FD     2
 | 
			
		||||
 | 
			
		||||
#define ALIGNED_BUFFER_SIZE (32 * 512)
 | 
			
		||||
 | 
			
		||||
/* if the FD is not accessed during that time (in ms), we try to
 | 
			
		||||
   reopen it to see if the disk has been changed */
 | 
			
		||||
#define FD_OPEN_TIMEOUT 1000
 | 
			
		||||
| 
						 | 
				
			
			@ -86,6 +88,9 @@ typedef struct BDRVRawState {
 | 
			
		|||
    int fd_got_error;
 | 
			
		||||
    int fd_media_changed;
 | 
			
		||||
#endif
 | 
			
		||||
#if defined(O_DIRECT) && !defined(QEMU_IMG)
 | 
			
		||||
    uint8_t* aligned_buf;
 | 
			
		||||
#endif
 | 
			
		||||
} BDRVRawState;
 | 
			
		||||
 | 
			
		||||
static int fd_open(BlockDriverState *bs);
 | 
			
		||||
| 
						 | 
				
			
			@ -121,6 +126,17 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
 | 
			
		|||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
    s->fd = fd;
 | 
			
		||||
#if defined(O_DIRECT) && !defined(QEMU_IMG)
 | 
			
		||||
    s->aligned_buf = NULL;
 | 
			
		||||
    if (flags & BDRV_O_DIRECT) {
 | 
			
		||||
        s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
 | 
			
		||||
        if (s->aligned_buf == NULL) {
 | 
			
		||||
            ret = -errno;
 | 
			
		||||
            close(fd);
 | 
			
		||||
            return ret;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -141,7 +157,14 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
 | 
			
		|||
#endif
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
static int raw_pread(BlockDriverState *bs, int64_t offset,
 | 
			
		||||
/*
 | 
			
		||||
 * offset and count are in bytes, but must be multiples of 512 for files
 | 
			
		||||
 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
 | 
			
		||||
 *
 | 
			
		||||
 * This function may be called without alignment if the caller ensures
 | 
			
		||||
 * that O_DIRECT is not in effect.
 | 
			
		||||
 */
 | 
			
		||||
static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
 | 
			
		||||
                     uint8_t *buf, int count)
 | 
			
		||||
{
 | 
			
		||||
    BDRVRawState *s = bs->opaque;
 | 
			
		||||
| 
						 | 
				
			
			@ -194,7 +217,14 @@ label__raw_read__success:
 | 
			
		|||
    return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int raw_pwrite(BlockDriverState *bs, int64_t offset,
 | 
			
		||||
/*
 | 
			
		||||
 * offset and count are in bytes, but must be multiples of 512 for files
 | 
			
		||||
 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
 | 
			
		||||
 *
 | 
			
		||||
 * This function may be called without alignment if the caller ensures
 | 
			
		||||
 * that O_DIRECT is not in effect.
 | 
			
		||||
 */
 | 
			
		||||
static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
 | 
			
		||||
                      const uint8_t *buf, int count)
 | 
			
		||||
{
 | 
			
		||||
    BDRVRawState *s = bs->opaque;
 | 
			
		||||
| 
						 | 
				
			
			@ -230,6 +260,164 @@ label__raw_write__success:
 | 
			
		|||
    return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(O_DIRECT) && !defined(QEMU_IMG)
 | 
			
		||||
/*
 | 
			
		||||
 * offset and count are in bytes and possibly not aligned. For files opened
 | 
			
		||||
 * with O_DIRECT, necessary alignments are ensured before calling
 | 
			
		||||
 * raw_pread_aligned to do the actual read.
 | 
			
		||||
 */
 | 
			
		||||
static int raw_pread(BlockDriverState *bs, int64_t offset,
 | 
			
		||||
                     uint8_t *buf, int count)
 | 
			
		||||
{
 | 
			
		||||
    BDRVRawState *s = bs->opaque;
 | 
			
		||||
    int size, ret, shift, sum;
 | 
			
		||||
 | 
			
		||||
    sum = 0;
 | 
			
		||||
 | 
			
		||||
    if (s->aligned_buf != NULL)  {
 | 
			
		||||
 | 
			
		||||
        if (offset & 0x1ff) {
 | 
			
		||||
            /* align offset on a 512 bytes boundary */
 | 
			
		||||
 | 
			
		||||
            shift = offset & 0x1ff;
 | 
			
		||||
            size = (shift + count + 0x1ff) & ~0x1ff;
 | 
			
		||||
            if (size > ALIGNED_BUFFER_SIZE)
 | 
			
		||||
                size = ALIGNED_BUFFER_SIZE;
 | 
			
		||||
            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
 | 
			
		||||
            if (ret < 0)
 | 
			
		||||
                return ret;
 | 
			
		||||
 | 
			
		||||
            size = 512 - shift;
 | 
			
		||||
            if (size > count)
 | 
			
		||||
                size = count;
 | 
			
		||||
            memcpy(buf, s->aligned_buf + shift, size);
 | 
			
		||||
 | 
			
		||||
            buf += size;
 | 
			
		||||
            offset += size;
 | 
			
		||||
            count -= size;
 | 
			
		||||
            sum += size;
 | 
			
		||||
 | 
			
		||||
            if (count == 0)
 | 
			
		||||
                return sum;
 | 
			
		||||
        }
 | 
			
		||||
        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
 | 
			
		||||
 | 
			
		||||
            /* read on aligned buffer */
 | 
			
		||||
 | 
			
		||||
            while (count) {
 | 
			
		||||
 | 
			
		||||
                size = (count + 0x1ff) & ~0x1ff;
 | 
			
		||||
                if (size > ALIGNED_BUFFER_SIZE)
 | 
			
		||||
                    size = ALIGNED_BUFFER_SIZE;
 | 
			
		||||
 | 
			
		||||
                ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
 | 
			
		||||
                if (ret < 0)
 | 
			
		||||
                    return ret;
 | 
			
		||||
 | 
			
		||||
                size = ret;
 | 
			
		||||
                if (size > count)
 | 
			
		||||
                    size = count;
 | 
			
		||||
 | 
			
		||||
                memcpy(buf, s->aligned_buf, size);
 | 
			
		||||
 | 
			
		||||
                buf += size;
 | 
			
		||||
                offset += size;
 | 
			
		||||
                count -= size;
 | 
			
		||||
                sum += size;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            return sum;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return raw_pread_aligned(bs, offset, buf, count) + sum;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * offset and count are in bytes and possibly not aligned. For files opened
 | 
			
		||||
 * with O_DIRECT, necessary alignments are ensured before calling
 | 
			
		||||
 * raw_pwrite_aligned to do the actual write.
 | 
			
		||||
 */
 | 
			
		||||
static int raw_pwrite(BlockDriverState *bs, int64_t offset,
 | 
			
		||||
                      const uint8_t *buf, int count)
 | 
			
		||||
{
 | 
			
		||||
    BDRVRawState *s = bs->opaque;
 | 
			
		||||
    int size, ret, shift, sum;
 | 
			
		||||
 | 
			
		||||
    sum = 0;
 | 
			
		||||
 | 
			
		||||
    if (s->aligned_buf != NULL) {
 | 
			
		||||
 | 
			
		||||
        if (offset & 0x1ff) {
 | 
			
		||||
            /* align offset on a 512 bytes boundary */
 | 
			
		||||
            shift = offset & 0x1ff;
 | 
			
		||||
            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
 | 
			
		||||
            if (ret < 0)
 | 
			
		||||
                return ret;
 | 
			
		||||
 | 
			
		||||
            size = 512 - shift;
 | 
			
		||||
            if (size > count)
 | 
			
		||||
                size = count;
 | 
			
		||||
            memcpy(s->aligned_buf + shift, buf, size);
 | 
			
		||||
 | 
			
		||||
            ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
 | 
			
		||||
            if (ret < 0)
 | 
			
		||||
                return ret;
 | 
			
		||||
 | 
			
		||||
            buf += size;
 | 
			
		||||
            offset += size;
 | 
			
		||||
            count -= size;
 | 
			
		||||
            sum += size;
 | 
			
		||||
 | 
			
		||||
            if (count == 0)
 | 
			
		||||
                return sum;
 | 
			
		||||
        }
 | 
			
		||||
        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
 | 
			
		||||
 | 
			
		||||
            while ((size = (count & ~0x1ff)) != 0) {
 | 
			
		||||
 | 
			
		||||
                if (size > ALIGNED_BUFFER_SIZE)
 | 
			
		||||
                    size = ALIGNED_BUFFER_SIZE;
 | 
			
		||||
 | 
			
		||||
                memcpy(s->aligned_buf, buf, size);
 | 
			
		||||
 | 
			
		||||
                ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
 | 
			
		||||
                if (ret < 0)
 | 
			
		||||
                    return ret;
 | 
			
		||||
 | 
			
		||||
                buf += ret;
 | 
			
		||||
                offset += ret;
 | 
			
		||||
                count -= ret;
 | 
			
		||||
                sum += ret;
 | 
			
		||||
            }
 | 
			
		||||
            /* here, count < 512 because (count & ~0x1ff) == 0 */
 | 
			
		||||
            if (count) {
 | 
			
		||||
                ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
 | 
			
		||||
                if (ret < 0)
 | 
			
		||||
                    return ret;
 | 
			
		||||
                 memcpy(s->aligned_buf, buf, count);
 | 
			
		||||
 | 
			
		||||
                 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
 | 
			
		||||
                 if (ret < 0)
 | 
			
		||||
                     return ret;
 | 
			
		||||
                 if (count < ret)
 | 
			
		||||
                     ret = count;
 | 
			
		||||
 | 
			
		||||
                 sum += ret;
 | 
			
		||||
            }
 | 
			
		||||
            return sum;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return raw_pwrite_aligned(bs, offset, buf, count) + sum;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
#define raw_pread raw_pread_aligned
 | 
			
		||||
#define raw_pwrite raw_pwrite_aligned
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/***********************************************************/
 | 
			
		||||
/* Unix AIO using POSIX AIO */
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -237,6 +425,7 @@ typedef struct RawAIOCB {
 | 
			
		|||
    BlockDriverAIOCB common;
 | 
			
		||||
    struct aiocb aiocb;
 | 
			
		||||
    struct RawAIOCB *next;
 | 
			
		||||
    int ret;
 | 
			
		||||
} RawAIOCB;
 | 
			
		||||
 | 
			
		||||
static int aio_sig_num = SIGUSR2;
 | 
			
		||||
| 
						 | 
				
			
			@ -397,12 +586,38 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
 | 
			
		|||
    return acb;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifndef QEMU_IMG
 | 
			
		||||
static void raw_aio_em_cb(void* opaque)
 | 
			
		||||
{
 | 
			
		||||
    RawAIOCB *acb = opaque;
 | 
			
		||||
    acb->common.cb(acb->common.opaque, acb->ret);
 | 
			
		||||
    qemu_aio_release(acb);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
 | 
			
		||||
        int64_t sector_num, uint8_t *buf, int nb_sectors,
 | 
			
		||||
        BlockDriverCompletionFunc *cb, void *opaque)
 | 
			
		||||
{
 | 
			
		||||
    RawAIOCB *acb;
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
     * If O_DIRECT is used and the buffer is not aligned fall back
 | 
			
		||||
     * to synchronous IO.
 | 
			
		||||
     */
 | 
			
		||||
#if defined(O_DIRECT) && !defined(QEMU_IMG)
 | 
			
		||||
    BDRVRawState *s = bs->opaque;
 | 
			
		||||
 | 
			
		||||
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
 | 
			
		||||
        QEMUBH *bh;
 | 
			
		||||
        acb = qemu_aio_get(bs, cb, opaque);
 | 
			
		||||
        acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
 | 
			
		||||
        bh = qemu_bh_new(raw_aio_em_cb, acb);
 | 
			
		||||
        qemu_bh_schedule(bh);
 | 
			
		||||
        return &acb->common;
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
 | 
			
		||||
    if (!acb)
 | 
			
		||||
        return NULL;
 | 
			
		||||
| 
						 | 
				
			
			@ -419,6 +634,23 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
 | 
			
		|||
{
 | 
			
		||||
    RawAIOCB *acb;
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
     * If O_DIRECT is used and the buffer is not aligned fall back
 | 
			
		||||
     * to synchronous IO.
 | 
			
		||||
     */
 | 
			
		||||
#if defined(O_DIRECT) && !defined(QEMU_IMG)
 | 
			
		||||
    BDRVRawState *s = bs->opaque;
 | 
			
		||||
 | 
			
		||||
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
 | 
			
		||||
        QEMUBH *bh;
 | 
			
		||||
        acb = qemu_aio_get(bs, cb, opaque);
 | 
			
		||||
        acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
 | 
			
		||||
        bh = qemu_bh_new(raw_aio_em_cb, acb);
 | 
			
		||||
        qemu_bh_schedule(bh);
 | 
			
		||||
        return &acb->common;
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
 | 
			
		||||
    if (!acb)
 | 
			
		||||
        return NULL;
 | 
			
		||||
| 
						 | 
				
			
			@ -462,6 +694,10 @@ static void raw_close(BlockDriverState *bs)
 | 
			
		|||
    if (s->fd >= 0) {
 | 
			
		||||
        close(s->fd);
 | 
			
		||||
        s->fd = -1;
 | 
			
		||||
#if defined(O_DIRECT) && !defined(QEMU_IMG)
 | 
			
		||||
        if (s->aligned_buf != NULL)
 | 
			
		||||
            qemu_free(s->aligned_buf);
 | 
			
		||||
#endif
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue