Expand cache= option and use write-through caching by default
This patch changes the cache= option to accept none, writeback, or writethough to control the host page cache behavior. By default, writethrough caching is now used which internally is implemented by using O_DSYNC to open the disk images. When using -snapshot, writeback is used by default since data integrity it not at all an issue. cache=none has the same behavior as cache=off previously. The later syntax is still supported by now deprecated. I also cleaned up the O_DIRECT implementation to avoid many of the #ifdefs. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5485 c046a42c-6fe2-441c-8c8c-71466251a162
This commit is contained in:
		
							parent
							
								
									eeb438c1b8
								
							
						
					
					
						commit
						9f7965c7e9
					
				| 
						 | 
				
			
			@ -73,6 +73,11 @@
 | 
			
		|||
#define DEBUG_BLOCK_PRINT(formatCstr, args...)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
 | 
			
		||||
#ifndef O_DIRECT
 | 
			
		||||
#define O_DIRECT O_DSYNC
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define FTYPE_FILE   0
 | 
			
		||||
#define FTYPE_CD     1
 | 
			
		||||
#define FTYPE_FD     2
 | 
			
		||||
| 
						 | 
				
			
			@ -101,9 +106,7 @@ typedef struct BDRVRawState {
 | 
			
		|||
    int fd_got_error;
 | 
			
		||||
    int fd_media_changed;
 | 
			
		||||
#endif
 | 
			
		||||
#if defined(O_DIRECT)
 | 
			
		||||
    uint8_t* aligned_buf;
 | 
			
		||||
#endif
 | 
			
		||||
} BDRVRawState;
 | 
			
		||||
 | 
			
		||||
static int posix_aio_init(void);
 | 
			
		||||
| 
						 | 
				
			
			@ -129,10 +132,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
 | 
			
		|||
    }
 | 
			
		||||
    if (flags & BDRV_O_CREAT)
 | 
			
		||||
        open_flags |= O_CREAT | O_TRUNC;
 | 
			
		||||
#ifdef O_DIRECT
 | 
			
		||||
    if (flags & BDRV_O_DIRECT)
 | 
			
		||||
 | 
			
		||||
    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
 | 
			
		||||
     * and O_DIRECT for no caching. */
 | 
			
		||||
    if ((flags & BDRV_O_NOCACHE))
 | 
			
		||||
        open_flags |= O_DIRECT;
 | 
			
		||||
#endif
 | 
			
		||||
    else if (!(flags & BDRV_O_CACHE_WB))
 | 
			
		||||
        open_flags |= O_DSYNC;
 | 
			
		||||
 | 
			
		||||
    s->type = FTYPE_FILE;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -146,9 +152,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
 | 
			
		|||
    s->fd = fd;
 | 
			
		||||
    for (i = 0; i < RAW_FD_POOL_SIZE; i++)
 | 
			
		||||
        s->fd_pool[i] = -1;
 | 
			
		||||
#if defined(O_DIRECT)
 | 
			
		||||
    s->aligned_buf = NULL;
 | 
			
		||||
    if (flags & BDRV_O_DIRECT) {
 | 
			
		||||
    if ((flags & BDRV_O_NOCACHE)) {
 | 
			
		||||
        s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
 | 
			
		||||
        if (s->aligned_buf == NULL) {
 | 
			
		||||
            ret = -errno;
 | 
			
		||||
| 
						 | 
				
			
			@ -156,7 +161,6 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
 | 
			
		|||
            return ret;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -281,7 +285,6 @@ label__raw_write__success:
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(O_DIRECT)
 | 
			
		||||
/*
 | 
			
		||||
 * offset and count are in bytes and possibly not aligned. For files opened
 | 
			
		||||
 * with O_DIRECT, necessary alignments are ensured before calling
 | 
			
		||||
| 
						 | 
				
			
			@ -432,12 +435,6 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset,
 | 
			
		|||
    return raw_pwrite_aligned(bs, offset, buf, count) + sum;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
#define raw_pread raw_pread_aligned
 | 
			
		||||
#define raw_pwrite raw_pwrite_aligned
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_AIO
 | 
			
		||||
/***********************************************************/
 | 
			
		||||
/* Unix AIO using POSIX AIO */
 | 
			
		||||
| 
						 | 
				
			
			@ -661,7 +658,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
 | 
			
		|||
     * If O_DIRECT is used and the buffer is not aligned fall back
 | 
			
		||||
     * to synchronous IO.
 | 
			
		||||
     */
 | 
			
		||||
#if defined(O_DIRECT)
 | 
			
		||||
    BDRVRawState *s = bs->opaque;
 | 
			
		||||
 | 
			
		||||
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
 | 
			
		||||
| 
						 | 
				
			
			@ -672,7 +668,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
 | 
			
		|||
        qemu_bh_schedule(bh);
 | 
			
		||||
        return &acb->common;
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
 | 
			
		||||
    if (!acb)
 | 
			
		||||
| 
						 | 
				
			
			@ -694,7 +689,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
 | 
			
		|||
     * If O_DIRECT is used and the buffer is not aligned fall back
 | 
			
		||||
     * to synchronous IO.
 | 
			
		||||
     */
 | 
			
		||||
#if defined(O_DIRECT)
 | 
			
		||||
    BDRVRawState *s = bs->opaque;
 | 
			
		||||
 | 
			
		||||
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
 | 
			
		||||
| 
						 | 
				
			
			@ -705,7 +699,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
 | 
			
		|||
        qemu_bh_schedule(bh);
 | 
			
		||||
        return &acb->common;
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
 | 
			
		||||
    if (!acb)
 | 
			
		||||
| 
						 | 
				
			
			@ -770,10 +763,8 @@ static void raw_close(BlockDriverState *bs)
 | 
			
		|||
    if (s->fd >= 0) {
 | 
			
		||||
        close(s->fd);
 | 
			
		||||
        s->fd = -1;
 | 
			
		||||
#if defined(O_DIRECT)
 | 
			
		||||
        if (s->aligned_buf != NULL)
 | 
			
		||||
            qemu_free(s->aligned_buf);
 | 
			
		||||
#endif
 | 
			
		||||
    }
 | 
			
		||||
    raw_close_fd_pool(s);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1003,10 +994,12 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
 | 
			
		|||
        open_flags |= O_RDONLY;
 | 
			
		||||
        bs->read_only = 1;
 | 
			
		||||
    }
 | 
			
		||||
#ifdef O_DIRECT
 | 
			
		||||
    if (flags & BDRV_O_DIRECT)
 | 
			
		||||
    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
 | 
			
		||||
     * and O_DIRECT for no caching. */
 | 
			
		||||
    if ((flags & BDRV_O_NOCACHE))
 | 
			
		||||
        open_flags |= O_DIRECT;
 | 
			
		||||
#endif
 | 
			
		||||
    else if (!(flags & BDRV_O_CACHE_WB))
 | 
			
		||||
        open_flags |= O_DSYNC;
 | 
			
		||||
 | 
			
		||||
    s->type = FTYPE_FILE;
 | 
			
		||||
#if defined(__linux__)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -104,8 +104,10 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
 | 
			
		|||
#else
 | 
			
		||||
    overlapped = FILE_ATTRIBUTE_NORMAL;
 | 
			
		||||
#endif
 | 
			
		||||
    if (flags & BDRV_O_DIRECT)
 | 
			
		||||
    if ((flags & BDRV_O_NOCACHE))
 | 
			
		||||
        overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
 | 
			
		||||
    else if (!(flags & BDRV_O_CACHE_WB))
 | 
			
		||||
        overlapped |= FILE_FLAG_WRITE_THROUGH;
 | 
			
		||||
    s->hfile = CreateFile(filename, access_flags,
 | 
			
		||||
                          FILE_SHARE_READ, NULL,
 | 
			
		||||
                          create_flags, overlapped, NULL);
 | 
			
		||||
| 
						 | 
				
			
			@ -440,8 +442,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
 | 
			
		|||
#else
 | 
			
		||||
    overlapped = FILE_ATTRIBUTE_NORMAL;
 | 
			
		||||
#endif
 | 
			
		||||
    if (flags & BDRV_O_DIRECT)
 | 
			
		||||
    if ((flags & BDRV_O_NOCACHE))
 | 
			
		||||
        overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
 | 
			
		||||
    else if (!(flags & BDRV_O_CACHE_WB))
 | 
			
		||||
        overlapped |= FILE_FLAG_WRITE_THROUGH;
 | 
			
		||||
    s->hfile = CreateFile(filename, access_flags,
 | 
			
		||||
                          FILE_SHARE_READ, NULL,
 | 
			
		||||
                          create_flags, overlapped, NULL);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										6
									
								
								block.c
								
								
								
								
							
							
						
						
									
										6
									
								
								block.c
								
								
								
								
							| 
						 | 
				
			
			@ -395,12 +395,12 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
 | 
			
		|||
    /* Note: for compatibility, we open disk image files as RDWR, and
 | 
			
		||||
       RDONLY as fallback */
 | 
			
		||||
    if (!(flags & BDRV_O_FILE))
 | 
			
		||||
        open_flags = BDRV_O_RDWR | (flags & BDRV_O_DIRECT);
 | 
			
		||||
        open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
 | 
			
		||||
    else
 | 
			
		||||
        open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
 | 
			
		||||
    ret = drv->bdrv_open(bs, filename, open_flags);
 | 
			
		||||
    if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
 | 
			
		||||
        ret = drv->bdrv_open(bs, filename, BDRV_O_RDONLY);
 | 
			
		||||
        ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
 | 
			
		||||
        bs->read_only = 1;
 | 
			
		||||
    }
 | 
			
		||||
    if (ret < 0) {
 | 
			
		||||
| 
						 | 
				
			
			@ -427,7 +427,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
 | 
			
		|||
        }
 | 
			
		||||
        path_combine(backing_filename, sizeof(backing_filename),
 | 
			
		||||
                     filename, bs->backing_file);
 | 
			
		||||
        if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0)
 | 
			
		||||
        if (bdrv_open(bs->backing_hd, backing_filename, open_flags) < 0)
 | 
			
		||||
            goto fail;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										5
									
								
								block.h
								
								
								
								
							
							
						
						
									
										5
									
								
								block.h
								
								
								
								
							| 
						 | 
				
			
			@ -47,7 +47,10 @@ typedef struct QEMUSnapshotInfo {
 | 
			
		|||
                                     use a disk image format on top of
 | 
			
		||||
                                     it (default for
 | 
			
		||||
                                     bdrv_file_open()) */
 | 
			
		||||
#define BDRV_O_DIRECT      0x0020
 | 
			
		||||
#define BDRV_O_NOCACHE     0x0020 /* do not use the host page cache */
 | 
			
		||||
#define BDRV_O_CACHE_WB    0x0040 /* use write-back caching */
 | 
			
		||||
 | 
			
		||||
#define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_CACHE_WB)
 | 
			
		||||
 | 
			
		||||
void bdrv_info(void);
 | 
			
		||||
void bdrv_info_stats(void);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -267,13 +267,28 @@ These options have the same definition as they have in @option{-hdachs}.
 | 
			
		|||
@item snapshot=@var{snapshot}
 | 
			
		||||
@var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}).
 | 
			
		||||
@item cache=@var{cache}
 | 
			
		||||
@var{cache} is "on" or "off" and allows to disable host cache to access data.
 | 
			
		||||
@var{cache} is "none", "writeback", or "writethrough" and controls how the host cache is used to access block data.
 | 
			
		||||
@item format=@var{format}
 | 
			
		||||
Specify which disk @var{format} will be used rather than detecting
 | 
			
		||||
the format.  Can be used to specifiy format=raw to avoid interpreting
 | 
			
		||||
an untrusted format header.
 | 
			
		||||
@end table
 | 
			
		||||
 | 
			
		||||
By default, writethrough caching is used for all block device.  This means that
 | 
			
		||||
the host page cache will be used to read and write data but write notification
 | 
			
		||||
will be sent to the guest only when the data has been reported as written by
 | 
			
		||||
the storage subsystem.
 | 
			
		||||
 | 
			
		||||
Writeback caching will report data writes as completed as soon as the data is
 | 
			
		||||
present in the host page cache.  This is safe as long as you trust your host.
 | 
			
		||||
If your host crashes or loses power, then the guest may experience data
 | 
			
		||||
corruption.  When using the @option{-snapshot} option, writeback caching is
 | 
			
		||||
used by default.
 | 
			
		||||
 | 
			
		||||
The host page can be avoided entirely with @option{cache=none}.  This will
 | 
			
		||||
attempt to do disk IO directly to the guests memory.  QEMU may still perform
 | 
			
		||||
an internal copy of the data.
 | 
			
		||||
 | 
			
		||||
Instead of @option{-cdrom} you can use:
 | 
			
		||||
@example
 | 
			
		||||
qemu -drive file=file,index=2,media=cdrom
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -232,7 +232,7 @@ int main(int argc, char **argv)
 | 
			
		|||
            flags |= BDRV_O_SNAPSHOT;
 | 
			
		||||
            break;
 | 
			
		||||
        case 'n':
 | 
			
		||||
            flags |= BDRV_O_DIRECT;
 | 
			
		||||
            flags |= BDRV_O_NOCACHE;
 | 
			
		||||
            break;
 | 
			
		||||
        case 'b':
 | 
			
		||||
            bindto = optarg;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										18
									
								
								vl.c
								
								
								
								
							
							
						
						
									
										18
									
								
								vl.c
								
								
								
								
							| 
						 | 
				
			
			@ -5648,10 +5648,12 @@ static int drive_init(struct drive_opt *arg, int snapshot,
 | 
			
		|||
    }
 | 
			
		||||
 | 
			
		||||
    if (get_param_value(buf, sizeof(buf), "cache", str)) {
 | 
			
		||||
        if (!strcmp(buf, "off"))
 | 
			
		||||
        if (!strcmp(buf, "off") || !strcmp(buf, "none"))
 | 
			
		||||
            cache = 0;
 | 
			
		||||
        else if (!strcmp(buf, "on"))
 | 
			
		||||
        else if (!strcmp(buf, "writethrough"))
 | 
			
		||||
            cache = 1;
 | 
			
		||||
        else if (!strcmp(buf, "writeback"))
 | 
			
		||||
            cache = 2;
 | 
			
		||||
        else {
 | 
			
		||||
           fprintf(stderr, "qemu: invalid cache option\n");
 | 
			
		||||
           return -1;
 | 
			
		||||
| 
						 | 
				
			
			@ -5770,10 +5772,14 @@ static int drive_init(struct drive_opt *arg, int snapshot,
 | 
			
		|||
    if (!file[0])
 | 
			
		||||
        return 0;
 | 
			
		||||
    bdrv_flags = 0;
 | 
			
		||||
    if (snapshot)
 | 
			
		||||
    if (snapshot) {
 | 
			
		||||
        bdrv_flags |= BDRV_O_SNAPSHOT;
 | 
			
		||||
    if (!cache)
 | 
			
		||||
        bdrv_flags |= BDRV_O_DIRECT;
 | 
			
		||||
        cache = 2; /* always use write-back with snapshot */
 | 
			
		||||
    }
 | 
			
		||||
    if (cache == 0) /* no caching */
 | 
			
		||||
        bdrv_flags |= BDRV_O_NOCACHE;
 | 
			
		||||
    else if (cache == 2) /* write-back */
 | 
			
		||||
        bdrv_flags |= BDRV_O_CACHE_WB;
 | 
			
		||||
    if (bdrv_open2(bdrv, file, bdrv_flags, drv) < 0 || qemu_key_check(bdrv, file)) {
 | 
			
		||||
        fprintf(stderr, "qemu: could not open disk image %s\n",
 | 
			
		||||
                        file);
 | 
			
		||||
| 
						 | 
				
			
			@ -8145,7 +8151,7 @@ static void help(int exitcode)
 | 
			
		|||
           "-cdrom file     use 'file' as IDE cdrom image (cdrom is ide1 master)\n"
 | 
			
		||||
	   "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
 | 
			
		||||
           "       [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n"
 | 
			
		||||
           "       [,cache=on|off][,format=f]\n"
 | 
			
		||||
           "       [,cache=writethrough|writeback|none][,format=f]\n"
 | 
			
		||||
	   "                use 'file' as a drive image\n"
 | 
			
		||||
           "-mtdblock file  use 'file' as on-board Flash memory image\n"
 | 
			
		||||
           "-sd file        use 'file' as SecureDigital card image\n"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue