MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public License version |
| 6 | * 2 as published by the Free Software Foundation. |
| 7 | * |
| 8 | * You should have received a copy of the GNU General Public License |
| 9 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
Paolo Bonzini | 6b620ca | 2012-01-13 17:44:23 +0100 | [diff] [blame] | 10 | * |
| 11 | * Contributions after 2012-01-13 are licensed under the terms of the |
| 12 | * GNU GPL, version 2 or (at your option) any later version. |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 13 | */ |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 14 | |
| 15 | #include "qemu-common.h" |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 16 | #include "qemu/uri.h" |
Paolo Bonzini | 1de7afc | 2012-12-17 18:20:00 +0100 | [diff] [blame] | 17 | #include "qemu/error-report.h" |
| 18 | #include "qemu/sockets.h" |
Paolo Bonzini | 737e150 | 2012-12-17 18:19:44 +0100 | [diff] [blame] | 19 | #include "block/block_int.h" |
Paolo Bonzini | 1de7afc | 2012-12-17 18:20:00 +0100 | [diff] [blame] | 20 | #include "qemu/bitops.h" |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 21 | |
| 22 | #define SD_PROTO_VER 0x01 |
| 23 | |
| 24 | #define SD_DEFAULT_ADDR "localhost" |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 25 | #define SD_DEFAULT_PORT 7000 |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 26 | |
| 27 | #define SD_OP_CREATE_AND_WRITE_OBJ 0x01 |
| 28 | #define SD_OP_READ_OBJ 0x02 |
| 29 | #define SD_OP_WRITE_OBJ 0x03 |
| 30 | |
| 31 | #define SD_OP_NEW_VDI 0x11 |
| 32 | #define SD_OP_LOCK_VDI 0x12 |
| 33 | #define SD_OP_RELEASE_VDI 0x13 |
| 34 | #define SD_OP_GET_VDI_INFO 0x14 |
| 35 | #define SD_OP_READ_VDIS 0x15 |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 36 | #define SD_OP_FLUSH_VDI 0x16 |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 37 | |
| 38 | #define SD_FLAG_CMD_WRITE 0x01 |
| 39 | #define SD_FLAG_CMD_COW 0x02 |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 40 | #define SD_FLAG_CMD_CACHE 0x04 /* Writeback mode for cache */ |
| 41 | #define SD_FLAG_CMD_DIRECT 0x08 /* Don't use cache */ |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 42 | |
| 43 | #define SD_RES_SUCCESS 0x00 /* Success */ |
| 44 | #define SD_RES_UNKNOWN 0x01 /* Unknown error */ |
| 45 | #define SD_RES_NO_OBJ 0x02 /* No object found */ |
| 46 | #define SD_RES_EIO 0x03 /* I/O error */ |
| 47 | #define SD_RES_VDI_EXIST 0x04 /* Vdi exists already */ |
| 48 | #define SD_RES_INVALID_PARMS 0x05 /* Invalid parameters */ |
| 49 | #define SD_RES_SYSTEM_ERROR 0x06 /* System error */ |
| 50 | #define SD_RES_VDI_LOCKED 0x07 /* Vdi is locked */ |
| 51 | #define SD_RES_NO_VDI 0x08 /* No vdi found */ |
| 52 | #define SD_RES_NO_BASE_VDI 0x09 /* No base vdi found */ |
| 53 | #define SD_RES_VDI_READ 0x0A /* Cannot read requested vdi */ |
| 54 | #define SD_RES_VDI_WRITE 0x0B /* Cannot write requested vdi */ |
| 55 | #define SD_RES_BASE_VDI_READ 0x0C /* Cannot read base vdi */ |
| 56 | #define SD_RES_BASE_VDI_WRITE 0x0D /* Cannot write base vdi */ |
| 57 | #define SD_RES_NO_TAG 0x0E /* Requested tag is not found */ |
| 58 | #define SD_RES_STARTUP 0x0F /* Sheepdog is on starting up */ |
| 59 | #define SD_RES_VDI_NOT_LOCKED 0x10 /* Vdi is not locked */ |
| 60 | #define SD_RES_SHUTDOWN 0x11 /* Sheepdog is shutting down */ |
| 61 | #define SD_RES_NO_MEM 0x12 /* Cannot allocate memory */ |
| 62 | #define SD_RES_FULL_VDI 0x13 /* we already have the maximum vdis */ |
| 63 | #define SD_RES_VER_MISMATCH 0x14 /* Protocol version mismatch */ |
| 64 | #define SD_RES_NO_SPACE 0x15 /* Server has no room for new objects */ |
| 65 | #define SD_RES_WAIT_FOR_FORMAT 0x16 /* Waiting for a format operation */ |
| 66 | #define SD_RES_WAIT_FOR_JOIN 0x17 /* Waiting for other nodes joining */ |
| 67 | #define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */ |
Liu Yuan | fca23f0 | 2013-03-18 14:27:55 +0800 | [diff] [blame] | 68 | #define SD_RES_HALT 0x19 /* Sheepdog is stopped serving IO request */ |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 69 | |
| 70 | /* |
| 71 | * Object ID rules |
| 72 | * |
| 73 | * 0 - 19 (20 bits): data object space |
| 74 | * 20 - 31 (12 bits): reserved data object space |
| 75 | * 32 - 55 (24 bits): vdi object space |
| 76 | * 56 - 59 ( 4 bits): reserved vdi object space |
Dong Xu Wang | 7acae20 | 2011-10-14 15:41:06 +0800 | [diff] [blame] | 77 | * 60 - 63 ( 4 bits): object type identifier space |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 78 | */ |
| 79 | |
| 80 | #define VDI_SPACE_SHIFT 32 |
| 81 | #define VDI_BIT (UINT64_C(1) << 63) |
| 82 | #define VMSTATE_BIT (UINT64_C(1) << 62) |
| 83 | #define MAX_DATA_OBJS (UINT64_C(1) << 20) |
| 84 | #define MAX_CHILDREN 1024 |
| 85 | #define SD_MAX_VDI_LEN 256 |
| 86 | #define SD_MAX_VDI_TAG_LEN 256 |
| 87 | #define SD_NR_VDIS (1U << 24) |
| 88 | #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22) |
| 89 | #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS) |
| 90 | #define SECTOR_SIZE 512 |
| 91 | |
| 92 | #define SD_INODE_SIZE (sizeof(SheepdogInode)) |
| 93 | #define CURRENT_VDI_ID 0 |
| 94 | |
| 95 | typedef struct SheepdogReq { |
| 96 | uint8_t proto_ver; |
| 97 | uint8_t opcode; |
| 98 | uint16_t flags; |
| 99 | uint32_t epoch; |
| 100 | uint32_t id; |
| 101 | uint32_t data_length; |
| 102 | uint32_t opcode_specific[8]; |
| 103 | } SheepdogReq; |
| 104 | |
| 105 | typedef struct SheepdogRsp { |
| 106 | uint8_t proto_ver; |
| 107 | uint8_t opcode; |
| 108 | uint16_t flags; |
| 109 | uint32_t epoch; |
| 110 | uint32_t id; |
| 111 | uint32_t data_length; |
| 112 | uint32_t result; |
| 113 | uint32_t opcode_specific[7]; |
| 114 | } SheepdogRsp; |
| 115 | |
| 116 | typedef struct SheepdogObjReq { |
| 117 | uint8_t proto_ver; |
| 118 | uint8_t opcode; |
| 119 | uint16_t flags; |
| 120 | uint32_t epoch; |
| 121 | uint32_t id; |
| 122 | uint32_t data_length; |
| 123 | uint64_t oid; |
| 124 | uint64_t cow_oid; |
| 125 | uint32_t copies; |
| 126 | uint32_t rsvd; |
| 127 | uint64_t offset; |
| 128 | } SheepdogObjReq; |
| 129 | |
| 130 | typedef struct SheepdogObjRsp { |
| 131 | uint8_t proto_ver; |
| 132 | uint8_t opcode; |
| 133 | uint16_t flags; |
| 134 | uint32_t epoch; |
| 135 | uint32_t id; |
| 136 | uint32_t data_length; |
| 137 | uint32_t result; |
| 138 | uint32_t copies; |
| 139 | uint32_t pad[6]; |
| 140 | } SheepdogObjRsp; |
| 141 | |
| 142 | typedef struct SheepdogVdiReq { |
| 143 | uint8_t proto_ver; |
| 144 | uint8_t opcode; |
| 145 | uint16_t flags; |
| 146 | uint32_t epoch; |
| 147 | uint32_t id; |
| 148 | uint32_t data_length; |
| 149 | uint64_t vdi_size; |
Liu Yuan | 6f74c26 | 2013-01-29 17:14:16 +0800 | [diff] [blame] | 150 | uint32_t vdi_id; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 151 | uint32_t copies; |
| 152 | uint32_t snapid; |
| 153 | uint32_t pad[3]; |
| 154 | } SheepdogVdiReq; |
| 155 | |
| 156 | typedef struct SheepdogVdiRsp { |
| 157 | uint8_t proto_ver; |
| 158 | uint8_t opcode; |
| 159 | uint16_t flags; |
| 160 | uint32_t epoch; |
| 161 | uint32_t id; |
| 162 | uint32_t data_length; |
| 163 | uint32_t result; |
| 164 | uint32_t rsvd; |
| 165 | uint32_t vdi_id; |
| 166 | uint32_t pad[5]; |
| 167 | } SheepdogVdiRsp; |
| 168 | |
| 169 | typedef struct SheepdogInode { |
| 170 | char name[SD_MAX_VDI_LEN]; |
| 171 | char tag[SD_MAX_VDI_TAG_LEN]; |
| 172 | uint64_t ctime; |
| 173 | uint64_t snap_ctime; |
| 174 | uint64_t vm_clock_nsec; |
| 175 | uint64_t vdi_size; |
| 176 | uint64_t vm_state_size; |
| 177 | uint16_t copy_policy; |
| 178 | uint8_t nr_copies; |
| 179 | uint8_t block_size_shift; |
| 180 | uint32_t snap_id; |
| 181 | uint32_t vdi_id; |
| 182 | uint32_t parent_vdi_id; |
| 183 | uint32_t child_vdi_id[MAX_CHILDREN]; |
| 184 | uint32_t data_vdi_id[MAX_DATA_OBJS]; |
| 185 | } SheepdogInode; |
| 186 | |
| 187 | /* |
| 188 | * 64 bit FNV-1a non-zero initial basis |
| 189 | */ |
| 190 | #define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL) |
| 191 | |
| 192 | /* |
| 193 | * 64 bit Fowler/Noll/Vo FNV-1a hash code |
| 194 | */ |
| 195 | static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval) |
| 196 | { |
| 197 | unsigned char *bp = buf; |
| 198 | unsigned char *be = bp + len; |
| 199 | while (bp < be) { |
| 200 | hval ^= (uint64_t) *bp++; |
| 201 | hval += (hval << 1) + (hval << 4) + (hval << 5) + |
| 202 | (hval << 7) + (hval << 8) + (hval << 40); |
| 203 | } |
| 204 | return hval; |
| 205 | } |
| 206 | |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 207 | static inline bool is_data_obj_writable(SheepdogInode *inode, unsigned int idx) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 208 | { |
| 209 | return inode->vdi_id == inode->data_vdi_id[idx]; |
| 210 | } |
| 211 | |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 212 | static inline bool is_data_obj(uint64_t oid) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 213 | { |
| 214 | return !(VDI_BIT & oid); |
| 215 | } |
| 216 | |
| 217 | static inline uint64_t data_oid_to_idx(uint64_t oid) |
| 218 | { |
| 219 | return oid & (MAX_DATA_OBJS - 1); |
| 220 | } |
| 221 | |
| 222 | static inline uint64_t vid_to_vdi_oid(uint32_t vid) |
| 223 | { |
| 224 | return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT); |
| 225 | } |
| 226 | |
| 227 | static inline uint64_t vid_to_vmstate_oid(uint32_t vid, uint32_t idx) |
| 228 | { |
| 229 | return VMSTATE_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT) | idx; |
| 230 | } |
| 231 | |
| 232 | static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx) |
| 233 | { |
| 234 | return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx; |
| 235 | } |
| 236 | |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 237 | static inline bool is_snapshot(struct SheepdogInode *inode) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 238 | { |
| 239 | return !!inode->snap_ctime; |
| 240 | } |
| 241 | |
| 242 | #undef dprintf |
| 243 | #ifdef DEBUG_SDOG |
| 244 | #define dprintf(fmt, args...) \ |
| 245 | do { \ |
| 246 | fprintf(stdout, "%s %d: " fmt, __func__, __LINE__, ##args); \ |
| 247 | } while (0) |
| 248 | #else |
| 249 | #define dprintf(fmt, args...) |
| 250 | #endif |
| 251 | |
| 252 | typedef struct SheepdogAIOCB SheepdogAIOCB; |
| 253 | |
| 254 | typedef struct AIOReq { |
| 255 | SheepdogAIOCB *aiocb; |
| 256 | unsigned int iov_offset; |
| 257 | |
| 258 | uint64_t oid; |
| 259 | uint64_t base_oid; |
| 260 | uint64_t offset; |
| 261 | unsigned int data_len; |
| 262 | uint8_t flags; |
| 263 | uint32_t id; |
| 264 | |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 265 | QLIST_ENTRY(AIOReq) aio_siblings; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 266 | } AIOReq; |
| 267 | |
| 268 | enum AIOCBState { |
| 269 | AIOCB_WRITE_UDATA, |
| 270 | AIOCB_READ_UDATA, |
Liu Yuan | 4778307 | 2013-01-15 16:28:55 +0800 | [diff] [blame] | 271 | AIOCB_FLUSH_CACHE, |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 272 | }; |
| 273 | |
| 274 | struct SheepdogAIOCB { |
| 275 | BlockDriverAIOCB common; |
| 276 | |
| 277 | QEMUIOVector *qiov; |
| 278 | |
| 279 | int64_t sector_num; |
| 280 | int nb_sectors; |
| 281 | |
| 282 | int ret; |
| 283 | enum AIOCBState aiocb_type; |
| 284 | |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 285 | Coroutine *coroutine; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 286 | void (*aio_done_func)(SheepdogAIOCB *); |
| 287 | |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 288 | bool canceled; |
MORITA Kazutaka | 1d732d7 | 2012-06-27 07:26:21 +0900 | [diff] [blame] | 289 | int nr_pending; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 290 | }; |
| 291 | |
| 292 | typedef struct BDRVSheepdogState { |
| 293 | SheepdogInode inode; |
| 294 | |
| 295 | uint32_t min_dirty_data_idx; |
| 296 | uint32_t max_dirty_data_idx; |
| 297 | |
| 298 | char name[SD_MAX_VDI_LEN]; |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 299 | bool is_snapshot; |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 300 | uint32_t cache_flags; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 301 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 302 | char *host_spec; |
MORITA Kazutaka | 1b8bbb4 | 2013-02-22 12:39:53 +0900 | [diff] [blame] | 303 | bool is_unix; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 304 | int fd; |
| 305 | |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 306 | CoMutex lock; |
| 307 | Coroutine *co_send; |
| 308 | Coroutine *co_recv; |
| 309 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 310 | uint32_t aioreq_seq_num; |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 311 | QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head; |
| 312 | QLIST_HEAD(pending_aio_head, AIOReq) pending_aio_head; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 313 | } BDRVSheepdogState; |
| 314 | |
| 315 | static const char * sd_strerror(int err) |
| 316 | { |
| 317 | int i; |
| 318 | |
| 319 | static const struct { |
| 320 | int err; |
| 321 | const char *desc; |
| 322 | } errors[] = { |
| 323 | {SD_RES_SUCCESS, "Success"}, |
| 324 | {SD_RES_UNKNOWN, "Unknown error"}, |
| 325 | {SD_RES_NO_OBJ, "No object found"}, |
| 326 | {SD_RES_EIO, "I/O error"}, |
| 327 | {SD_RES_VDI_EXIST, "VDI exists already"}, |
| 328 | {SD_RES_INVALID_PARMS, "Invalid parameters"}, |
| 329 | {SD_RES_SYSTEM_ERROR, "System error"}, |
| 330 | {SD_RES_VDI_LOCKED, "VDI is already locked"}, |
| 331 | {SD_RES_NO_VDI, "No vdi found"}, |
| 332 | {SD_RES_NO_BASE_VDI, "No base VDI found"}, |
| 333 | {SD_RES_VDI_READ, "Failed read the requested VDI"}, |
| 334 | {SD_RES_VDI_WRITE, "Failed to write the requested VDI"}, |
| 335 | {SD_RES_BASE_VDI_READ, "Failed to read the base VDI"}, |
| 336 | {SD_RES_BASE_VDI_WRITE, "Failed to write the base VDI"}, |
| 337 | {SD_RES_NO_TAG, "Failed to find the requested tag"}, |
| 338 | {SD_RES_STARTUP, "The system is still booting"}, |
| 339 | {SD_RES_VDI_NOT_LOCKED, "VDI isn't locked"}, |
| 340 | {SD_RES_SHUTDOWN, "The system is shutting down"}, |
| 341 | {SD_RES_NO_MEM, "Out of memory on the server"}, |
| 342 | {SD_RES_FULL_VDI, "We already have the maximum vdis"}, |
| 343 | {SD_RES_VER_MISMATCH, "Protocol version mismatch"}, |
| 344 | {SD_RES_NO_SPACE, "Server has no space for new objects"}, |
| 345 | {SD_RES_WAIT_FOR_FORMAT, "Sheepdog is waiting for a format operation"}, |
| 346 | {SD_RES_WAIT_FOR_JOIN, "Sheepdog is waiting for other nodes joining"}, |
| 347 | {SD_RES_JOIN_FAILED, "Target node had failed to join sheepdog"}, |
Liu Yuan | fca23f0 | 2013-03-18 14:27:55 +0800 | [diff] [blame] | 348 | {SD_RES_HALT, "Sheepdog is stopped serving IO request"}, |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 349 | }; |
| 350 | |
| 351 | for (i = 0; i < ARRAY_SIZE(errors); ++i) { |
| 352 | if (errors[i].err == err) { |
| 353 | return errors[i].desc; |
| 354 | } |
| 355 | } |
| 356 | |
| 357 | return "Invalid error code"; |
| 358 | } |
| 359 | |
| 360 | /* |
| 361 | * Sheepdog I/O handling: |
| 362 | * |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 363 | * 1. In sd_co_rw_vector, we send the I/O requests to the server and |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 364 | * link the requests to the inflight_list in the |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 365 | * BDRVSheepdogState. The function exits without waiting for |
| 366 | * receiving the response. |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 367 | * |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 368 | * 2. We receive the response in aio_read_response, the fd handler to |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 369 | * the sheepdog connection. If metadata update is needed, we send |
| 370 | * the write request to the vdi object in sd_write_done, the write |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 371 | * completion function. We switch back to sd_co_readv/writev after |
| 372 | * all the requests belonging to the AIOCB are finished. |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 373 | */ |
| 374 | |
| 375 | static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb, |
| 376 | uint64_t oid, unsigned int data_len, |
| 377 | uint64_t offset, uint8_t flags, |
| 378 | uint64_t base_oid, unsigned int iov_offset) |
| 379 | { |
| 380 | AIOReq *aio_req; |
| 381 | |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 382 | aio_req = g_malloc(sizeof(*aio_req)); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 383 | aio_req->aiocb = acb; |
| 384 | aio_req->iov_offset = iov_offset; |
| 385 | aio_req->oid = oid; |
| 386 | aio_req->base_oid = base_oid; |
| 387 | aio_req->offset = offset; |
| 388 | aio_req->data_len = data_len; |
| 389 | aio_req->flags = flags; |
| 390 | aio_req->id = s->aioreq_seq_num++; |
| 391 | |
MORITA Kazutaka | 1d732d7 | 2012-06-27 07:26:21 +0900 | [diff] [blame] | 392 | acb->nr_pending++; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 393 | return aio_req; |
| 394 | } |
| 395 | |
MORITA Kazutaka | 1d732d7 | 2012-06-27 07:26:21 +0900 | [diff] [blame] | 396 | static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 397 | { |
| 398 | SheepdogAIOCB *acb = aio_req->aiocb; |
MORITA Kazutaka | 1d732d7 | 2012-06-27 07:26:21 +0900 | [diff] [blame] | 399 | |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 400 | QLIST_REMOVE(aio_req, aio_siblings); |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 401 | g_free(aio_req); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 402 | |
MORITA Kazutaka | 1d732d7 | 2012-06-27 07:26:21 +0900 | [diff] [blame] | 403 | acb->nr_pending--; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 404 | } |
| 405 | |
Paolo Bonzini | d8716b4 | 2011-10-05 09:17:31 +0200 | [diff] [blame] | 406 | static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 407 | { |
| 408 | if (!acb->canceled) { |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 409 | qemu_coroutine_enter(acb->coroutine, NULL); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 410 | } |
| 411 | qemu_aio_release(acb); |
| 412 | } |
| 413 | |
| 414 | static void sd_aio_cancel(BlockDriverAIOCB *blockacb) |
| 415 | { |
| 416 | SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb; |
| 417 | |
| 418 | /* |
| 419 | * Sheepdog cannot cancel the requests which are already sent to |
| 420 | * the servers, so we just complete the request with -EIO here. |
| 421 | */ |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 422 | acb->ret = -EIO; |
| 423 | qemu_coroutine_enter(acb->coroutine, NULL); |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 424 | acb->canceled = true; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 425 | } |
| 426 | |
Stefan Hajnoczi | d7331be | 2012-10-31 16:34:37 +0100 | [diff] [blame] | 427 | static const AIOCBInfo sd_aiocb_info = { |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 428 | .aiocb_size = sizeof(SheepdogAIOCB), |
| 429 | .cancel = sd_aio_cancel, |
| 430 | }; |
| 431 | |
| 432 | static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, |
Liu Yuan | f700f8e | 2013-01-14 14:01:03 +0800 | [diff] [blame] | 433 | int64_t sector_num, int nb_sectors) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 434 | { |
| 435 | SheepdogAIOCB *acb; |
| 436 | |
Liu Yuan | f700f8e | 2013-01-14 14:01:03 +0800 | [diff] [blame] | 437 | acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 438 | |
| 439 | acb->qiov = qiov; |
| 440 | |
| 441 | acb->sector_num = sector_num; |
| 442 | acb->nb_sectors = nb_sectors; |
| 443 | |
| 444 | acb->aio_done_func = NULL; |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 445 | acb->canceled = false; |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 446 | acb->coroutine = qemu_coroutine_self(); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 447 | acb->ret = 0; |
MORITA Kazutaka | 1d732d7 | 2012-06-27 07:26:21 +0900 | [diff] [blame] | 448 | acb->nr_pending = 0; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 449 | return acb; |
| 450 | } |
| 451 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 452 | static int connect_to_sdog(BDRVSheepdogState *s) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 453 | { |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 454 | int fd; |
| 455 | Error *err = NULL; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 456 | |
MORITA Kazutaka | 1b8bbb4 | 2013-02-22 12:39:53 +0900 | [diff] [blame] | 457 | if (s->is_unix) { |
| 458 | fd = unix_connect(s->host_spec, &err); |
| 459 | } else { |
| 460 | fd = inet_connect(s->host_spec, &err); |
| 461 | |
| 462 | if (err == NULL) { |
| 463 | int ret = socket_set_nodelay(fd); |
| 464 | if (ret < 0) { |
| 465 | error_report("%s", strerror(errno)); |
| 466 | } |
| 467 | } |
| 468 | } |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 469 | |
| 470 | if (err != NULL) { |
| 471 | qerror_report_err(err); |
| 472 | error_free(err); |
MORITA Kazutaka | 0d6db30 | 2013-03-12 16:05:42 +0900 | [diff] [blame] | 473 | } else { |
Stefan Hajnoczi | f9e8cac | 2013-03-27 10:10:43 +0100 | [diff] [blame] | 474 | qemu_set_nonblock(fd); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 475 | } |
| 476 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 477 | return fd; |
| 478 | } |
| 479 | |
MORITA Kazutaka | e0d93a8 | 2012-05-30 09:03:55 +0900 | [diff] [blame] | 480 | static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data, |
| 481 | unsigned int *wlen) |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 482 | { |
| 483 | int ret; |
| 484 | |
| 485 | ret = qemu_co_send(sockfd, hdr, sizeof(*hdr)); |
| 486 | if (ret < sizeof(*hdr)) { |
| 487 | error_report("failed to send a req, %s", strerror(errno)); |
Liu Yuan | eb09218 | 2012-04-03 18:04:21 +0800 | [diff] [blame] | 488 | return ret; |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 489 | } |
| 490 | |
| 491 | ret = qemu_co_send(sockfd, data, *wlen); |
| 492 | if (ret < *wlen) { |
| 493 | error_report("failed to send a req, %s", strerror(errno)); |
| 494 | } |
| 495 | |
| 496 | return ret; |
| 497 | } |
MORITA Kazutaka | e0d93a8 | 2012-05-30 09:03:55 +0900 | [diff] [blame] | 498 | |
MORITA Kazutaka | 2dfcca3 | 2012-06-27 07:26:19 +0900 | [diff] [blame] | 499 | static void restart_co_req(void *opaque) |
| 500 | { |
| 501 | Coroutine *co = opaque; |
| 502 | |
| 503 | qemu_coroutine_enter(co, NULL); |
| 504 | } |
| 505 | |
MORITA Kazutaka | ed9ba72 | 2013-03-12 16:05:43 +0900 | [diff] [blame] | 506 | static int have_co_req(void *opaque) |
| 507 | { |
| 508 | /* this handler is set only when there is a pending request, so |
| 509 | * always returns 1. */ |
| 510 | return 1; |
| 511 | } |
| 512 | |
MORITA Kazutaka | cddd4ac | 2012-07-05 01:41:06 +0900 | [diff] [blame] | 513 | typedef struct SheepdogReqCo { |
| 514 | int sockfd; |
| 515 | SheepdogReq *hdr; |
| 516 | void *data; |
| 517 | unsigned int *wlen; |
| 518 | unsigned int *rlen; |
| 519 | int ret; |
| 520 | bool finished; |
| 521 | } SheepdogReqCo; |
| 522 | |
| 523 | static coroutine_fn void do_co_req(void *opaque) |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 524 | { |
| 525 | int ret; |
MORITA Kazutaka | 2dfcca3 | 2012-06-27 07:26:19 +0900 | [diff] [blame] | 526 | Coroutine *co; |
MORITA Kazutaka | cddd4ac | 2012-07-05 01:41:06 +0900 | [diff] [blame] | 527 | SheepdogReqCo *srco = opaque; |
| 528 | int sockfd = srco->sockfd; |
| 529 | SheepdogReq *hdr = srco->hdr; |
| 530 | void *data = srco->data; |
| 531 | unsigned int *wlen = srco->wlen; |
| 532 | unsigned int *rlen = srco->rlen; |
MORITA Kazutaka | 2dfcca3 | 2012-06-27 07:26:19 +0900 | [diff] [blame] | 533 | |
| 534 | co = qemu_coroutine_self(); |
MORITA Kazutaka | ed9ba72 | 2013-03-12 16:05:43 +0900 | [diff] [blame] | 535 | qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, have_co_req, co); |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 536 | |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 537 | ret = send_co_req(sockfd, hdr, data, wlen); |
| 538 | if (ret < 0) { |
| 539 | goto out; |
| 540 | } |
| 541 | |
MORITA Kazutaka | ed9ba72 | 2013-03-12 16:05:43 +0900 | [diff] [blame] | 542 | qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, have_co_req, co); |
MORITA Kazutaka | 2dfcca3 | 2012-06-27 07:26:19 +0900 | [diff] [blame] | 543 | |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 544 | ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr)); |
| 545 | if (ret < sizeof(*hdr)) { |
| 546 | error_report("failed to get a rsp, %s", strerror(errno)); |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 547 | ret = -errno; |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 548 | goto out; |
| 549 | } |
| 550 | |
| 551 | if (*rlen > hdr->data_length) { |
| 552 | *rlen = hdr->data_length; |
| 553 | } |
| 554 | |
| 555 | if (*rlen) { |
| 556 | ret = qemu_co_recv(sockfd, data, *rlen); |
| 557 | if (ret < *rlen) { |
| 558 | error_report("failed to get the data, %s", strerror(errno)); |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 559 | ret = -errno; |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 560 | goto out; |
| 561 | } |
| 562 | } |
| 563 | ret = 0; |
| 564 | out: |
MORITA Kazutaka | ed9ba72 | 2013-03-12 16:05:43 +0900 | [diff] [blame] | 565 | /* there is at most one request for this sockfd, so it is safe to |
| 566 | * set each handler to NULL. */ |
MORITA Kazutaka | 2dfcca3 | 2012-06-27 07:26:19 +0900 | [diff] [blame] | 567 | qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL, NULL); |
MORITA Kazutaka | cddd4ac | 2012-07-05 01:41:06 +0900 | [diff] [blame] | 568 | |
| 569 | srco->ret = ret; |
| 570 | srco->finished = true; |
| 571 | } |
| 572 | |
| 573 | static int do_req(int sockfd, SheepdogReq *hdr, void *data, |
| 574 | unsigned int *wlen, unsigned int *rlen) |
| 575 | { |
| 576 | Coroutine *co; |
| 577 | SheepdogReqCo srco = { |
| 578 | .sockfd = sockfd, |
| 579 | .hdr = hdr, |
| 580 | .data = data, |
| 581 | .wlen = wlen, |
| 582 | .rlen = rlen, |
| 583 | .ret = 0, |
| 584 | .finished = false, |
| 585 | }; |
| 586 | |
| 587 | if (qemu_in_coroutine()) { |
| 588 | do_co_req(&srco); |
| 589 | } else { |
| 590 | co = qemu_coroutine_create(do_co_req); |
| 591 | qemu_coroutine_enter(co, &srco); |
| 592 | while (!srco.finished) { |
| 593 | qemu_aio_wait(); |
| 594 | } |
| 595 | } |
| 596 | |
| 597 | return srco.ret; |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 598 | } |
| 599 | |
Paolo Bonzini | d8716b4 | 2011-10-05 09:17:31 +0200 | [diff] [blame] | 600 | static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 601 | struct iovec *iov, int niov, bool create, |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 602 | enum AIOCBState aiocb_type); |
| 603 | |
MORITA Kazutaka | 7dc1cde | 2012-06-27 07:26:23 +0900 | [diff] [blame] | 604 | |
| 605 | static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid) |
| 606 | { |
| 607 | AIOReq *aio_req; |
| 608 | |
| 609 | QLIST_FOREACH(aio_req, &s->pending_aio_head, aio_siblings) { |
| 610 | if (aio_req->oid == oid) { |
| 611 | return aio_req; |
| 612 | } |
| 613 | } |
| 614 | |
| 615 | return NULL; |
| 616 | } |
| 617 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 618 | /* |
| 619 | * This function searchs pending requests to the object `oid', and |
| 620 | * sends them. |
| 621 | */ |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 622 | static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 623 | { |
MORITA Kazutaka | 7dc1cde | 2012-06-27 07:26:23 +0900 | [diff] [blame] | 624 | AIOReq *aio_req; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 625 | SheepdogAIOCB *acb; |
| 626 | int ret; |
| 627 | |
MORITA Kazutaka | 7dc1cde | 2012-06-27 07:26:23 +0900 | [diff] [blame] | 628 | while ((aio_req = find_pending_req(s, oid)) != NULL) { |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 629 | acb = aio_req->aiocb; |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 630 | /* move aio_req from pending list to inflight one */ |
| 631 | QLIST_REMOVE(aio_req, aio_siblings); |
| 632 | QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 633 | ret = add_aio_request(s, aio_req, acb->qiov->iov, |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 634 | acb->qiov->niov, false, acb->aiocb_type); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 635 | if (ret < 0) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 636 | error_report("add_aio_request is failed"); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 637 | free_aio_req(s, aio_req); |
MORITA Kazutaka | 1d732d7 | 2012-06-27 07:26:21 +0900 | [diff] [blame] | 638 | if (!acb->nr_pending) { |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 639 | sd_finish_aiocb(acb); |
| 640 | } |
| 641 | } |
| 642 | } |
| 643 | } |
| 644 | |
| 645 | /* |
| 646 | * Receive responses of the I/O requests. |
| 647 | * |
| 648 | * This function is registered as a fd handler, and called from the |
| 649 | * main loop when s->fd is ready for reading responses. |
| 650 | */ |
Paolo Bonzini | d8716b4 | 2011-10-05 09:17:31 +0200 | [diff] [blame] | 651 | static void coroutine_fn aio_read_response(void *opaque) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 652 | { |
| 653 | SheepdogObjRsp rsp; |
| 654 | BDRVSheepdogState *s = opaque; |
| 655 | int fd = s->fd; |
| 656 | int ret; |
| 657 | AIOReq *aio_req = NULL; |
| 658 | SheepdogAIOCB *acb; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 659 | unsigned long idx; |
| 660 | |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 661 | if (QLIST_EMPTY(&s->inflight_aio_head)) { |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 662 | goto out; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 663 | } |
| 664 | |
| 665 | /* read a header */ |
Paolo Bonzini | 8c5135f | 2011-09-08 13:46:25 +0200 | [diff] [blame] | 666 | ret = qemu_co_recv(fd, &rsp, sizeof(rsp)); |
| 667 | if (ret < 0) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 668 | error_report("failed to get the header, %s", strerror(errno)); |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 669 | goto out; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 670 | } |
| 671 | |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 672 | /* find the right aio_req from the inflight aio list */ |
| 673 | QLIST_FOREACH(aio_req, &s->inflight_aio_head, aio_siblings) { |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 674 | if (aio_req->id == rsp.id) { |
| 675 | break; |
| 676 | } |
| 677 | } |
| 678 | if (!aio_req) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 679 | error_report("cannot find aio_req %x", rsp.id); |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 680 | goto out; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 681 | } |
| 682 | |
| 683 | acb = aio_req->aiocb; |
| 684 | |
| 685 | switch (acb->aiocb_type) { |
| 686 | case AIOCB_WRITE_UDATA: |
MORITA Kazutaka | 6d1acda | 2012-01-31 02:10:06 +0900 | [diff] [blame] | 687 | /* this coroutine context is no longer suitable for co_recv |
| 688 | * because we may send data to update vdi objects */ |
| 689 | s->co_recv = NULL; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 690 | if (!is_data_obj(aio_req->oid)) { |
| 691 | break; |
| 692 | } |
| 693 | idx = data_oid_to_idx(aio_req->oid); |
| 694 | |
| 695 | if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) { |
| 696 | /* |
| 697 | * If the object is newly created one, we need to update |
| 698 | * the vdi object (metadata object). min_dirty_data_idx |
| 699 | * and max_dirty_data_idx are changed to include updated |
| 700 | * index between them. |
| 701 | */ |
Liu Yuan | bd751f2 | 2012-12-17 14:17:26 +0800 | [diff] [blame] | 702 | if (rsp.result == SD_RES_SUCCESS) { |
| 703 | s->inode.data_vdi_id[idx] = s->inode.vdi_id; |
| 704 | s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx); |
| 705 | s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx); |
| 706 | } |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 707 | /* |
| 708 | * Some requests may be blocked because simultaneous |
| 709 | * create requests are not allowed, so we search the |
| 710 | * pending requests here. |
| 711 | */ |
Liu Yuan | d6b1ef8 | 2012-12-17 14:17:27 +0800 | [diff] [blame] | 712 | send_pending_req(s, aio_req->oid); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 713 | } |
| 714 | break; |
| 715 | case AIOCB_READ_UDATA: |
Michael Tokarev | 2fc8ae1 | 2012-06-07 20:22:46 +0400 | [diff] [blame] | 716 | ret = qemu_co_recvv(fd, acb->qiov->iov, acb->qiov->niov, |
| 717 | aio_req->iov_offset, rsp.data_length); |
Paolo Bonzini | 8c5135f | 2011-09-08 13:46:25 +0200 | [diff] [blame] | 718 | if (ret < 0) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 719 | error_report("failed to get the data, %s", strerror(errno)); |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 720 | goto out; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 721 | } |
| 722 | break; |
Liu Yuan | 4778307 | 2013-01-15 16:28:55 +0800 | [diff] [blame] | 723 | case AIOCB_FLUSH_CACHE: |
| 724 | if (rsp.result == SD_RES_INVALID_PARMS) { |
| 725 | dprintf("disable cache since the server doesn't support it\n"); |
| 726 | s->cache_flags = SD_FLAG_CMD_DIRECT; |
| 727 | rsp.result = SD_RES_SUCCESS; |
| 728 | } |
| 729 | break; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 730 | } |
| 731 | |
| 732 | if (rsp.result != SD_RES_SUCCESS) { |
| 733 | acb->ret = -EIO; |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 734 | error_report("%s", sd_strerror(rsp.result)); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 735 | } |
| 736 | |
MORITA Kazutaka | 1d732d7 | 2012-06-27 07:26:21 +0900 | [diff] [blame] | 737 | free_aio_req(s, aio_req); |
| 738 | if (!acb->nr_pending) { |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 739 | /* |
| 740 | * We've finished all requests which belong to the AIOCB, so |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 741 | * we can switch back to sd_co_readv/writev now. |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 742 | */ |
| 743 | acb->aio_done_func(acb); |
| 744 | } |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 745 | out: |
| 746 | s->co_recv = NULL; |
| 747 | } |
| 748 | |
| 749 | static void co_read_response(void *opaque) |
| 750 | { |
| 751 | BDRVSheepdogState *s = opaque; |
| 752 | |
| 753 | if (!s->co_recv) { |
| 754 | s->co_recv = qemu_coroutine_create(aio_read_response); |
| 755 | } |
| 756 | |
| 757 | qemu_coroutine_enter(s->co_recv, opaque); |
| 758 | } |
| 759 | |
| 760 | static void co_write_request(void *opaque) |
| 761 | { |
| 762 | BDRVSheepdogState *s = opaque; |
| 763 | |
| 764 | qemu_coroutine_enter(s->co_send, NULL); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 765 | } |
| 766 | |
| 767 | static int aio_flush_request(void *opaque) |
| 768 | { |
| 769 | BDRVSheepdogState *s = opaque; |
| 770 | |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 771 | return !QLIST_EMPTY(&s->inflight_aio_head) || |
| 772 | !QLIST_EMPTY(&s->pending_aio_head); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 773 | } |
| 774 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 775 | /* |
| 776 | * Return a socket discriptor to read/write objects. |
| 777 | * |
| 778 | * We cannot use this discriptor for other operations because |
| 779 | * the block driver may be on waiting response from the server. |
| 780 | */ |
| 781 | static int get_sheep_fd(BDRVSheepdogState *s) |
| 782 | { |
MORITA Kazutaka | 1b8bbb4 | 2013-02-22 12:39:53 +0900 | [diff] [blame] | 783 | int fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 784 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 785 | fd = connect_to_sdog(s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 786 | if (fd < 0) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 787 | return fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 788 | } |
| 789 | |
Paolo Bonzini | bafbd6a | 2012-04-12 14:00:54 +0200 | [diff] [blame] | 790 | qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 791 | return fd; |
| 792 | } |
| 793 | |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 794 | static int sd_parse_uri(BDRVSheepdogState *s, const char *filename, |
| 795 | char *vdi, uint32_t *snapid, char *tag) |
| 796 | { |
| 797 | URI *uri; |
| 798 | QueryParams *qp = NULL; |
| 799 | int ret = 0; |
| 800 | |
| 801 | uri = uri_parse(filename); |
| 802 | if (!uri) { |
| 803 | return -EINVAL; |
| 804 | } |
| 805 | |
MORITA Kazutaka | 1b8bbb4 | 2013-02-22 12:39:53 +0900 | [diff] [blame] | 806 | /* transport */ |
| 807 | if (!strcmp(uri->scheme, "sheepdog")) { |
| 808 | s->is_unix = false; |
| 809 | } else if (!strcmp(uri->scheme, "sheepdog+tcp")) { |
| 810 | s->is_unix = false; |
| 811 | } else if (!strcmp(uri->scheme, "sheepdog+unix")) { |
| 812 | s->is_unix = true; |
| 813 | } else { |
| 814 | ret = -EINVAL; |
| 815 | goto out; |
| 816 | } |
| 817 | |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 818 | if (uri->path == NULL || !strcmp(uri->path, "/")) { |
| 819 | ret = -EINVAL; |
| 820 | goto out; |
| 821 | } |
| 822 | pstrcpy(vdi, SD_MAX_VDI_LEN, uri->path + 1); |
| 823 | |
MORITA Kazutaka | 1b8bbb4 | 2013-02-22 12:39:53 +0900 | [diff] [blame] | 824 | qp = query_params_parse(uri->query); |
| 825 | if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) { |
| 826 | ret = -EINVAL; |
| 827 | goto out; |
| 828 | } |
| 829 | |
| 830 | if (s->is_unix) { |
| 831 | /* sheepdog+unix:///vdiname?socket=path */ |
| 832 | if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) { |
| 833 | ret = -EINVAL; |
| 834 | goto out; |
| 835 | } |
| 836 | s->host_spec = g_strdup(qp->p[0].value); |
| 837 | } else { |
| 838 | /* sheepdog[+tcp]://[host:port]/vdiname */ |
| 839 | s->host_spec = g_strdup_printf("%s:%d", uri->server ?: SD_DEFAULT_ADDR, |
| 840 | uri->port ?: SD_DEFAULT_PORT); |
| 841 | } |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 842 | |
| 843 | /* snapshot tag */ |
| 844 | if (uri->fragment) { |
| 845 | *snapid = strtoul(uri->fragment, NULL, 10); |
| 846 | if (*snapid == 0) { |
| 847 | pstrcpy(tag, SD_MAX_VDI_TAG_LEN, uri->fragment); |
| 848 | } |
| 849 | } else { |
| 850 | *snapid = CURRENT_VDI_ID; /* search current vdi */ |
| 851 | } |
| 852 | |
| 853 | out: |
| 854 | if (qp) { |
| 855 | query_params_free(qp); |
| 856 | } |
| 857 | uri_free(uri); |
| 858 | return ret; |
| 859 | } |
| 860 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 861 | /* |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 862 | * Parse a filename (old syntax) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 863 | * |
| 864 | * filename must be one of the following formats: |
| 865 | * 1. [vdiname] |
| 866 | * 2. [vdiname]:[snapid] |
| 867 | * 3. [vdiname]:[tag] |
| 868 | * 4. [hostname]:[port]:[vdiname] |
| 869 | * 5. [hostname]:[port]:[vdiname]:[snapid] |
| 870 | * 6. [hostname]:[port]:[vdiname]:[tag] |
| 871 | * |
| 872 | * You can boot from the snapshot images by specifying `snapid` or |
| 873 | * `tag'. |
| 874 | * |
| 875 | * You can run VMs outside the Sheepdog cluster by specifying |
| 876 | * `hostname' and `port' (experimental). |
| 877 | */ |
| 878 | static int parse_vdiname(BDRVSheepdogState *s, const char *filename, |
| 879 | char *vdi, uint32_t *snapid, char *tag) |
| 880 | { |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 881 | char *p, *q, *uri; |
| 882 | const char *host_spec, *vdi_spec; |
| 883 | int nr_sep, ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 884 | |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 885 | strstart(filename, "sheepdog:", (const char **)&filename); |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 886 | p = q = g_strdup(filename); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 887 | |
| 888 | /* count the number of separators */ |
| 889 | nr_sep = 0; |
| 890 | while (*p) { |
| 891 | if (*p == ':') { |
| 892 | nr_sep++; |
| 893 | } |
| 894 | p++; |
| 895 | } |
| 896 | p = q; |
| 897 | |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 898 | /* use the first two tokens as host_spec. */ |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 899 | if (nr_sep >= 2) { |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 900 | host_spec = p; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 901 | p = strchr(p, ':'); |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 902 | p++; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 903 | p = strchr(p, ':'); |
| 904 | *p++ = '\0'; |
| 905 | } else { |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 906 | host_spec = ""; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 907 | } |
| 908 | |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 909 | vdi_spec = p; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 910 | |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 911 | p = strchr(vdi_spec, ':'); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 912 | if (p) { |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 913 | *p++ = '#'; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 914 | } |
| 915 | |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 916 | uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 917 | |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 918 | ret = sd_parse_uri(s, uri, vdi, snapid, tag); |
| 919 | |
| 920 | g_free(q); |
| 921 | g_free(uri); |
| 922 | |
| 923 | return ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 924 | } |
| 925 | |
| 926 | static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid, |
| 927 | char *tag, uint32_t *vid, int for_snapshot) |
| 928 | { |
| 929 | int ret, fd; |
| 930 | SheepdogVdiReq hdr; |
| 931 | SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; |
| 932 | unsigned int wlen, rlen = 0; |
| 933 | char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN]; |
| 934 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 935 | fd = connect_to_sdog(s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 936 | if (fd < 0) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 937 | return fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 938 | } |
| 939 | |
Jim Meyering | 3178e27 | 2012-10-04 13:09:47 +0200 | [diff] [blame] | 940 | /* This pair of strncpy calls ensures that the buffer is zero-filled, |
| 941 | * which is desirable since we'll soon be sending those bytes, and |
| 942 | * don't want the send_req to read uninitialized data. |
| 943 | */ |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 944 | strncpy(buf, filename, SD_MAX_VDI_LEN); |
| 945 | strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN); |
| 946 | |
| 947 | memset(&hdr, 0, sizeof(hdr)); |
| 948 | if (for_snapshot) { |
| 949 | hdr.opcode = SD_OP_GET_VDI_INFO; |
| 950 | } else { |
| 951 | hdr.opcode = SD_OP_LOCK_VDI; |
| 952 | } |
| 953 | wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN; |
| 954 | hdr.proto_ver = SD_PROTO_VER; |
| 955 | hdr.data_length = wlen; |
| 956 | hdr.snapid = snapid; |
| 957 | hdr.flags = SD_FLAG_CMD_WRITE; |
| 958 | |
| 959 | ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); |
| 960 | if (ret) { |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 961 | goto out; |
| 962 | } |
| 963 | |
| 964 | if (rsp->result != SD_RES_SUCCESS) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 965 | error_report("cannot get vdi info, %s, %s %d %s", |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 966 | sd_strerror(rsp->result), filename, snapid, tag); |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 967 | if (rsp->result == SD_RES_NO_VDI) { |
| 968 | ret = -ENOENT; |
| 969 | } else { |
| 970 | ret = -EIO; |
| 971 | } |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 972 | goto out; |
| 973 | } |
| 974 | *vid = rsp->vdi_id; |
| 975 | |
| 976 | ret = 0; |
| 977 | out: |
| 978 | closesocket(fd); |
| 979 | return ret; |
| 980 | } |
| 981 | |
Paolo Bonzini | d8716b4 | 2011-10-05 09:17:31 +0200 | [diff] [blame] | 982 | static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 983 | struct iovec *iov, int niov, bool create, |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 984 | enum AIOCBState aiocb_type) |
| 985 | { |
| 986 | int nr_copies = s->inode.nr_copies; |
| 987 | SheepdogObjReq hdr; |
Liu Yuan | 4778307 | 2013-01-15 16:28:55 +0800 | [diff] [blame] | 988 | unsigned int wlen = 0; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 989 | int ret; |
| 990 | uint64_t oid = aio_req->oid; |
| 991 | unsigned int datalen = aio_req->data_len; |
| 992 | uint64_t offset = aio_req->offset; |
| 993 | uint8_t flags = aio_req->flags; |
| 994 | uint64_t old_oid = aio_req->base_oid; |
| 995 | |
| 996 | if (!nr_copies) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 997 | error_report("bug"); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 998 | } |
| 999 | |
| 1000 | memset(&hdr, 0, sizeof(hdr)); |
| 1001 | |
Liu Yuan | 4778307 | 2013-01-15 16:28:55 +0800 | [diff] [blame] | 1002 | switch (aiocb_type) { |
| 1003 | case AIOCB_FLUSH_CACHE: |
| 1004 | hdr.opcode = SD_OP_FLUSH_VDI; |
| 1005 | break; |
| 1006 | case AIOCB_READ_UDATA: |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1007 | hdr.opcode = SD_OP_READ_OBJ; |
| 1008 | hdr.flags = flags; |
Liu Yuan | 4778307 | 2013-01-15 16:28:55 +0800 | [diff] [blame] | 1009 | break; |
| 1010 | case AIOCB_WRITE_UDATA: |
| 1011 | if (create) { |
| 1012 | hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; |
| 1013 | } else { |
| 1014 | hdr.opcode = SD_OP_WRITE_OBJ; |
| 1015 | } |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1016 | wlen = datalen; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1017 | hdr.flags = SD_FLAG_CMD_WRITE | flags; |
Liu Yuan | 4778307 | 2013-01-15 16:28:55 +0800 | [diff] [blame] | 1018 | break; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1019 | } |
| 1020 | |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1021 | if (s->cache_flags) { |
| 1022 | hdr.flags |= s->cache_flags; |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 1023 | } |
| 1024 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1025 | hdr.oid = oid; |
| 1026 | hdr.cow_oid = old_oid; |
| 1027 | hdr.copies = s->inode.nr_copies; |
| 1028 | |
| 1029 | hdr.data_length = datalen; |
| 1030 | hdr.offset = offset; |
| 1031 | |
| 1032 | hdr.id = aio_req->id; |
| 1033 | |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 1034 | qemu_co_mutex_lock(&s->lock); |
| 1035 | s->co_send = qemu_coroutine_self(); |
| 1036 | qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request, |
Paolo Bonzini | bafbd6a | 2012-04-12 14:00:54 +0200 | [diff] [blame] | 1037 | aio_flush_request, s); |
Paolo Bonzini | 128aa58 | 2011-09-21 12:36:48 +0200 | [diff] [blame] | 1038 | socket_set_cork(s->fd, 1); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1039 | |
| 1040 | /* send a header */ |
Paolo Bonzini | 8c5135f | 2011-09-08 13:46:25 +0200 | [diff] [blame] | 1041 | ret = qemu_co_send(s->fd, &hdr, sizeof(hdr)); |
| 1042 | if (ret < 0) { |
Dong Xu Wang | c3fecea | 2011-11-22 10:56:58 +0800 | [diff] [blame] | 1043 | qemu_co_mutex_unlock(&s->lock); |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1044 | error_report("failed to send a req, %s", strerror(errno)); |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1045 | return -errno; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1046 | } |
| 1047 | |
| 1048 | if (wlen) { |
Michael Tokarev | 2fc8ae1 | 2012-06-07 20:22:46 +0400 | [diff] [blame] | 1049 | ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen); |
Paolo Bonzini | 8c5135f | 2011-09-08 13:46:25 +0200 | [diff] [blame] | 1050 | if (ret < 0) { |
Dong Xu Wang | c3fecea | 2011-11-22 10:56:58 +0800 | [diff] [blame] | 1051 | qemu_co_mutex_unlock(&s->lock); |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1052 | error_report("failed to send a data, %s", strerror(errno)); |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1053 | return -errno; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1054 | } |
| 1055 | } |
| 1056 | |
Paolo Bonzini | 128aa58 | 2011-09-21 12:36:48 +0200 | [diff] [blame] | 1057 | socket_set_cork(s->fd, 0); |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 1058 | qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, |
Paolo Bonzini | bafbd6a | 2012-04-12 14:00:54 +0200 | [diff] [blame] | 1059 | aio_flush_request, s); |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 1060 | qemu_co_mutex_unlock(&s->lock); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1061 | |
| 1062 | return 0; |
| 1063 | } |
| 1064 | |
| 1065 | static int read_write_object(int fd, char *buf, uint64_t oid, int copies, |
| 1066 | unsigned int datalen, uint64_t offset, |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1067 | bool write, bool create, uint32_t cache_flags) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1068 | { |
| 1069 | SheepdogObjReq hdr; |
| 1070 | SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr; |
| 1071 | unsigned int wlen, rlen; |
| 1072 | int ret; |
| 1073 | |
| 1074 | memset(&hdr, 0, sizeof(hdr)); |
| 1075 | |
| 1076 | if (write) { |
| 1077 | wlen = datalen; |
| 1078 | rlen = 0; |
| 1079 | hdr.flags = SD_FLAG_CMD_WRITE; |
| 1080 | if (create) { |
| 1081 | hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; |
| 1082 | } else { |
| 1083 | hdr.opcode = SD_OP_WRITE_OBJ; |
| 1084 | } |
| 1085 | } else { |
| 1086 | wlen = 0; |
| 1087 | rlen = datalen; |
| 1088 | hdr.opcode = SD_OP_READ_OBJ; |
| 1089 | } |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 1090 | |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1091 | hdr.flags |= cache_flags; |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 1092 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1093 | hdr.oid = oid; |
| 1094 | hdr.data_length = datalen; |
| 1095 | hdr.offset = offset; |
| 1096 | hdr.copies = copies; |
| 1097 | |
| 1098 | ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); |
| 1099 | if (ret) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1100 | error_report("failed to send a request to the sheep"); |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1101 | return ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1102 | } |
| 1103 | |
| 1104 | switch (rsp->result) { |
| 1105 | case SD_RES_SUCCESS: |
| 1106 | return 0; |
| 1107 | default: |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1108 | error_report("%s", sd_strerror(rsp->result)); |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1109 | return -EIO; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1110 | } |
| 1111 | } |
| 1112 | |
| 1113 | static int read_object(int fd, char *buf, uint64_t oid, int copies, |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1114 | unsigned int datalen, uint64_t offset, |
| 1115 | uint32_t cache_flags) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1116 | { |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 1117 | return read_write_object(fd, buf, oid, copies, datalen, offset, false, |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1118 | false, cache_flags); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1119 | } |
| 1120 | |
| 1121 | static int write_object(int fd, char *buf, uint64_t oid, int copies, |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 1122 | unsigned int datalen, uint64_t offset, bool create, |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1123 | uint32_t cache_flags) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1124 | { |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 1125 | return read_write_object(fd, buf, oid, copies, datalen, offset, true, |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1126 | create, cache_flags); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1127 | } |
| 1128 | |
Kevin Wolf | c8c9635 | 2013-04-12 18:10:49 +0200 | [diff] [blame] | 1129 | /* TODO Convert to fine grained options */ |
| 1130 | static QemuOptsList runtime_opts = { |
| 1131 | .name = "sheepdog", |
| 1132 | .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), |
| 1133 | .desc = { |
| 1134 | { |
| 1135 | .name = "filename", |
| 1136 | .type = QEMU_OPT_STRING, |
| 1137 | .help = "URL to the sheepdog image", |
| 1138 | }, |
| 1139 | { /* end of list */ } |
| 1140 | }, |
| 1141 | }; |
| 1142 | |
Kevin Wolf | 56d1b4d | 2013-04-12 20:02:37 +0200 | [diff] [blame^] | 1143 | static int sd_open(BlockDriverState *bs, QDict *options, int flags) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1144 | { |
| 1145 | int ret, fd; |
| 1146 | uint32_t vid = 0; |
| 1147 | BDRVSheepdogState *s = bs->opaque; |
| 1148 | char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; |
| 1149 | uint32_t snapid; |
| 1150 | char *buf = NULL; |
Kevin Wolf | c8c9635 | 2013-04-12 18:10:49 +0200 | [diff] [blame] | 1151 | QemuOpts *opts; |
| 1152 | Error *local_err = NULL; |
| 1153 | const char *filename; |
| 1154 | |
| 1155 | opts = qemu_opts_create_nofail(&runtime_opts); |
| 1156 | qemu_opts_absorb_qdict(opts, options, &local_err); |
| 1157 | if (error_is_set(&local_err)) { |
| 1158 | qerror_report_err(local_err); |
| 1159 | error_free(local_err); |
| 1160 | ret = -EINVAL; |
| 1161 | goto out; |
| 1162 | } |
| 1163 | |
| 1164 | filename = qemu_opt_get(opts, "filename"); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1165 | |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 1166 | QLIST_INIT(&s->inflight_aio_head); |
| 1167 | QLIST_INIT(&s->pending_aio_head); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1168 | s->fd = -1; |
| 1169 | |
| 1170 | memset(vdi, 0, sizeof(vdi)); |
| 1171 | memset(tag, 0, sizeof(tag)); |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 1172 | |
| 1173 | if (strstr(filename, "://")) { |
| 1174 | ret = sd_parse_uri(s, filename, vdi, &snapid, tag); |
| 1175 | } else { |
| 1176 | ret = parse_vdiname(s, filename, vdi, &snapid, tag); |
| 1177 | } |
| 1178 | if (ret < 0) { |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1179 | goto out; |
| 1180 | } |
| 1181 | s->fd = get_sheep_fd(s); |
| 1182 | if (s->fd < 0) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1183 | ret = s->fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1184 | goto out; |
| 1185 | } |
| 1186 | |
| 1187 | ret = find_vdi_name(s, vdi, snapid, tag, &vid, 0); |
| 1188 | if (ret) { |
| 1189 | goto out; |
| 1190 | } |
| 1191 | |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1192 | /* |
| 1193 | * QEMU block layer emulates writethrough cache as 'writeback + flush', so |
| 1194 | * we always set SD_FLAG_CMD_CACHE (writeback cache) as default. |
| 1195 | */ |
| 1196 | s->cache_flags = SD_FLAG_CMD_CACHE; |
| 1197 | if (flags & BDRV_O_NOCACHE) { |
| 1198 | s->cache_flags = SD_FLAG_CMD_DIRECT; |
| 1199 | } |
| 1200 | |
MORITA Kazutaka | 622b605 | 2012-05-17 03:15:31 +0900 | [diff] [blame] | 1201 | if (snapid || tag[0] != '\0') { |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1202 | dprintf("%" PRIx32 " snapshot inode was open.\n", vid); |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 1203 | s->is_snapshot = true; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1204 | } |
| 1205 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1206 | fd = connect_to_sdog(s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1207 | if (fd < 0) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1208 | ret = fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1209 | goto out; |
| 1210 | } |
| 1211 | |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1212 | buf = g_malloc(SD_INODE_SIZE); |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 1213 | ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0, |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1214 | s->cache_flags); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1215 | |
| 1216 | closesocket(fd); |
| 1217 | |
| 1218 | if (ret) { |
| 1219 | goto out; |
| 1220 | } |
| 1221 | |
| 1222 | memcpy(&s->inode, buf, sizeof(s->inode)); |
| 1223 | s->min_dirty_data_idx = UINT32_MAX; |
| 1224 | s->max_dirty_data_idx = 0; |
| 1225 | |
| 1226 | bs->total_sectors = s->inode.vdi_size / SECTOR_SIZE; |
Jim Meyering | 3178e27 | 2012-10-04 13:09:47 +0200 | [diff] [blame] | 1227 | pstrcpy(s->name, sizeof(s->name), vdi); |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 1228 | qemu_co_mutex_init(&s->lock); |
Kevin Wolf | c8c9635 | 2013-04-12 18:10:49 +0200 | [diff] [blame] | 1229 | qemu_opts_del(opts); |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1230 | g_free(buf); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1231 | return 0; |
| 1232 | out: |
Paolo Bonzini | bafbd6a | 2012-04-12 14:00:54 +0200 | [diff] [blame] | 1233 | qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1234 | if (s->fd >= 0) { |
| 1235 | closesocket(s->fd); |
| 1236 | } |
Kevin Wolf | c8c9635 | 2013-04-12 18:10:49 +0200 | [diff] [blame] | 1237 | qemu_opts_del(opts); |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1238 | g_free(buf); |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1239 | return ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1240 | } |
| 1241 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1242 | static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size, |
| 1243 | uint32_t base_vid, uint32_t *vdi_id, int snapshot) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1244 | { |
| 1245 | SheepdogVdiReq hdr; |
| 1246 | SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; |
| 1247 | int fd, ret; |
| 1248 | unsigned int wlen, rlen = 0; |
| 1249 | char buf[SD_MAX_VDI_LEN]; |
| 1250 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1251 | fd = connect_to_sdog(s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1252 | if (fd < 0) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1253 | return fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1254 | } |
| 1255 | |
Jim Meyering | 3178e27 | 2012-10-04 13:09:47 +0200 | [diff] [blame] | 1256 | /* FIXME: would it be better to fail (e.g., return -EIO) when filename |
| 1257 | * does not fit in buf? For now, just truncate and avoid buffer overrun. |
| 1258 | */ |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1259 | memset(buf, 0, sizeof(buf)); |
Jim Meyering | 3178e27 | 2012-10-04 13:09:47 +0200 | [diff] [blame] | 1260 | pstrcpy(buf, sizeof(buf), filename); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1261 | |
| 1262 | memset(&hdr, 0, sizeof(hdr)); |
| 1263 | hdr.opcode = SD_OP_NEW_VDI; |
Liu Yuan | 6f74c26 | 2013-01-29 17:14:16 +0800 | [diff] [blame] | 1264 | hdr.vdi_id = base_vid; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1265 | |
| 1266 | wlen = SD_MAX_VDI_LEN; |
| 1267 | |
| 1268 | hdr.flags = SD_FLAG_CMD_WRITE; |
| 1269 | hdr.snapid = snapshot; |
| 1270 | |
| 1271 | hdr.data_length = wlen; |
| 1272 | hdr.vdi_size = vdi_size; |
| 1273 | |
| 1274 | ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); |
| 1275 | |
| 1276 | closesocket(fd); |
| 1277 | |
| 1278 | if (ret) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1279 | return ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1280 | } |
| 1281 | |
| 1282 | if (rsp->result != SD_RES_SUCCESS) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1283 | error_report("%s, %s", sd_strerror(rsp->result), filename); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1284 | return -EIO; |
| 1285 | } |
| 1286 | |
| 1287 | if (vdi_id) { |
| 1288 | *vdi_id = rsp->vdi_id; |
| 1289 | } |
| 1290 | |
| 1291 | return 0; |
| 1292 | } |
| 1293 | |
MORITA Kazutaka | a8e0fdd | 2011-07-06 03:38:48 +0900 | [diff] [blame] | 1294 | static int sd_prealloc(const char *filename) |
| 1295 | { |
| 1296 | BlockDriverState *bs = NULL; |
| 1297 | uint32_t idx, max_idx; |
| 1298 | int64_t vdi_size; |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1299 | void *buf = g_malloc0(SD_DATA_OBJ_SIZE); |
MORITA Kazutaka | a8e0fdd | 2011-07-06 03:38:48 +0900 | [diff] [blame] | 1300 | int ret; |
| 1301 | |
Kevin Wolf | 787e4a8 | 2013-03-06 11:52:48 +0100 | [diff] [blame] | 1302 | ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR); |
MORITA Kazutaka | a8e0fdd | 2011-07-06 03:38:48 +0900 | [diff] [blame] | 1303 | if (ret < 0) { |
| 1304 | goto out; |
| 1305 | } |
| 1306 | |
| 1307 | vdi_size = bdrv_getlength(bs); |
| 1308 | if (vdi_size < 0) { |
| 1309 | ret = vdi_size; |
| 1310 | goto out; |
| 1311 | } |
| 1312 | max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); |
| 1313 | |
| 1314 | for (idx = 0; idx < max_idx; idx++) { |
| 1315 | /* |
| 1316 | * The created image can be a cloned image, so we need to read |
| 1317 | * a data from the source image. |
| 1318 | */ |
| 1319 | ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); |
| 1320 | if (ret < 0) { |
| 1321 | goto out; |
| 1322 | } |
| 1323 | ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); |
| 1324 | if (ret < 0) { |
| 1325 | goto out; |
| 1326 | } |
| 1327 | } |
| 1328 | out: |
| 1329 | if (bs) { |
| 1330 | bdrv_delete(bs); |
| 1331 | } |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1332 | g_free(buf); |
MORITA Kazutaka | a8e0fdd | 2011-07-06 03:38:48 +0900 | [diff] [blame] | 1333 | |
| 1334 | return ret; |
| 1335 | } |
| 1336 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1337 | static int sd_create(const char *filename, QEMUOptionParameter *options) |
| 1338 | { |
MORITA Kazutaka | b6fc824 | 2012-05-17 03:15:34 +0900 | [diff] [blame] | 1339 | int ret = 0; |
MORITA Kazutaka | b444736 | 2011-01-28 01:33:10 +0900 | [diff] [blame] | 1340 | uint32_t vid = 0, base_vid = 0; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1341 | int64_t vdi_size = 0; |
| 1342 | char *backing_file = NULL; |
MORITA Kazutaka | b6fc824 | 2012-05-17 03:15:34 +0900 | [diff] [blame] | 1343 | BDRVSheepdogState *s; |
MORITA Kazutaka | b444736 | 2011-01-28 01:33:10 +0900 | [diff] [blame] | 1344 | char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; |
| 1345 | uint32_t snapid; |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 1346 | bool prealloc = false; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1347 | |
MORITA Kazutaka | b6fc824 | 2012-05-17 03:15:34 +0900 | [diff] [blame] | 1348 | s = g_malloc0(sizeof(BDRVSheepdogState)); |
| 1349 | |
MORITA Kazutaka | b444736 | 2011-01-28 01:33:10 +0900 | [diff] [blame] | 1350 | memset(vdi, 0, sizeof(vdi)); |
| 1351 | memset(tag, 0, sizeof(tag)); |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 1352 | if (strstr(filename, "://")) { |
| 1353 | ret = sd_parse_uri(s, filename, vdi, &snapid, tag); |
| 1354 | } else { |
| 1355 | ret = parse_vdiname(s, filename, vdi, &snapid, tag); |
| 1356 | } |
| 1357 | if (ret < 0) { |
MORITA Kazutaka | b6fc824 | 2012-05-17 03:15:34 +0900 | [diff] [blame] | 1358 | goto out; |
MORITA Kazutaka | b444736 | 2011-01-28 01:33:10 +0900 | [diff] [blame] | 1359 | } |
| 1360 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1361 | while (options && options->name) { |
| 1362 | if (!strcmp(options->name, BLOCK_OPT_SIZE)) { |
| 1363 | vdi_size = options->value.n; |
| 1364 | } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { |
| 1365 | backing_file = options->value.s; |
MORITA Kazutaka | a8e0fdd | 2011-07-06 03:38:48 +0900 | [diff] [blame] | 1366 | } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { |
| 1367 | if (!options->value.s || !strcmp(options->value.s, "off")) { |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 1368 | prealloc = false; |
MORITA Kazutaka | a8e0fdd | 2011-07-06 03:38:48 +0900 | [diff] [blame] | 1369 | } else if (!strcmp(options->value.s, "full")) { |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 1370 | prealloc = true; |
MORITA Kazutaka | a8e0fdd | 2011-07-06 03:38:48 +0900 | [diff] [blame] | 1371 | } else { |
| 1372 | error_report("Invalid preallocation mode: '%s'", |
| 1373 | options->value.s); |
MORITA Kazutaka | b6fc824 | 2012-05-17 03:15:34 +0900 | [diff] [blame] | 1374 | ret = -EINVAL; |
| 1375 | goto out; |
MORITA Kazutaka | a8e0fdd | 2011-07-06 03:38:48 +0900 | [diff] [blame] | 1376 | } |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1377 | } |
| 1378 | options++; |
| 1379 | } |
| 1380 | |
| 1381 | if (vdi_size > SD_MAX_VDI_SIZE) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1382 | error_report("too big image size"); |
MORITA Kazutaka | b6fc824 | 2012-05-17 03:15:34 +0900 | [diff] [blame] | 1383 | ret = -EINVAL; |
| 1384 | goto out; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1385 | } |
| 1386 | |
| 1387 | if (backing_file) { |
| 1388 | BlockDriverState *bs; |
| 1389 | BDRVSheepdogState *s; |
| 1390 | BlockDriver *drv; |
| 1391 | |
| 1392 | /* Currently, only Sheepdog backing image is supported. */ |
| 1393 | drv = bdrv_find_protocol(backing_file); |
| 1394 | if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1395 | error_report("backing_file must be a sheepdog image"); |
MORITA Kazutaka | b6fc824 | 2012-05-17 03:15:34 +0900 | [diff] [blame] | 1396 | ret = -EINVAL; |
| 1397 | goto out; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1398 | } |
| 1399 | |
Kevin Wolf | 787e4a8 | 2013-03-06 11:52:48 +0100 | [diff] [blame] | 1400 | ret = bdrv_file_open(&bs, backing_file, NULL, 0); |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1401 | if (ret < 0) { |
MORITA Kazutaka | b6fc824 | 2012-05-17 03:15:34 +0900 | [diff] [blame] | 1402 | goto out; |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1403 | } |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1404 | |
| 1405 | s = bs->opaque; |
| 1406 | |
| 1407 | if (!is_snapshot(&s->inode)) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1408 | error_report("cannot clone from a non snapshot vdi"); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1409 | bdrv_delete(bs); |
MORITA Kazutaka | b6fc824 | 2012-05-17 03:15:34 +0900 | [diff] [blame] | 1410 | ret = -EINVAL; |
| 1411 | goto out; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1412 | } |
| 1413 | |
MORITA Kazutaka | b444736 | 2011-01-28 01:33:10 +0900 | [diff] [blame] | 1414 | base_vid = s->inode.vdi_id; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1415 | bdrv_delete(bs); |
| 1416 | } |
| 1417 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1418 | ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0); |
MORITA Kazutaka | a8e0fdd | 2011-07-06 03:38:48 +0900 | [diff] [blame] | 1419 | if (!prealloc || ret) { |
MORITA Kazutaka | b6fc824 | 2012-05-17 03:15:34 +0900 | [diff] [blame] | 1420 | goto out; |
MORITA Kazutaka | a8e0fdd | 2011-07-06 03:38:48 +0900 | [diff] [blame] | 1421 | } |
| 1422 | |
MORITA Kazutaka | b6fc824 | 2012-05-17 03:15:34 +0900 | [diff] [blame] | 1423 | ret = sd_prealloc(filename); |
| 1424 | out: |
| 1425 | g_free(s); |
| 1426 | return ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1427 | } |
| 1428 | |
| 1429 | static void sd_close(BlockDriverState *bs) |
| 1430 | { |
| 1431 | BDRVSheepdogState *s = bs->opaque; |
| 1432 | SheepdogVdiReq hdr; |
| 1433 | SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; |
| 1434 | unsigned int wlen, rlen = 0; |
| 1435 | int fd, ret; |
| 1436 | |
| 1437 | dprintf("%s\n", s->name); |
| 1438 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1439 | fd = connect_to_sdog(s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1440 | if (fd < 0) { |
| 1441 | return; |
| 1442 | } |
| 1443 | |
| 1444 | memset(&hdr, 0, sizeof(hdr)); |
| 1445 | |
| 1446 | hdr.opcode = SD_OP_RELEASE_VDI; |
Liu Yuan | 6f74c26 | 2013-01-29 17:14:16 +0800 | [diff] [blame] | 1447 | hdr.vdi_id = s->inode.vdi_id; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1448 | wlen = strlen(s->name) + 1; |
| 1449 | hdr.data_length = wlen; |
| 1450 | hdr.flags = SD_FLAG_CMD_WRITE; |
| 1451 | |
| 1452 | ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen); |
| 1453 | |
| 1454 | closesocket(fd); |
| 1455 | |
| 1456 | if (!ret && rsp->result != SD_RES_SUCCESS && |
| 1457 | rsp->result != SD_RES_VDI_NOT_LOCKED) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1458 | error_report("%s, %s", sd_strerror(rsp->result), s->name); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1459 | } |
| 1460 | |
Paolo Bonzini | bafbd6a | 2012-04-12 14:00:54 +0200 | [diff] [blame] | 1461 | qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1462 | closesocket(s->fd); |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1463 | g_free(s->host_spec); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1464 | } |
| 1465 | |
| 1466 | static int64_t sd_getlength(BlockDriverState *bs) |
| 1467 | { |
| 1468 | BDRVSheepdogState *s = bs->opaque; |
| 1469 | |
| 1470 | return s->inode.vdi_size; |
| 1471 | } |
| 1472 | |
| 1473 | static int sd_truncate(BlockDriverState *bs, int64_t offset) |
| 1474 | { |
| 1475 | BDRVSheepdogState *s = bs->opaque; |
| 1476 | int ret, fd; |
| 1477 | unsigned int datalen; |
| 1478 | |
| 1479 | if (offset < s->inode.vdi_size) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1480 | error_report("shrinking is not supported"); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1481 | return -EINVAL; |
| 1482 | } else if (offset > SD_MAX_VDI_SIZE) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1483 | error_report("too big image size"); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1484 | return -EINVAL; |
| 1485 | } |
| 1486 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1487 | fd = connect_to_sdog(s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1488 | if (fd < 0) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1489 | return fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1490 | } |
| 1491 | |
| 1492 | /* we don't need to update entire object */ |
| 1493 | datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); |
| 1494 | s->inode.vdi_size = offset; |
| 1495 | ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1496 | s->inode.nr_copies, datalen, 0, false, s->cache_flags); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1497 | close(fd); |
| 1498 | |
| 1499 | if (ret < 0) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1500 | error_report("failed to update an inode."); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1501 | } |
| 1502 | |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1503 | return ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1504 | } |
| 1505 | |
| 1506 | /* |
| 1507 | * This function is called after writing data objects. If we need to |
| 1508 | * update metadata, this sends a write request to the vdi object. |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 1509 | * Otherwise, this switches back to sd_co_readv/writev. |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1510 | */ |
Paolo Bonzini | d8716b4 | 2011-10-05 09:17:31 +0200 | [diff] [blame] | 1511 | static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1512 | { |
| 1513 | int ret; |
| 1514 | BDRVSheepdogState *s = acb->common.bs->opaque; |
| 1515 | struct iovec iov; |
| 1516 | AIOReq *aio_req; |
| 1517 | uint32_t offset, data_len, mn, mx; |
| 1518 | |
| 1519 | mn = s->min_dirty_data_idx; |
| 1520 | mx = s->max_dirty_data_idx; |
| 1521 | if (mn <= mx) { |
| 1522 | /* we need to update the vdi object. */ |
| 1523 | offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) + |
| 1524 | mn * sizeof(s->inode.data_vdi_id[0]); |
| 1525 | data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]); |
| 1526 | |
| 1527 | s->min_dirty_data_idx = UINT32_MAX; |
| 1528 | s->max_dirty_data_idx = 0; |
| 1529 | |
| 1530 | iov.iov_base = &s->inode; |
| 1531 | iov.iov_len = sizeof(s->inode); |
| 1532 | aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), |
| 1533 | data_len, offset, 0, 0, offset); |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 1534 | QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 1535 | ret = add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1536 | if (ret) { |
| 1537 | free_aio_req(s, aio_req); |
| 1538 | acb->ret = -EIO; |
| 1539 | goto out; |
| 1540 | } |
| 1541 | |
| 1542 | acb->aio_done_func = sd_finish_aiocb; |
| 1543 | acb->aiocb_type = AIOCB_WRITE_UDATA; |
| 1544 | return; |
| 1545 | } |
| 1546 | out: |
| 1547 | sd_finish_aiocb(acb); |
| 1548 | } |
| 1549 | |
| 1550 | /* |
| 1551 | * Create a writable VDI from a snapshot |
| 1552 | */ |
| 1553 | static int sd_create_branch(BDRVSheepdogState *s) |
| 1554 | { |
| 1555 | int ret, fd; |
| 1556 | uint32_t vid; |
| 1557 | char *buf; |
| 1558 | |
| 1559 | dprintf("%" PRIx32 " is snapshot.\n", s->inode.vdi_id); |
| 1560 | |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1561 | buf = g_malloc(SD_INODE_SIZE); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1562 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1563 | ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1564 | if (ret) { |
| 1565 | goto out; |
| 1566 | } |
| 1567 | |
| 1568 | dprintf("%" PRIx32 " is created.\n", vid); |
| 1569 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1570 | fd = connect_to_sdog(s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1571 | if (fd < 0) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1572 | ret = fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1573 | goto out; |
| 1574 | } |
| 1575 | |
| 1576 | ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1577 | SD_INODE_SIZE, 0, s->cache_flags); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1578 | |
| 1579 | closesocket(fd); |
| 1580 | |
| 1581 | if (ret < 0) { |
| 1582 | goto out; |
| 1583 | } |
| 1584 | |
| 1585 | memcpy(&s->inode, buf, sizeof(s->inode)); |
| 1586 | |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 1587 | s->is_snapshot = false; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1588 | ret = 0; |
| 1589 | dprintf("%" PRIx32 " was newly created.\n", s->inode.vdi_id); |
| 1590 | |
| 1591 | out: |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1592 | g_free(buf); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1593 | |
| 1594 | return ret; |
| 1595 | } |
| 1596 | |
| 1597 | /* |
| 1598 | * Send I/O requests to the server. |
| 1599 | * |
| 1600 | * This function sends requests to the server, links the requests to |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 1601 | * the inflight_list in BDRVSheepdogState, and exits without |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1602 | * waiting the response. The responses are received in the |
| 1603 | * `aio_read_response' function which is called from the main loop as |
| 1604 | * a fd handler. |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 1605 | * |
| 1606 | * Returns 1 when we need to wait a response, 0 when there is no sent |
| 1607 | * request and -errno in error cases. |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1608 | */ |
Paolo Bonzini | d8716b4 | 2011-10-05 09:17:31 +0200 | [diff] [blame] | 1609 | static int coroutine_fn sd_co_rw_vector(void *p) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1610 | { |
| 1611 | SheepdogAIOCB *acb = p; |
| 1612 | int ret = 0; |
| 1613 | unsigned long len, done = 0, total = acb->nb_sectors * SECTOR_SIZE; |
| 1614 | unsigned long idx = acb->sector_num * SECTOR_SIZE / SD_DATA_OBJ_SIZE; |
| 1615 | uint64_t oid; |
| 1616 | uint64_t offset = (acb->sector_num * SECTOR_SIZE) % SD_DATA_OBJ_SIZE; |
| 1617 | BDRVSheepdogState *s = acb->common.bs->opaque; |
| 1618 | SheepdogInode *inode = &s->inode; |
| 1619 | AIOReq *aio_req; |
| 1620 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1621 | if (acb->aiocb_type == AIOCB_WRITE_UDATA && s->is_snapshot) { |
| 1622 | /* |
| 1623 | * In the case we open the snapshot VDI, Sheepdog creates the |
| 1624 | * writable VDI when we do a write operation first. |
| 1625 | */ |
| 1626 | ret = sd_create_branch(s); |
| 1627 | if (ret) { |
| 1628 | acb->ret = -EIO; |
| 1629 | goto out; |
| 1630 | } |
| 1631 | } |
| 1632 | |
MORITA Kazutaka | 1d732d7 | 2012-06-27 07:26:21 +0900 | [diff] [blame] | 1633 | /* |
| 1634 | * Make sure we don't free the aiocb before we are done with all requests. |
| 1635 | * This additional reference is dropped at the end of this function. |
| 1636 | */ |
| 1637 | acb->nr_pending++; |
| 1638 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1639 | while (done != total) { |
| 1640 | uint8_t flags = 0; |
| 1641 | uint64_t old_oid = 0; |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 1642 | bool create = false; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1643 | |
| 1644 | oid = vid_to_data_oid(inode->data_vdi_id[idx], idx); |
| 1645 | |
| 1646 | len = MIN(total - done, SD_DATA_OBJ_SIZE - offset); |
| 1647 | |
Christoph Hellwig | 19db9b9 | 2012-07-10 16:12:27 +0200 | [diff] [blame] | 1648 | switch (acb->aiocb_type) { |
| 1649 | case AIOCB_READ_UDATA: |
| 1650 | if (!inode->data_vdi_id[idx]) { |
| 1651 | qemu_iovec_memset(acb->qiov, done, 0, len); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1652 | goto done; |
| 1653 | } |
Christoph Hellwig | 19db9b9 | 2012-07-10 16:12:27 +0200 | [diff] [blame] | 1654 | break; |
| 1655 | case AIOCB_WRITE_UDATA: |
| 1656 | if (!inode->data_vdi_id[idx]) { |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 1657 | create = true; |
Christoph Hellwig | 19db9b9 | 2012-07-10 16:12:27 +0200 | [diff] [blame] | 1658 | } else if (!is_data_obj_writable(inode, idx)) { |
| 1659 | /* Copy-On-Write */ |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 1660 | create = true; |
Christoph Hellwig | 19db9b9 | 2012-07-10 16:12:27 +0200 | [diff] [blame] | 1661 | old_oid = oid; |
| 1662 | flags = SD_FLAG_CMD_COW; |
| 1663 | } |
| 1664 | break; |
| 1665 | default: |
| 1666 | break; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1667 | } |
| 1668 | |
| 1669 | if (create) { |
MORITA Kazutaka | 1b6ac99 | 2012-06-27 07:26:18 +0900 | [diff] [blame] | 1670 | dprintf("update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld\n", |
| 1671 | inode->vdi_id, oid, |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1672 | vid_to_data_oid(inode->data_vdi_id[idx], idx), idx); |
| 1673 | oid = vid_to_data_oid(inode->vdi_id, idx); |
MORITA Kazutaka | 1b6ac99 | 2012-06-27 07:26:18 +0900 | [diff] [blame] | 1674 | dprintf("new oid %" PRIx64 "\n", oid); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1675 | } |
| 1676 | |
| 1677 | aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done); |
| 1678 | |
| 1679 | if (create) { |
| 1680 | AIOReq *areq; |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 1681 | QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) { |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1682 | if (areq->oid == oid) { |
| 1683 | /* |
| 1684 | * Sheepdog cannot handle simultaneous create |
| 1685 | * requests to the same object. So we cannot send |
| 1686 | * the request until the previous request |
| 1687 | * finishes. |
| 1688 | */ |
| 1689 | aio_req->flags = 0; |
| 1690 | aio_req->base_oid = 0; |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 1691 | QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, |
| 1692 | aio_siblings); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1693 | goto done; |
| 1694 | } |
| 1695 | } |
| 1696 | } |
| 1697 | |
MORITA Kazutaka | c292ee6 | 2012-06-27 07:26:22 +0900 | [diff] [blame] | 1698 | QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1699 | ret = add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, |
| 1700 | create, acb->aiocb_type); |
| 1701 | if (ret < 0) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1702 | error_report("add_aio_request is failed"); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1703 | free_aio_req(s, aio_req); |
| 1704 | acb->ret = -EIO; |
| 1705 | goto out; |
| 1706 | } |
| 1707 | done: |
| 1708 | offset = 0; |
| 1709 | idx++; |
| 1710 | done += len; |
| 1711 | } |
| 1712 | out: |
MORITA Kazutaka | 1d732d7 | 2012-06-27 07:26:21 +0900 | [diff] [blame] | 1713 | if (!--acb->nr_pending) { |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 1714 | return acb->ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1715 | } |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 1716 | return 1; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1717 | } |
| 1718 | |
Dong Xu Wang | a968168 | 2011-11-10 16:23:22 +0800 | [diff] [blame] | 1719 | static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 1720 | int nb_sectors, QEMUIOVector *qiov) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1721 | { |
| 1722 | SheepdogAIOCB *acb; |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 1723 | int ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1724 | |
| 1725 | if (bs->growable && sector_num + nb_sectors > bs->total_sectors) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1726 | ret = sd_truncate(bs, (sector_num + nb_sectors) * SECTOR_SIZE); |
| 1727 | if (ret < 0) { |
| 1728 | return ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1729 | } |
| 1730 | bs->total_sectors = sector_num + nb_sectors; |
| 1731 | } |
| 1732 | |
Liu Yuan | f700f8e | 2013-01-14 14:01:03 +0800 | [diff] [blame] | 1733 | acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1734 | acb->aio_done_func = sd_write_done; |
| 1735 | acb->aiocb_type = AIOCB_WRITE_UDATA; |
| 1736 | |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 1737 | ret = sd_co_rw_vector(acb); |
| 1738 | if (ret <= 0) { |
| 1739 | qemu_aio_release(acb); |
| 1740 | return ret; |
| 1741 | } |
| 1742 | |
| 1743 | qemu_coroutine_yield(); |
| 1744 | |
| 1745 | return acb->ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1746 | } |
| 1747 | |
Dong Xu Wang | a968168 | 2011-11-10 16:23:22 +0800 | [diff] [blame] | 1748 | static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 1749 | int nb_sectors, QEMUIOVector *qiov) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1750 | { |
| 1751 | SheepdogAIOCB *acb; |
Christoph Hellwig | 19db9b9 | 2012-07-10 16:12:27 +0200 | [diff] [blame] | 1752 | int ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1753 | |
Liu Yuan | f700f8e | 2013-01-14 14:01:03 +0800 | [diff] [blame] | 1754 | acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1755 | acb->aiocb_type = AIOCB_READ_UDATA; |
| 1756 | acb->aio_done_func = sd_finish_aiocb; |
| 1757 | |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 1758 | ret = sd_co_rw_vector(acb); |
| 1759 | if (ret <= 0) { |
| 1760 | qemu_aio_release(acb); |
| 1761 | return ret; |
| 1762 | } |
| 1763 | |
| 1764 | qemu_coroutine_yield(); |
| 1765 | |
| 1766 | return acb->ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1767 | } |
| 1768 | |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 1769 | static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) |
| 1770 | { |
| 1771 | BDRVSheepdogState *s = bs->opaque; |
Liu Yuan | 4778307 | 2013-01-15 16:28:55 +0800 | [diff] [blame] | 1772 | SheepdogAIOCB *acb; |
| 1773 | AIOReq *aio_req; |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 1774 | int ret; |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 1775 | |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1776 | if (s->cache_flags != SD_FLAG_CMD_CACHE) { |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 1777 | return 0; |
| 1778 | } |
| 1779 | |
Liu Yuan | f700f8e | 2013-01-14 14:01:03 +0800 | [diff] [blame] | 1780 | acb = sd_aio_setup(bs, NULL, 0, 0); |
Liu Yuan | 4778307 | 2013-01-15 16:28:55 +0800 | [diff] [blame] | 1781 | acb->aiocb_type = AIOCB_FLUSH_CACHE; |
| 1782 | acb->aio_done_func = sd_finish_aiocb; |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 1783 | |
Liu Yuan | 4778307 | 2013-01-15 16:28:55 +0800 | [diff] [blame] | 1784 | aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), |
| 1785 | 0, 0, 0, 0, 0); |
| 1786 | QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); |
| 1787 | ret = add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type); |
| 1788 | if (ret < 0) { |
| 1789 | error_report("add_aio_request is failed"); |
| 1790 | free_aio_req(s, aio_req); |
| 1791 | qemu_aio_release(acb); |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 1792 | return ret; |
| 1793 | } |
| 1794 | |
Liu Yuan | 4778307 | 2013-01-15 16:28:55 +0800 | [diff] [blame] | 1795 | qemu_coroutine_yield(); |
| 1796 | return acb->ret; |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 1797 | } |
| 1798 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1799 | static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) |
| 1800 | { |
| 1801 | BDRVSheepdogState *s = bs->opaque; |
| 1802 | int ret, fd; |
| 1803 | uint32_t new_vid; |
| 1804 | SheepdogInode *inode; |
| 1805 | unsigned int datalen; |
| 1806 | |
MORITA Kazutaka | 1b6ac99 | 2012-06-27 07:26:18 +0900 | [diff] [blame] | 1807 | dprintf("sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " " |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1808 | "is_snapshot %d\n", sn_info->name, sn_info->id_str, |
| 1809 | s->name, sn_info->vm_state_size, s->is_snapshot); |
| 1810 | |
| 1811 | if (s->is_snapshot) { |
| 1812 | error_report("You can't create a snapshot of a snapshot VDI, " |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1813 | "%s (%" PRIu32 ").", s->name, s->inode.vdi_id); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1814 | |
| 1815 | return -EINVAL; |
| 1816 | } |
| 1817 | |
| 1818 | dprintf("%s %s\n", sn_info->name, sn_info->id_str); |
| 1819 | |
| 1820 | s->inode.vm_state_size = sn_info->vm_state_size; |
| 1821 | s->inode.vm_clock_nsec = sn_info->vm_clock_nsec; |
Jim Meyering | 3178e27 | 2012-10-04 13:09:47 +0200 | [diff] [blame] | 1822 | /* It appears that inode.tag does not require a NUL terminator, |
| 1823 | * which means this use of strncpy is ok. |
| 1824 | */ |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1825 | strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag)); |
| 1826 | /* we don't need to update entire object */ |
| 1827 | datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); |
| 1828 | |
| 1829 | /* refresh inode. */ |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1830 | fd = connect_to_sdog(s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1831 | if (fd < 0) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1832 | ret = fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1833 | goto cleanup; |
| 1834 | } |
| 1835 | |
| 1836 | ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1837 | s->inode.nr_copies, datalen, 0, false, s->cache_flags); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1838 | if (ret < 0) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1839 | error_report("failed to write snapshot's inode."); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1840 | goto cleanup; |
| 1841 | } |
| 1842 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1843 | ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, |
| 1844 | 1); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1845 | if (ret < 0) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1846 | error_report("failed to create inode for snapshot. %s", |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1847 | strerror(errno)); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1848 | goto cleanup; |
| 1849 | } |
| 1850 | |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1851 | inode = (SheepdogInode *)g_malloc(datalen); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1852 | |
| 1853 | ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid), |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1854 | s->inode.nr_copies, datalen, 0, s->cache_flags); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1855 | |
| 1856 | if (ret < 0) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1857 | error_report("failed to read new inode info. %s", strerror(errno)); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1858 | goto cleanup; |
| 1859 | } |
| 1860 | |
| 1861 | memcpy(&s->inode, inode, datalen); |
| 1862 | dprintf("s->inode: name %s snap_id %x oid %x\n", |
| 1863 | s->inode.name, s->inode.snap_id, s->inode.vdi_id); |
| 1864 | |
| 1865 | cleanup: |
| 1866 | closesocket(fd); |
| 1867 | return ret; |
| 1868 | } |
| 1869 | |
| 1870 | static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) |
| 1871 | { |
| 1872 | BDRVSheepdogState *s = bs->opaque; |
| 1873 | BDRVSheepdogState *old_s; |
| 1874 | char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; |
| 1875 | char *buf = NULL; |
| 1876 | uint32_t vid; |
| 1877 | uint32_t snapid = 0; |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1878 | int ret = 0, fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1879 | |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1880 | old_s = g_malloc(sizeof(BDRVSheepdogState)); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1881 | |
| 1882 | memcpy(old_s, s, sizeof(BDRVSheepdogState)); |
| 1883 | |
Jim Meyering | 3178e27 | 2012-10-04 13:09:47 +0200 | [diff] [blame] | 1884 | pstrcpy(vdi, sizeof(vdi), s->name); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1885 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1886 | snapid = strtoul(snapshot_id, NULL, 10); |
Jim Meyering | 3178e27 | 2012-10-04 13:09:47 +0200 | [diff] [blame] | 1887 | if (snapid) { |
| 1888 | tag[0] = 0; |
| 1889 | } else { |
| 1890 | pstrcpy(tag, sizeof(tag), s->name); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1891 | } |
| 1892 | |
| 1893 | ret = find_vdi_name(s, vdi, snapid, tag, &vid, 1); |
| 1894 | if (ret) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1895 | error_report("Failed to find_vdi_name"); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1896 | goto out; |
| 1897 | } |
| 1898 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1899 | fd = connect_to_sdog(s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1900 | if (fd < 0) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1901 | ret = fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1902 | goto out; |
| 1903 | } |
| 1904 | |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1905 | buf = g_malloc(SD_INODE_SIZE); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1906 | ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 1907 | SD_INODE_SIZE, 0, s->cache_flags); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1908 | |
| 1909 | closesocket(fd); |
| 1910 | |
| 1911 | if (ret) { |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1912 | goto out; |
| 1913 | } |
| 1914 | |
| 1915 | memcpy(&s->inode, buf, sizeof(s->inode)); |
| 1916 | |
| 1917 | if (!s->inode.vm_state_size) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1918 | error_report("Invalid snapshot"); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1919 | ret = -ENOENT; |
| 1920 | goto out; |
| 1921 | } |
| 1922 | |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 1923 | s->is_snapshot = true; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1924 | |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1925 | g_free(buf); |
| 1926 | g_free(old_s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1927 | |
| 1928 | return 0; |
| 1929 | out: |
| 1930 | /* recover bdrv_sd_state */ |
| 1931 | memcpy(s, old_s, sizeof(BDRVSheepdogState)); |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1932 | g_free(buf); |
| 1933 | g_free(old_s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1934 | |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 1935 | error_report("failed to open. recover old bdrv_sd_state."); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1936 | |
| 1937 | return ret; |
| 1938 | } |
| 1939 | |
| 1940 | static int sd_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) |
| 1941 | { |
| 1942 | /* FIXME: Delete specified snapshot id. */ |
| 1943 | return 0; |
| 1944 | } |
| 1945 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1946 | static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) |
| 1947 | { |
| 1948 | BDRVSheepdogState *s = bs->opaque; |
| 1949 | SheepdogReq req; |
| 1950 | int fd, nr = 1024, ret, max = BITS_TO_LONGS(SD_NR_VDIS) * sizeof(long); |
| 1951 | QEMUSnapshotInfo *sn_tab = NULL; |
| 1952 | unsigned wlen, rlen; |
| 1953 | int found = 0; |
| 1954 | static SheepdogInode inode; |
| 1955 | unsigned long *vdi_inuse; |
| 1956 | unsigned int start_nr; |
| 1957 | uint64_t hval; |
| 1958 | uint32_t vid; |
| 1959 | |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1960 | vdi_inuse = g_malloc(max); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1961 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1962 | fd = connect_to_sdog(s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1963 | if (fd < 0) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1964 | ret = fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1965 | goto out; |
| 1966 | } |
| 1967 | |
| 1968 | rlen = max; |
| 1969 | wlen = 0; |
| 1970 | |
| 1971 | memset(&req, 0, sizeof(req)); |
| 1972 | |
| 1973 | req.opcode = SD_OP_READ_VDIS; |
| 1974 | req.data_length = max; |
| 1975 | |
| 1976 | ret = do_req(fd, (SheepdogReq *)&req, vdi_inuse, &wlen, &rlen); |
| 1977 | |
| 1978 | closesocket(fd); |
| 1979 | if (ret) { |
| 1980 | goto out; |
| 1981 | } |
| 1982 | |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 1983 | sn_tab = g_malloc0(nr * sizeof(*sn_tab)); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1984 | |
| 1985 | /* calculate a vdi id with hash function */ |
| 1986 | hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT); |
| 1987 | start_nr = hval & (SD_NR_VDIS - 1); |
| 1988 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 1989 | fd = connect_to_sdog(s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1990 | if (fd < 0) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 1991 | ret = fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 1992 | goto out; |
| 1993 | } |
| 1994 | |
| 1995 | for (vid = start_nr; found < nr; vid = (vid + 1) % SD_NR_VDIS) { |
| 1996 | if (!test_bit(vid, vdi_inuse)) { |
| 1997 | break; |
| 1998 | } |
| 1999 | |
| 2000 | /* we don't need to read entire object */ |
| 2001 | ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid), |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 2002 | 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0, |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 2003 | s->cache_flags); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2004 | |
| 2005 | if (ret) { |
| 2006 | continue; |
| 2007 | } |
| 2008 | |
| 2009 | if (!strcmp(inode.name, s->name) && is_snapshot(&inode)) { |
| 2010 | sn_tab[found].date_sec = inode.snap_ctime >> 32; |
| 2011 | sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff; |
| 2012 | sn_tab[found].vm_state_size = inode.vm_state_size; |
| 2013 | sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec; |
| 2014 | |
| 2015 | snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u", |
| 2016 | inode.snap_id); |
Jim Meyering | 3178e27 | 2012-10-04 13:09:47 +0200 | [diff] [blame] | 2017 | pstrcpy(sn_tab[found].name, |
| 2018 | MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)), |
| 2019 | inode.tag); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2020 | found++; |
| 2021 | } |
| 2022 | } |
| 2023 | |
| 2024 | closesocket(fd); |
| 2025 | out: |
| 2026 | *psn_tab = sn_tab; |
| 2027 | |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 2028 | g_free(vdi_inuse); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2029 | |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 2030 | if (ret < 0) { |
| 2031 | return ret; |
| 2032 | } |
| 2033 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2034 | return found; |
| 2035 | } |
| 2036 | |
| 2037 | static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, |
| 2038 | int64_t pos, int size, int load) |
| 2039 | { |
MORITA Kazutaka | 2f53680 | 2012-10-07 01:57:14 +0900 | [diff] [blame] | 2040 | bool create; |
| 2041 | int fd, ret = 0, remaining = size; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2042 | unsigned int data_len; |
| 2043 | uint64_t vmstate_oid; |
| 2044 | uint32_t vdi_index; |
| 2045 | uint64_t offset; |
| 2046 | |
MORITA Kazutaka | 25af257 | 2013-02-22 12:39:52 +0900 | [diff] [blame] | 2047 | fd = connect_to_sdog(s); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2048 | if (fd < 0) { |
MORITA Kazutaka | cb59588 | 2012-05-17 03:15:33 +0900 | [diff] [blame] | 2049 | return fd; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2050 | } |
| 2051 | |
MORITA Kazutaka | 6f3c714 | 2012-05-30 01:05:15 +0900 | [diff] [blame] | 2052 | while (remaining) { |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2053 | vdi_index = pos / SD_DATA_OBJ_SIZE; |
| 2054 | offset = pos % SD_DATA_OBJ_SIZE; |
| 2055 | |
MORITA Kazutaka | 1f7a48d | 2012-08-30 03:39:45 +0900 | [diff] [blame] | 2056 | data_len = MIN(remaining, SD_DATA_OBJ_SIZE - offset); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2057 | |
| 2058 | vmstate_oid = vid_to_vmstate_oid(s->inode.vdi_id, vdi_index); |
| 2059 | |
| 2060 | create = (offset == 0); |
| 2061 | if (load) { |
| 2062 | ret = read_object(fd, (char *)data, vmstate_oid, |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 2063 | s->inode.nr_copies, data_len, offset, |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 2064 | s->cache_flags); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2065 | } else { |
| 2066 | ret = write_object(fd, (char *)data, vmstate_oid, |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 2067 | s->inode.nr_copies, data_len, offset, create, |
Liu Yuan | 0e7106d | 2013-01-10 16:03:47 +0800 | [diff] [blame] | 2068 | s->cache_flags); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2069 | } |
| 2070 | |
| 2071 | if (ret < 0) { |
Markus Armbruster | 6daf194 | 2011-06-22 14:03:54 +0200 | [diff] [blame] | 2072 | error_report("failed to save vmstate %s", strerror(errno)); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2073 | goto cleanup; |
| 2074 | } |
| 2075 | |
| 2076 | pos += data_len; |
MORITA Kazutaka | 1f7a48d | 2012-08-30 03:39:45 +0900 | [diff] [blame] | 2077 | data += data_len; |
MORITA Kazutaka | 6f3c714 | 2012-05-30 01:05:15 +0900 | [diff] [blame] | 2078 | remaining -= data_len; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2079 | } |
MORITA Kazutaka | 6f3c714 | 2012-05-30 01:05:15 +0900 | [diff] [blame] | 2080 | ret = size; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2081 | cleanup: |
| 2082 | closesocket(fd); |
| 2083 | return ret; |
| 2084 | } |
| 2085 | |
Kevin Wolf | cf8074b | 2013-04-05 21:27:53 +0200 | [diff] [blame] | 2086 | static int sd_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, |
| 2087 | int64_t pos) |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2088 | { |
| 2089 | BDRVSheepdogState *s = bs->opaque; |
Kevin Wolf | cf8074b | 2013-04-05 21:27:53 +0200 | [diff] [blame] | 2090 | void *buf; |
| 2091 | int ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2092 | |
Kevin Wolf | cf8074b | 2013-04-05 21:27:53 +0200 | [diff] [blame] | 2093 | buf = qemu_blockalign(bs, qiov->size); |
| 2094 | qemu_iovec_to_buf(qiov, 0, buf, qiov->size); |
| 2095 | ret = do_load_save_vmstate(s, (uint8_t *) buf, pos, qiov->size, 0); |
| 2096 | qemu_vfree(buf); |
| 2097 | |
| 2098 | return ret; |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2099 | } |
| 2100 | |
| 2101 | static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data, |
| 2102 | int64_t pos, int size) |
| 2103 | { |
| 2104 | BDRVSheepdogState *s = bs->opaque; |
| 2105 | |
| 2106 | return do_load_save_vmstate(s, data, pos, size, 1); |
| 2107 | } |
| 2108 | |
| 2109 | |
| 2110 | static QEMUOptionParameter sd_create_options[] = { |
| 2111 | { |
| 2112 | .name = BLOCK_OPT_SIZE, |
| 2113 | .type = OPT_SIZE, |
| 2114 | .help = "Virtual disk size" |
| 2115 | }, |
| 2116 | { |
| 2117 | .name = BLOCK_OPT_BACKING_FILE, |
| 2118 | .type = OPT_STRING, |
| 2119 | .help = "File name of a base image" |
| 2120 | }, |
MORITA Kazutaka | a8e0fdd | 2011-07-06 03:38:48 +0900 | [diff] [blame] | 2121 | { |
| 2122 | .name = BLOCK_OPT_PREALLOC, |
| 2123 | .type = OPT_STRING, |
| 2124 | .help = "Preallocation mode (allowed values: off, full)" |
| 2125 | }, |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2126 | { NULL } |
| 2127 | }; |
| 2128 | |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 2129 | static BlockDriver bdrv_sheepdog = { |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2130 | .format_name = "sheepdog", |
| 2131 | .protocol_name = "sheepdog", |
| 2132 | .instance_size = sizeof(BDRVSheepdogState), |
| 2133 | .bdrv_file_open = sd_open, |
| 2134 | .bdrv_close = sd_close, |
| 2135 | .bdrv_create = sd_create, |
| 2136 | .bdrv_getlength = sd_getlength, |
| 2137 | .bdrv_truncate = sd_truncate, |
| 2138 | |
MORITA Kazutaka | 2df4624 | 2011-08-12 21:33:15 +0900 | [diff] [blame] | 2139 | .bdrv_co_readv = sd_co_readv, |
| 2140 | .bdrv_co_writev = sd_co_writev, |
Liu Yuan | 47622c4 | 2012-04-04 04:03:58 +0800 | [diff] [blame] | 2141 | .bdrv_co_flush_to_disk = sd_co_flush_to_disk, |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2142 | |
| 2143 | .bdrv_snapshot_create = sd_snapshot_create, |
| 2144 | .bdrv_snapshot_goto = sd_snapshot_goto, |
| 2145 | .bdrv_snapshot_delete = sd_snapshot_delete, |
| 2146 | .bdrv_snapshot_list = sd_snapshot_list, |
| 2147 | |
| 2148 | .bdrv_save_vmstate = sd_save_vmstate, |
| 2149 | .bdrv_load_vmstate = sd_load_vmstate, |
| 2150 | |
| 2151 | .create_options = sd_create_options, |
| 2152 | }; |
| 2153 | |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 2154 | static BlockDriver bdrv_sheepdog_tcp = { |
| 2155 | .format_name = "sheepdog", |
| 2156 | .protocol_name = "sheepdog+tcp", |
| 2157 | .instance_size = sizeof(BDRVSheepdogState), |
| 2158 | .bdrv_file_open = sd_open, |
| 2159 | .bdrv_close = sd_close, |
| 2160 | .bdrv_create = sd_create, |
| 2161 | .bdrv_getlength = sd_getlength, |
| 2162 | .bdrv_truncate = sd_truncate, |
| 2163 | |
| 2164 | .bdrv_co_readv = sd_co_readv, |
| 2165 | .bdrv_co_writev = sd_co_writev, |
| 2166 | .bdrv_co_flush_to_disk = sd_co_flush_to_disk, |
| 2167 | |
| 2168 | .bdrv_snapshot_create = sd_snapshot_create, |
| 2169 | .bdrv_snapshot_goto = sd_snapshot_goto, |
| 2170 | .bdrv_snapshot_delete = sd_snapshot_delete, |
| 2171 | .bdrv_snapshot_list = sd_snapshot_list, |
| 2172 | |
| 2173 | .bdrv_save_vmstate = sd_save_vmstate, |
| 2174 | .bdrv_load_vmstate = sd_load_vmstate, |
| 2175 | |
| 2176 | .create_options = sd_create_options, |
| 2177 | }; |
| 2178 | |
MORITA Kazutaka | 1b8bbb4 | 2013-02-22 12:39:53 +0900 | [diff] [blame] | 2179 | static BlockDriver bdrv_sheepdog_unix = { |
| 2180 | .format_name = "sheepdog", |
| 2181 | .protocol_name = "sheepdog+unix", |
| 2182 | .instance_size = sizeof(BDRVSheepdogState), |
| 2183 | .bdrv_file_open = sd_open, |
| 2184 | .bdrv_close = sd_close, |
| 2185 | .bdrv_create = sd_create, |
| 2186 | .bdrv_getlength = sd_getlength, |
| 2187 | .bdrv_truncate = sd_truncate, |
| 2188 | |
| 2189 | .bdrv_co_readv = sd_co_readv, |
| 2190 | .bdrv_co_writev = sd_co_writev, |
| 2191 | .bdrv_co_flush_to_disk = sd_co_flush_to_disk, |
| 2192 | |
| 2193 | .bdrv_snapshot_create = sd_snapshot_create, |
| 2194 | .bdrv_snapshot_goto = sd_snapshot_goto, |
| 2195 | .bdrv_snapshot_delete = sd_snapshot_delete, |
| 2196 | .bdrv_snapshot_list = sd_snapshot_list, |
| 2197 | |
| 2198 | .bdrv_save_vmstate = sd_save_vmstate, |
| 2199 | .bdrv_load_vmstate = sd_load_vmstate, |
| 2200 | |
| 2201 | .create_options = sd_create_options, |
| 2202 | }; |
| 2203 | |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2204 | static void bdrv_sheepdog_init(void) |
| 2205 | { |
| 2206 | bdrv_register(&bdrv_sheepdog); |
MORITA Kazutaka | 5d6768e | 2013-02-22 12:39:51 +0900 | [diff] [blame] | 2207 | bdrv_register(&bdrv_sheepdog_tcp); |
MORITA Kazutaka | 1b8bbb4 | 2013-02-22 12:39:53 +0900 | [diff] [blame] | 2208 | bdrv_register(&bdrv_sheepdog_unix); |
MORITA Kazutaka | 33b1db1 | 2010-06-21 05:01:00 +0900 | [diff] [blame] | 2209 | } |
| 2210 | block_init(bdrv_sheepdog_init); |