blob: 38f74c94da4b6c7b37c10c36dfd8e3edf7caa993 [file] [log] [blame]
Max Reitz0c9b70d2020-10-27 20:05:42 +01001/*
2 * Present a block device as a raw image through FUSE
3 *
4 * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; under version 2 or later of the License.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
17 */
18
19#define FUSE_USE_VERSION 31
20
21#include "qemu/osdep.h"
22#include "block/aio.h"
23#include "block/block.h"
24#include "block/export.h"
25#include "block/fuse.h"
26#include "block/qapi.h"
27#include "qapi/error.h"
28#include "qapi/qapi-commands-block.h"
29#include "sysemu/block-backend.h"
30
31#include <fuse.h>
32#include <fuse_lowlevel.h>
33
34
35/* Prevent overly long bounce buffer allocations */
36#define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
37
38
39typedef struct FuseExport {
40 BlockExport common;
41
42 struct fuse_session *fuse_session;
43 struct fuse_buf fuse_buf;
44 bool mounted, fd_handler_set_up;
45
46 char *mountpoint;
47 bool writable;
Max Reitz4fba06d2020-10-27 20:05:44 +010048 bool growable;
Max Reitz0c9b70d2020-10-27 20:05:42 +010049} FuseExport;
50
51static GHashTable *exports;
52static const struct fuse_lowlevel_ops fuse_ops;
53
54static void fuse_export_shutdown(BlockExport *exp);
55static void fuse_export_delete(BlockExport *exp);
56
57static void init_exports_table(void);
58
59static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
60 Error **errp);
61static void read_from_fuse_export(void *opaque);
62
63static bool is_regular_file(const char *path, Error **errp);
64
65
66static int fuse_export_create(BlockExport *blk_exp,
67 BlockExportOptions *blk_exp_args,
68 Error **errp)
69{
70 FuseExport *exp = container_of(blk_exp, FuseExport, common);
71 BlockExportOptionsFuse *args = &blk_exp_args->u.fuse;
72 int ret;
73
74 assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
75
Max Reitz4fba06d2020-10-27 20:05:44 +010076 /* For growable exports, take the RESIZE permission */
77 if (args->growable) {
78 uint64_t blk_perm, blk_shared_perm;
79
80 blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
81
82 ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
83 blk_shared_perm, errp);
84 if (ret < 0) {
85 return ret;
86 }
87 }
88
Max Reitz0c9b70d2020-10-27 20:05:42 +010089 init_exports_table();
90
91 /*
92 * It is important to do this check before calling is_regular_file() --
93 * that function will do a stat(), which we would have to handle if we
94 * already exported something on @mountpoint. But we cannot, because
95 * we are currently caught up here.
96 * (Note that ideally we would want to resolve relative paths here,
97 * but bdrv_make_absolute_filename() might do the wrong thing for
98 * paths that contain colons, and realpath() would resolve symlinks,
99 * which we do not want: The mount point is not going to be the
100 * symlink's destination, but the link itself.)
101 * So this will not catch all potential clashes, but hopefully at
102 * least the most common one of specifying exactly the same path
103 * string twice.
104 */
105 if (g_hash_table_contains(exports, args->mountpoint)) {
106 error_setg(errp, "There already is a FUSE export on '%s'",
107 args->mountpoint);
108 ret = -EEXIST;
109 goto fail;
110 }
111
112 if (!is_regular_file(args->mountpoint, errp)) {
113 ret = -EINVAL;
114 goto fail;
115 }
116
117 exp->mountpoint = g_strdup(args->mountpoint);
118 exp->writable = blk_exp_args->writable;
Max Reitz4fba06d2020-10-27 20:05:44 +0100119 exp->growable = args->growable;
Max Reitz0c9b70d2020-10-27 20:05:42 +0100120
121 ret = setup_fuse_export(exp, args->mountpoint, errp);
122 if (ret < 0) {
123 goto fail;
124 }
125
126 return 0;
127
128fail:
129 fuse_export_delete(blk_exp);
130 return ret;
131}
132
133/**
134 * Allocates the global @exports hash table.
135 */
136static void init_exports_table(void)
137{
138 if (exports) {
139 return;
140 }
141
142 exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
143}
144
145/**
146 * Create exp->fuse_session and mount it.
147 */
148static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
149 Error **errp)
150{
151 const char *fuse_argv[4];
152 char *mount_opts;
153 struct fuse_args fuse_args;
154 int ret;
155
156 /* Needs to match what fuse_init() sets. Only max_read must be supplied. */
157 mount_opts = g_strdup_printf("max_read=%zu", FUSE_MAX_BOUNCE_BYTES);
158
159 fuse_argv[0] = ""; /* Dummy program name */
160 fuse_argv[1] = "-o";
161 fuse_argv[2] = mount_opts;
162 fuse_argv[3] = NULL;
163 fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv);
164
165 exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops,
166 sizeof(fuse_ops), exp);
167 g_free(mount_opts);
168 if (!exp->fuse_session) {
169 error_setg(errp, "Failed to set up FUSE session");
170 ret = -EIO;
171 goto fail;
172 }
173
174 ret = fuse_session_mount(exp->fuse_session, mountpoint);
175 if (ret < 0) {
176 error_setg(errp, "Failed to mount FUSE session to export");
177 ret = -EIO;
178 goto fail;
179 }
180 exp->mounted = true;
181
182 g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
183
184 aio_set_fd_handler(exp->common.ctx,
185 fuse_session_fd(exp->fuse_session), true,
186 read_from_fuse_export, NULL, NULL, exp);
187 exp->fd_handler_set_up = true;
188
189 return 0;
190
191fail:
192 fuse_export_shutdown(&exp->common);
193 return ret;
194}
195
196/**
197 * Callback to be invoked when the FUSE session FD can be read from.
198 * (This is basically the FUSE event loop.)
199 */
200static void read_from_fuse_export(void *opaque)
201{
202 FuseExport *exp = opaque;
203 int ret;
204
205 blk_exp_ref(&exp->common);
206
207 do {
208 ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
209 } while (ret == -EINTR);
210 if (ret < 0) {
211 goto out;
212 }
213
214 fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
215
216out:
217 blk_exp_unref(&exp->common);
218}
219
220static void fuse_export_shutdown(BlockExport *blk_exp)
221{
222 FuseExport *exp = container_of(blk_exp, FuseExport, common);
223
224 if (exp->fuse_session) {
225 fuse_session_exit(exp->fuse_session);
226
227 if (exp->fd_handler_set_up) {
228 aio_set_fd_handler(exp->common.ctx,
229 fuse_session_fd(exp->fuse_session), true,
230 NULL, NULL, NULL, NULL);
231 exp->fd_handler_set_up = false;
232 }
233 }
234
235 if (exp->mountpoint) {
236 /*
237 * Safe to drop now, because we will not handle any requests
238 * for this export anymore anyway.
239 */
240 g_hash_table_remove(exports, exp->mountpoint);
241 }
242}
243
244static void fuse_export_delete(BlockExport *blk_exp)
245{
246 FuseExport *exp = container_of(blk_exp, FuseExport, common);
247
248 if (exp->fuse_session) {
249 if (exp->mounted) {
250 fuse_session_unmount(exp->fuse_session);
251 }
252
253 fuse_session_destroy(exp->fuse_session);
254 }
255
256 free(exp->fuse_buf.mem);
257 g_free(exp->mountpoint);
258}
259
260/**
261 * Check whether @path points to a regular file. If not, put an
262 * appropriate message into *errp.
263 */
264static bool is_regular_file(const char *path, Error **errp)
265{
266 struct stat statbuf;
267 int ret;
268
269 ret = stat(path, &statbuf);
270 if (ret < 0) {
271 error_setg_errno(errp, errno, "Failed to stat '%s'", path);
272 return false;
273 }
274
275 if (!S_ISREG(statbuf.st_mode)) {
276 error_setg(errp, "'%s' is not a regular file", path);
277 return false;
278 }
279
280 return true;
281}
282
283/**
284 * A chance to set change some parameters supplied to FUSE_INIT.
285 */
286static void fuse_init(void *userdata, struct fuse_conn_info *conn)
287{
288 /*
289 * MIN_NON_ZERO() would not be wrong here, but what we set here
290 * must equal what has been passed to fuse_session_new().
291 * Therefore, as long as max_read must be passed as a mount option
292 * (which libfuse claims will be changed at some point), we have
293 * to set max_read to a fixed value here.
294 */
295 conn->max_read = FUSE_MAX_BOUNCE_BYTES;
296
297 conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write);
298}
299
Max Reitz41429e32020-10-27 20:05:43 +0100300/**
301 * Let clients look up files. Always return ENOENT because we only
302 * care about the mountpoint itself.
303 */
304static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
305{
306 fuse_reply_err(req, ENOENT);
307}
308
309/**
310 * Let clients get file attributes (i.e., stat() the file).
311 */
312static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
313 struct fuse_file_info *fi)
314{
315 struct stat statbuf;
316 int64_t length, allocated_blocks;
317 time_t now = time(NULL);
318 FuseExport *exp = fuse_req_userdata(req);
319 mode_t mode;
320
321 length = blk_getlength(exp->common.blk);
322 if (length < 0) {
323 fuse_reply_err(req, -length);
324 return;
325 }
326
327 allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
328 if (allocated_blocks <= 0) {
329 allocated_blocks = DIV_ROUND_UP(length, 512);
330 } else {
331 allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
332 }
333
334 mode = S_IFREG | S_IRUSR;
335 if (exp->writable) {
336 mode |= S_IWUSR;
337 }
338
339 statbuf = (struct stat) {
340 .st_ino = inode,
341 .st_mode = mode,
342 .st_nlink = 1,
343 .st_uid = getuid(),
344 .st_gid = getgid(),
345 .st_size = length,
346 .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
347 .st_blocks = allocated_blocks,
348 .st_atime = now,
349 .st_mtime = now,
350 .st_ctime = now,
351 };
352
353 fuse_reply_attr(req, &statbuf, 1.);
354}
355
356static int fuse_do_truncate(const FuseExport *exp, int64_t size,
357 bool req_zero_write, PreallocMode prealloc)
358{
359 uint64_t blk_perm, blk_shared_perm;
360 BdrvRequestFlags truncate_flags = 0;
361 int ret;
362
363 if (req_zero_write) {
364 truncate_flags |= BDRV_REQ_ZERO_WRITE;
365 }
366
Max Reitz4fba06d2020-10-27 20:05:44 +0100367 /* Growable exports have a permanent RESIZE permission */
368 if (!exp->growable) {
369 blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
Max Reitz41429e32020-10-27 20:05:43 +0100370
Max Reitz4fba06d2020-10-27 20:05:44 +0100371 ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
372 blk_shared_perm, NULL);
373 if (ret < 0) {
374 return ret;
375 }
Max Reitz41429e32020-10-27 20:05:43 +0100376 }
377
378 ret = blk_truncate(exp->common.blk, size, true, prealloc,
379 truncate_flags, NULL);
380
Max Reitz4fba06d2020-10-27 20:05:44 +0100381 if (!exp->growable) {
382 /* Must succeed, because we are only giving up the RESIZE permission */
383 blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort);
384 }
Max Reitz41429e32020-10-27 20:05:43 +0100385
386 return ret;
387}
388
389/**
390 * Let clients set file attributes. Only resizing is supported.
391 */
392static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf,
393 int to_set, struct fuse_file_info *fi)
394{
395 FuseExport *exp = fuse_req_userdata(req);
396 int ret;
397
398 if (!exp->writable) {
399 fuse_reply_err(req, EACCES);
400 return;
401 }
402
403 if (to_set & ~FUSE_SET_ATTR_SIZE) {
404 fuse_reply_err(req, ENOTSUP);
405 return;
406 }
407
408 ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
409 if (ret < 0) {
410 fuse_reply_err(req, -ret);
411 return;
412 }
413
414 fuse_getattr(req, inode, fi);
415}
416
417/**
418 * Let clients open a file (i.e., the exported image).
419 */
420static void fuse_open(fuse_req_t req, fuse_ino_t inode,
421 struct fuse_file_info *fi)
422{
423 fuse_reply_open(req, fi);
424}
425
426/**
427 * Handle client reads from the exported image.
428 */
429static void fuse_read(fuse_req_t req, fuse_ino_t inode,
430 size_t size, off_t offset, struct fuse_file_info *fi)
431{
432 FuseExport *exp = fuse_req_userdata(req);
433 int64_t length;
434 void *buf;
435 int ret;
436
437 /* Limited by max_read, should not happen */
438 if (size > FUSE_MAX_BOUNCE_BYTES) {
439 fuse_reply_err(req, EINVAL);
440 return;
441 }
442
443 /**
444 * Clients will expect short reads at EOF, so we have to limit
445 * offset+size to the image length.
446 */
447 length = blk_getlength(exp->common.blk);
448 if (length < 0) {
449 fuse_reply_err(req, -length);
450 return;
451 }
452
453 if (offset + size > length) {
454 size = length - offset;
455 }
456
457 buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
458 if (!buf) {
459 fuse_reply_err(req, ENOMEM);
460 return;
461 }
462
463 ret = blk_pread(exp->common.blk, offset, buf, size);
464 if (ret >= 0) {
465 fuse_reply_buf(req, buf, size);
466 } else {
467 fuse_reply_err(req, -ret);
468 }
469
470 qemu_vfree(buf);
471}
472
473/**
474 * Handle client writes to the exported image.
475 */
476static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf,
477 size_t size, off_t offset, struct fuse_file_info *fi)
478{
479 FuseExport *exp = fuse_req_userdata(req);
480 int64_t length;
481 int ret;
482
483 /* Limited by max_write, should not happen */
484 if (size > BDRV_REQUEST_MAX_BYTES) {
485 fuse_reply_err(req, EINVAL);
486 return;
487 }
488
489 if (!exp->writable) {
490 fuse_reply_err(req, EACCES);
491 return;
492 }
493
494 /**
495 * Clients will expect short writes at EOF, so we have to limit
496 * offset+size to the image length.
497 */
498 length = blk_getlength(exp->common.blk);
499 if (length < 0) {
500 fuse_reply_err(req, -length);
501 return;
502 }
503
504 if (offset + size > length) {
Max Reitz4fba06d2020-10-27 20:05:44 +0100505 if (exp->growable) {
506 ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF);
507 if (ret < 0) {
508 fuse_reply_err(req, -ret);
509 return;
510 }
511 } else {
512 size = length - offset;
513 }
Max Reitz41429e32020-10-27 20:05:43 +0100514 }
515
516 ret = blk_pwrite(exp->common.blk, offset, buf, size, 0);
517 if (ret >= 0) {
518 fuse_reply_write(req, size);
519 } else {
520 fuse_reply_err(req, -ret);
521 }
522}
523
524/**
Max Reitz4ca37a92020-10-27 20:05:45 +0100525 * Let clients perform various fallocate() operations.
526 */
527static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
528 off_t offset, off_t length,
529 struct fuse_file_info *fi)
530{
531 FuseExport *exp = fuse_req_userdata(req);
532 int64_t blk_len;
533 int ret;
534
535 if (!exp->writable) {
536 fuse_reply_err(req, EACCES);
537 return;
538 }
539
540 blk_len = blk_getlength(exp->common.blk);
541 if (blk_len < 0) {
542 fuse_reply_err(req, -blk_len);
543 return;
544 }
545
546 if (mode & FALLOC_FL_KEEP_SIZE) {
547 length = MIN(length, blk_len - offset);
548 }
549
550 if (mode & FALLOC_FL_PUNCH_HOLE) {
551 if (!(mode & FALLOC_FL_KEEP_SIZE)) {
552 fuse_reply_err(req, EINVAL);
553 return;
554 }
555
556 do {
557 int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
558
559 ret = blk_pdiscard(exp->common.blk, offset, size);
560 offset += size;
561 length -= size;
562 } while (ret == 0 && length > 0);
563 } else if (mode & FALLOC_FL_ZERO_RANGE) {
564 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
565 /* No need for zeroes, we are going to write them ourselves */
566 ret = fuse_do_truncate(exp, offset + length, false,
567 PREALLOC_MODE_OFF);
568 if (ret < 0) {
569 fuse_reply_err(req, -ret);
570 return;
571 }
572 }
573
574 do {
575 int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
576
577 ret = blk_pwrite_zeroes(exp->common.blk,
578 offset, size, 0);
579 offset += size;
580 length -= size;
581 } while (ret == 0 && length > 0);
582 } else if (!mode) {
583 /* We can only fallocate at the EOF with a truncate */
584 if (offset < blk_len) {
585 fuse_reply_err(req, EOPNOTSUPP);
586 return;
587 }
588
589 if (offset > blk_len) {
590 /* No preallocation needed here */
591 ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
592 if (ret < 0) {
593 fuse_reply_err(req, -ret);
594 return;
595 }
596 }
597
598 ret = fuse_do_truncate(exp, offset + length, true,
599 PREALLOC_MODE_FALLOC);
600 } else {
601 ret = -EOPNOTSUPP;
602 }
603
604 fuse_reply_err(req, ret < 0 ? -ret : 0);
605}
606
607/**
Max Reitz41429e32020-10-27 20:05:43 +0100608 * Let clients fsync the exported image.
609 */
610static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync,
611 struct fuse_file_info *fi)
612{
613 FuseExport *exp = fuse_req_userdata(req);
614 int ret;
615
616 ret = blk_flush(exp->common.blk);
617 fuse_reply_err(req, ret < 0 ? -ret : 0);
618}
619
620/**
621 * Called before an FD to the exported image is closed. (libfuse
622 * notes this to be a way to return last-minute errors.)
623 */
624static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
625 struct fuse_file_info *fi)
626{
627 fuse_fsync(req, inode, 1, fi);
628}
629
Max Reitzdf4ea702020-10-27 20:05:46 +0100630#ifdef CONFIG_FUSE_LSEEK
631/**
632 * Let clients inquire allocation status.
633 */
634static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset,
635 int whence, struct fuse_file_info *fi)
636{
637 FuseExport *exp = fuse_req_userdata(req);
638
639 if (whence != SEEK_HOLE && whence != SEEK_DATA) {
640 fuse_reply_err(req, EINVAL);
641 return;
642 }
643
644 while (true) {
645 int64_t pnum;
646 int ret;
647
648 ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
649 offset, INT64_MAX, &pnum, NULL, NULL);
650 if (ret < 0) {
651 fuse_reply_err(req, -ret);
652 return;
653 }
654
655 if (!pnum && (ret & BDRV_BLOCK_EOF)) {
656 int64_t blk_len;
657
658 /*
659 * If blk_getlength() rounds (e.g. by sectors), then the
660 * export length will be rounded, too. However,
661 * bdrv_block_status_above() may return EOF at unaligned
662 * offsets. We must not let this become visible and thus
663 * always simulate a hole between @offset (the real EOF)
664 * and @blk_len (the client-visible EOF).
665 */
666
667 blk_len = blk_getlength(exp->common.blk);
668 if (blk_len < 0) {
669 fuse_reply_err(req, -blk_len);
670 return;
671 }
672
673 if (offset > blk_len || whence == SEEK_DATA) {
674 fuse_reply_err(req, ENXIO);
675 } else {
676 fuse_reply_lseek(req, offset);
677 }
678 return;
679 }
680
681 if (ret & BDRV_BLOCK_DATA) {
682 if (whence == SEEK_DATA) {
683 fuse_reply_lseek(req, offset);
684 return;
685 }
686 } else {
687 if (whence == SEEK_HOLE) {
688 fuse_reply_lseek(req, offset);
689 return;
690 }
691 }
692
693 /* Safety check against infinite loops */
694 if (!pnum) {
695 fuse_reply_err(req, ENXIO);
696 return;
697 }
698
699 offset += pnum;
700 }
701}
702#endif
703
Max Reitz0c9b70d2020-10-27 20:05:42 +0100704static const struct fuse_lowlevel_ops fuse_ops = {
705 .init = fuse_init,
Max Reitz41429e32020-10-27 20:05:43 +0100706 .lookup = fuse_lookup,
707 .getattr = fuse_getattr,
708 .setattr = fuse_setattr,
709 .open = fuse_open,
710 .read = fuse_read,
711 .write = fuse_write,
Max Reitz4ca37a92020-10-27 20:05:45 +0100712 .fallocate = fuse_fallocate,
Max Reitz41429e32020-10-27 20:05:43 +0100713 .flush = fuse_flush,
714 .fsync = fuse_fsync,
Max Reitzdf4ea702020-10-27 20:05:46 +0100715#ifdef CONFIG_FUSE_LSEEK
716 .lseek = fuse_lseek,
717#endif
Max Reitz0c9b70d2020-10-27 20:05:42 +0100718};
719
720const BlockExportDriver blk_exp_fuse = {
721 .type = BLOCK_EXPORT_TYPE_FUSE,
722 .instance_size = sizeof(FuseExport),
723 .create = fuse_export_create,
724 .delete = fuse_export_delete,
725 .request_shutdown = fuse_export_shutdown,
726};