Skip to content

Commit

Permalink
Merge tag 'fuse-update-6.9' of git:https://git.kernel.org/pub/scm/linux/ker…
Browse files Browse the repository at this point in the history
…nel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

 - Add passthrough mode for regular file I/O.

   This allows performing read and write (also via memory maps) on a
   backing file without incurring the overhead of roundtrips to
   userspace. For now this is only allowed to privileged servers, but
   this limitation will go away in the future (Amir Goldstein)

 - Fix interaction of direct I/O mode with memory maps (Bernd Schubert)

 - Export filesystem tags through sysfs for virtiofs (Stefan Hajnoczi)

 - Allow resending queued requests for server crash recovery (Zhao Chen)

 - Misc fixes and cleanups

* tag 'fuse-update-6.9' of git:https://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (38 commits)
  fuse: get rid of ff->readdir.lock
  fuse: remove unneeded lock which protecting update of congestion_threshold
  fuse: Fix missing FOLL_PIN for direct-io
  fuse: remove an unnecessary if statement
  fuse: Track process write operations in both direct and writethrough modes
  fuse: Use the high bit of request ID for indicating resend requests
  fuse: Introduce a new notification type for resend pending requests
  fuse: add support for explicit export disabling
  fuse: __kuid_val/__kgid_val helpers in fuse_fill_attr_from_inode()
  fuse: fix typo for fuse_permission comment
  fuse: Convert fuse_writepage_locked to take a folio
  fuse: Remove fuse_writepage
  virtio_fs: remove duplicate check if queue is broken
  fuse: use FUSE_ROOT_ID in fuse_get_root_inode()
  fuse: don't unhash root
  fuse: fix root lookup with nonzero generation
  fuse: replace remaining make_bad_inode() with fuse_make_bad()
  virtiofs: drop __exit from virtio_fs_sysfs_exit()
  fuse: implement passthrough for mmap
  fuse: implement splice read/write passthrough
  ...
  • Loading branch information
torvalds committed Mar 15, 2024
2 parents 68bf6bf + cdf6ac2 commit 6ce8b2c
Show file tree
Hide file tree
Showing 14 changed files with 1,422 additions and 277 deletions.
11 changes: 11 additions & 0 deletions Documentation/ABI/testing/sysfs-fs-virtiofs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
What: /sys/fs/virtiofs/<n>/tag
Date: Feb 2024
Contact: [email protected]
Description:
[RO] The mount "tag" that can be used to mount this filesystem.

What: /sys/fs/virtiofs/<n>/device
Date: Feb 2024
Contact: [email protected]
Description:
Symlink to the virtio device that exports this filesystem.
11 changes: 11 additions & 0 deletions fs/fuse/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,14 @@ config FUSE_DAX

If you want to allow mounting a Virtio Filesystem with the "dax"
option, answer Y.

config FUSE_PASSTHROUGH
bool "FUSE passthrough operations support"
default y
depends on FUSE_FS
select FS_STACK
help
This allows bypassing FUSE server by mapping specific FUSE operations
to be performed directly on a backing file.

If you want to allow passthrough operations, answer Y.
2 changes: 2 additions & 0 deletions fs/fuse/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ obj-$(CONFIG_CUSE) += cuse.o
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o

fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
fuse-y += iomode.o
fuse-$(CONFIG_FUSE_DAX) += dax.o
fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o

virtiofs-y := virtio_fs.o
6 changes: 1 addition & 5 deletions fs/fuse/control.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
if (!fc)
goto out;

down_read(&fc->killsb);
spin_lock(&fc->bg_lock);
fc->congestion_threshold = val;
spin_unlock(&fc->bg_lock);
up_read(&fc->killsb);
WRITE_ONCE(fc->congestion_threshold, val);
fuse_conn_put(fc);
out:
return ret;
Expand Down
156 changes: 131 additions & 25 deletions fs/fuse/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1775,6 +1775,61 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
return err;
}

/*
* Resending all processing queue requests.
*
* During a FUSE daemon panics and failover, it is possible for some inflight
* requests to be lost and never returned. As a result, applications awaiting
* replies would become stuck forever. To address this, we can use notification
* to trigger resending of these pending requests to the FUSE daemon, ensuring
* they are properly processed again.
*
* Please note that this strategy is applicable only to idempotent requests or
* if the FUSE daemon takes careful measures to avoid processing duplicated
* non-idempotent requests.
*/
static void fuse_resend(struct fuse_conn *fc)
{
struct fuse_dev *fud;
struct fuse_req *req, *next;
struct fuse_iqueue *fiq = &fc->iq;
LIST_HEAD(to_queue);
unsigned int i;

spin_lock(&fc->lock);
if (!fc->connected) {
spin_unlock(&fc->lock);
return;
}

list_for_each_entry(fud, &fc->devices, entry) {
struct fuse_pqueue *fpq = &fud->pq;

spin_lock(&fpq->lock);
for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
list_splice_tail_init(&fpq->processing[i], &to_queue);
spin_unlock(&fpq->lock);
}
spin_unlock(&fc->lock);

list_for_each_entry_safe(req, next, &to_queue, list) {
__set_bit(FR_PENDING, &req->flags);
/* mark the request as resend request */
req->in.h.unique |= FUSE_UNIQUE_RESEND;
}

spin_lock(&fiq->lock);
/* iq and pq requests are both oldest to newest */
list_splice(&to_queue, &fiq->pending);
fiq->ops->wake_pending_and_unlock(fiq);
}

static int fuse_notify_resend(struct fuse_conn *fc)
{
fuse_resend(fc);
return 0;
}

static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
unsigned int size, struct fuse_copy_state *cs)
{
Expand All @@ -1800,6 +1855,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
case FUSE_NOTIFY_DELETE:
return fuse_notify_delete(fc, size, cs);

case FUSE_NOTIFY_RESEND:
return fuse_notify_resend(fc);

default:
fuse_copy_finish(cs);
return -EINVAL;
Expand Down Expand Up @@ -2251,43 +2309,91 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
return 0;
}

static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
{
int res;
int oldfd;
struct fuse_dev *fud = NULL;
struct fd f;

if (get_user(oldfd, argp))
return -EFAULT;

f = fdget(oldfd);
if (!f.file)
return -EINVAL;

/*
* Check against file->f_op because CUSE
* uses the same ioctl handler.
*/
if (f.file->f_op == file->f_op)
fud = fuse_get_dev(f.file);

res = -EINVAL;
if (fud) {
mutex_lock(&fuse_mutex);
res = fuse_device_clone(fud->fc, file);
mutex_unlock(&fuse_mutex);
}

fdput(f);
return res;
}

static long fuse_dev_ioctl_backing_open(struct file *file,
struct fuse_backing_map __user *argp)
{
struct fuse_dev *fud = fuse_get_dev(file);
struct fuse_backing_map map;

if (!fud)
return -EPERM;

if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
return -EOPNOTSUPP;

if (copy_from_user(&map, argp, sizeof(map)))
return -EFAULT;

return fuse_backing_open(fud->fc, &map);
}

static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp)
{
struct fuse_dev *fud = fuse_get_dev(file);
int backing_id;

if (!fud)
return -EPERM;

if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
return -EOPNOTSUPP;

if (get_user(backing_id, argp))
return -EFAULT;

return fuse_backing_close(fud->fc, backing_id);
}

static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
void __user *argp = (void __user *)arg;

switch (cmd) {
case FUSE_DEV_IOC_CLONE:
if (get_user(oldfd, (__u32 __user *)arg))
return -EFAULT;
return fuse_dev_ioctl_clone(file, argp);

f = fdget(oldfd);
if (!f.file)
return -EINVAL;
case FUSE_DEV_IOC_BACKING_OPEN:
return fuse_dev_ioctl_backing_open(file, argp);

case FUSE_DEV_IOC_BACKING_CLOSE:
return fuse_dev_ioctl_backing_close(file, argp);

/*
* Check against file->f_op because CUSE
* uses the same ioctl handler.
*/
if (f.file->f_op == file->f_op)
fud = fuse_get_dev(f.file);

res = -EINVAL;
if (fud) {
mutex_lock(&fuse_mutex);
res = fuse_device_clone(fud->fc, file);
mutex_unlock(&fuse_mutex);
}
fdput(f);
break;
default:
res = -ENOTTY;
break;
return -ENOTTY;
}
return res;
}

const struct file_operations fuse_dev_operations = {
Expand Down
55 changes: 43 additions & 12 deletions fs/fuse/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
err = -EIO;
if (fuse_invalid_attr(&outarg->attr))
goto out_put_forget;
if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
pr_warn_once("root generation should be zero\n");
outarg->generation = 0;
}

*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
&outarg->attr, ATTR_TIMEOUT(outarg),
Expand Down Expand Up @@ -615,7 +619,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
FUSE_ARGS(args);
struct fuse_forget_link *forget;
struct fuse_create_in inarg;
struct fuse_open_out outopen;
struct fuse_open_out *outopenp;
struct fuse_entry_out outentry;
struct fuse_inode *fi;
struct fuse_file *ff;
Expand All @@ -630,7 +634,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
goto out_err;

err = -ENOMEM;
ff = fuse_file_alloc(fm);
ff = fuse_file_alloc(fm, true);
if (!ff)
goto out_put_forget_req;

Expand Down Expand Up @@ -659,8 +663,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
args.out_numargs = 2;
args.out_args[0].size = sizeof(outentry);
args.out_args[0].value = &outentry;
args.out_args[1].size = sizeof(outopen);
args.out_args[1].value = &outopen;
/* Store outarg for fuse_finish_open() */
outopenp = &ff->args->open_outarg;
args.out_args[1].size = sizeof(*outopenp);
args.out_args[1].value = outopenp;

err = get_create_ext(&args, dir, entry, mode);
if (err)
Expand All @@ -676,9 +682,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
fuse_invalid_attr(&outentry.attr))
goto out_free_ff;

ff->fh = outopen.fh;
ff->fh = outopenp->fh;
ff->nodeid = outentry.nodeid;
ff->open_flags = outopen.open_flags;
ff->open_flags = outopenp->open_flags;
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
&outentry.attr, ATTR_TIMEOUT(&outentry), 0);
if (!inode) {
Expand All @@ -692,13 +698,15 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
d_instantiate(entry, inode);
fuse_change_entry_timeout(entry, &outentry);
fuse_dir_changed(dir);
err = finish_open(file, entry, generic_file_open);
err = generic_file_open(inode, file);
if (!err) {
file->private_data = ff;
err = finish_open(file, entry, fuse_finish_open);
}
if (err) {
fi = get_fuse_inode(inode);
fuse_sync_release(fi, ff, flags);
} else {
file->private_data = ff;
fuse_finish_open(inode, file);
if (fm->fc->atomic_o_trunc && trunc)
truncate_pagecache(inode, 0);
else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
Expand Down Expand Up @@ -1210,7 +1218,7 @@ static int fuse_do_statx(struct inode *inode, struct file *file,
if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
inode_wrong_type(inode, sx->mode)))) {
make_bad_inode(inode);
fuse_make_bad(inode);
return -EIO;
}

Expand Down Expand Up @@ -1485,7 +1493,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
*
* 1) Local access checking ('default_permissions' mount option) based
* on file mode. This is the plain old disk filesystem permission
* modell.
* model.
*
* 2) "Remote" access checking, where server is responsible for
* checking permission in each inode operation. An exception to this
Expand Down Expand Up @@ -1630,7 +1638,30 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,

static int fuse_dir_open(struct inode *inode, struct file *file)
{
return fuse_open_common(inode, file, true);
struct fuse_mount *fm = get_fuse_mount(inode);
int err;

if (fuse_is_bad(inode))
return -EIO;

err = generic_file_open(inode, file);
if (err)
return err;

err = fuse_do_open(fm, get_node_id(inode), file, true);
if (!err) {
struct fuse_file *ff = file->private_data;

/*
* Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
* directories for backward compatibility, though it's unlikely
* to be useful.
*/
if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
nonseekable_open(inode, file);
}

return err;
}

static int fuse_dir_release(struct inode *inode, struct file *file)
Expand Down
Loading

0 comments on commit 6ce8b2c

Please sign in to comment.