From 00cd8dd3bf95f2cc8435b4cac01d9995635c6d0b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 17:13:09 -0400 Subject: stop passing nameidata to ->lookup() Just the flags; only NFS cares even about that, but there are legitimate uses for such argument. And getting rid of that completely would require splitting ->lookup() into a couple of methods (at least), so let's leave that alone for now... Signed-off-by: Al Viro --- kernel/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b303dfc7dce0..0cd1314acdaf 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -822,7 +822,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); */ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); -static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); static int cgroup_populate_dir(struct cgroup *cgrp); static const struct inode_operations cgroup_dir_inode_operations; @@ -2570,7 +2570,7 @@ static const struct inode_operations cgroup_dir_inode_operations = { .rename = cgroup_rename, }; -static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); -- cgit v1.2.3 From 79714f72d3b964611997de512cb29198c9f2dbbb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Jun 2012 03:01:42 +0400 Subject: get rid of kern_path_parent() all callers want the same thing, actually - a kinda-sorta analog of kern_path_create(). I.e. they want parent vfsmount/dentry (with ->i_mutex held, to make sure the child dentry is still their child) + the child dentry. Signed-off-by Al Viro --- drivers/base/devtmpfs.c | 98 +++++++++++++++++++++---------------------------- fs/namei.c | 22 ++++++++++- include/linux/namei.h | 2 +- kernel/audit_watch.c | 25 ++----------- 4 files changed, 65 insertions(+), 82 deletions(-) (limited to 'kernel') diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 765c3a28077a..d91a3a0b2325 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -227,33 +227,24 @@ static int handle_create(const char *nodename, umode_t mode, struct device *dev) static int dev_rmdir(const char *name) { - struct nameidata nd; + struct path parent; struct dentry *dentry; int err; - err = kern_path_parent(name, &nd); - if (err) - return err; - - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); - if (!IS_ERR(dentry)) { - if (dentry->d_inode) { - if (dentry->d_inode->i_private == &thread) - err = vfs_rmdir(nd.path.dentry->d_inode, - dentry); - else - err = -EPERM; - } else { - err = -ENOENT; - } - dput(dentry); + dentry = kern_path_locked(name, &parent); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + if (dentry->d_inode) { + if (dentry->d_inode->i_private == &thread) + err = vfs_rmdir(parent.dentry->d_inode, dentry); + else + err = -EPERM; } else { - err = PTR_ERR(dentry); + err = -ENOENT; } - - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - path_put(&nd.path); + dput(dentry); + mutex_unlock(&parent.dentry->d_inode->i_mutex); + path_put(&parent); return err; } @@ -305,50 +296,43 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta static int handle_remove(const char *nodename, struct device *dev) { - struct nameidata nd; + struct path parent; struct dentry *dentry; - struct kstat stat; int deleted = 1; int err; - err = kern_path_parent(nodename, &nd); - if (err) - return err; + dentry = kern_path_locked(nodename, &parent); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); - if (!IS_ERR(dentry)) { - if (dentry->d_inode) { - err = vfs_getattr(nd.path.mnt, dentry, &stat); - if (!err && dev_mynode(dev, dentry->d_inode, &stat)) { - struct iattr newattrs; - /* - * before unlinking this node, reset permissions - * of possible references like hardlinks - */ - newattrs.ia_uid = 0; - newattrs.ia_gid = 0; - newattrs.ia_mode = stat.mode & ~0777; - newattrs.ia_valid = - ATTR_UID|ATTR_GID|ATTR_MODE; - mutex_lock(&dentry->d_inode->i_mutex); - notify_change(dentry, &newattrs); - mutex_unlock(&dentry->d_inode->i_mutex); - err = vfs_unlink(nd.path.dentry->d_inode, - dentry); - if (!err || err == -ENOENT) - deleted = 1; - } - } else { - err = -ENOENT; + if (dentry->d_inode) { + struct kstat stat; + err = vfs_getattr(parent.mnt, dentry, &stat); + if (!err && dev_mynode(dev, dentry->d_inode, &stat)) { + struct iattr newattrs; + /* + * before unlinking this node, reset permissions + * of possible references like hardlinks + */ + newattrs.ia_uid = 0; + newattrs.ia_gid = 0; + newattrs.ia_mode = stat.mode & ~0777; + newattrs.ia_valid = + ATTR_UID|ATTR_GID|ATTR_MODE; + mutex_lock(&dentry->d_inode->i_mutex); + notify_change(dentry, &newattrs); + mutex_unlock(&dentry->d_inode->i_mutex); + err = vfs_unlink(parent.dentry->d_inode, dentry); + if (!err || err == -ENOENT) + deleted = 1; } - dput(dentry); } else { - err = PTR_ERR(dentry); + err = -ENOENT; } - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + dput(dentry); + mutex_unlock(&parent.dentry->d_inode->i_mutex); - path_put(&nd.path); + path_put(&parent); if (deleted && strchr(nodename, '/')) delete_path(nodename); return err; diff --git a/fs/namei.c b/fs/namei.c index 5abab9176903..6b29a51bef5d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1814,9 +1814,27 @@ static int do_path_lookup(int dfd, const char *name, return retval; } -int kern_path_parent(const char *name, struct nameidata *nd) +/* does lookup, returns the object with parent locked */ +struct dentry *kern_path_locked(const char *name, struct path *path) { - return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd); + struct nameidata nd; + struct dentry *d; + int err = do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, &nd); + if (err) + return ERR_PTR(err); + if (nd.last_type != LAST_NORM) { + path_put(&nd.path); + return ERR_PTR(-EINVAL); + } + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); + d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); + if (IS_ERR(d)) { + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + path_put(&nd.path); + return d; + } + *path = nd.path; + return d; } int kern_path(const char *name, unsigned int flags, struct path *path) diff --git a/include/linux/namei.h b/include/linux/namei.h index 23d859879210..f5931489e150 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -67,7 +67,7 @@ extern int kern_path(const char *, unsigned, struct path *); extern struct dentry *kern_path_create(int, const char *, struct path *, int); extern struct dentry *user_path_create(int, const char __user *, struct path *, int); -extern int kern_path_parent(const char *, struct nameidata *); +extern struct dentry *kern_path_locked(const char *, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index e683869365d9..3823281401b5 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -355,34 +355,15 @@ static void audit_remove_parent_watches(struct audit_parent *parent) /* Get path information necessary for adding watches. */ static int audit_get_nd(struct audit_watch *watch, struct path *parent) { - struct nameidata nd; - struct dentry *d; - int err; - - err = kern_path_parent(watch->path, &nd); - if (err) - return err; - - if (nd.last_type != LAST_NORM) { - path_put(&nd.path); - return -EINVAL; - } - - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); - if (IS_ERR(d)) { - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - path_put(&nd.path); + struct dentry *d = kern_path_locked(watch->path, parent); + if (IS_ERR(d)) return PTR_ERR(d); - } + mutex_unlock(&parent->dentry->d_inode->i_mutex); if (d->d_inode) { /* update watch filter fields */ watch->dev = d->d_inode->i_sb->s_dev; watch->ino = d->d_inode->i_ino; } - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - - *parent = nd.path; dput(d); return 0; } -- cgit v1.2.3 From be34d1a3bc4b6f357a49acb55ae870c81337e4f0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:18 +0100 Subject: VFS: Make clone_mnt()/copy_tree()/collect_mounts() return errors copy_tree() can theoretically fail in a case other than ENOMEM, but always returns NULL which is interpreted by callers as -ENOMEM. Change it to return an explicit error. Also change clone_mnt() for consistency and because union mounts will add new error cases. Thanks to Andreas Gruenbacher for a bug fix. [AV: folded braino fix by Dan Carpenter] Original-author: Valerie Aurora Signed-off-by: David Howells Cc: Valerie Aurora Cc: Andreas Gruenbacher Signed-off-by: Al Viro --- fs/namespace.c | 120 ++++++++++++++++++++++++++++------------------------ fs/pnode.c | 5 ++- kernel/audit_tree.c | 10 ++--- 3 files changed, 73 insertions(+), 62 deletions(-) (limited to 'kernel') diff --git a/fs/namespace.c b/fs/namespace.c index 8f412abcb67f..be1b07a774f1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -708,56 +708,60 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, int flag) { struct super_block *sb = old->mnt.mnt_sb; - struct mount *mnt = alloc_vfsmnt(old->mnt_devname); + struct mount *mnt; + int err; - if (mnt) { - if (flag & (CL_SLAVE | CL_PRIVATE)) - mnt->mnt_group_id = 0; /* not a peer of original */ - else - mnt->mnt_group_id = old->mnt_group_id; - - if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) { - int err = mnt_alloc_group_id(mnt); - if (err) - goto out_free; - } + mnt = alloc_vfsmnt(old->mnt_devname); + if (!mnt) + return ERR_PTR(-ENOMEM); - mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; - atomic_inc(&sb->s_active); - mnt->mnt.mnt_sb = sb; - mnt->mnt.mnt_root = dget(root); - mnt->mnt_mountpoint = mnt->mnt.mnt_root; - mnt->mnt_parent = mnt; - br_write_lock(&vfsmount_lock); - list_add_tail(&mnt->mnt_instance, &sb->s_mounts); - br_write_unlock(&vfsmount_lock); + if (flag & (CL_SLAVE | CL_PRIVATE)) + mnt->mnt_group_id = 0; /* not a peer of original */ + else + mnt->mnt_group_id = old->mnt_group_id; - if (flag & CL_SLAVE) { - list_add(&mnt->mnt_slave, &old->mnt_slave_list); - mnt->mnt_master = old; - CLEAR_MNT_SHARED(mnt); - } else if (!(flag & CL_PRIVATE)) { - if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old)) - list_add(&mnt->mnt_share, &old->mnt_share); - if (IS_MNT_SLAVE(old)) - list_add(&mnt->mnt_slave, &old->mnt_slave); - mnt->mnt_master = old->mnt_master; - } - if (flag & CL_MAKE_SHARED) - set_mnt_shared(mnt); - - /* stick the duplicate mount on the same expiry list - * as the original if that was on one */ - if (flag & CL_EXPIRE) { - if (!list_empty(&old->mnt_expire)) - list_add(&mnt->mnt_expire, &old->mnt_expire); - } + if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) { + err = mnt_alloc_group_id(mnt); + if (err) + goto out_free; + } + + mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; + atomic_inc(&sb->s_active); + mnt->mnt.mnt_sb = sb; + mnt->mnt.mnt_root = dget(root); + mnt->mnt_mountpoint = mnt->mnt.mnt_root; + mnt->mnt_parent = mnt; + br_write_lock(&vfsmount_lock); + list_add_tail(&mnt->mnt_instance, &sb->s_mounts); + br_write_unlock(&vfsmount_lock); + + if (flag & CL_SLAVE) { + list_add(&mnt->mnt_slave, &old->mnt_slave_list); + mnt->mnt_master = old; + CLEAR_MNT_SHARED(mnt); + } else if (!(flag & CL_PRIVATE)) { + if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old)) + list_add(&mnt->mnt_share, &old->mnt_share); + if (IS_MNT_SLAVE(old)) + list_add(&mnt->mnt_slave, &old->mnt_slave); + mnt->mnt_master = old->mnt_master; + } + if (flag & CL_MAKE_SHARED) + set_mnt_shared(mnt); + + /* stick the duplicate mount on the same expiry list + * as the original if that was on one */ + if (flag & CL_EXPIRE) { + if (!list_empty(&old->mnt_expire)) + list_add(&mnt->mnt_expire, &old->mnt_expire); } + return mnt; out_free: free_vfsmnt(mnt); - return NULL; + return ERR_PTR(err); } static inline void mntfree(struct mount *mnt) @@ -1242,11 +1246,12 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, struct path path; if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) - return NULL; + return ERR_PTR(-EINVAL); res = q = clone_mnt(mnt, dentry, flag); - if (!q) - goto Enomem; + if (IS_ERR(q)) + return q; + q->mnt_mountpoint = mnt->mnt_mountpoint; p = mnt; @@ -1268,8 +1273,8 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, path.mnt = &q->mnt; path.dentry = p->mnt_mountpoint; q = clone_mnt(p, p->mnt.mnt_root, flag); - if (!q) - goto Enomem; + if (IS_ERR(q)) + goto out; br_write_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); @@ -1277,7 +1282,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, } } return res; -Enomem: +out: if (res) { LIST_HEAD(umount_list); br_write_lock(&vfsmount_lock); @@ -1285,9 +1290,11 @@ Enomem: br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); } - return NULL; + return q; } +/* Caller should check returned pointer for errors */ + struct vfsmount *collect_mounts(struct path *path) { struct mount *tree; @@ -1295,7 +1302,9 @@ struct vfsmount *collect_mounts(struct path *path) tree = copy_tree(real_mount(path->mnt), path->dentry, CL_COPY_ALL | CL_PRIVATE); up_write(&namespace_sem); - return tree ? &tree->mnt : NULL; + if (IS_ERR(tree)) + return NULL; + return &tree->mnt; } void drop_collected_mounts(struct vfsmount *mnt) @@ -1590,14 +1599,15 @@ static int do_loopback(struct path *path, char *old_name, if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old)) goto out2; - err = -ENOMEM; if (recurse) mnt = copy_tree(old, old_path.dentry, 0); else mnt = clone_mnt(old, old_path.dentry, 0); - if (!mnt) - goto out2; + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); + goto out; + } err = graft_tree(mnt, path); if (err) { @@ -2211,10 +2221,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, down_write(&namespace_sem); /* First pass: copy the tree topology */ new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE); - if (!new) { + if (IS_ERR(new)) { up_write(&namespace_sem); kfree(new_ns); - return ERR_PTR(-ENOMEM); + return ERR_CAST(new); } new_ns->root = new; br_write_lock(&vfsmount_lock); diff --git a/fs/pnode.c b/fs/pnode.c index bed378db0758..3e000a51ac0d 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -237,8 +237,9 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); - if (!(child = copy_tree(source, source->mnt.mnt_root, type))) { - ret = -ENOMEM; + child = copy_tree(source, source->mnt.mnt_root, type); + if (IS_ERR(child)) { + ret = PTR_ERR(child); list_splice(tree_list, tmp_list.prev); goto out; } diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 5bf0790497e7..3a5ca582ba1e 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -595,7 +595,7 @@ void audit_trim_trees(void) root_mnt = collect_mounts(&path); path_put(&path); - if (!root_mnt) + if (IS_ERR(root_mnt)) goto skip_it; spin_lock(&hash_lock); @@ -669,8 +669,8 @@ int audit_add_tree_rule(struct audit_krule *rule) goto Err; mnt = collect_mounts(&path); path_put(&path); - if (!mnt) { - err = -ENOMEM; + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); goto Err; } @@ -719,8 +719,8 @@ int audit_tag_tree(char *old, char *new) return err; tagged = collect_mounts(&path2); path_put(&path2); - if (!tagged) - return -ENOMEM; + if (IS_ERR(tagged)) + return PTR_ERR(tagged); err = kern_path(old, 0, &path1); if (err) { -- cgit v1.2.3 From 9249e17fe094d853d1ef7475dd559a2cc7e23d42 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:37 +0100 Subject: VFS: Pass mount flags to sget() Pass mount flags to sget() so that it can use them in initialising a new superblock before the set function is called. They could also be passed to the compare function. Signed-off-by: David Howells Signed-off-by: Al Viro --- drivers/mtd/mtdsuper.c | 4 +--- fs/9p/vfs_super.c | 4 ++-- fs/afs/super.c | 3 +-- fs/btrfs/super.c | 4 ++-- fs/ceph/super.c | 2 +- fs/cifs/cifsfs.c | 9 ++++----- fs/devpts/inode.c | 6 +++--- fs/ecryptfs/main.c | 3 +-- fs/gfs2/ops_fstype.c | 5 ++--- fs/libfs.c | 4 ++-- fs/logfs/super.c | 3 +-- fs/nfs/super.c | 2 +- fs/nilfs2/super.c | 4 ++-- fs/proc/root.c | 3 +-- fs/reiserfs/procfs.c | 2 +- fs/super.c | 22 +++++++++++----------- fs/sysfs/mount.c | 3 +-- fs/ubifs/super.c | 3 +-- include/linux/fs.h | 2 +- kernel/cgroup.c | 2 +- 20 files changed, 40 insertions(+), 50 deletions(-) (limited to 'kernel') diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index a90bfe79916d..334da5f583c0 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -63,7 +63,7 @@ static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags, struct super_block *sb; int ret; - sb = sget(fs_type, get_sb_mtd_compare, get_sb_mtd_set, mtd); + sb = sget(fs_type, get_sb_mtd_compare, get_sb_mtd_set, flags, mtd); if (IS_ERR(sb)) goto out_error; @@ -74,8 +74,6 @@ static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags, pr_debug("MTDSB: New superblock for device %d (\"%s\")\n", mtd->index, mtd->name); - sb->s_flags = flags; - ret = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (ret < 0) { deactivate_locked_super(sb); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 8c92a9ba8330..137d50396898 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -89,7 +89,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, if (v9ses->cache) sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE; - sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME; + sb->s_flags |= MS_ACTIVE | MS_DIRSYNC | MS_NOATIME; if (!v9ses->cache) sb->s_flags |= MS_SYNCHRONOUS; @@ -137,7 +137,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, goto close_session; } - sb = sget(fs_type, NULL, v9fs_set_super, v9ses); + sb = sget(fs_type, NULL, v9fs_set_super, flags, v9ses); if (IS_ERR(sb)) { retval = PTR_ERR(sb); goto clunk_fid; diff --git a/fs/afs/super.c b/fs/afs/super.c index f02b31e7e648..df8c6047c2a1 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -395,7 +395,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, as->volume = vol; /* allocate a deviceless superblock */ - sb = sget(fs_type, afs_test_super, afs_set_super, as); + sb = sget(fs_type, afs_test_super, afs_set_super, flags, as); if (IS_ERR(sb)) { ret = PTR_ERR(sb); afs_put_volume(vol); @@ -406,7 +406,6 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, if (!sb->s_root) { /* initial superblock/root creation */ _debug("create"); - sb->s_flags = flags; ret = afs_fill_super(sb, ¶ms); if (ret < 0) { deactivate_locked_super(sb); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e23991574fdf..b19d75567728 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1068,7 +1068,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } bdev = fs_devices->latest_bdev; - s = sget(fs_type, btrfs_test_super, btrfs_set_super, fs_info); + s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC, + fs_info); if (IS_ERR(s)) { error = PTR_ERR(s); goto error_close_devices; @@ -1082,7 +1083,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } else { char b[BDEVNAME_SIZE]; - s->s_flags = flags | MS_NOSEC; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); btrfs_sb(s)->bdev_holder = fs_type; error = btrfs_fill_super(s, fs_devices, data, diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 1e67dd7305a4..7076109f014d 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -871,7 +871,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, if (ceph_test_opt(fsc->client, NOSHARE)) compare_super = NULL; - sb = sget(fs_type, compare_super, ceph_set_super, fsc); + sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc); if (IS_ERR(sb)) { res = ERR_CAST(sb); goto out; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c0c2751a7573..a7610cfedf0a 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -637,7 +637,10 @@ cifs_do_mount(struct file_system_type *fs_type, mnt_data.cifs_sb = cifs_sb; mnt_data.flags = flags; - sb = sget(fs_type, cifs_match_super, cifs_set_super, &mnt_data); + /* BB should we make this contingent on mount parm? */ + flags |= MS_NODIRATIME | MS_NOATIME; + + sb = sget(fs_type, cifs_match_super, cifs_set_super, flags, &mnt_data); if (IS_ERR(sb)) { root = ERR_CAST(sb); cifs_umount(cifs_sb); @@ -648,10 +651,6 @@ cifs_do_mount(struct file_system_type *fs_type, cFYI(1, "Use existing superblock"); cifs_umount(cifs_sb); } else { - sb->s_flags = flags; - /* BB should we make this contingent on mount parm? */ - sb->s_flags |= MS_NODIRATIME | MS_NOATIME; - rc = cifs_read_super(sb); if (rc) { root = ERR_PTR(rc); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 979c1e309c73..14afbabe6546 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -439,15 +439,15 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type, return ERR_PTR(error); if (opts.newinstance) - s = sget(fs_type, NULL, set_anon_super, NULL); + s = sget(fs_type, NULL, set_anon_super, flags, NULL); else - s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL); + s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags, + NULL); if (IS_ERR(s)) return ERR_CAST(s); if (!s->s_root) { - s->s_flags = flags; error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) goto out_undo_sget; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 68954937a071..7edeb3d893c1 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -499,13 +499,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out; } - s = sget(fs_type, NULL, set_anon_super, NULL); + s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) { rc = PTR_ERR(s); goto out; } - s->s_flags = flags; rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); if (rc) goto out1; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b8c250fc4922..6c906078f657 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1286,7 +1286,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, error = -EBUSY; goto error_bdev; } - s = sget(fs_type, test_gfs2_super, set_gfs2_super, bdev); + s = sget(fs_type, test_gfs2_super, set_gfs2_super, flags, bdev); mutex_unlock(&bdev->bd_fsfreeze_mutex); error = PTR_ERR(s); if (IS_ERR(s)) @@ -1316,7 +1316,6 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, } else { char b[BDEVNAME_SIZE]; - s->s_flags = flags; s->s_mode = mode; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); @@ -1360,7 +1359,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, dev_name, error); return ERR_PTR(error); } - s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, + s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags, path.dentry->d_inode->i_sb->s_bdev); path_put(&path); if (IS_ERR(s)) { diff --git a/fs/libfs.c b/fs/libfs.c index ebd03f6910d5..a74cb1725ac6 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -222,15 +222,15 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name, const struct super_operations *ops, const struct dentry_operations *dops, unsigned long magic) { - struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); + struct super_block *s; struct dentry *dentry; struct inode *root; struct qstr d_name = QSTR_INIT(name, strlen(name)); + s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL); if (IS_ERR(s)) return ERR_CAST(s); - s->s_flags = MS_NOUSER; s->s_maxbytes = MAX_LFS_FILESIZE; s->s_blocksize = PAGE_SIZE; s->s_blocksize_bits = PAGE_SHIFT; diff --git a/fs/logfs/super.c b/fs/logfs/super.c index 97bca623d893..345c24b8a6f8 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -519,7 +519,7 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super, log_super("LogFS: Start mount %x\n", mount_count++); err = -EINVAL; - sb = sget(type, logfs_sb_test, logfs_sb_set, super); + sb = sget(type, logfs_sb_test, logfs_sb_set, flags | MS_NOATIME, super); if (IS_ERR(sb)) { super->s_devops->put_device(super); kfree(super); @@ -542,7 +542,6 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super, sb->s_maxbytes = (1ull << 43) - 1; sb->s_max_links = LOGFS_LINK_MAX; sb->s_op = &logfs_super_operations; - sb->s_flags = flags | MS_NOATIME; err = logfs_read_sb(sb, sb->s_flags & MS_RDONLY); if (err) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 06228192f64e..8b2a2977b720 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2419,7 +2419,7 @@ static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, sb_mntdata.mntflags |= MS_SYNCHRONOUS; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); + s = sget(fs_type, compare_super, nfs_set_super, flags, &sb_mntdata); if (IS_ERR(s)) { mntroot = ERR_CAST(s); goto out_err_nosb; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1099a76cee59..d57c42f974ea 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1288,7 +1288,8 @@ nilfs_mount(struct file_system_type *fs_type, int flags, err = -EBUSY; goto failed; } - s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, sd.bdev); + s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags, + sd.bdev); mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) { err = PTR_ERR(s); @@ -1301,7 +1302,6 @@ nilfs_mount(struct file_system_type *fs_type, int flags, s_new = true; /* New superblock instance created */ - s->s_flags = flags; s->s_mode = mode; strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(sd.bdev)); diff --git a/fs/proc/root.c b/fs/proc/root.c index 568b20290c75..9a2d9fd7cadd 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -111,7 +111,7 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, options = data; } - sb = sget(fs_type, proc_test_super, proc_set_super, ns); + sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); if (IS_ERR(sb)) return ERR_CAST(sb); @@ -121,7 +121,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, } if (!sb->s_root) { - sb->s_flags = flags; err = proc_fill_super(sb); if (err) { deactivate_locked_super(sb); diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 2c1ade692cc8..e60e87035bb3 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -403,7 +403,7 @@ static void *r_start(struct seq_file *m, loff_t * pos) if (l) return NULL; - if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, s))) + if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, s))) return NULL; up_write(&s->s_umount); diff --git a/fs/super.c b/fs/super.c index cf001775617f..c743fb3be4b8 100644 --- a/fs/super.c +++ b/fs/super.c @@ -105,11 +105,12 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) /** * alloc_super - create new superblock * @type: filesystem type superblock should belong to + * @flags: the mount flags * * Allocates and initializes a new &struct super_block. alloc_super() * returns a pointer new superblock or %NULL if allocation had failed. */ -static struct super_block *alloc_super(struct file_system_type *type) +static struct super_block *alloc_super(struct file_system_type *type, int flags) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); static const struct super_operations default_op; @@ -136,6 +137,7 @@ static struct super_block *alloc_super(struct file_system_type *type) #else INIT_LIST_HEAD(&s->s_files); #endif + s->s_flags = flags; s->s_bdi = &default_backing_dev_info; INIT_HLIST_NODE(&s->s_instances); INIT_HLIST_BL_HEAD(&s->s_anon); @@ -415,11 +417,13 @@ EXPORT_SYMBOL(generic_shutdown_super); * @type: filesystem type superblock should belong to * @test: comparison callback * @set: setup callback + * @flags: mount flags * @data: argument to each of them */ struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), + int flags, void *data) { struct super_block *s = NULL; @@ -450,7 +454,7 @@ retry: } if (!s) { spin_unlock(&sb_lock); - s = alloc_super(type); + s = alloc_super(type, flags); if (!s) return ERR_PTR(-ENOMEM); goto retry; @@ -925,13 +929,12 @@ struct dentry *mount_ns(struct file_system_type *fs_type, int flags, { struct super_block *sb; - sb = sget(fs_type, ns_test_super, ns_set_super, data); + sb = sget(fs_type, ns_test_super, ns_set_super, flags, data); if (IS_ERR(sb)) return ERR_CAST(sb); if (!sb->s_root) { int err; - sb->s_flags = flags; err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) { deactivate_locked_super(sb); @@ -992,7 +995,8 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, error = -EBUSY; goto error_bdev; } - s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); + s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC, + bdev); mutex_unlock(&bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) goto error_s; @@ -1017,7 +1021,6 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, } else { char b[BDEVNAME_SIZE]; - s->s_flags = flags | MS_NOSEC; s->s_mode = mode; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); @@ -1062,13 +1065,11 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, int (*fill_super)(struct super_block *, void *, int)) { int error; - struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); + struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); - s->s_flags = flags; - error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); @@ -1091,11 +1092,10 @@ struct dentry *mount_single(struct file_system_type *fs_type, struct super_block *s; int error; - s = sget(fs_type, compare_single, set_anon_super, NULL); + s = sget(fs_type, compare_single, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); if (!s->s_root) { - s->s_flags = flags; error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index c15a7a3572e9..71eb7e253927 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -118,13 +118,12 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) info->ns[type] = kobj_ns_grab_current(type); - sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); + sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info); if (IS_ERR(sb) || sb->s_fs_info != info) free_sysfs_super_info(info); if (IS_ERR(sb)) return ERR_CAST(sb); if (!sb->s_root) { - sb->s_flags = flags; error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(sb); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 5862dd9d2784..1c766c39c038 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2136,7 +2136,7 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); - sb = sget(fs_type, sb_test, sb_set, c); + sb = sget(fs_type, sb_test, sb_set, flags, c); if (IS_ERR(sb)) { err = PTR_ERR(sb); kfree(c); @@ -2153,7 +2153,6 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, goto out_deact; } } else { - sb->s_flags = flags; err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) goto out_deact; diff --git a/include/linux/fs.h b/include/linux/fs.h index 2f857e9eeb3a..48548bdd7722 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1914,7 +1914,7 @@ void free_anon_bdev(dev_t); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), - void *data); + int flags, void *data); extern struct dentry *mount_pseudo(struct file_system_type *, char *, const struct super_operations *ops, const struct dentry_operations *dops, diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0cd1314acdaf..af2b5641fc8b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1587,7 +1587,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, opts.new_root = new_root; /* Locate an existing or new sb for this hierarchy */ - sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts); + sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts); if (IS_ERR(sb)) { ret = PTR_ERR(sb); cgroup_drop_root(opts.new_root); -- cgit v1.2.3 From 7266702805f9d824f92ce5c4069eca65d0f21d28 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 15 Jul 2012 14:10:52 +0400 Subject: signal: make sure we don't get stopped with pending task_work Signed-off-by: Al Viro --- kernel/signal.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 677102789cf2..be4f856d52f8 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1971,6 +1971,13 @@ static void ptrace_do_notify(int signr, int exit_code, int why) void ptrace_notify(int exit_code) { BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP); + if (unlikely(current->task_works)) { + if (test_and_clear_ti_thread_flag(current_thread_info(), + TIF_NOTIFY_RESUME)) { + smp_mb__after_clear_bit(); + task_work_run(); + } + } spin_lock_irq(¤t->sighand->siglock); ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED); @@ -2191,6 +2198,14 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct signal_struct *signal = current->signal; int signr; + if (unlikely(current->task_works)) { + if (test_and_clear_ti_thread_flag(current_thread_info(), + TIF_NOTIFY_RESUME)) { + smp_mb__after_clear_bit(); + task_work_run(); + } + } + if (unlikely(uprobe_deny_signal())) return 0; -- cgit v1.2.3 From 41f9d29f09ca0b22c3631e8a39676e74cda9bcc0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jun 2012 22:10:04 +0400 Subject: trimming task_work: kill ->data get rid of the only user of ->data; this is _not_ the final variant - in the end we'll have task_work and rcu_head identical and just use cred->rcu, at which point the separate allocation will be gone completely. Signed-off-by: Al Viro --- include/linux/task_work.h | 4 +--- kernel/irq/manage.c | 2 +- security/keys/internal.h | 4 ++++ security/keys/keyctl.c | 14 ++++++++------ security/keys/process_keys.c | 5 +++-- 5 files changed, 17 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 294d5d5e90b1..627421c0e108 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -10,14 +10,12 @@ typedef void (*task_work_func_t)(struct task_work *); struct task_work { struct hlist_node hlist; task_work_func_t func; - void *data; }; static inline void -init_task_work(struct task_work *twork, task_work_func_t func, void *data) +init_task_work(struct task_work *twork, task_work_func_t func) { twork->func = func; - twork->data = data; } int task_work_add(struct task_struct *task, struct task_work *twork, bool); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 8c548232ba39..d1dd54734ce7 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -830,7 +830,7 @@ static int irq_thread(void *data) sched_setscheduler(current, SCHED_FIFO, ¶m); - init_task_work(&on_exit_work, irq_thread_dtor, NULL); + init_task_work(&on_exit_work, irq_thread_dtor); task_work_add(current, &on_exit_work, false); while (!irq_wait_for_interrupt(action)) { diff --git a/security/keys/internal.h b/security/keys/internal.h index 3dcbf86b0d31..b510a316874a 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -148,6 +148,10 @@ extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, #define KEY_LOOKUP_PARTIAL 0x02 #define KEY_LOOKUP_FOR_UNLINK 0x04 +struct kludge { /* this will die off very soon */ + struct task_work twork; + struct cred *cred; +}; extern long join_session_keyring(const char *name); extern void key_change_session_keyring(struct task_work *twork); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 0f5b3f027299..26723caaad05 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1456,7 +1456,8 @@ long keyctl_session_to_parent(void) { struct task_struct *me, *parent; const struct cred *mycred, *pcred; - struct task_work *newwork, *oldwork; + struct kludge *newwork; + struct task_work *oldwork; key_ref_t keyring_r; struct cred *cred; int ret; @@ -1466,7 +1467,7 @@ long keyctl_session_to_parent(void) return PTR_ERR(keyring_r); ret = -ENOMEM; - newwork = kmalloc(sizeof(struct task_work), GFP_KERNEL); + newwork = kmalloc(sizeof(struct kludge), GFP_KERNEL); if (!newwork) goto error_keyring; @@ -1478,7 +1479,8 @@ long keyctl_session_to_parent(void) goto error_newwork; cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r); - init_task_work(newwork, key_change_session_keyring, cred); + init_task_work(&newwork->twork, key_change_session_keyring); + newwork->cred = cred; me = current; rcu_read_lock(); @@ -1527,18 +1529,18 @@ long keyctl_session_to_parent(void) /* the replacement session keyring is applied just prior to userspace * restarting */ - ret = task_work_add(parent, newwork, true); + ret = task_work_add(parent, &newwork->twork, true); if (!ret) newwork = NULL; unlock: write_unlock_irq(&tasklist_lock); rcu_read_unlock(); if (oldwork) { - put_cred(oldwork->data); + put_cred(container_of(oldwork, struct kludge, twork)->cred); kfree(oldwork); } if (newwork) { - put_cred(newwork->data); + put_cred(newwork->cred); kfree(newwork); } return ret; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 4ad54eea1ea4..c9b07c97d7f2 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -837,9 +837,10 @@ error: void key_change_session_keyring(struct task_work *twork) { const struct cred *old = current_cred(); - struct cred *new = twork->data; + struct kludge *p = container_of(twork, struct kludge, twork); + struct cred *new = p->cred; - kfree(twork); + kfree(p); if (unlikely(current->flags & PF_EXITING)) { put_cred(new); return; -- cgit v1.2.3 From 158e1645e07f3e9f7e4962d7a0997f5c3b98311b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jun 2012 09:24:13 +0400 Subject: trim task_work: get rid of hlist layout based on Oleg's suggestion; single-linked list, task->task_works points to the last element, forward pointer from said last element points to head. I'd still prefer much more regular scheme with two pointers in task_work, but... Signed-off-by: Al Viro --- include/linux/sched.h | 2 +- include/linux/task_work.h | 4 +-- include/linux/tracehook.h | 2 +- kernel/fork.c | 2 +- kernel/task_work.c | 64 ++++++++++++++++++++++++----------------------- 5 files changed, 38 insertions(+), 36 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4059c0f33f07..b9216ebc2789 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1405,7 +1405,7 @@ struct task_struct { int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; - struct hlist_head task_works; + void *task_works; struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 627421c0e108..3b3e2c8d037b 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -8,7 +8,7 @@ struct task_work; typedef void (*task_work_func_t)(struct task_work *); struct task_work { - struct hlist_node hlist; + struct task_work *next; task_work_func_t func; }; @@ -24,7 +24,7 @@ void task_work_run(void); static inline void exit_task_work(struct task_struct *task) { - if (unlikely(!hlist_empty(&task->task_works))) + if (unlikely(task->task_works)) task_work_run(); } diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 6a4d82bedb03..1e98b5530425 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -192,7 +192,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) * hlist_add_head(task->task_works); */ smp_mb__after_clear_bit(); - if (unlikely(!hlist_empty(¤t->task_works))) + if (unlikely(current->task_works)) task_work_run(); } diff --git a/kernel/fork.c b/kernel/fork.c index ab5211b9e622..bebabad59202 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1415,7 +1415,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); - INIT_HLIST_HEAD(&p->task_works); + p->task_works = NULL; /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible diff --git a/kernel/task_work.c b/kernel/task_work.c index 82d1c794066d..9b8948dbdc60 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -19,7 +19,12 @@ task_work_add(struct task_struct *task, struct task_work *twork, bool notify) */ raw_spin_lock_irqsave(&task->pi_lock, flags); if (likely(!(task->flags & PF_EXITING))) { - hlist_add_head(&twork->hlist, &task->task_works); + struct task_work *last = task->task_works; + struct task_work *first = last ? last->next : twork; + twork->next = first; + if (last) + last->next = twork; + task->task_works = twork; err = 0; } raw_spin_unlock_irqrestore(&task->pi_lock, flags); @@ -34,51 +39,48 @@ struct task_work * task_work_cancel(struct task_struct *task, task_work_func_t func) { unsigned long flags; - struct task_work *twork; - struct hlist_node *pos; + struct task_work *last, *res = NULL; raw_spin_lock_irqsave(&task->pi_lock, flags); - hlist_for_each_entry(twork, pos, &task->task_works, hlist) { - if (twork->func == func) { - hlist_del(&twork->hlist); - goto found; + last = task->task_works; + if (last) { + struct task_work *q = last, *p = q->next; + while (1) { + if (p->func == func) { + q->next = p->next; + if (p == last) + task->task_works = q == p ? NULL : q; + res = p; + break; + } + if (p == last) + break; + q = p; + p = q->next; } } - twork = NULL; - found: raw_spin_unlock_irqrestore(&task->pi_lock, flags); - - return twork; + return res; } void task_work_run(void) { struct task_struct *task = current; - struct hlist_head task_works; - struct hlist_node *pos; + struct task_work *p, *q; raw_spin_lock_irq(&task->pi_lock); - hlist_move_list(&task->task_works, &task_works); + p = task->task_works; + task->task_works = NULL; raw_spin_unlock_irq(&task->pi_lock); - if (unlikely(hlist_empty(&task_works))) + if (unlikely(!p)) return; - /* - * We use hlist to save the space in task_struct, but we want fifo. - * Find the last entry, the list should be short, then process them - * in reverse order. - */ - for (pos = task_works.first; pos->next; pos = pos->next) - ; - - for (;;) { - struct hlist_node **pprev = pos->pprev; - struct task_work *twork = container_of(pos, struct task_work, - hlist); - twork->func(twork); - if (pprev == &task_works.first) - break; - pos = container_of(pprev, struct hlist_node, next); + q = p->next; /* head */ + p->next = NULL; /* cut it */ + while (q) { + p = q->next; + q->func(q); + q = p; } } -- cgit v1.2.3 From 67d1214551e800f9fe7dc7c47a346d2df0fafed5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jun 2012 11:07:19 +0400 Subject: merge task_work and rcu_head, get rid of separate allocation for keyring case task_work and rcu_head are identical now; merge them (calling the result struct callback_head, rcu_head #define'd to it), kill separate allocation in security/keys since we can just use cred->rcu now. Signed-off-by: Al Viro --- include/linux/sched.h | 2 +- include/linux/task_work.h | 14 ++++---------- include/linux/types.h | 9 +++++---- kernel/irq/manage.c | 4 ++-- kernel/task_work.c | 14 +++++++------- security/keys/internal.h | 6 +----- security/keys/keyctl.c | 28 +++++++++------------------- security/keys/process_keys.c | 6 ++---- 8 files changed, 31 insertions(+), 52 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched.h b/include/linux/sched.h index b9216ebc2789..af3555cc760f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1405,7 +1405,7 @@ struct task_struct { int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; - void *task_works; + struct callback_head *task_works; struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 3b3e2c8d037b..fb46b03b1852 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -4,22 +4,16 @@ #include #include -struct task_work; -typedef void (*task_work_func_t)(struct task_work *); - -struct task_work { - struct task_work *next; - task_work_func_t func; -}; +typedef void (*task_work_func_t)(struct callback_head *); static inline void -init_task_work(struct task_work *twork, task_work_func_t func) +init_task_work(struct callback_head *twork, task_work_func_t func) { twork->func = func; } -int task_work_add(struct task_struct *task, struct task_work *twork, bool); -struct task_work *task_work_cancel(struct task_struct *, task_work_func_t); +int task_work_add(struct task_struct *task, struct callback_head *twork, bool); +struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); void task_work_run(void); static inline void exit_task_work(struct task_struct *task) diff --git a/include/linux/types.h b/include/linux/types.h index 9c1bd539ea70..bf0dd7524b2a 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -246,14 +246,15 @@ struct ustat { }; /** - * struct rcu_head - callback structure for use with RCU + * struct callback_head - callback structure for use with RCU and task_work * @next: next update requests in a list * @func: actual update function to call after the grace period. */ -struct rcu_head { - struct rcu_head *next; - void (*func)(struct rcu_head *head); +struct callback_head { + struct callback_head *next; + void (*func)(struct callback_head *head); }; +#define rcu_head callback_head #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index d1dd54734ce7..814c9ef6bba1 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -781,7 +781,7 @@ static void wake_threads_waitq(struct irq_desc *desc) wake_up(&desc->wait_for_threads); } -static void irq_thread_dtor(struct task_work *unused) +static void irq_thread_dtor(struct callback_head *unused) { struct task_struct *tsk = current; struct irq_desc *desc; @@ -813,7 +813,7 @@ static void irq_thread_dtor(struct task_work *unused) */ static int irq_thread(void *data) { - struct task_work on_exit_work; + struct callback_head on_exit_work; static const struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; diff --git a/kernel/task_work.c b/kernel/task_work.c index 9b8948dbdc60..76266fb665dc 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -3,7 +3,7 @@ #include int -task_work_add(struct task_struct *task, struct task_work *twork, bool notify) +task_work_add(struct task_struct *task, struct callback_head *twork, bool notify) { unsigned long flags; int err = -ESRCH; @@ -19,8 +19,8 @@ task_work_add(struct task_struct *task, struct task_work *twork, bool notify) */ raw_spin_lock_irqsave(&task->pi_lock, flags); if (likely(!(task->flags & PF_EXITING))) { - struct task_work *last = task->task_works; - struct task_work *first = last ? last->next : twork; + struct callback_head *last = task->task_works; + struct callback_head *first = last ? last->next : twork; twork->next = first; if (last) last->next = twork; @@ -35,16 +35,16 @@ task_work_add(struct task_struct *task, struct task_work *twork, bool notify) return err; } -struct task_work * +struct callback_head * task_work_cancel(struct task_struct *task, task_work_func_t func) { unsigned long flags; - struct task_work *last, *res = NULL; + struct callback_head *last, *res = NULL; raw_spin_lock_irqsave(&task->pi_lock, flags); last = task->task_works; if (last) { - struct task_work *q = last, *p = q->next; + struct callback_head *q = last, *p = q->next; while (1) { if (p->func == func) { q->next = p->next; @@ -66,7 +66,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) void task_work_run(void) { struct task_struct *task = current; - struct task_work *p, *q; + struct callback_head *p, *q; raw_spin_lock_irq(&task->pi_lock); p = task->task_works; diff --git a/security/keys/internal.h b/security/keys/internal.h index b510a316874a..c246ba5d43ab 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -148,12 +148,8 @@ extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, #define KEY_LOOKUP_PARTIAL 0x02 #define KEY_LOOKUP_FOR_UNLINK 0x04 -struct kludge { /* this will die off very soon */ - struct task_work twork; - struct cred *cred; -}; extern long join_session_keyring(const char *name); -extern void key_change_session_keyring(struct task_work *twork); +extern void key_change_session_keyring(struct callback_head *twork); extern struct work_struct key_gc_work; extern unsigned key_gc_delay; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 26723caaad05..0291b3f9397c 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1456,8 +1456,7 @@ long keyctl_session_to_parent(void) { struct task_struct *me, *parent; const struct cred *mycred, *pcred; - struct kludge *newwork; - struct task_work *oldwork; + struct callback_head *newwork, *oldwork; key_ref_t keyring_r; struct cred *cred; int ret; @@ -1467,20 +1466,17 @@ long keyctl_session_to_parent(void) return PTR_ERR(keyring_r); ret = -ENOMEM; - newwork = kmalloc(sizeof(struct kludge), GFP_KERNEL); - if (!newwork) - goto error_keyring; /* our parent is going to need a new cred struct, a new tgcred struct * and new security data, so we allocate them here to prevent ENOMEM in * our parent */ cred = cred_alloc_blank(); if (!cred) - goto error_newwork; + goto error_keyring; + newwork = &cred->rcu; cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r); - init_task_work(&newwork->twork, key_change_session_keyring); - newwork->cred = cred; + init_task_work(newwork, key_change_session_keyring); me = current; rcu_read_lock(); @@ -1529,24 +1525,18 @@ long keyctl_session_to_parent(void) /* the replacement session keyring is applied just prior to userspace * restarting */ - ret = task_work_add(parent, &newwork->twork, true); + ret = task_work_add(parent, newwork, true); if (!ret) newwork = NULL; unlock: write_unlock_irq(&tasklist_lock); rcu_read_unlock(); - if (oldwork) { - put_cred(container_of(oldwork, struct kludge, twork)->cred); - kfree(oldwork); - } - if (newwork) { - put_cred(newwork->cred); - kfree(newwork); - } + if (oldwork) + put_cred(container_of(oldwork, struct cred, rcu)); + if (newwork) + put_cred(cred); return ret; -error_newwork: - kfree(newwork); error_keyring: key_ref_put(keyring_r); return ret; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index c9b07c97d7f2..54339cfd6734 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -834,13 +834,11 @@ error: * Replace a process's session keyring on behalf of one of its children when * the target process is about to resume userspace execution. */ -void key_change_session_keyring(struct task_work *twork) +void key_change_session_keyring(struct callback_head *twork) { const struct cred *old = current_cred(); - struct kludge *p = container_of(twork, struct kludge, twork); - struct cred *new = p->cred; + struct cred *new = container_of(twork, struct cred, rcu); - kfree(p); if (unlikely(current->flags & PF_EXITING)) { put_cred(new); return; -- cgit v1.2.3 From ed3e694d78cc75fa79bf29698631b146fd27aa35 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jun 2012 11:31:24 +0400 Subject: move exit_task_work() past exit_files() et.al. ... and get rid of PF_EXITING check in task_work_add(). Signed-off-by: Al Viro --- kernel/exit.c | 6 ++---- kernel/task_work.c | 30 +++++++++++------------------- 2 files changed, 13 insertions(+), 23 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 2f59cc334516..d17f6c4ddfa9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -953,14 +953,11 @@ void do_exit(long code) exit_signals(tsk); /* sets PF_EXITING */ /* * tsk->flags are checked in the futex code to protect against - * an exiting task cleaning up the robust pi futexes, and in - * task_work_add() to avoid the race with exit_task_work(). + * an exiting task cleaning up the robust pi futexes. */ smp_mb(); raw_spin_unlock_wait(&tsk->pi_lock); - exit_task_work(tsk); - if (unlikely(in_atomic())) printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", current->comm, task_pid_nr(current), @@ -995,6 +992,7 @@ void do_exit(long code) exit_shm(tsk); exit_files(tsk); exit_fs(tsk); + exit_task_work(tsk); check_stack_usage(); exit_thread(); diff --git a/kernel/task_work.c b/kernel/task_work.c index 76266fb665dc..fb396089f66a 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -5,34 +5,26 @@ int task_work_add(struct task_struct *task, struct callback_head *twork, bool notify) { + struct callback_head *last, *first; unsigned long flags; - int err = -ESRCH; -#ifndef TIF_NOTIFY_RESUME - if (notify) - return -ENOTSUPP; -#endif /* - * We must not insert the new work if the task has already passed - * exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait() - * and check PF_EXITING under pi_lock. + * Not inserting the new work if the task has already passed + * exit_task_work() is the responisbility of callers. */ raw_spin_lock_irqsave(&task->pi_lock, flags); - if (likely(!(task->flags & PF_EXITING))) { - struct callback_head *last = task->task_works; - struct callback_head *first = last ? last->next : twork; - twork->next = first; - if (last) - last->next = twork; - task->task_works = twork; - err = 0; - } + last = task->task_works; + first = last ? last->next : twork; + twork->next = first; + if (last) + last->next = twork; + task->task_works = twork; raw_spin_unlock_irqrestore(&task->pi_lock, flags); /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */ - if (likely(!err) && notify) + if (notify) set_notify_resume(task); - return err; + return 0; } struct callback_head * -- cgit v1.2.3 From a2d4c71d1559426155e5da8db3265bfa0d8d398d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jun 2012 11:33:29 +0400 Subject: deal with task_work callbacks adding more work It doesn't matter on normal return to userland path (we'll recheck the NOTIFY_RESUME flag anyway), but in case of exit_task_work() we'll need that as soon as we get callbacks capable of triggering more task_work_add(). Signed-off-by: Al Viro --- kernel/task_work.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/task_work.c b/kernel/task_work.c index fb396089f66a..91d4e1742a0c 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -60,19 +60,21 @@ void task_work_run(void) struct task_struct *task = current; struct callback_head *p, *q; - raw_spin_lock_irq(&task->pi_lock); - p = task->task_works; - task->task_works = NULL; - raw_spin_unlock_irq(&task->pi_lock); + while (1) { + raw_spin_lock_irq(&task->pi_lock); + p = task->task_works; + task->task_works = NULL; + raw_spin_unlock_irq(&task->pi_lock); - if (unlikely(!p)) - return; + if (unlikely(!p)) + return; - q = p->next; /* head */ - p->next = NULL; /* cut it */ - while (q) { - p = q->next; - q->func(q); - q = p; + q = p->next; /* head */ + p->next = NULL; /* cut it */ + while (q) { + p = q->next; + q->func(q); + q = p; + } } } -- cgit v1.2.3