From 57eb6e3dbdd3389a4325b8d5a42203591f64153d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 27 May 2016 14:50:05 -0500 Subject: mnt: fs_fully_visible test the proper mount for MNT_LOCKED commit d71ed6c930ac7d8f88f3cef6624a7e826392d61f upstream. MNT_LOCKED implies on a child mount implies the child is locked to the parent. So while looping through the children the children should be tested (not their parent). Typically an unshare of a mount namespace locks all mounts together making both the parent and the slave as locked but there are a few corner cases where other things work. Fixes: ceeb0e5d39fc ("vfs: Ignore unlocked mounts in fs_fully_visible") Reported-by: Seth Forshee Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 0570729c87fd..efb396f41fcd 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3262,7 +3262,7 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; /* Only worry about locked mounts */ - if (!(mnt_flags & MNT_LOCKED)) + if (!(child->mnt.mnt_flags & MNT_LOCKED)) continue; /* Is the directory permanetly empty? */ if (!is_empty_dir_inode(inode)) -- cgit v1.2.3 From 6256d2f541fc893083ebebb1369ff41803f6b129 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Jun 2016 12:21:40 -0500 Subject: mnt: Account for MS_RDONLY in fs_fully_visible commit 695e9df010e40f407f4830dc11d53dce957710ba upstream. In rare cases it is possible for s_flags & MS_RDONLY to be set but MNT_READONLY to be clear. This starting combination can cause fs_fully_visible to fail to ensure that the new mount is readonly. Therefore force MNT_LOCK_READONLY in the new mount if MS_RDONLY is set on the source filesystem of the mount. In general both MS_RDONLY and MNT_READONLY are set at the same for mounts so I don't expect any programs to care. Nor do I expect MS_RDONLY to be set on proc or sysfs in the initial user namespace, which further decreases the likelyhood of problems. Which means this change should only affect system configurations by paranoid sysadmins who should welcome the additional protection as it keeps people from wriggling out of their policies. Fixes: 8c6cf9cc829f ("mnt: Modify fs_fully_visible to deal with locked ro nodev and atime") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index efb396f41fcd..3bab9dae798a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3236,6 +3236,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC) mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC); + /* Don't miss readonly hidden in the superblock flags */ + if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY) + mnt_flags |= MNT_LOCK_READONLY; + /* Verify the mount flags are equal to or more permissive * than the proposed new mount. */ -- cgit v1.2.3 From a400a79321cdaf6cebf74666f975d6c8a3ee995f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Jun 2016 15:36:07 -0500 Subject: mnt: If fs_fully_visible fails call put_filesystem. commit 97c1df3e54e811aed484a036a798b4b25d002ecf upstream. Add this trivial missing error handling. Fixes: 1b852bceb0d1 ("mnt: Refactor the logic for mounting sysfs and proc in a user namespace") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 3bab9dae798a..33064fcbfff9 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2401,8 +2401,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } if (type->fs_flags & FS_USERNS_VISIBLE) { - if (!fs_fully_visible(type, &mnt_flags)) + if (!fs_fully_visible(type, &mnt_flags)) { + put_filesystem(type); return -EPERM; + } } } -- cgit v1.2.3 From 58e9e70ab997977776ab7b2d6b6ca3d8802d52b4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 16 May 2016 17:03:42 -0400 Subject: nfsd4/rpc: move backchannel create logic into rpc code commit d50039ea5ee63c589b0434baa5ecf6e5075bb6f9 upstream. Also simplify the logic a bit. Signed-off-by: J. Bruce Fields Acked-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4callback.c | 18 +----------------- include/linux/sunrpc/clnt.h | 2 -- net/sunrpc/clnt.c | 12 ++++++++++-- 3 files changed, 11 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index e7f50c4081d6..15bdc2d48cfe 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -710,22 +710,6 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc } } -static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args) -{ - struct rpc_xprt *xprt; - - if (args->protocol != XPRT_TRANSPORT_BC_TCP) - return rpc_create(args); - - xprt = args->bc_xprt->xpt_bc_xprt; - if (xprt) { - xprt_get(xprt); - return rpc_create_xprt(args, xprt); - } - - return rpc_create(args); -} - static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { int maxtime = max_cb_time(clp->net); @@ -768,7 +752,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ - client = create_backchannel_client(&args); + client = rpc_create(&args); if (IS_ERR(client)) { dprintk("NFSD: couldn't create callback client: %ld\n", PTR_ERR(client)); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 131032f15cc1..9b6027c51736 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -135,8 +135,6 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) struct rpc_clnt *rpc_create(struct rpc_create_args *args); -struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, - struct rpc_xprt *xprt); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, const struct rpc_program *, u32); void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 23608eb0ded2..7a93922457ff 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -442,7 +442,7 @@ out_no_rpciod: return ERR_PTR(err); } -struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, +static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, struct rpc_xprt *xprt) { struct rpc_clnt *clnt = NULL; @@ -474,7 +474,6 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, return clnt; } -EXPORT_SYMBOL_GPL(rpc_create_xprt); /** * rpc_create - create an RPC client and transport with one call @@ -500,6 +499,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) }; char servername[48]; + if (args->bc_xprt) { + WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP); + xprt = args->bc_xprt->xpt_bc_xprt; + if (xprt) { + xprt_get(xprt); + return rpc_create_xprt(args, xprt); + } + } + if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS) xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS; if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT) -- cgit v1.2.3 From 087f8fe0074874a6072bdabbdf6dad03e0fbba2a Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Tue, 14 Jun 2016 23:28:04 -0400 Subject: nfsd: Always lock state exclusively. commit feb9dad5209280085d5b0c094fa67e7a8d75c81a upstream. It used to be the case that state had an rwlock that was locked for write by downgrades, but for read for upgrades (opens). Well, the problem is if there are two competing opens for the same state, they step on each other toes potentially leading to leaking file descriptors from the state structure, since access mode is a bitmap only set once. Signed-off-by: Oleg Drokin Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 40 ++++++++++++++++++++-------------------- fs/nfsd/state.h | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6b800b5b8fed..025803c3b997 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3467,7 +3467,7 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; - init_rwsem(&stp->st_rwsem); + mutex_init(&stp->st_mutex); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); @@ -4300,10 +4300,10 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ - down_read(&stp->st_rwsem); + mutex_lock(&stp->st_mutex); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); goto out; } } else { @@ -4313,19 +4313,19 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (swapstp) { nfs4_put_stid(&stp->st_stid); stp = swapstp; - down_read(&stp->st_rwsem); + mutex_lock(&stp->st_mutex); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); goto out; } goto upgrade_out; } - down_read(&stp->st_rwsem); + mutex_lock(&stp->st_mutex); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); release_open_stateid(stp); goto out; } @@ -4337,7 +4337,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } upgrade_out: nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); - up_read(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4950,12 +4950,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; - down_write(&stp->st_rwsem); + mutex_lock(&stp->st_mutex); status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status == nfs_ok) status = nfs4_check_fh(current_fh, &stp->st_stid); if (status != nfs_ok) - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); return status; } @@ -5003,7 +5003,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } @@ -5035,12 +5035,12 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; if (oo->oo_flags & NFS4_OO_CONFIRMED) { - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); goto put_stateid; } oo->oo_flags |= NFS4_OO_CONFIRMED; nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); @@ -5116,7 +5116,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid); status = nfs_ok; put_stateid: - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); @@ -5169,7 +5169,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfsd4_close_open_stateid(stp); @@ -5395,7 +5395,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; - init_rwsem(&stp->st_rwsem); + mutex_init(&stp->st_mutex); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -5564,7 +5564,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &open_stp, nn); if (status) goto out; - up_write(&open_stp->st_rwsem); + mutex_unlock(&open_stp->st_mutex); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, @@ -5573,7 +5573,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); if (status == nfs_ok) - down_write(&lock_stp->st_rwsem); + mutex_lock(&lock_stp->st_mutex); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, @@ -5677,7 +5677,7 @@ out: seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; - up_write(&lock_stp->st_rwsem); + mutex_unlock(&lock_stp->st_mutex); /* * If this is a new, never-before-used stateid, and we are @@ -5847,7 +5847,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fput: fput(filp); put_stateid: - up_write(&stp->st_rwsem); + mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 77fdf4de91ba..77860b75da9d 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -535,7 +535,7 @@ struct nfs4_ol_stateid { unsigned char st_access_bmap; unsigned char st_deny_bmap; struct nfs4_ol_stateid *st_openstp; - struct rw_semaphore st_rwsem; + struct mutex st_mutex; }; static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) -- cgit v1.2.3 From f78ffdc2bb3c88792636f14d732cc1fce4073685 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Tue, 14 Jun 2016 23:28:05 -0400 Subject: nfsd: Extend the mutex holding region around in nfsd4_process_open2() commit 5cc1fb2a093e254b656c64ff24b0b76bed1d34d9 upstream. To avoid racing entry into nfs4_get_vfs_file(). Make init_open_stateid() return with locked stateid to be unlocked by the caller. Signed-off-by: Oleg Drokin Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 025803c3b997..ed2f64ca49de 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3452,6 +3452,10 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfs4_openowner *oo = open->op_openowner; struct nfs4_ol_stateid *retstp = NULL; + /* We are moving these outside of the spinlocks to avoid the warnings */ + mutex_init(&stp->st_mutex); + mutex_lock(&stp->st_mutex); + spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); @@ -3467,13 +3471,17 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; - mutex_init(&stp->st_mutex); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); + if (retstp) { + mutex_lock(&retstp->st_mutex); + /* Not that we need to, just for neatness */ + mutex_unlock(&stp->st_mutex); + } return retstp; } @@ -4309,11 +4317,14 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } else { stp = open->op_stp; open->op_stp = NULL; + /* + * init_open_stateid() either returns a locked stateid + * it found, or initializes and locks the new one we passed in + */ swapstp = init_open_stateid(stp, fp, open); if (swapstp) { nfs4_put_stid(&stp->st_stid); stp = swapstp; - mutex_lock(&stp->st_mutex); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { @@ -4322,7 +4333,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } goto upgrade_out; } - mutex_lock(&stp->st_mutex); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { mutex_unlock(&stp->st_mutex); -- cgit v1.2.3 From c3fa141c1f288ac785c82ead9d06d1b5acd76d60 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 22 Jun 2016 23:57:25 +0200 Subject: posix_acl: Add set_posix_acl commit 485e71e8fb6356c08c7fc6bcce4bf02c9a9a663f upstream. Factor out part of posix_acl_xattr_set into a common function that takes a posix_acl, which nfsd can also call. The prototype already exists in include/linux/posix_acl.h. Signed-off-by: Andreas Gruenbacher Cc: Christoph Hellwig Cc: Al Viro Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/posix_acl.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 4adde1e2cbec..34bd1bd354e6 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -788,6 +788,28 @@ posix_acl_xattr_get(const struct xattr_handler *handler, return error; } +int +set_posix_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + if (!IS_POSIXACL(inode)) + return -EOPNOTSUPP; + if (!inode->i_op->set_acl) + return -EOPNOTSUPP; + + if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + if (!inode_owner_or_capable(inode)) + return -EPERM; + + if (acl) { + int ret = posix_acl_valid(acl); + if (ret) + return ret; + } + return inode->i_op->set_acl(inode, acl, type); +} +EXPORT_SYMBOL(set_posix_acl); + static int posix_acl_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, const char *name, @@ -799,30 +821,13 @@ posix_acl_xattr_set(const struct xattr_handler *handler, if (strcmp(name, "") != 0) return -EINVAL; - if (!IS_POSIXACL(inode)) - return -EOPNOTSUPP; - if (!inode->i_op->set_acl) - return -EOPNOTSUPP; - - if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) - return value ? -EACCES : 0; - if (!inode_owner_or_capable(inode)) - return -EPERM; if (value) { acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); - - if (acl) { - ret = posix_acl_valid(acl); - if (ret) - goto out; - } } - - ret = inode->i_op->set_acl(inode, acl, handler->flags); -out: + ret = set_posix_acl(inode, handler->flags, acl); posix_acl_release(acl); return ret; } -- cgit v1.2.3 From 412cfeec2f4851f266b20a12b3a81dcef94cb979 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 22 Jun 2016 19:43:35 +0100 Subject: nfsd: check permissions when setting ACLs commit 999653786df6954a31044528ac3f7a5dadca08f4 upstream. Use set_posix_acl, which includes proper permission checks, instead of calling ->set_acl directly. Without this anyone may be able to grant themselves permissions to a file by setting the ACL. Lock the inode to make the new checks atomic with respect to set_acl. (Also, nfsd was the only caller of set_acl not locking the inode, so I suspect this may fix other races.) This also simplifies the code, and ensures our ACLs are checked by posix_acl_valid. The permission checks and the inode locking were lost with commit 4ac7249e, which changed nfsd to use the set_acl inode operation directly instead of going through xattr handlers. Reported-by: David Sinquin [agreunba@redhat.com: use set_posix_acl] Fixes: 4ac7249e Cc: Christoph Hellwig Cc: Al Viro Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs2acl.c | 20 ++++++++++---------- fs/nfsd/nfs3acl.c | 16 +++++++--------- fs/nfsd/nfs4acl.c | 16 ++++++++-------- 3 files changed, 25 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 1580ea6fd64d..d08cd88155c7 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -104,22 +104,21 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp, goto out; inode = d_inode(fh->fh_dentry); - if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { - error = -EOPNOTSUPP; - goto out_errno; - } error = fh_want_write(fh); if (error) goto out_errno; - error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); + fh_lock(fh); + + error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access); if (error) - goto out_drop_write; - error = inode->i_op->set_acl(inode, argp->acl_default, - ACL_TYPE_DEFAULT); + goto out_drop_lock; + error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default); if (error) - goto out_drop_write; + goto out_drop_lock; + + fh_unlock(fh); fh_drop_write(fh); @@ -131,7 +130,8 @@ out: posix_acl_release(argp->acl_access); posix_acl_release(argp->acl_default); return nfserr; -out_drop_write: +out_drop_lock: + fh_unlock(fh); fh_drop_write(fh); out_errno: nfserr = nfserrno(error); diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 01df4cd7c753..0c890347cde3 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -95,22 +95,20 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp, goto out; inode = d_inode(fh->fh_dentry); - if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { - error = -EOPNOTSUPP; - goto out_errno; - } error = fh_want_write(fh); if (error) goto out_errno; - error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); + fh_lock(fh); + + error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access); if (error) - goto out_drop_write; - error = inode->i_op->set_acl(inode, argp->acl_default, - ACL_TYPE_DEFAULT); + goto out_drop_lock; + error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default); -out_drop_write: +out_drop_lock: + fh_unlock(fh); fh_drop_write(fh); out_errno: nfserr = nfserrno(error); diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 6adabd6049b7..71292a0d6f09 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -770,9 +770,6 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, dentry = fhp->fh_dentry; inode = d_inode(dentry); - if (!inode->i_op->set_acl || !IS_POSIXACL(inode)) - return nfserr_attrnotsupp; - if (S_ISDIR(inode->i_mode)) flags = NFS4_ACL_DIR; @@ -782,16 +779,19 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_error < 0) goto out_nfserr; - host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS); + fh_lock(fhp); + + host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl); if (host_error < 0) - goto out_release; + goto out_drop_lock; if (S_ISDIR(inode->i_mode)) { - host_error = inode->i_op->set_acl(inode, dpacl, - ACL_TYPE_DEFAULT); + host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl); } -out_release: +out_drop_lock: + fh_unlock(fhp); + posix_acl_release(pacl); posix_acl_release(dpacl); out_nfserr: -- cgit v1.2.3 From 44d86dbf9af1f5d328057eb1137265231da17540 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Jun 2016 13:14:36 -0400 Subject: make nfs_atomic_open() call d_drop() on all ->open_context() errors. commit d20cb71dbf3487f24549ede1a8e2d67579b4632e upstream. In "NFSv4: Move dentry instantiation into the NFSv4-specific atomic open code" unconditional d_drop() after the ->open_context() had been removed. It had been correct for success cases (there ->open_context() itself had been doing dcache manipulations), but not for error ones. Only one of those (ENOENT) got a compensatory d_drop() added in that commit, but in fact it should've been done for all errors. As it is, the case of O_CREAT non-exclusive open on a hashed negative dentry racing with e.g. symlink creation from another client ended up with ->open_context() getting an error and proceeding to call nfs_lookup(). On a hashed dentry, which would've instantly triggered BUG_ON() in d_materialise_unique() (or, these days, its equivalent in d_splice_alias()). Tested-by: Oleg Drokin Signed-off-by: Al Viro Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5fc2162afb67..46cfed63d229 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1531,9 +1531,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, err = PTR_ERR(inode); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); put_nfs_open_context(ctx); + d_drop(dentry); switch (err) { case -ENOENT: - d_drop(dentry); d_add(dentry, NULL); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); break; -- cgit v1.2.3 From b5d4a793312969e4dcb0156d4a29bb5f2f5c15f0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 25 Jun 2016 19:19:28 -0400 Subject: NFS: Fix another OPEN_DOWNGRADE bug commit e547f2628327fec6afd2e03b46f113f614cca05b upstream. Olga Kornievskaia reports that the following test fails to trigger an OPEN_DOWNGRADE on the wire, and only triggers the final CLOSE. fd0 = open(foo, RDRW) -- should be open on the wire for "both" fd1 = open(foo, RDONLY) -- should be open on the wire for "read" close(fd0) -- should trigger an open_downgrade read(fd1) close(fd1) The issue is that we're missing a check for whether or not the current state transitioned from an O_RDWR state as opposed to having transitioned from a combination of O_RDONLY and O_WRONLY. Reported-by: Olga Kornievskaia Fixes: cd9288ffaea4 ("NFSv4: Fix another bug in the close/open_downgrade code") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 98a44157353a..fc215ab4dcd5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2854,12 +2854,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) call_close |= is_wronly; else if (is_wronly) calldata->arg.fmode |= FMODE_WRITE; + if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE)) + call_close |= is_rdwr; } else if (is_rdwr) calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; - if (calldata->arg.fmode == 0) - call_close |= is_rdwr; - if (!nfs4_valid_open_stateid(state)) call_close = 0; spin_unlock(&state->owner->so_lock); -- cgit v1.2.3 From 1e1f4ff765cb0cd011f30ed11a4ef18903329013 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 16 Jun 2016 23:26:15 +0200 Subject: UBIFS: Implement ->migratepage() commit 4ac1c17b2044a1b4b2fbed74451947e905fc2992 upstream. During page migrations UBIFS might get confused and the following assert triggers: [ 213.480000] UBIFS assert failed in ubifs_set_page_dirty at 1451 (pid 436) [ 213.490000] CPU: 0 PID: 436 Comm: drm-stress-test Not tainted 4.4.4-00176-geaa802524636-dirty #1008 [ 213.490000] Hardware name: Allwinner sun4i/sun5i Families [ 213.490000] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 213.490000] [] (show_stack) from [] (dump_stack+0x8c/0xa0) [ 213.490000] [] (dump_stack) from [] (ubifs_set_page_dirty+0x44/0x50) [ 213.490000] [] (ubifs_set_page_dirty) from [] (try_to_unmap_one+0x10c/0x3a8) [ 213.490000] [] (try_to_unmap_one) from [] (rmap_walk+0xb4/0x290) [ 213.490000] [] (rmap_walk) from [] (try_to_unmap+0x64/0x80) [ 213.490000] [] (try_to_unmap) from [] (migrate_pages+0x328/0x7a0) [ 213.490000] [] (migrate_pages) from [] (alloc_contig_range+0x168/0x2f4) [ 213.490000] [] (alloc_contig_range) from [] (cma_alloc+0x170/0x2c0) [ 213.490000] [] (cma_alloc) from [] (__alloc_from_contiguous+0x38/0xd8) [ 213.490000] [] (__alloc_from_contiguous) from [] (__dma_alloc+0x23c/0x274) [ 213.490000] [] (__dma_alloc) from [] (arm_dma_alloc+0x54/0x5c) [ 213.490000] [] (arm_dma_alloc) from [] (drm_gem_cma_create+0xb8/0xf0) [ 213.490000] [] (drm_gem_cma_create) from [] (drm_gem_cma_create_with_handle+0x1c/0xe8) [ 213.490000] [] (drm_gem_cma_create_with_handle) from [] (drm_gem_cma_dumb_create+0x3c/0x48) [ 213.490000] [] (drm_gem_cma_dumb_create) from [] (drm_ioctl+0x12c/0x444) [ 213.490000] [] (drm_ioctl) from [] (do_vfs_ioctl+0x3f4/0x614) [ 213.490000] [] (do_vfs_ioctl) from [] (SyS_ioctl+0x34/0x5c) [ 213.490000] [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x34) UBIFS is using PagePrivate() which can have different meanings across filesystems. Therefore the generic page migration code cannot handle this case correctly. We have to implement our own migration function which basically does a plain copy but also duplicates the page private flag. UBIFS is not a block device filesystem and cannot use buffer_migrate_page(). Signed-off-by: Kirill A. Shutemov [rw: Massaged changelog, build fixes, etc...] Signed-off-by: Richard Weinberger Acked-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/file.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'fs') diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 0edc12856147..b895af7d8d80 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -52,6 +52,7 @@ #include "ubifs.h" #include #include +#include static int read_block(struct inode *inode, void *addr, unsigned int block, struct ubifs_data_node *dn) @@ -1452,6 +1453,26 @@ static int ubifs_set_page_dirty(struct page *page) return ret; } +#ifdef CONFIG_MIGRATION +static int ubifs_migrate_page(struct address_space *mapping, + struct page *newpage, struct page *page, enum migrate_mode mode) +{ + int rc; + + rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0); + if (rc != MIGRATEPAGE_SUCCESS) + return rc; + + if (PagePrivate(page)) { + ClearPagePrivate(page); + SetPagePrivate(newpage); + } + + migrate_page_copy(newpage, page); + return MIGRATEPAGE_SUCCESS; +} +#endif + static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) { /* @@ -1591,6 +1612,9 @@ const struct address_space_operations ubifs_file_address_operations = { .write_end = ubifs_write_end, .invalidatepage = ubifs_invalidatepage, .set_page_dirty = ubifs_set_page_dirty, +#ifdef CONFIG_MIGRATION + .migratepage = ubifs_migrate_page, +#endif .releasepage = ubifs_releasepage, }; -- cgit v1.2.3 From 13226e1ed87182883f877f6021c2b4a2f019c363 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 8 Jun 2016 00:36:38 -0400 Subject: btrfs: account for non-CoW'd blocks in btrfs_abort_transaction commit 64c12921e11b3a0c10d088606e328c58e29274d8 upstream. The test for !trans->blocks_used in btrfs_abort_transaction is insufficient to determine whether it's safe to drop the transaction handle on the floor. btrfs_cow_block, informed by should_cow_block, can return blocks that have already been CoW'd in the current transaction. trans->blocks_used is only incremented for new block allocations. If an operation overlaps the blocks in the current transaction entirely and must abort the transaction, we'll happily let it clean up the trans handle even though it may have modified the blocks and will commit an incomplete operation. In the long-term, I'd like to do closer tracking of when the fs is actually modified so we can still recover as gracefully as possible, but that approach will need some discussion. In the short term, since this is the only code using trans->blocks_used, let's just switch it to a bool indicating whether any blocks were used and set it when should_cow_block returns false. Signed-off-by: Jeff Mahoney Reviewed-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.c | 5 ++++- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.h | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5b8e235c4b6d..0f2b7c622ce3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1551,6 +1551,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid, root->fs_info->generation); if (!should_cow_block(trans, root, buf)) { + trans->dirty = true; *cow_ret = buf; return 0; } @@ -2773,8 +2774,10 @@ again: * then we don't want to set the path blocking, * so we test it here */ - if (!should_cow_block(trans, root, b)) + if (!should_cow_block(trans, root, b)) { + trans->dirty = true; goto cow_done; + } /* * must have write locks on this node and the diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2368cac1115a..47cdc6f3390b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7856,7 +7856,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); } - trans->blocks_used++; + trans->dirty = true; /* this returns a buffer locked for blocking */ return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index fe609b81dd1b..5d34a062ca4f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -239,7 +239,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, trans->aborted = errno; /* Nothing used. The other threads that have joined this * transaction may be able to continue. */ - if (!trans->blocks_used && list_empty(&trans->new_bgs)) { + if (!trans->dirty && list_empty(&trans->new_bgs)) { const char *errstr; errstr = btrfs_decode_error(errno); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 64c8221b6165..1e872923ec2c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -110,7 +110,6 @@ struct btrfs_trans_handle { u64 chunk_bytes_reserved; unsigned long use_count; unsigned long blocks_reserved; - unsigned long blocks_used; unsigned long delayed_ref_updates; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; @@ -121,6 +120,7 @@ struct btrfs_trans_handle { bool can_flush_pending_bgs; bool reloc_reserved; bool sync; + bool dirty; unsigned int type; /* * this root is only needed to validate that the root passed to -- cgit v1.2.3 From c12dada5f28a4894b81df2666c060f5cecc02cf9 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 1 Jul 2016 16:34:25 -0400 Subject: ovl: Copy up underlying inode's ->i_mode to overlay inode commit 07a2daab49c549a37b5b744cbebb6e3f445f12bc upstream. Right now when a new overlay inode is created, we initialize overlay inode's ->i_mode from underlying inode ->i_mode but we retain only file type bits (S_IFMT) and discard permission bits. This patch changes it and retains permission bits too. This should allow overlay to do permission checks on overlay inode itself in task context. [SzM] It also fixes clearing suid/sgid bits on write. Signed-off-by: Vivek Goyal Reported-by: Eryu Guan Signed-off-by: Miklos Szeredi Fixes: 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay") Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/inode.c | 3 +-- fs/overlayfs/overlayfs.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 05ac9a95e881..0597820f5d9d 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -412,12 +412,11 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, if (!inode) return NULL; - mode &= S_IFMT; - inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_flags |= S_NOATIME | S_NOCMTIME; + mode &= S_IFMT; switch (mode) { case S_IFDIR: inode->i_private = oe; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index e17154aeaae4..735e1d49b301 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -181,6 +181,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) { to->i_uid = from->i_uid; to->i_gid = from->i_gid; + to->i_mode = from->i_mode; } /* dir.c */ -- cgit v1.2.3 From cf2a2c6de0516f595825c9c46eb8ebc12a48dd31 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Thu, 21 Jul 2016 18:24:26 -0700 Subject: ovl: verify upper dentry in ovl_remove_and_whiteout() commit cfc9fde0b07c3b44b570057c5f93dda59dca1c94 upstream. The upper dentry may become stale before we call ovl_lock_rename_workdir. For example, someone could (mistakenly or maliciously) manually unlink(2) it directly from upperdir. To ensure it is not stale, let's lookup it after ovl_lock_rename_workdir and and check if it matches the upper dentry. Essentially, it is the same problem and similar solution as in commit 11f3710417d0 ("ovl: verify upper dentry before unlink and rename"). Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/dir.c | 54 ++++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index a2b1d7ce3e1a..977236a46aa2 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -511,6 +511,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) struct dentry *upper; struct dentry *opaquedir = NULL; int err; + int flags = 0; if (WARN_ON(!workdir)) return -EROFS; @@ -540,46 +541,39 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) if (err) goto out_dput; - whiteout = ovl_whiteout(workdir, dentry); - err = PTR_ERR(whiteout); - if (IS_ERR(whiteout)) + upper = lookup_one_len(dentry->d_name.name, upperdir, + dentry->d_name.len); + err = PTR_ERR(upper); + if (IS_ERR(upper)) goto out_unlock; - upper = ovl_dentry_upper(dentry); - if (!upper) { - upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); - err = PTR_ERR(upper); - if (IS_ERR(upper)) - goto kill_whiteout; - - err = ovl_do_rename(wdir, whiteout, udir, upper, 0); - dput(upper); - if (err) - goto kill_whiteout; - } else { - int flags = 0; + err = -ESTALE; + if ((opaquedir && upper != opaquedir) || + (!opaquedir && ovl_dentry_upper(dentry) && + upper != ovl_dentry_upper(dentry))) { + goto out_dput_upper; + } - if (opaquedir) - upper = opaquedir; - err = -ESTALE; - if (upper->d_parent != upperdir) - goto kill_whiteout; + whiteout = ovl_whiteout(workdir, dentry); + err = PTR_ERR(whiteout); + if (IS_ERR(whiteout)) + goto out_dput_upper; - if (is_dir) - flags |= RENAME_EXCHANGE; + if (d_is_dir(upper)) + flags = RENAME_EXCHANGE; - err = ovl_do_rename(wdir, whiteout, udir, upper, flags); - if (err) - goto kill_whiteout; + err = ovl_do_rename(wdir, whiteout, udir, upper, flags); + if (err) + goto kill_whiteout; + if (flags) + ovl_cleanup(wdir, upper); - if (is_dir) - ovl_cleanup(wdir, upper); - } ovl_dentry_version_inc(dentry->d_parent); out_d_drop: d_drop(dentry); dput(whiteout); +out_dput_upper: + dput(upper); out_unlock: unlock_rename(workdir, upperdir); out_dput: -- cgit v1.2.3 From 4ce7aa4e44d88ce64ea8ae2337b8910f3670b0ba Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 22 Jun 2016 20:12:05 -0500 Subject: Fix reconnect to not defer smb3 session reconnect long after socket reconnect commit 4fcd1813e6404dd4420c7d12fb483f9320f0bf93 upstream. Azure server blocks clients that open a socket and don't do anything on it. In our reconnect scenarios, we can reconnect the tcp session and detect the socket is available but we defer the negprot and SMB3 session setup and tree connect reconnection until the next i/o is requested, but this looks suspicous to some servers who expect SMB3 negprog and session setup soon after a socket is created. In the echo thread, reconnect SMB3 sessions and tree connections that are disconnected. A later patch will replay persistent (and resilient) handle opens. Signed-off-by: Steve French Acked-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 4 +++- fs/cifs/smb2pdu.c | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3c194ff0d2f0..5481a6eb9a95 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -425,7 +425,9 @@ cifs_echo_request(struct work_struct *work) * server->ops->need_neg() == true. Also, no need to ping if * we got a response recently. */ - if (!server->ops->need_neg || server->ops->need_neg(server) || + + if (server->tcpStatus == CifsNeedReconnect || + server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew || (server->ops->can_echo && !server->ops->can_echo(server)) || time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) goto requeue_echo; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 82c5f57382b2..169ed83d46fe 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1820,6 +1820,33 @@ SMB2_echo(struct TCP_Server_Info *server) cifs_dbg(FYI, "In echo request\n"); + if (server->tcpStatus == CifsNeedNegotiate) { + struct list_head *tmp, *tmp2; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + + cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n"); + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + list_for_each(tmp2, &ses->tcon_list) { + tcon = list_entry(tmp2, struct cifs_tcon, + tcon_list); + /* add check for persistent handle reconnect */ + if (tcon && tcon->need_reconnect) { + spin_unlock(&cifs_tcp_ses_lock); + rc = smb2_reconnect(SMB2_ECHO, tcon); + spin_lock(&cifs_tcp_ses_lock); + } + } + } + spin_unlock(&cifs_tcp_ses_lock); + } + + /* if no session, renegotiate failed above */ + if (server->tcpStatus == CifsNeedNegotiate) + return -EIO; + rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req); if (rc) return rc; -- cgit v1.2.3 From 1422b6b926f3634cddf449d65ce976f397726c63 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Thu, 26 May 2016 11:52:25 +0200 Subject: cifs: dynamic allocation of ntlmssp blob commit b8da344b74c822e966c6d19d6b2321efe82c5d97 upstream. In sess_auth_rawntlmssp_authenticate(), the ntlmssp blob is allocated statically and its size is an "empirical" 5*sizeof(struct _AUTHENTICATE_MESSAGE) (320B on x86_64). I don't know where this value comes from or if it was ever appropriate, but it is currently insufficient: the user and domain name in UTF16 could take 1kB by themselves. Because of that, build_ntlmssp_auth_blob() might corrupt memory (out-of-bounds write). The size of ntlmssp_blob in SMB2_sess_setup() is too small too (sizeof(struct _NEGOTIATE_MESSAGE) + 500). This patch allocates the blob dynamically in build_ntlmssp_auth_blob(). Signed-off-by: Jerome Marchand Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/ntlmssp.h | 2 +- fs/cifs/sess.c | 76 ++++++++++++++++++++++++++++++------------------------- fs/cifs/smb2pdu.c | 10 ++------ 3 files changed, 45 insertions(+), 43 deletions(-) (limited to 'fs') diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 848249fa120f..3079b38f0afb 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -133,6 +133,6 @@ typedef struct _AUTHENTICATE_MESSAGE { int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses); void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses); -int build_ntlmssp_auth_blob(unsigned char *pbuffer, u16 *buflen, +int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, const struct nls_table *nls_cp); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index af0ec2d5ad0e..e88ffe1da045 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -364,19 +364,43 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, sec_blob->DomainName.MaximumLength = 0; } -/* We do not malloc the blob, it is passed in pbuffer, because its - maximum possible size is fixed and small, making this approach cleaner. - This function returns the length of the data in the blob */ -int build_ntlmssp_auth_blob(unsigned char *pbuffer, +static int size_of_ntlmssp_blob(struct cifs_ses *ses) +{ + int sz = sizeof(AUTHENTICATE_MESSAGE) + ses->auth_key.len + - CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2; + + if (ses->domainName) + sz += 2 * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN); + else + sz += 2; + + if (ses->user_name) + sz += 2 * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN); + else + sz += 2; + + return sz; +} + +int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, const struct nls_table *nls_cp) { int rc; - AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; + AUTHENTICATE_MESSAGE *sec_blob; __u32 flags; unsigned char *tmp; + rc = setup_ntlmv2_rsp(ses, nls_cp); + if (rc) { + cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc); + *buflen = 0; + goto setup_ntlmv2_ret; + } + *pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL); + sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer; + memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); sec_blob->MessageType = NtLmAuthenticate; @@ -391,7 +415,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, flags |= NTLMSSP_NEGOTIATE_KEY_XCH; } - tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE); + tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE); sec_blob->NegotiateFlags = cpu_to_le32(flags); sec_blob->LmChallengeResponse.BufferOffset = @@ -399,13 +423,9 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, sec_blob->LmChallengeResponse.Length = 0; sec_blob->LmChallengeResponse.MaximumLength = 0; - sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->NtChallengeResponse.BufferOffset = + cpu_to_le32(tmp - *pbuffer); if (ses->user_name != NULL) { - rc = setup_ntlmv2_rsp(ses, nls_cp); - if (rc) { - cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc); - goto setup_ntlmv2_ret; - } memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE, ses->auth_key.len - CIFS_SESS_KEY_SIZE); tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE; @@ -423,7 +443,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, } if (ses->domainName == NULL) { - sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->DomainName.Length = 0; sec_blob->DomainName.MaximumLength = 0; tmp += 2; @@ -432,14 +452,14 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName, CIFS_MAX_USERNAME_LEN, nls_cp); len *= 2; /* unicode is 2 bytes each */ - sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->DomainName.Length = cpu_to_le16(len); sec_blob->DomainName.MaximumLength = cpu_to_le16(len); tmp += len; } if (ses->user_name == NULL) { - sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->UserName.Length = 0; sec_blob->UserName.MaximumLength = 0; tmp += 2; @@ -448,13 +468,13 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name, CIFS_MAX_USERNAME_LEN, nls_cp); len *= 2; /* unicode is 2 bytes each */ - sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->UserName.Length = cpu_to_le16(len); sec_blob->UserName.MaximumLength = cpu_to_le16(len); tmp += len; } - sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->WorkstationName.Length = 0; sec_blob->WorkstationName.MaximumLength = 0; tmp += 2; @@ -463,19 +483,19 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, (ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC)) && !calc_seckey(ses)) { memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); - sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE); sec_blob->SessionKey.MaximumLength = cpu_to_le16(CIFS_CPHTXT_SIZE); tmp += CIFS_CPHTXT_SIZE; } else { - sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->SessionKey.Length = 0; sec_blob->SessionKey.MaximumLength = 0; } + *buflen = tmp - *pbuffer; setup_ntlmv2_ret: - *buflen = tmp - pbuffer; return rc; } @@ -1266,7 +1286,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) struct cifs_ses *ses = sess_data->ses; __u16 bytes_remaining; char *bcc_ptr; - char *ntlmsspblob = NULL; + unsigned char *ntlmsspblob = NULL; u16 blob_len; cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n"); @@ -1279,19 +1299,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) /* Build security blob before we assemble the request */ pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; smb_buf = (struct smb_hdr *)pSMB; - /* - * 5 is an empirical value, large enough to hold - * authenticate message plus max 10 of av paris, - * domain, user, workstation names, flags, etc. - */ - ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE), - GFP_KERNEL); - if (!ntlmsspblob) { - rc = -ENOMEM; - goto out; - } - - rc = build_ntlmssp_auth_blob(ntlmsspblob, + rc = build_ntlmssp_auth_blob(&ntlmsspblob, &blob_len, ses, sess_data->nls_cp); if (rc) goto out_free_ntlmsspblob; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 169ed83d46fe..0b6dc1942bdc 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -591,7 +591,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, u16 blob_length = 0; struct key *spnego_key = NULL; char *security_blob = NULL; - char *ntlmssp_blob = NULL; + unsigned char *ntlmssp_blob = NULL; bool use_spnego = false; /* else use raw ntlmssp */ cifs_dbg(FYI, "Session Setup\n"); @@ -716,13 +716,7 @@ ssetup_ntlmssp_authenticate: iov[1].iov_len = blob_length; } else if (phase == NtLmAuthenticate) { req->hdr.SessionId = ses->Suid; - ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500, - GFP_KERNEL); - if (ntlmssp_blob == NULL) { - rc = -ENOMEM; - goto ssetup_exit; - } - rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses, + rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses, nls_cp); if (rc) { cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n", -- cgit v1.2.3 From 12f2f04e0376453bab82a2d6aa51cb94f4f65d33 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 22 Jun 2016 21:07:32 -0500 Subject: File names with trailing period or space need special case conversion commit 45e8a2583d97ca758a55c608f78c4cef562644d1 upstream. POSIX allows files with trailing spaces or a trailing period but SMB3 does not, so convert these using the normal Services For Mac mapping as we do for other reserved characters such as : < > | ? * This is similar to what Macs do for the same problem over SMB3. Signed-off-by: Steve French Acked-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_unicode.c | 33 +++++++++++++++++++++++++++++---- fs/cifs/cifs_unicode.h | 2 ++ 2 files changed, 31 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 5a53ac6b1e02..02b071bf3732 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -101,6 +101,12 @@ convert_sfm_char(const __u16 src_char, char *target) case SFM_SLASH: *target = '\\'; break; + case SFM_SPACE: + *target = ' '; + break; + case SFM_PERIOD: + *target = '.'; + break; default: return false; } @@ -404,7 +410,7 @@ static __le16 convert_to_sfu_char(char src_char) return dest_char; } -static __le16 convert_to_sfm_char(char src_char) +static __le16 convert_to_sfm_char(char src_char, bool end_of_string) { __le16 dest_char; @@ -427,6 +433,18 @@ static __le16 convert_to_sfm_char(char src_char) case '|': dest_char = cpu_to_le16(SFM_PIPE); break; + case '.': + if (end_of_string) + dest_char = cpu_to_le16(SFM_PERIOD); + else + dest_char = 0; + break; + case ' ': + if (end_of_string) + dest_char = cpu_to_le16(SFM_SPACE); + else + dest_char = 0; + break; default: dest_char = 0; } @@ -469,9 +487,16 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, /* see if we must remap this char */ if (map_chars == SFU_MAP_UNI_RSVD) dst_char = convert_to_sfu_char(src_char); - else if (map_chars == SFM_MAP_UNI_RSVD) - dst_char = convert_to_sfm_char(src_char); - else + else if (map_chars == SFM_MAP_UNI_RSVD) { + bool end_of_string; + + if (i == srclen - 1) + end_of_string = true; + else + end_of_string = false; + + dst_char = convert_to_sfm_char(src_char, end_of_string); + } else dst_char = 0; /* * FIXME: We can not handle remapping backslash (UNI_SLASH) diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index bdc52cb9a676..479bc0a941f3 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -64,6 +64,8 @@ #define SFM_LESSTHAN ((__u16) 0xF023) #define SFM_PIPE ((__u16) 0xF027) #define SFM_SLASH ((__u16) 0xF026) +#define SFM_PERIOD ((__u16) 0xF028) +#define SFM_SPACE ((__u16) 0xF029) /* * Mapping mechanism to use when one of the seven reserved characters is -- cgit v1.2.3 From dbf72a4d4531e7e5fb28e76d902d66f05c1bfe12 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Mar 2016 17:31:44 +0100 Subject: ovl: verify upper dentry before unlink and rename commit 11f3710417d026ea2f4fcf362d866342c5274185 upstream. Unlink and rename in overlayfs checked the upper dentry for staleness by verifying upper->d_parent against upperdir. However the dentry can go stale also by being unhashed, for example. Expand the verification to actually look up the name again (under parent lock) and check if it matches the upper dentry. This matches what the VFS does before passing the dentry to filesytem's unlink/rename methods, which excludes any inconsistency caused by overlayfs. Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/dir.c | 59 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 977236a46aa2..ba5ef733951f 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -590,21 +590,25 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) { struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct inode *dir = upperdir->d_inode; - struct dentry *upper = ovl_dentry_upper(dentry); + struct dentry *upper; int err; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + upper = lookup_one_len(dentry->d_name.name, upperdir, + dentry->d_name.len); + err = PTR_ERR(upper); + if (IS_ERR(upper)) + goto out_unlock; + err = -ESTALE; - if (upper->d_parent == upperdir) { - /* Don't let d_delete() think it can reset d_inode */ - dget(upper); + if (upper == ovl_dentry_upper(dentry)) { if (is_dir) err = vfs_rmdir(dir, upper); else err = vfs_unlink(dir, upper, NULL); - dput(upper); ovl_dentry_version_inc(dentry->d_parent); } + dput(upper); /* * Keeping this dentry hashed would mean having to release @@ -614,6 +618,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) */ if (!err) d_drop(dentry); +out_unlock: mutex_unlock(&dir->i_mutex); return err; @@ -834,29 +839,39 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, trap = lock_rename(new_upperdir, old_upperdir); - olddentry = ovl_dentry_upper(old); - newdentry = ovl_dentry_upper(new); - if (newdentry) { + + olddentry = lookup_one_len(old->d_name.name, old_upperdir, + old->d_name.len); + err = PTR_ERR(olddentry); + if (IS_ERR(olddentry)) + goto out_unlock; + + err = -ESTALE; + if (olddentry != ovl_dentry_upper(old)) + goto out_dput_old; + + newdentry = lookup_one_len(new->d_name.name, new_upperdir, + new->d_name.len); + err = PTR_ERR(newdentry); + if (IS_ERR(newdentry)) + goto out_dput_old; + + err = -ESTALE; + if (ovl_dentry_upper(new)) { if (opaquedir) { - newdentry = opaquedir; - opaquedir = NULL; + if (newdentry != opaquedir) + goto out_dput; } else { - dget(newdentry); + if (newdentry != ovl_dentry_upper(new)) + goto out_dput; } } else { new_create = true; - newdentry = lookup_one_len(new->d_name.name, new_upperdir, - new->d_name.len); - err = PTR_ERR(newdentry); - if (IS_ERR(newdentry)) - goto out_unlock; + if (!d_is_negative(newdentry) && + (!new_opaque || !ovl_is_whiteout(newdentry))) + goto out_dput; } - err = -ESTALE; - if (olddentry->d_parent != old_upperdir) - goto out_dput; - if (newdentry->d_parent != new_upperdir) - goto out_dput; if (olddentry == trap) goto out_dput; if (newdentry == trap) @@ -919,6 +934,8 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, out_dput: dput(newdentry); +out_dput_old: + dput(olddentry); out_unlock: unlock_rename(new_upperdir, old_upperdir); out_revert_creds: -- cgit v1.2.3