summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-13 16:46:18 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-13 16:46:18 -0700
commit16cefa8c3863721fd40445a1b34dea18cd16ccfe (patch)
treec8e58ca06e2edfd667d3e6062a642b80cc58e5e7
parent4fbef206daead133085fe33905f5e842d38fb8da (diff)
parentd8558f99fbc5ef5d4ae76b893784005056450f82 (diff)
Merge git://git.linux-nfs.org/pub/linux/nfs-2.6
* git://git.linux-nfs.org/pub/linux/nfs-2.6: (122 commits) sunrpc: drop BKL around wrap and unwrap NFSv4: Make sure unlock is really an unlock when cancelling a lock NLM: fix source address of callback to client SUNRPC client: add interface for binding to a local address SUNRPC server: record the destination address of a request SUNRPC: cleanup transport creation argument passing NFSv4: Make the NFS state model work with the nosharedcache mount option NFS: Error when mounting the same filesystem with different options NFS: Add the mount option "nosharecache" NFS: Add support for mounting NFSv4 file systems with string options NFS: Add final pieces to support in-kernel mount option parsing NFS: Introduce generic mount client API NFS: Add enums and match tables for mount option parsing NFS: Improve debugging output in NFS in-kernel mount client NFS: Clean up in-kernel NFS mount NFS: Remake nfsroot_mount as a permanent part of NFS client SUNRPC: Add a convenient default for the hostname when calling rpc_create() SUNRPC: Rename rpcb_getport to be consistent with new rpcb_getport_sync name SUNRPC: Rename rpcb_getport_external routine SUNRPC: Allow rpcbind requests to be interrupted by a signal. ...
-rw-r--r--fs/lockd/host.c39
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svc.c6
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/client.c28
-rw-r--r--fs/nfs/delegation.c186
-rw-r--r--fs/nfs/delegation.h26
-rw-r--r--fs/nfs/dir.c16
-rw-r--r--fs/nfs/direct.c34
-rw-r--r--fs/nfs/inode.c73
-rw-r--r--fs/nfs/internal.h4
-rw-r--r--fs/nfs/mount_clnt.c169
-rw-r--r--fs/nfs/nfs2xdr.c6
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs3xdr.c8
-rw-r--r--fs/nfs/nfs4_fs.h40
-rw-r--r--fs/nfs/nfs4proc.c760
-rw-r--r--fs/nfs/nfs4state.c310
-rw-r--r--fs/nfs/nfs4xdr.c126
-rw-r--r--fs/nfs/nfsroot.c5
-rw-r--r--fs/nfs/pagelist.c60
-rw-r--r--fs/nfs/read.c40
-rw-r--r--fs/nfs/super.c1189
-rw-r--r--fs/nfs/write.c149
-rw-r--r--fs/nfsd/nfs4callback.c18
-rw-r--r--fs/nfsd/nfs4state.c1
-rw-r--r--include/linux/lockd/lockd.h1
-rw-r--r--include/linux/nfs4.h1
-rw-r--r--include/linux/nfs4_mount.h3
-rw-r--r--include/linux/nfs_fs.h28
-rw-r--r--include/linux/nfs_fs_sb.h8
-rw-r--r--include/linux/nfs_mount.h3
-rw-r--r--include/linux/nfs_page.h25
-rw-r--r--include/linux/nfs_xdr.h5
-rw-r--r--include/linux/sunrpc/auth.h48
-rw-r--r--include/linux/sunrpc/auth_gss.h6
-rw-r--r--include/linux/sunrpc/clnt.h33
-rw-r--r--include/linux/sunrpc/gss_api.h2
-rw-r--r--include/linux/sunrpc/rpc_pipe_fs.h2
-rw-r--r--include/linux/sunrpc/sched.h6
-rw-r--r--include/linux/sunrpc/svcsock.h1
-rw-r--r--include/linux/sunrpc/xprt.h16
-rw-r--r--net/sunrpc/auth.c370
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c349
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c2
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c2
-rw-r--r--net/sunrpc/auth_null.c10
-rw-r--r--net/sunrpc/auth_unix.c54
-rw-r--r--net/sunrpc/clnt.c371
-rw-r--r--net/sunrpc/rpc_pipe.c80
-rw-r--r--net/sunrpc/rpcb_clnt.c65
-rw-r--r--net/sunrpc/sched.c209
-rw-r--r--net/sunrpc/sunrpc_syms.c8
-rw-r--r--net/sunrpc/svcsock.c20
-rw-r--r--net/sunrpc/xprt.c19
-rw-r--r--net/sunrpc/xprtsock.c81
56 files changed, 3321 insertions, 1810 deletions
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 96070bff93fc..572601e98dcd 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -44,9 +44,8 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
*/
static struct nlm_host *
nlm_lookup_host(int server, const struct sockaddr_in *sin,
- int proto, int version,
- const char *hostname,
- int hostname_len)
+ int proto, int version, const char *hostname,
+ int hostname_len, const struct sockaddr_in *ssin)
{
struct hlist_head *chain;
struct hlist_node *pos;
@@ -54,7 +53,9 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
struct nsm_handle *nsm = NULL;
int hash;
- dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
+ dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT
+ ", p=%d, v=%d, my role=%s, name=%.*s)\n",
+ NIPQUAD(ssin->sin_addr.s_addr),
NIPQUAD(sin->sin_addr.s_addr), proto, version,
server? "server" : "client",
hostname_len,
@@ -91,6 +92,8 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
continue;
if (host->h_server != server)
continue;
+ if (!nlm_cmp_addr(&host->h_saddr, ssin))
+ continue;
/* Move to head of hash chain. */
hlist_del(&host->h_hash);
@@ -118,6 +121,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
host->h_name = nsm->sm_name;
host->h_addr = *sin;
host->h_addr.sin_port = 0; /* ouch! */
+ host->h_saddr = *ssin;
host->h_version = version;
host->h_proto = proto;
host->h_rpcclnt = NULL;
@@ -161,15 +165,9 @@ nlm_destroy_host(struct nlm_host *host)
*/
nsm_unmonitor(host);
- if ((clnt = host->h_rpcclnt) != NULL) {
- if (atomic_read(&clnt->cl_users)) {
- printk(KERN_WARNING
- "lockd: active RPC handle\n");
- clnt->cl_dead = 1;
- } else {
- rpc_destroy_client(host->h_rpcclnt);
- }
- }
+ clnt = host->h_rpcclnt;
+ if (clnt != NULL)
+ rpc_shutdown_client(clnt);
kfree(host);
}
@@ -180,8 +178,10 @@ struct nlm_host *
nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
const char *hostname, int hostname_len)
{
+ struct sockaddr_in ssin = {0};
+
return nlm_lookup_host(0, sin, proto, version,
- hostname, hostname_len);
+ hostname, hostname_len, &ssin);
}
/*
@@ -191,9 +191,12 @@ struct nlm_host *
nlmsvc_lookup_host(struct svc_rqst *rqstp,
const char *hostname, int hostname_len)
{
+ struct sockaddr_in ssin = {0};
+
+ ssin.sin_addr = rqstp->rq_daddr.addr;
return nlm_lookup_host(1, svc_addr_in(rqstp),
rqstp->rq_prot, rqstp->rq_vers,
- hostname, hostname_len);
+ hostname, hostname_len, &ssin);
}
/*
@@ -204,8 +207,9 @@ nlm_bind_host(struct nlm_host *host)
{
struct rpc_clnt *clnt;
- dprintk("lockd: nlm_bind_host(%08x)\n",
- (unsigned)ntohl(host->h_addr.sin_addr.s_addr));
+ dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n",
+ NIPQUAD(host->h_saddr.sin_addr),
+ NIPQUAD(host->h_addr.sin_addr));
/* Lock host handle */
mutex_lock(&host->h_mutex);
@@ -232,6 +236,7 @@ nlm_bind_host(struct nlm_host *host)
.protocol = host->h_proto,
.address = (struct sockaddr *)&host->h_addr,
.addrsize = sizeof(host->h_addr),
+ .saddress = (struct sockaddr *)&host->h_saddr,
.timeout = &timeparms,
.servername = host->h_name,
.program = &nlm_program,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 2102e2d0134d..3353ed8421a7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -61,6 +61,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
status);
else
status = 0;
+ rpc_shutdown_client(clnt);
out:
return status;
}
@@ -138,7 +139,6 @@ nsm_create(void)
.program = &nsm_program,
.version = SM_VERSION,
.authflavor = RPC_AUTH_NULL,
- .flags = (RPC_CLNT_CREATE_ONESHOT),
};
return rpc_create(&args);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 126b1bf02c0e..26809325469c 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -123,9 +123,6 @@ lockd(struct svc_rqst *rqstp)
/* Process request with signals blocked, but allow SIGKILL. */
allow_signal(SIGKILL);
- /* kick rpciod */
- rpciod_up();
-
dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
if (!nlm_timeout)
@@ -202,9 +199,6 @@ lockd(struct svc_rqst *rqstp)
/* Exit the RPC thread */
svc_exit_thread(rqstp);
- /* release rpciod */
- rpciod_down();
-
/* Release module */
unlock_kernel();
module_put_and_exit(0);
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index f4580b44eef4..b55cb236cf74 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,8 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
pagelist.o proc.o read.o symlink.o unlink.o \
- write.o namespace.o
-nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
+ write.o namespace.o mount_clnt.o
+nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 881fa4900923..ccb455053ee4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -102,19 +102,10 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
int nfsversion)
{
struct nfs_client *clp;
- int error;
if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
goto error_0;
- error = rpciod_up();
- if (error < 0) {
- dprintk("%s: couldn't start rpciod! Error = %d\n",
- __FUNCTION__, error);
- goto error_1;
- }
- __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
-
if (nfsversion == 4) {
if (nfs_callback_up() < 0)
goto error_2;
@@ -139,8 +130,6 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
#ifdef CONFIG_NFS_V4
init_rwsem(&clp->cl_sem);
INIT_LIST_HEAD(&clp->cl_delegations);
- INIT_LIST_HEAD(&clp->cl_state_owners);
- INIT_LIST_HEAD(&clp->cl_unused);
spin_lock_init(&clp->cl_lock);
INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -154,9 +143,6 @@ error_3:
if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
nfs_callback_down();
error_2:
- rpciod_down();
- __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
-error_1:
kfree(clp);
error_0:
return NULL;
@@ -167,16 +153,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
#ifdef CONFIG_NFS_V4
if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
nfs4_kill_renewd(clp);
- while (!list_empty(&clp->cl_unused)) {
- struct nfs4_state_owner *sp;
-
- sp = list_entry(clp->cl_unused.next,
- struct nfs4_state_owner,
- so_list);
- list_del(&sp->so_list);
- kfree(sp);
- }
- BUG_ON(!list_empty(&clp->cl_state_owners));
+ BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
nfs_idmap_delete(clp);
#endif
@@ -198,9 +175,6 @@ static void nfs_free_client(struct nfs_client *clp)
if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
nfs_callback_down();
- if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
- rpciod_down();
-
kfree(clp->cl_hostname);
kfree(clp);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7f37d1bea83f..20ac403469a0 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -27,6 +27,13 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
kfree(delegation);
}
+static void nfs_free_delegation_callback(struct rcu_head *head)
+{
+ struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu);
+
+ nfs_free_delegation(delegation);
+}
+
static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
{
struct inode *inode = state->inode;
@@ -57,7 +64,7 @@ out_err:
return status;
}
-static void nfs_delegation_claim_opens(struct inode *inode)
+static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *ctx;
@@ -72,9 +79,11 @@ again:
continue;
if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
continue;
+ if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
+ continue;
get_nfs_open_context(ctx);
spin_unlock(&inode->i_lock);
- err = nfs4_open_delegation_recall(ctx->dentry, state);
+ err = nfs4_open_delegation_recall(ctx, state, stateid);
if (err >= 0)
err = nfs_delegation_claim_locks(ctx, state);
put_nfs_open_context(ctx);
@@ -115,10 +124,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
struct nfs_delegation *delegation;
int status = 0;
- /* Ensure we first revalidate the attributes and page cache! */
- if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)))
- __nfs_revalidate_inode(NFS_SERVER(inode), inode);
-
delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
if (delegation == NULL)
return -ENOMEM;
@@ -131,10 +136,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
delegation->inode = inode;
spin_lock(&clp->cl_lock);
- if (nfsi->delegation == NULL) {
- list_add(&delegation->super_list, &clp->cl_delegations);
- nfsi->delegation = delegation;
+ if (rcu_dereference(nfsi->delegation) == NULL) {
+ list_add_rcu(&delegation->super_list, &clp->cl_delegations);
nfsi->delegation_state = delegation->type;
+ rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
} else {
if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
@@ -145,6 +150,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
status = -EIO;
}
}
+
+ /* Ensure we revalidate the attributes and page cache! */
+ spin_lock(&inode->i_lock);
+ nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
+ spin_unlock(&inode->i_lock);
+
spin_unlock(&clp->cl_lock);
kfree(delegation);
return status;
@@ -155,7 +166,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
int res = 0;
res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
- nfs_free_delegation(delegation);
+ call_rcu(&delegation->rcu, nfs_free_delegation_callback);
return res;
}
@@ -170,33 +181,55 @@ static void nfs_msync_inode(struct inode *inode)
/*
* Basic procedure for returning a delegation to the server
*/
-int __nfs_inode_return_delegation(struct inode *inode)
+static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_delegation *delegation;
- int res = 0;
nfs_msync_inode(inode);
down_read(&clp->cl_sem);
/* Guard against new delegated open calls */
down_write(&nfsi->rwsem);
- spin_lock(&clp->cl_lock);
- delegation = nfsi->delegation;
- if (delegation != NULL) {
- list_del_init(&delegation->super_list);
- nfsi->delegation = NULL;
- nfsi->delegation_state = 0;
- }
- spin_unlock(&clp->cl_lock);
- nfs_delegation_claim_opens(inode);
+ nfs_delegation_claim_opens(inode, &delegation->stateid);
up_write(&nfsi->rwsem);
up_read(&clp->cl_sem);
nfs_msync_inode(inode);
- if (delegation != NULL)
- res = nfs_do_return_delegation(inode, delegation);
- return res;
+ return nfs_do_return_delegation(inode, delegation);
+}
+
+static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
+{
+ struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
+
+ if (delegation == NULL)
+ goto nomatch;
+ if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
+ sizeof(delegation->stateid.data)) != 0)
+ goto nomatch;
+ list_del_rcu(&delegation->super_list);
+ nfsi->delegation_state = 0;
+ rcu_assign_pointer(nfsi->delegation, NULL);
+ return delegation;
+nomatch:
+ return NULL;
+}
+
+int nfs_inode_return_delegation(struct inode *inode)
+{
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_delegation *delegation;
+ int err = 0;
+
+ if (rcu_dereference(nfsi->delegation) != NULL) {
+ spin_lock(&clp->cl_lock);
+ delegation = nfs_detach_delegation_locked(nfsi, NULL);
+ spin_unlock(&clp->cl_lock);
+ if (delegation != NULL)
+ err = __nfs_inode_return_delegation(inode, delegation);
+ }
+ return err;
}
/*
@@ -211,19 +244,23 @@ void nfs_return_all_delegations(struct super_block *sb)
if (clp == NULL)
return;
restart:
- spin_lock(&clp->cl_lock);
- list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
if (delegation->inode->i_sb != sb)
continue;
inode = igrab(delegation->inode);
if (inode == NULL)
continue;
+ spin_lock(&clp->cl_lock);
+ delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
spin_unlock(&clp->cl_lock);
- nfs_inode_return_delegation(inode);
+ rcu_read_unlock();
+ if (delegation != NULL)
+ __nfs_inode_return_delegation(inode, delegation);
iput(inode);
goto restart;
}
- spin_unlock(&clp->cl_lock);
+ rcu_read_unlock();
}
static int nfs_do_expire_all_delegations(void *ptr)
@@ -234,22 +271,26 @@ static int nfs_do_expire_all_delegations(void *ptr)
allow_signal(SIGKILL);
restart:
- spin_lock(&clp->cl_lock);
if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0)
goto out;
if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0)
goto out;
- list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
inode = igrab(delegation->inode);
if (inode == NULL)
continue;
+ spin_lock(&clp->cl_lock);
+ delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
spin_unlock(&clp->cl_lock);
- nfs_inode_return_delegation(inode);
+ rcu_read_unlock();
+ if (delegation)
+ __nfs_inode_return_delegation(inode, delegation);
iput(inode);
goto restart;
}
+ rcu_read_unlock();
out:
- spin_unlock(&clp->cl_lock);
nfs_put_client(clp);
module_put_and_exit(0);
}
@@ -280,17 +321,21 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
if (clp == NULL)
return;
restart:
- spin_lock(&clp->cl_lock);
- list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
inode = igrab(delegation->inode);
if (inode == NULL)
continue;
+ spin_lock(&clp->cl_lock);
+ delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
spin_unlock(&clp->cl_lock);
- nfs_inode_return_delegation(inode);
+ rcu_read_unlock();
+ if (delegation != NULL)
+ __nfs_inode_return_delegation(inode, delegation);
iput(inode);
goto restart;
}
- spin_unlock(&clp->cl_lock);
+ rcu_read_unlock();
}
struct recall_threadargs {
@@ -316,21 +361,14 @@ static int recall_thread(void *data)
down_read(&clp->cl_sem);
down_write(&nfsi->rwsem);
spin_lock(&clp->cl_lock);
- delegation = nfsi->delegation;
- if (delegation != NULL && memcmp(delegation->stateid.data,
- args->stateid->data,
- sizeof(delegation->stateid.data)) == 0) {
- list_del_init(&delegation->super_list);
- nfsi->delegation = NULL;
- nfsi->delegation_state = 0;
+ delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
+ if (delegation != NULL)
args->result = 0;
- } else {
- delegation = NULL;
+ else
args->result = -ENOENT;
- }
spin_unlock(&clp->cl_lock);
complete(&args->started);
- nfs_delegation_claim_opens(inode);
+ nfs_delegation_claim_opens(inode, args->stateid);
up_write(&nfsi->rwsem);
up_read(&clp->cl_sem);
nfs_msync_inode(inode);
@@ -371,14 +409,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
{
struct nfs_delegation *delegation;
struct inode *res = NULL;
- spin_lock(&clp->cl_lock);
- list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
res = igrab(delegation->inode);
break;
}
}
- spin_unlock(&clp->cl_lock);
+ rcu_read_unlock();
return res;
}
@@ -388,10 +426,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
void nfs_delegation_mark_reclaim(struct nfs_client *clp)
{
struct nfs_delegation *delegation;
- spin_lock(&clp->cl_lock);
- list_for_each_entry(delegation, &clp->cl_delegations, super_list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
- spin_unlock(&clp->cl_lock);
+ rcu_read_unlock();
}
/*
@@ -399,39 +437,35 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
*/
void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
{
- struct nfs_delegation *delegation, *n;
- LIST_HEAD(head);
- spin_lock(&clp->cl_lock);
- list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) {
+ struct nfs_delegation *delegation;
+restart:
+ rcu_read_lock();
+ list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
continue;
- list_move(&delegation->super_list, &head);
- NFS_I(delegation->inode)->delegation = NULL;
- NFS_I(delegation->inode)->delegation_state = 0;
- }
- spin_unlock(&clp->cl_lock);
- while(!list_empty(&head)) {
- delegation = list_entry(head.next, struct nfs_delegation, super_list);
- list_del(&delegation->super_list);
- nfs_free_delegation(delegation);
+ spin_lock(&clp->cl_lock);
+ delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL);
+ spin_unlock(&clp->cl_lock);
+ rcu_read_unlock();
+ if (delegation != NULL)
+ call_rcu(&delegation->rcu, nfs_free_delegation_callback);
+ goto restart;
}
+ rcu_read_unlock();
}
int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
{
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
- int res = 0;
+ int ret = 0;
- if (nfsi->delegation_state == 0)
- return 0;
- spin_lock(&clp->cl_lock);
- delegation = nfsi->delegation;
+ rcu_read_lock();
+ delegation = rcu_dereference(nfsi->delegation);
if (delegation != NULL) {
memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
- res = 1;
+ ret = 1;
}
- spin_unlock(&clp->cl_lock);
- return res;
+ rcu_read_unlock();
+ return ret;
}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2cfd4b24c7fe..5874ce7fdbae 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -22,11 +22,12 @@ struct nfs_delegation {
long flags;
loff_t maxsize;
__u64 change_attr;
+ struct rcu_head rcu;
};
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
-int __nfs_inode_return_delegation(struct inode *inode);
+int nfs_inode_return_delegation(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
@@ -39,27 +40,24 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */
int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
static inline int nfs_have_delegation(struct inode *inode, int flags)
{
+ struct nfs_delegation *delegation;
+ int ret = 0;
+
flags &= FMODE_READ|FMODE_WRITE;
- smp_rmb();
- if ((NFS_I(inode)->delegation_state & flags) == flags)
- return 1;
- return 0;
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation != NULL && (delegation->type & flags) == flags)
+ ret = 1;
+ rcu_read_unlock();
+ return ret;
}
-static inline int nfs_inode_return_delegation(struct inode *inode)
-{
- int err = 0;
-
- if (NFS_I(inode)->delegation != NULL)
- err = __nfs_inode_return_delegation(inode);
- return err;
-}
#else
static inline int nfs_have_delegation(struct inode *inode, int flags)
{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c27258b5d3e1..322141f4ab48 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -897,14 +897,13 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
return (nd->intent.open.flags & O_EXCL) != 0;
}
-static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
- struct nfs_fh *fh, struct nfs_fattr *fattr)
+static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
{
struct nfs_server *server = NFS_SERVER(dir);
if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
- /* Revalidate fsid on root dir */
- return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
+ /* Revalidate fsid using the parent directory */
+ return __nfs_revalidate_inode(server, dir);
return 0;
}
@@ -946,7 +945,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
res = ERR_PTR(error);
goto out_unlock;
}
- error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
+ error = nfs_reval_fsid(dir, &fattr);
if (error < 0) {
res = ERR_PTR(error);
goto out_unlock;
@@ -1244,7 +1243,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
- if (nd && (nd->flags & LOOKUP_CREATE))
+ if ((nd->flags & LOOKUP_CREATE) != 0)
open_flags = nd->intent.open.flags;
lock_kernel();
@@ -1535,7 +1534,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
lock_kernel();
- page = alloc_page(GFP_KERNEL);
+ page = alloc_page(GFP_HIGHUSER);
if (!page) {
unlock_kernel();
return -ENOMEM;
@@ -1744,8 +1743,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
struct nfs_inode *nfsi;
struct nfs_access_entry *cache;
- spin_lock(&nfs_access_lru_lock);
restart:
+ spin_lock(&nfs_access_lru_lock);
list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
struct inode *inode;
@@ -1770,6 +1769,7 @@ remove_lru_entry:
clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
}
spin_unlock(&inode->i_lock);
+ spin_unlock(&nfs_access_lru_lock);
iput(inode);
goto restart;
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 00eee87510fe..a5c82b6f3b45 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -266,7 +266,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
{
struct nfs_open_context *ctx = dreq->ctx;
- struct inode *inode = ctx->dentry->d_inode;
+ struct inode *inode = ctx->path.dentry->d_inode;
size_t rsize = NFS_SERVER(inode)->rsize;
unsigned int pgbase;
int result;
@@ -295,9 +295,14 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
break;
}
if ((unsigned)result < data->npages) {
- nfs_direct_release_pages(data->pagevec, result);
- nfs_readdata_release(data);
- break;
+ bytes = result * PAGE_SIZE;
+ if (bytes <= pgbase) {
+ nfs_direct_release_pages(data->pagevec, result);
+ nfs_readdata_release(data);
+ break;
+ }
+ bytes -= pgbase;
+ data->npages = result;
}
get_dreq(dreq);
@@ -601,7 +606,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
{
struct nfs_open_context *ctx = dreq->ctx;
- struct inode *inode = ctx->dentry->d_inode;
+ struct inode *inode = ctx->path.dentry->d_inode;
size_t wsize = NFS_SERVER(inode)->wsize;
unsigned int pgbase;
int result;
@@ -630,9 +635,14 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
break;
}
if ((unsigned)result < data->npages) {
- nfs_direct_release_pages(data->pagevec, result);
- nfs_writedata_release(data);
- break;
+ bytes = result * PAGE_SIZE;
+ if (bytes <= pgbase) {
+ nfs_direct_release_pages(data->pagevec, result);
+ nfs_writedata_release(data);
+ break;
+ }
+ bytes -= pgbase;
+ data->npages = result;
}
get_dreq(dreq);
@@ -763,10 +773,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
(unsigned long) count, (long long) pos);
if (nr_segs != 1)
- return -EINVAL;
-
- if (count < 0)
goto out;
+
retval = -EFAULT;
if (!access_ok(VERIFY_WRITE, buf, count))
goto out;
@@ -814,7 +822,7 @@ out:
ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
- ssize_t retval;
+ ssize_t retval = -EINVAL;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
/* XXX: temporary */
@@ -827,7 +835,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
(unsigned long) count, (long long) pos);
if (nr_segs != 1)
- return -EINVAL;
+ goto out;
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd9f5a836592..3d9fccf4ef93 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -461,14 +461,14 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx != NULL) {
- atomic_set(&ctx->count, 1);
- ctx->dentry = dget(dentry);
- ctx->vfsmnt = mntget(mnt);
+ ctx->path.dentry = dget(dentry);
+ ctx->path.mnt = mntget(mnt);
ctx->cred = get_rpccred(cred);
ctx->state = NULL;
ctx->lockowner = current->files;
ctx->error = 0;
ctx->dir_cookie = 0;
+ kref_init(&ctx->kref);
}
return ctx;
}
@@ -476,27 +476,33 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
{
if (ctx != NULL)
- atomic_inc(&ctx->count);
+ kref_get(&ctx->kref);
return ctx;
}
-void put_nfs_open_context(struct nfs_open_context *ctx)
+static void nfs_free_open_context(struct kref *kref)
{
- if (atomic_dec_and_test(&ctx->count)) {
- if (!list_empty(&ctx->list)) {
- struct inode *inode = ctx->dentry->d_inode;
- spin_lock(&inode->i_lock);
- list_del(&ctx->list);
- spin_unlock(&inode->i_lock);
- }
- if (ctx->state != NULL)
- nfs4_close_state(ctx->state, ctx->mode);
- if (ctx->cred != NULL)
- put_rpccred(ctx->cred);
- dput(ctx->dentry);
- mntput(ctx->vfsmnt);
- kfree(ctx);
+ struct nfs_open_context *ctx = container_of(kref,
+ struct nfs_open_context, kref);
+
+ if (!list_empty(&ctx->list)) {
+ struct inode *inode = ctx->path.dentry->d_inode;
+ spin_lock(&inode->i_lock);
+ list_del(&ctx->list);
+ spin_unlock(&inode->i_lock);
}
+ if (ctx->state != NULL)
+ nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+ if (ctx->cred != NULL)
+ put_rpccred(ctx->cred);
+ dput(ctx->path.dentry);
+ mntput(ctx->path.mnt);
+ kfree(ctx);
+}
+
+void put_nfs_open_context(struct nfs_open_context *ctx)
+{
+ kref_put(&ctx->kref, nfs_free_open_context);
}
/*
@@ -961,8 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
goto out_changed;
server = NFS_SERVER(inode);
- /* Update the fsid if and only if this is the root directory */
- if (inode == inode->i_sb->s_root->d_inode
+ /* Update the fsid? */
+ if (S_ISDIR(inode->i_mode)
&& !nfs_fsid_equal(&server->fsid, &fattr->fsid))
server->fsid = fattr->fsid;
@@ -1066,8 +1072,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid &= ~NFS_INO_INVALID_DATA;
if (data_stable)
invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
- if (!nfs_have_delegation(inode, FMODE_READ))
+ if (!nfs_have_delegation(inode, FMODE_READ) ||
+ (nfsi->cache_validity & NFS_INO_REVAL_FORCED))
nfsi->cache_validity |= invalid;
+ nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
return 0;
out_changed:
@@ -1103,27 +1111,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
void nfs4_clear_inode(struct inode *inode)
{
- struct nfs_inode *nfsi = NFS_I(inode);
-
/* If we are holding a delegation, return it! */
nfs_inode_return_delegation(inode);
/* First call standard NFS clear_inode() code */
nfs_clear_inode(inode);
- /* Now clear out any remaining state */
- while (!list_empty(&nfsi->open_states)) {
- struct nfs4_state *state;
-
- state = list_entry(nfsi->open_states.next,
- struct nfs4_state,
- inode_states);
- dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
- __FUNCTION__,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- state);
- BUG_ON(atomic_read(&state->count) != 1);
- nfs4_close_state(state, state->state);
- }
}
#endif
@@ -1165,15 +1156,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
struct nfs_inode *nfsi = (struct nfs_inode *) foo;
inode_init_once(&nfsi->vfs_inode);
- spin_lock_init(&nfsi->req_lock);
- INIT_LIST_HEAD(&nfsi->dirty);
- INIT_LIST_HEAD(&nfsi->commit);
INIT_LIST_HEAD(&nfsi->open_files);
INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
atomic_set(&nfsi->data_updates, 0);
- nfsi->ndirty = 0;
nfsi->ncommit = 0;
nfsi->npages = 0;
nfs4_init_once(nfsi);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ad2b40db1e65..76cf55d57101 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -183,9 +183,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
/*
* Calculate the number of 512byte blocks used.
*/
-static inline unsigned long nfs_calc_block_size(u64 tsize)
+static inline blkcnt_t nfs_calc_block_size(u64 tsize)
{
- loff_t used = (tsize + 511) >> 9;
+ blkcnt_t used = (tsize + 511) >> 9;
return (used > ULONG_MAX) ? ULONG_MAX : used;
}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index ca5a266a3140..8afd9f7e7a97 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -1,7 +1,5 @@
/*
- * linux/fs/nfs/mount_clnt.c
- *
- * MOUNT client to support NFSroot.
+ * In-kernel MOUNT protocol client
*
* Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
*/
@@ -18,33 +16,31 @@
#include <linux/nfs_fs.h>
#ifdef RPC_DEBUG
-# define NFSDBG_FACILITY NFSDBG_ROOT
+# define NFSDBG_FACILITY NFSDBG_MOUNT
#endif
-/*
-#define MOUNT_PROGRAM 100005
-#define MOUNT_VERSION 1
-#define MOUNT_MNT 1
-#define MOUNT_UMNT 3
- */
-
-static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *,
- int, int);
static struct rpc_program mnt_program;
struct mnt_fhstatus {
- unsigned int status;
- struct nfs_fh * fh;
+ u32 status;
+ struct nfs_fh *fh;
};
-/*
- * Obtain an NFS file handle for the given host and path
+/**
+ * nfs_mount - Obtain an NFS file handle for the given host and path
+ * @addr: pointer to server's address
+ * @len: size of server's address
+ * @hostname: name of server host, or NULL
+ * @path: pointer to string containing export path to mount
+ * @version: mount version to use for this request
+ * @protocol: transport protocol to use for thie request
+ * @fh: pointer to location to place returned file handle
+ *
+ * Uses default timeout parameters specified by underlying transport.
*/
-int
-nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
- int version, int protocol)
+int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path,
+ int version, int protocol, struct nfs_fh *fh)
{
- struct rpc_clnt *mnt_clnt;
struct mnt_fhstatus result = {
.fh = fh
};
@@ -52,16 +48,25 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
.rpc_argp = path,
.rpc_resp = &result,
};
- char hostname[32];
+ struct rpc_create_args args = {
+ .protocol = protocol,
+ .address = addr,
+ .addrsize = len,
+ .servername = hostname,
+ .program = &mnt_program,
+ .version = version,
+ .authflavor = RPC_AUTH_UNIX,
+ .flags = RPC_CLNT_CREATE_INTR,
+ };
+ struct rpc_clnt *mnt_clnt;
int status;
- dprintk("NFS: nfs_mount(%08x:%s)\n",
- (unsigned)ntohl(addr->sin_addr.s_addr), path);
+ dprintk("NFS: sending MNT request for %s:%s\n",
+ (hostname ? hostname : "server"), path);
- sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr));
- mnt_clnt = mnt_create(hostname, addr, version, protocol);
+ mnt_clnt = rpc_create(&args);
if (IS_ERR(mnt_clnt))
- return PTR_ERR(mnt_clnt);
+ goto out_clnt_err;
if (version == NFS_MNT3_VERSION)
msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
@@ -69,33 +74,39 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
status = rpc_call_sync(mnt_clnt, &msg, 0);
- return status < 0? status : (result.status? -EACCES : 0);
-}
+ rpc_shutdown_client(mnt_clnt);
-static struct rpc_clnt *
-mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
- int protocol)
-{
- struct rpc_create_args args = {
- .protocol = protocol,
- .address = (struct sockaddr *)srvaddr,
- .addrsize = sizeof(*srvaddr),
- .servername = hostname,
- .program = &mnt_program,
- .version = version,
- .authflavor = RPC_AUTH_UNIX,
- .flags = (RPC_CLNT_CREATE_ONESHOT |
- RPC_CLNT_CREATE_INTR),
- };
+ if (status < 0)
+ goto out_call_err;
+ if (result.status != 0)
+ goto out_mnt_err;
+
+ dprintk("NFS: MNT request succeeded\n");
+ status = 0;
+
+out:
+ return status;
+
+out_clnt_err:
+ status = PTR_ERR(mnt_clnt);
+ dprintk("NFS: failed to create RPC client, status=%d\n", status);
+ goto out;
+
+out_call_err:
+ dprintk("NFS: failed to start MNT request, status=%d\n", status);
+ goto out;
- return rpc_create(&args);
+out_mnt_err:
+ dprintk("NFS: MNT server returned result %d\n", result.status);
+ status = -EACCES;
+ goto out;
}
/*
* XDR encode/decode functions for MOUNT
*/
-static int
-xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
+static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p,
+ const char *path)
{
p = xdr_encode_string(p, path);
@@ -103,8 +114,8 @@ xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
return 0;
}
-static int
-xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
+static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p,
+ struct mnt_fhstatus *res)
{
struct nfs_fh *fh = res->fh;
@@ -115,8 +126,8 @@ xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
return 0;
}
-static int
-xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
+static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
+ struct mnt_fhstatus *res)
{
struct nfs_fh *fh = res->fh;
@@ -135,53 +146,53 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
#define MNT_fhstatus_sz (1 + 8)
#define MNT_fhstatus3_sz (1 + 16)
-static struct rpc_procinfo mnt_procedures[] = {
-[MNTPROC_MNT] = {
- .p_proc = MNTPROC_MNT,
- .p_encode = (kxdrproc_t) xdr_encode_dirpath,
- .p_decode = (kxdrproc_t) xdr_decode_fhstatus,
- .p_arglen = MNT_dirpath_sz,
- .p_replen = MNT_fhstatus_sz,
- .p_statidx = MNTPROC_MNT,
- .p_name = "MOUNT",
+static struct rpc_procinfo mnt_procedures[] = {
+ [MNTPROC_MNT] = {
+ .p_proc = MNTPROC_MNT,
+ .p_encode = (kxdrproc_t) xdr_encode_dirpath,
+ .p_decode = (kxdrproc_t) xdr_decode_fhstatus,
+ .p_arglen = MNT_dirpath_sz,
+ .p_replen = MNT_fhstatus_sz,
+ .p_statidx = MNTPROC_MNT,
+ .p_name = "MOUNT",
},
};
static struct rpc_procinfo mnt3_procedures[] = {
-[MOUNTPROC3_MNT] = {
- .p_proc = MOUNTPROC3_MNT,
- .p_encode = (kxdrproc_t) xdr_encode_dirpath,
- .p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
- .p_arglen = MNT_dirpath_sz,
- .p_replen = MNT_fhstatus3_sz,
- .p_statidx = MOUNTPROC3_MNT,
- .p_name = "MOUNT",
+ [MOUNTPROC3_MNT] = {
+ .p_proc = MOUNTPROC3_MNT,
+ .p_encode = (kxdrproc_t) xdr_encode_dirpath,
+ .p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
+ .p_arglen = MNT_dirpath_sz,
+ .p_replen = MNT_fhstatus3_sz,
+ .p_statidx = MOUNTPROC3_MNT,
+ .p_name = "MOUNT",
},
};
-static struct rpc_version mnt_version1 = {
- .number = 1,
- .nrprocs = 2,
- .procs = mnt_procedures
+static struct rpc_version mnt_version1 = {
+ .number = 1,
+ .nrprocs = 2,
+ .procs = mnt_procedures,
};
-static struct rpc_version mnt_version3 = {
- .number = 3,
- .nrprocs = 2,
- .procs = mnt3_procedures
+static struct rpc_version mnt_version3 = {
+ .number = 3,
+ .nrprocs = 2,
+ .procs = mnt3_procedures,
};
-static struct rpc_version * mnt_version[] = {
+static struct rpc_version *mnt_version[] = {
NULL,
&mnt_version1,
NULL,
&mnt_version3,
};
-static struct rpc_stat mnt_stats;
+static struct rpc_stat mnt_stats;
-static struct rpc_program mnt_program = {
+static struct rpc_program mnt_program = {
.name = "mount",
.number = NFS_MNT_PROGRAM,
.nrvers = ARRAY_SIZE(mnt_version),
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index cd3ca7b5d3db..7fcc78f2aa71 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -223,7 +223,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args)
static int
nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
{
- struct rpc_auth *auth = req->rq_task->tk_auth;
+ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
unsigned int replen;
u32 offset = (u32)args->offset;
u32 count = args->count;
@@ -380,7 +380,7 @@ static int
nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
{
struct rpc_task *task = req->rq_task;
- struct rpc_auth *auth = task->tk_auth;
+ struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth;
unsigned int replen;
u32 count = args->count;
@@ -541,7 +541,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
static int
nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
{
- struct rpc_auth *auth = req->rq_task->tk_auth;
+ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
unsigned int replen;
p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 45268d6def2e..814d886b6aa4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -335,9 +335,7 @@ again:
* not sure this buys us anything (and I'd have
* to revamp the NFSv3 XDR code) */
status = nfs3_proc_setattr(dentry, &fattr, sattr);
- if (status == 0)
- nfs_setattr_update_inode(dentry->d_inode, sattr);
- nfs_refresh_inode(dentry->d_inode, &fattr);
+ nfs_post_op_update_inode(dentry->d_inode, &fattr);
dprintk("NFS reply setattr (post-create): %d\n", status);
}
if (status != 0)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b51df8eb9f01..b4647a22f349 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -319,7 +319,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg
static int
nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
{
- struct rpc_auth *auth = req->rq_task->tk_auth;
+ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
unsigned int replen;
u32 count = args->count;
@@ -458,7 +458,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
static int
nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
{
- struct rpc_auth *auth = req->rq_task->tk_auth;
+ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
unsigned int replen;
u32 count = args->count;
@@ -643,7 +643,7 @@ static int
nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
struct nfs3_getaclargs *args)
{
- struct rpc_auth *auth = req->rq_task->tk_auth;
+ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
unsigned int replen;
p = xdr_encode_fhandle(p, args->fh);
@@ -773,7 +773,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
static int
nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
{
- struct rpc_auth *auth = req->rq_task->tk_auth;
+ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
unsigned int replen;
p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cf3a17eb5c09..6c028e734fe6 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -70,19 +70,26 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
seqid->flags |= NFS_SEQID_CONFIRMED;
}
+struct nfs_unique_id {
+ struct rb_node rb_node;
+ __u64 id;
+};
+
/*
* NFS4 state_owners and lock_owners are simply labels for ordered
* sequences of RPC calls. Their sole purpose is to provide once-only
* semantics by allowing the server to identify replayed requests.
*/
struct nfs4_state_owner {
- spinlock_t so_lock;
- struct list_head so_list; /* per-clientid list of state_owners */
+ struct nfs_unique_id so_owner_id;
struct nfs_client *so_client;
- u32 so_id; /* 32-bit identifier, unique */
- atomic_t so_count;
+ struct nfs_server *so_server;
+ struct rb_node so_client_node;
struct rpc_cred *so_cred; /* Associated cred */
+
+ spinlock_t so_lock;
+ atomic_t so_count;
struct list_head so_states;
struct list_head so_delegations;
struct nfs_seqid_counter so_seqid;
@@ -108,7 +115,7 @@ struct nfs4_lock_state {
#define NFS_LOCK_INITIALIZED 1
int ls_flags;
struct nfs_seqid_counter ls_seqid;
- u32 ls_id;
+ struct nfs_unique_id ls_id;
nfs4_stateid ls_stateid;
atomic_t ls_count;
};
@@ -116,7 +123,10 @@ struct nfs4_lock_state {
/* bits for nfs4_state->flags */
enum {
LK_STATE_IN_USE,
- NFS_DELEGATED_STATE,
+ NFS_DELEGATED_STATE, /* Current stateid is delegation */
+ NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */
+ NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */
+ NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
};
struct nfs4_state {
@@ -130,11 +140,14 @@ struct nfs4_state {
unsigned long flags; /* Do we hold any locks? */
spinlock_t state_lock; /* Protects the lock_states list */
- nfs4_stateid stateid;
+ seqlock_t seqlock; /* Protects the stateid/open_stateid */
+ nfs4_stateid stateid; /* Current stateid: may be delegation */
+ nfs4_stateid open_stateid; /* OPEN stateid */
- unsigned int n_rdonly;
- unsigned int n_wronly;
- unsigned int n_rdwr;
+ /* The following 3 fields are protected by owner->so_lock */
+ unsigned int n_rdonly; /* Number of read-only references */
+ unsigned int n_wronly; /* Number of write-only references */
+ unsigned int n_rdwr; /* Number of read/write references */
int state; /* State on the server (R,W, or RW) */
atomic_t count;
};
@@ -165,7 +178,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
+extern int nfs4_do_close(struct path *path, struct nfs4_state *state);
extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
@@ -189,14 +202,13 @@ extern void nfs4_renew_state(struct work_struct *);
/* nfs4state.c */
struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
-extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
extern void nfs4_put_state_owner(struct nfs4_state_owner *);
extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
extern void nfs4_put_open_state(struct nfs4_state *);
-extern void nfs4_close_state(struct nfs4_state *, mode_t);
+extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
extern void nfs4_schedule_state_recovery(struct nfs_client *);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
@@ -222,7 +234,7 @@ extern struct svc_version nfs4_callback_version1;
#else
-#define nfs4_close_state(a, b) do { } while (0)
+#define nfs4_close_state(a, b, c) do { } while (0)
#endif /* CONFIG_NFS_V4 */
#endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 648e0ac0f90e..fee2da856c95 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -65,6 +65,7 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *)
static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
+static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags);
/* Prevent leaks of NFSv4 errors into userland */
int nfs4_map_errors(int err)
@@ -214,27 +215,39 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
}
struct nfs4_opendata {
- atomic_t count;
+ struct kref kref;
struct nfs_openargs o_arg;
struct nfs_openres o_res;
struct nfs_open_confirmargs c_arg;
struct nfs_open_confirmres c_res;
struct nfs_fattr f_attr;
struct nfs_fattr dir_attr;
- struct dentry *dentry;
+ struct path path;
struct dentry *dir;
struct nfs4_state_owner *owner;
+ struct nfs4_state *state;
struct iattr attrs;
unsigned long timestamp;
+ unsigned int rpc_done : 1;
int rpc_status;
int cancelled;
};
-static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
+
+static void nfs4_init_opendata_res(struct nfs4_opendata *p)
+{
+ p->o_res.f_attr = &p->f_attr;
+ p->o_res.dir_attr = &p->dir_attr;
+ p->o_res.server = p->o_arg.server;
+ nfs_fattr_init(&p->f_attr);
+ nfs_fattr_init(&p->dir_attr);
+}
+
+static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
struct nfs4_state_owner *sp, int flags,
const struct iattr *attrs)
{
- struct dentry *parent = dget_parent(dentry);
+ struct dentry *parent = dget_parent(path->dentry);
struct inode *dir = parent->d_inode;
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_opendata *p;
@@ -245,24 +258,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
if (p->o_arg.seqid == NULL)
goto err_free;
- atomic_set(&p->count, 1);
- p->dentry = dget(dentry);
+ p->path.mnt = mntget(path->mnt);
+ p->path.dentry = dget(path->dentry);
p->dir = parent;
p->owner = sp;
atomic_inc(&sp->so_count);
p->o_arg.fh = NFS_FH(dir);
p->o_arg.open_flags = flags,
p->o_arg.clientid = server->nfs_client->cl_clientid;
- p->o_arg.id = sp->so_id;
- p->o_arg.name = &dentry->d_name;
+ p->o_arg.id = sp->so_owner_id.id;
+ p->o_arg.name = &p->path.dentry->d_name;
p->o_arg.server = server;
p->o_arg.bitmask = server->attr_bitmask;
p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
- p->o_res.f_attr = &p->f_attr;
- p->o_res.dir_attr = &p->dir_attr;
- p->o_res.server = server;
- nfs_fattr_init(&p->f_attr);
- nfs_fattr_init(&p->dir_attr);
if (flags & O_EXCL) {
u32 *s = (u32 *) p->o_arg.u.verifier.data;
s[0] = jiffies;
@@ -274,6 +282,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
p->c_arg.fh = &p->o_res.fh;
p->c_arg.stateid = &p->o_res.stateid;
p->c_arg.seqid = p->o_arg.seqid;
+ nfs4_init_opendata_res(p);
+ kref_init(&p->kref);
return p;
err_free:
kfree(p);
@@ -282,27 +292,25 @@ err:
return NULL;
}
-static void nfs4_opendata_free(struct nfs4_opendata *p)
+static void nfs4_opendata_free(struct kref *kref)
{
- if (p != NULL && atomic_dec_and_test(&p->count)) {
- nfs_free_seqid(p->o_arg.seqid);
- nfs4_put_state_owner(p->owner);
- dput(p->dir);
- dput(p->dentry);
- kfree(p);
- }
+ struct nfs4_opendata *p = container_of(kref,
+ struct nfs4_opendata, kref);
+
+ nfs_free_seqid(p->o_arg.seqid);
+ if (p->state != NULL)
+ nfs4_put_open_state(p->state);
+ nfs4_put_state_owner(p->owner);
+ dput(p->dir);
+ dput(p->path.dentry);
+ mntput(p->path.mnt);
+ kfree(p);
}
-/* Helper for asynchronous RPC calls */
-static int nfs4_call_async(struct rpc_clnt *clnt,
- const struct rpc_call_ops *tk_ops, void *calldata)
+static void nfs4_opendata_put(struct nfs4_opendata *p)
{
- struct rpc_task *task;
-
- if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
- return -ENOMEM;
- rpc_execute(task);
- return 0;
+ if (p != NULL)
+ kref_put(&p->kref, nfs4_opendata_free);
}
static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
@@ -316,7 +324,34 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
return ret;
}
-static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
+static int can_open_cached(struct nfs4_state *state, int mode)
+{
+ int ret = 0;
+ switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) {
+ case FMODE_READ:
+ ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
+ ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+ break;
+ case FMODE_WRITE:
+ ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0;
+ ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+ break;
+ case FMODE_READ|FMODE_WRITE:
+ ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+ }
+ return ret;
+}
+
+static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags)
+{
+ if ((delegation->type & open_flags) != open_flags)
+ return 0;
+ if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM)
+ return 0;
+ return 1;
+}
+
+static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
{
switch (open_flags) {
case FMODE_WRITE:
@@ -328,41 +363,176 @@ static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_
case FMODE_READ|FMODE_WRITE:
state->n_rdwr++;
}
+ nfs4_state_set_mode_locked(state, state->state | open_flags);
}
-static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
{
- struct inode *inode = state->inode;
+ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+ memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
+ memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
+ switch (open_flags) {
+ case FMODE_READ:
+ set_bit(NFS_O_RDONLY_STATE, &state->flags);
+ break;
+ case FMODE_WRITE:
+ set_bit(NFS_O_WRONLY_STATE, &state->flags);
+ break;
+ case FMODE_READ|FMODE_WRITE:
+ set_bit(NFS_O_RDWR_STATE, &state->flags);
+ }
+}
+
+static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+{
+ write_seqlock(&state->seqlock);
+ nfs_set_open_stateid_locked(state, stateid, open_flags);
+ write_sequnlock(&state->seqlock);
+}
+static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags)
+{
open_flags &= (FMODE_READ|FMODE_WRITE);
- /* Protect against nfs4_find_state_byowner() */
+ /*
+ * Protect the call to nfs4_state_set_mode_locked and
+ * serialise the stateid update
+ */
+ write_seqlock(&state->seqlock);
+ if (deleg_stateid != NULL) {
+ memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
+ set_bit(NFS_DELEGATED_STATE, &state->flags);
+ }
+ if (open_stateid != NULL)
+ nfs_set_open_stateid_locked(state, open_stateid, open_flags);
+ write_sequnlock(&state->seqlock);
spin_lock(&state->owner->so_lock);
- spin_lock(&inode->i_lock);
- memcpy(&state->stateid, stateid, sizeof(state->stateid));
update_open_stateflags(state, open_flags);
- nfs4_state_set_mode_locked(state, state->state | open_flags);
- spin_unlock(&inode->i_lock);
spin_unlock(&state->owner->so_lock);
}
+static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags)
+{
+ struct nfs_delegation *delegation;
+
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation == NULL || (delegation->type & open_flags) == open_flags) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+ nfs_inode_return_delegation(inode);
+}
+
+static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
+{
+ struct nfs4_state *state = opendata->state;
+ struct nfs_inode *nfsi = NFS_I(state->inode);
+ struct nfs_delegation *delegation;
+ int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL);
+ nfs4_stateid stateid;
+ int ret = -EAGAIN;
+
+ rcu_read_lock();
+ delegation = rcu_dereference(nfsi->delegation);
+ for (;;) {
+ if (can_open_cached(state, open_mode)) {
+ spin_lock(&state->owner->so_lock);
+ if (can_open_cached(state, open_mode)) {
+ update_open_stateflags(state, open_mode);
+ spin_unlock(&state->owner->so_lock);
+ rcu_read_unlock();
+ goto out_return_state;
+ }
+ spin_unlock(&state->owner->so_lock);
+ }
+ if (delegation == NULL)
+ break;
+ if (!can_open_delegated(delegation, open_mode))
+ break;
+ /* Save the delegation */
+ memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
+ rcu_read_unlock();
+ lock_kernel();
+ ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode);
+ unlock_kernel();
+ if (ret != 0)
+ goto out;
+ ret = -EAGAIN;
+ rcu_read_lock();
+ delegation = rcu_dereference(nfsi->delegation);
+ /* If no delegation, try a cached open */
+ if (delegation == NULL)
+ continue;
+ /* Is the delegation still valid? */
+ if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0)
+ continue;
+ rcu_read_unlock();
+ update_open_stateid(state, NULL, &stateid, open_mode);
+ goto out_return_state;
+ }
+ rcu_read_unlock();
+out:
+ return ERR_PTR(ret);
+out_return_state:
+ atomic_inc(&state->count);
+ return state;
+}
+
static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
{
struct inode *inode;
struct nfs4_state *state = NULL;
+ struct nfs_delegation *delegation;
+ nfs4_stateid *deleg_stateid = NULL;
+ int ret;
- if (!(data->f_attr.valid & NFS_ATTR_FATTR))
+ if (!data->rpc_done) {
+ state = nfs4_try_open_cached(data);
goto out;
+ }
+
+ ret = -EAGAIN;
+ if (!(data->f_attr.valid & NFS_ATTR_FATTR))
+ goto err;
inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
+ ret = PTR_ERR(inode);
if (IS_ERR(inode))
- goto out;
+ goto err;
+ ret = -ENOMEM;
state = nfs4_get_open_state(inode, data->owner);
if (state == NULL)
- goto put_inode;
- update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags);
-put_inode:
+ goto err_put_inode;
+ if (data->o_res.delegation_type != 0) {
+ int delegation_flags = 0;
+
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation)
+ delegation_flags = delegation->flags;
+ rcu_read_unlock();
+ if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
+ nfs_inode_set_delegation(state->inode,
+ data->owner->so_cred,
+ &data->o_res);
+ else
+ nfs_inode_reclaim_delegation(state->inode,
+ data->owner->so_cred,
+ &data->o_res);
+ }
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation != NULL)
+ deleg_stateid = &delegation->stateid;
+ update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags);
+ rcu_read_unlock();
iput(inode);
out:
return state;
+err_put_inode:
+ iput(inode);
+err:
+ return ERR_PTR(ret);
}
static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
@@ -382,79 +552,66 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *
return ERR_PTR(-ENOENT);
}
-static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid)
+static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res)
{
+ struct nfs4_state *newstate;
int ret;
opendata->o_arg.open_flags = openflags;
+ memset(&opendata->o_res, 0, sizeof(opendata->o_res));
+ memset(&opendata->c_res, 0, sizeof(opendata->c_res));
+ nfs4_init_opendata_res(opendata);
ret = _nfs4_proc_open(opendata);
if (ret != 0)
return ret;
- memcpy(stateid->data, opendata->o_res.stateid.data,
- sizeof(stateid->data));
+ newstate = nfs4_opendata_to_nfs4_state(opendata);
+ if (IS_ERR(newstate))
+ return PTR_ERR(newstate);
+ nfs4_close_state(&opendata->path, newstate, openflags);
+ *res = newstate;
return 0;
}
static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
{
- nfs4_stateid stateid;
struct nfs4_state *newstate;
- int mode = 0;
- int delegation = 0;
int ret;
/* memory barrier prior to reading state->n_* */
+ clear_bit(NFS_DELEGATED_STATE, &state->flags);
smp_rmb();
if (state->n_rdwr != 0) {
- ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid);
+ ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
if (ret != 0)
return ret;
- mode |= FMODE_READ|FMODE_WRITE;
- if (opendata->o_res.delegation_type != 0)
- delegation = opendata->o_res.delegation_type;
- smp_rmb();
+ if (newstate != state)
+ return -ESTALE;
}
if (state->n_wronly != 0) {
- ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid);
+ ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
if (ret != 0)
return ret;
- mode |= FMODE_WRITE;
- if (opendata->o_res.delegation_type != 0)
- delegation = opendata->o_res.delegation_type;
- smp_rmb();
+ if (newstate != state)
+ return -ESTALE;
}
if (state->n_rdonly != 0) {
- ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid);
+ ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
if (ret != 0)
return ret;
- mode |= FMODE_READ;
+ if (newstate != state)
+ return -ESTALE;
}
- clear_bit(NFS_DELEGATED_STATE, &state->flags);
- if (mode == 0)
- return 0;
- if (opendata->o_res.delegation_type == 0)
- opendata->o_res.delegation_type = delegation;
- opendata->o_arg.open_flags |= mode;
- newstate = nfs4_opendata_to_nfs4_state(opendata);
- if (newstate != NULL) {
- if (opendata->o_res.delegation_type != 0) {
- struct nfs_inode *nfsi = NFS_I(newstate->inode);
- int delegation_flags = 0;
- if (nfsi->delegation)
- delegation_flags = nfsi->delegation->flags;
- if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
- nfs_inode_set_delegation(newstate->inode,
- opendata->owner->so_cred,
- &opendata->o_res);
- else
- nfs_inode_reclaim_delegation(newstate->inode,
- opendata->owner->so_cred,
- &opendata->o_res);
- }
- nfs4_close_state(newstate, opendata->o_arg.open_flags);
+ /*
+ * We may have performed cached opens for all three recoveries.
+ * Check if we need to update the current stateid.
+ */
+ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
+ memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
+ write_seqlock(&state->seqlock);
+ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+ memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
+ write_sequnlock(&state->seqlock);
}
- if (newstate != state)
- return -ESTALE;
return 0;
}
@@ -462,41 +619,37 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
* OPEN_RECLAIM:
* reclaim state on the server after a reboot.
*/
-static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
{
- struct nfs_delegation *delegation = NFS_I(state->inode)->delegation;
+ struct nfs_delegation *delegation;
struct nfs4_opendata *opendata;
int delegation_type = 0;
int status;
- if (delegation != NULL) {
- if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
- memcpy(&state->stateid, &delegation->stateid,
- sizeof(state->stateid));
- set_bit(NFS_DELEGATED_STATE, &state->flags);
- return 0;
- }
- delegation_type = delegation->type;
- }
- opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
+ opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
if (opendata == NULL)
return -ENOMEM;
opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
opendata->o_arg.fh = NFS_FH(state->inode);
nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh);
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(state->inode)->delegation);
+ if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0)
+ delegation_type = delegation->flags;
+ rcu_read_unlock();
opendata->o_arg.u.delegation_type = delegation_type;
status = nfs4_open_recover(opendata, state);
- nfs4_opendata_free(opendata);
+ nfs4_opendata_put(opendata);
return status;
}
-static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_exception exception = { };
int err;
do {
- err = _nfs4_do_open_reclaim(sp, state, dentry);
+ err = _nfs4_do_open_reclaim(ctx, state);
if (err != -NFS4ERR_DELAY)
break;
nfs4_handle_exception(server, err, &exception);
@@ -512,37 +665,35 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
ctx = nfs4_state_find_open_context(state);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ret = nfs4_do_open_reclaim(sp, state, ctx->dentry);
+ ret = nfs4_do_open_reclaim(ctx, state);
put_nfs_open_context(ctx);
return ret;
}
-static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
{
struct nfs4_state_owner *sp = state->owner;
struct nfs4_opendata *opendata;
int ret;
- if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
- return 0;
- opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
+ opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL);
if (opendata == NULL)
return -ENOMEM;
opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
- memcpy(opendata->o_arg.u.delegation.data, state->stateid.data,
+ memcpy(opendata->o_arg.u.delegation.data, stateid->data,
sizeof(opendata->o_arg.u.delegation.data));
ret = nfs4_open_recover(opendata, state);
- nfs4_opendata_free(opendata);
+ nfs4_opendata_put(opendata);
return ret;
}
-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
{
struct nfs4_exception exception = { };
- struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+ struct nfs_server *server = NFS_SERVER(state->inode);
int err;
do {
- err = _nfs4_open_delegation_recall(dentry, state);
+ err = _nfs4_open_delegation_recall(ctx, state, stateid);
switch (err) {
case 0:
return err;
@@ -582,9 +733,10 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
sizeof(data->o_res.stateid.data));
renew_lease(data->o_res.server, data->timestamp);
+ data->rpc_done = 1;
}
- nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
+ nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
}
static void nfs4_open_confirm_release(void *calldata)
@@ -596,14 +748,14 @@ static void nfs4_open_confirm_release(void *calldata)
if (data->cancelled == 0)
goto out_free;
/* In case of error, no cleanup! */
- if (data->rpc_status != 0)
+ if (!data->rpc_done)
goto out_free;
nfs_confirm_seqid(&data->owner->so_seqid, 0);
state = nfs4_opendata_to_nfs4_state(data);
- if (state != NULL)
- nfs4_close_state(state, data->o_arg.open_flags);
+ if (!IS_ERR(state))
+ nfs4_close_state(&data->path, state, data->o_arg.open_flags);
out_free:
- nfs4_opendata_free(data);
+ nfs4_opendata_put(data);
}
static const struct rpc_call_ops nfs4_open_confirm_ops = {
@@ -621,12 +773,9 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
struct rpc_task *task;
int status;
- atomic_inc(&data->count);
- /*
- * If rpc_run_task() ends up calling ->rpc_release(), we
- * want to ensure that it takes the 'error' code path.
- */
- data->rpc_status = -ENOMEM;
+ kref_get(&data->kref);
+ data->rpc_done = 0;
+ data->rpc_status = 0;
task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -653,13 +802,35 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
return;
+ /*
+ * Check if we still need to send an OPEN call, or if we can use
+ * a delegation instead.
+ */
+ if (data->state != NULL) {
+ struct nfs_delegation *delegation;
+
+ if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL)))
+ goto out_no_action;
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
+ if (delegation != NULL &&
+ (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) {
+ rcu_read_unlock();
+ goto out_no_action;
+ }
+ rcu_read_unlock();
+ }
/* Update sequence id. */
- data->o_arg.id = sp->so_id;
+ data->o_arg.id = sp->so_owner_id.id;
data->o_arg.clientid = sp->so_client->cl_clientid;
if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
data->timestamp = jiffies;
rpc_call_setup(task, &msg, 0);
+ return;
+out_no_action:
+ task->tk_action = NULL;
+
}
static void nfs4_open_done(struct rpc_task *task, void *calldata)
@@ -683,8 +854,11 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
data->rpc_status = -ENOTDIR;
}
renew_lease(data->o_res.server, data->timestamp);
+ if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
+ nfs_confirm_seqid(&data->owner->so_seqid, 0);
}
nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid);
+ data->rpc_done = 1;
}
static void nfs4_open_release(void *calldata)
@@ -696,17 +870,17 @@ static void nfs4_open_release(void *calldata)
if (data->cancelled == 0)
goto out_free;
/* In case of error, no cleanup! */
- if (data->rpc_status != 0)
+ if (data->rpc_status != 0 || !data->rpc_done)
goto out_free;
/* In case we need an open_confirm, no cleanup! */
if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
goto out_free;
nfs_confirm_seqid(&data->owner->so_seqid, 0);
state = nfs4_opendata_to_nfs4_state(data);
- if (state != NULL)
- nfs4_close_state(state, data->o_arg.open_flags);
+ if (!IS_ERR(state))
+ nfs4_close_state(&data->path, state, data->o_arg.open_flags);
out_free:
- nfs4_opendata_free(data);
+ nfs4_opendata_put(data);
}
static const struct rpc_call_ops nfs4_open_ops = {
@@ -727,12 +901,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
struct rpc_task *task;
int status;
- atomic_inc(&data->count);
- /*
- * If rpc_run_task() ends up calling ->rpc_release(), we
- * want to ensure that it takes the 'error' code path.
- */
- data->rpc_status = -ENOMEM;
+ kref_get(&data->kref);
+ data->rpc_done = 0;
+ data->rpc_status = 0;
+ data->cancelled = 0;
task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -743,7 +915,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
} else
status = data->rpc_status;
rpc_put_task(task);
- if (status != 0)
+ if (status != 0 || !data->rpc_done)
return status;
if (o_arg->open_flags & O_CREAT) {
@@ -756,7 +928,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
if (status != 0)
return status;
}
- nfs_confirm_seqid(&data->owner->so_seqid, 0);
if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
return 0;
@@ -772,6 +943,8 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf
mask |= MAY_READ;
if (openflags & FMODE_WRITE)
mask |= MAY_WRITE;
+ if (openflags & FMODE_EXEC)
+ mask |= MAY_EXEC;
status = nfs_access_get_cached(inode, cred, &cache);
if (status == 0)
goto out;
@@ -811,43 +984,32 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
* reclaim state on the server after a network partition.
* Assumes caller holds the appropriate lock
*/
-static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
{
- struct inode *inode = state->inode;
- struct nfs_delegation *delegation = NFS_I(inode)->delegation;
struct nfs4_opendata *opendata;
- int openflags = state->state & (FMODE_READ|FMODE_WRITE);
int ret;
- if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
- ret = _nfs4_do_access(inode, sp->so_cred, openflags);
- if (ret < 0)
- return ret;
- memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
- set_bit(NFS_DELEGATED_STATE, &state->flags);
- return 0;
- }
- opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL);
+ opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
if (opendata == NULL)
return -ENOMEM;
ret = nfs4_open_recover(opendata, state);
if (ret == -ESTALE) {
/* Invalidate the state owner so we don't ever use it again */
- nfs4_drop_state_owner(sp);
- d_drop(dentry);
+ nfs4_drop_state_owner(state->owner);
+ d_drop(ctx->path.dentry);
}
- nfs4_opendata_free(opendata);
+ nfs4_opendata_put(opendata);
return ret;
}
-static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
{
- struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+ struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_exception exception = { };
int err;
do {
- err = _nfs4_open_expired(sp, state, dentry);
+ err = _nfs4_open_expired(ctx, state);
if (err == -NFS4ERR_DELAY)
nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
@@ -862,107 +1024,38 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
ctx = nfs4_state_find_open_context(state);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ret = nfs4_do_open_expired(sp, state, ctx->dentry);
+ ret = nfs4_do_open_expired(ctx, state);
put_nfs_open_context(ctx);
return ret;
}
/*
- * Returns a referenced nfs4_state if there is an open delegation on the file
+ * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
+ * fields corresponding to attributes that were used to store the verifier.
+ * Make sure we clobber those fields in the later setattr call
*/
-static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res)
-{
- struct nfs_delegation *delegation;
- struct nfs_server *server = NFS_SERVER(inode);
- struct nfs_client *clp = server->nfs_client;
- struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs4_state_owner *sp = NULL;
- struct nfs4_state *state = NULL;
- int open_flags = flags & (FMODE_READ|FMODE_WRITE);
- int err;
-
- err = -ENOMEM;
- if (!(sp = nfs4_get_state_owner(server, cred))) {
- dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
- return err;
- }
- err = nfs4_recover_expired_lease(server);
- if (err != 0)
- goto out_put_state_owner;
- /* Protect against reboot recovery - NOTE ORDER! */
- down_read(&clp->cl_sem);
- /* Protect against delegation recall */
- down_read(&nfsi->rwsem);
- delegation = NFS_I(inode)->delegation;
- err = -ENOENT;
- if (delegation == NULL || (delegation->type & open_flags) != open_flags)
- goto out_err;
- err = -ENOMEM;
- state = nfs4_get_open_state(inode, sp);
- if (state == NULL)
- goto out_err;
-
- err = -ENOENT;
- if ((state->state & open_flags) == open_flags) {
- spin_lock(&inode->i_lock);
- update_open_stateflags(state, open_flags);
- spin_unlock(&inode->i_lock);
- goto out_ok;
- } else if (state->state != 0)
- goto out_put_open_state;
-
- lock_kernel();
- err = _nfs4_do_access(inode, cred, open_flags);
- unlock_kernel();
- if (err != 0)
- goto out_put_open_state;
- set_bit(NFS_DELEGATED_STATE, &state->flags);
- update_open_stateid(state, &delegation->stateid, open_flags);
-out_ok:
- nfs4_put_state_owner(sp);
- up_read(&nfsi->rwsem);
- up_read(&clp->cl_sem);
- *res = state;
- return 0;
-out_put_open_state:
- nfs4_put_open_state(state);
-out_err:
- up_read(&nfsi->rwsem);
- up_read(&clp->cl_sem);
- if (err != -EACCES)
- nfs_inode_return_delegation(inode);
-out_put_state_owner:
- nfs4_put_state_owner(sp);
- return err;
-}
-
-static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
+static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr)
{
- struct nfs4_exception exception = { };
- struct nfs4_state *res = ERR_PTR(-EIO);
- int err;
+ if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) &&
+ !(sattr->ia_valid & ATTR_ATIME_SET))
+ sattr->ia_valid |= ATTR_ATIME;
- do {
- err = _nfs4_open_delegated(inode, flags, cred, &res);
- if (err == 0)
- break;
- res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
- err, &exception));
- } while (exception.retry);
- return res;
+ if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) &&
+ !(sattr->ia_valid & ATTR_MTIME_SET))
+ sattr->ia_valid |= ATTR_MTIME;
}
/*
* Returns a referenced nfs4_state
*/
-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
{
struct nfs4_state_owner *sp;
struct nfs4_state *state = NULL;
struct nfs_server *server = NFS_SERVER(dir);
struct nfs_client *clp = server->nfs_client;
struct nfs4_opendata *opendata;
- int status;
+ int status;
/* Protect against reboot recovery conflicts */
status = -ENOMEM;
@@ -973,29 +1066,35 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
status = nfs4_recover_expired_lease(server);
if (status != 0)
goto err_put_state_owner;
+ if (path->dentry->d_inode != NULL)
+ nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE));
down_read(&clp->cl_sem);
status = -ENOMEM;
- opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
+ opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
if (opendata == NULL)
goto err_release_rwsem;
+ if (path->dentry->d_inode != NULL)
+ opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
+
status = _nfs4_proc_open(opendata);
if (status != 0)
- goto err_opendata_free;
+ goto err_opendata_put;
+
+ if (opendata->o_arg.open_flags & O_EXCL)
+ nfs4_exclusive_attrset(opendata, sattr);
- status = -ENOMEM;
state = nfs4_opendata_to_nfs4_state(opendata);
- if (state == NULL)
- goto err_opendata_free;
- if (opendata->o_res.delegation_type != 0)
- nfs_inode_set_delegation(state->inode, cred, &opendata->o_res);
- nfs4_opendata_free(opendata);
+ status = PTR_ERR(state);
+ if (IS_ERR(state))
+ goto err_opendata_put;
+ nfs4_opendata_put(opendata);
nfs4_put_state_owner(sp);
up_read(&clp->cl_sem);
*res = state;
return 0;
-err_opendata_free:
- nfs4_opendata_free(opendata);
+err_opendata_put:
+ nfs4_opendata_put(opendata);
err_release_rwsem:
up_read(&clp->cl_sem);
err_put_state_owner:
@@ -1006,14 +1105,14 @@ out_err:
}
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred)
{
struct nfs4_exception exception = { };
struct nfs4_state *res;
int status;
do {
- status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
+ status = _nfs4_do_open(dir, path, flags, sattr, cred, &res);
if (status == 0)
break;
/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1028,7 +1127,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
* the user though...
*/
if (status == -NFS4ERR_BAD_SEQID) {
- printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n");
+ printk(KERN_WARNING "NFS: v4 server %s "
+ " returned a bad sequence-id error!\n",
+ NFS_SERVER(dir)->nfs_client->cl_hostname);
exception.retry = 1;
continue;
}
@@ -1042,6 +1143,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
exception.retry = 1;
continue;
}
+ if (status == -EAGAIN) {
+ /* We must have found a delegation */
+ exception.retry = 1;
+ continue;
+ }
res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
status, &exception));
} while (exception.retry);
@@ -1101,6 +1207,7 @@ static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
}
struct nfs4_closedata {
+ struct path path;
struct inode *inode;
struct nfs4_state *state;
struct nfs_closeargs arg;
@@ -1117,6 +1224,8 @@ static void nfs4_free_closedata(void *data)
nfs4_put_open_state(calldata->state);
nfs_free_seqid(calldata->arg.seqid);
nfs4_put_state_owner(sp);
+ dput(calldata->path.dentry);
+ mntput(calldata->path.mnt);
kfree(calldata);
}
@@ -1134,8 +1243,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
switch (task->tk_status) {
case 0:
- memcpy(&state->stateid, &calldata->res.stateid,
- sizeof(state->stateid));
+ nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags);
renew_lease(server, calldata->timestamp);
break;
case -NFS4ERR_STALE_STATEID:
@@ -1160,26 +1268,30 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
.rpc_resp = &calldata->res,
.rpc_cred = state->owner->so_cred,
};
- int mode = 0, old_mode;
+ int clear_rd, clear_wr, clear_rdwr;
+ int mode;
if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
return;
- /* Recalculate the new open mode in case someone reopened the file
- * while we were waiting in line to be scheduled.
- */
+
+ mode = FMODE_READ|FMODE_WRITE;
+ clear_rd = clear_wr = clear_rdwr = 0;
spin_lock(&state->owner->so_lock);
- spin_lock(&calldata->inode->i_lock);
- mode = old_mode = state->state;
+ /* Calculate the change in open mode */
if (state->n_rdwr == 0) {
- if (state->n_rdonly == 0)
+ if (state->n_rdonly == 0) {
mode &= ~FMODE_READ;
- if (state->n_wronly == 0)
+ clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+ clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
+ }
+ if (state->n_wronly == 0) {
mode &= ~FMODE_WRITE;
+ clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
+ }
}
- nfs4_state_set_mode_locked(state, mode);
- spin_unlock(&calldata->inode->i_lock);
spin_unlock(&state->owner->so_lock);
- if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+ if (!clear_rd && !clear_wr && !clear_rdwr) {
/* Note: exit _without_ calling nfs4_close_done */
task->tk_action = NULL;
return;
@@ -1209,19 +1321,21 @@ static const struct rpc_call_ops nfs4_close_ops = {
*
* NOTE: Caller must be holding the sp->so_owner semaphore!
*/
-int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
+int nfs4_do_close(struct path *path, struct nfs4_state *state)
{
- struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_closedata *calldata;
+ struct nfs4_state_owner *sp = state->owner;
+ struct rpc_task *task;
int status = -ENOMEM;
calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
if (calldata == NULL)
goto out;
- calldata->inode = inode;
+ calldata->inode = state->inode;
calldata->state = state;
- calldata->arg.fh = NFS_FH(inode);
- calldata->arg.stateid = &state->stateid;
+ calldata->arg.fh = NFS_FH(state->inode);
+ calldata->arg.stateid = &state->open_stateid;
/* Serialization for the sequence id */
calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
if (calldata->arg.seqid == NULL)
@@ -1229,36 +1343,55 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
calldata->arg.bitmask = server->attr_bitmask;
calldata->res.fattr = &calldata->fattr;
calldata->res.server = server;
+ calldata->path.mnt = mntget(path->mnt);
+ calldata->path.dentry = dget(path->dentry);
- status = nfs4_call_async(server->client, &nfs4_close_ops, calldata);
- if (status == 0)
- goto out;
-
- nfs_free_seqid(calldata->arg.seqid);
+ task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ rpc_put_task(task);
+ return 0;
out_free_calldata:
kfree(calldata);
out:
+ nfs4_put_open_state(state);
+ nfs4_put_state_owner(sp);
return status;
}
-static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
+static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state)
{
struct file *filp;
+ int ret;
- filp = lookup_instantiate_filp(nd, dentry, NULL);
+ /* If the open_intent is for execute, we have an extra check to make */
+ if (nd->intent.open.flags & FMODE_EXEC) {
+ ret = _nfs4_do_access(state->inode,
+ state->owner->so_cred,
+ nd->intent.open.flags);
+ if (ret < 0)
+ goto out_close;
+ }
+ filp = lookup_instantiate_filp(nd, path->dentry, NULL);
if (!IS_ERR(filp)) {
struct nfs_open_context *ctx;
ctx = (struct nfs_open_context *)filp->private_data;
ctx->state = state;
return 0;
}
- nfs4_close_state(state, nd->intent.open.flags);
- return PTR_ERR(filp);
+ ret = PTR_ERR(filp);
+out_close:
+ nfs4_close_state(path, state, nd->intent.open.flags);
+ return ret;
}
struct dentry *
nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
+ struct path path = {
+ .mnt = nd->mnt,
+ .dentry = dentry,
+ };
struct iattr attr;
struct rpc_cred *cred;
struct nfs4_state *state;
@@ -1277,7 +1410,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
if (IS_ERR(cred))
return (struct dentry *)cred;
- state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
+ state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
put_rpccred(cred);
if (IS_ERR(state)) {
if (PTR_ERR(state) == -ENOENT)
@@ -1287,22 +1420,24 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
res = d_add_unique(dentry, igrab(state->inode));
if (res != NULL)
dentry = res;
- nfs4_intent_set_file(nd, dentry, state);
+ nfs4_intent_set_file(nd, &path, state);
return res;
}
int
nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
{
+ struct path path = {
+ .mnt = nd->mnt,
+ .dentry = dentry,
+ };
struct rpc_cred *cred;
struct nfs4_state *state;
cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
if (IS_ERR(cred))
return PTR_ERR(cred);
- state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
- if (IS_ERR(state))
- state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
+ state = nfs4_do_open(dir, &path, openflags, NULL, cred);
put_rpccred(cred);
if (IS_ERR(state)) {
switch (PTR_ERR(state)) {
@@ -1318,10 +1453,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
}
}
if (state->inode == dentry->d_inode) {
- nfs4_intent_set_file(nd, dentry, state);
+ nfs4_intent_set_file(nd, &path, state);
return 1;
}
- nfs4_close_state(state, openflags);
+ nfs4_close_state(&path, state, openflags);
out_drop:
d_drop(dentry);
return 0;
@@ -1559,8 +1694,6 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
dprintk("NFS call lookupfh %s\n", name->name);
status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply lookupfh: %d\n", status);
- if (status == -NFS4ERR_MOVED)
- status = -EREMOTE;
return status;
}
@@ -1571,10 +1704,13 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
struct nfs4_exception exception = { };
int err;
do {
- err = nfs4_handle_exception(server,
- _nfs4_proc_lookupfh(server, dirfh, name,
- fhandle, fattr),
- &exception);
+ err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr);
+ /* FIXME: !!!! */
+ if (err == -NFS4ERR_MOVED) {
+ err = -EREMOTE;
+ break;
+ }
+ err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
return err;
}
@@ -1582,28 +1718,10 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
- int status;
- struct nfs_server *server = NFS_SERVER(dir);
- struct nfs4_lookup_arg args = {
- .bitmask = server->attr_bitmask,
- .dir_fh = NFS_FH(dir),
- .name = name,
- };
- struct nfs4_lookup_res res = {
- .server = server,
- .fattr = fattr,
- .fh = fhandle,
- };
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
- .rpc_argp = &args,
- .rpc_resp = &res,
- };
-
- nfs_fattr_init(fattr);
+ int status;
dprintk("NFS call lookup %s\n", name->name);
- status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
if (status == -NFS4ERR_MOVED)
status = nfs4_get_referral(dir, name, fattr, fhandle);
dprintk("NFS reply lookup: %d\n", status);
@@ -1752,6 +1870,10 @@ static int
nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags, struct nameidata *nd)
{
+ struct path path = {
+ .mnt = nd->mnt,
+ .dentry = dentry,
+ };
struct nfs4_state *state;
struct rpc_cred *cred;
int status = 0;
@@ -1761,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
status = PTR_ERR(cred);
goto out;
}
- state = nfs4_do_open(dir, dentry, flags, sattr, cred);
+ state = nfs4_do_open(dir, &path, flags, sattr, cred);
put_rpccred(cred);
if (IS_ERR(state)) {
status = PTR_ERR(state);
@@ -1773,11 +1895,12 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
if (status == 0)
nfs_setattr_update_inode(state->inode, sattr);
+ nfs_post_op_update_inode(state->inode, &fattr);
}
- if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
- status = nfs4_intent_set_file(nd, dentry, state);
+ if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
+ status = nfs4_intent_set_file(nd, &path, state);
else
- nfs4_close_state(state, flags);
+ nfs4_close_state(&path, state, flags);
out:
return status;
}
@@ -3008,7 +3131,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
if (status != 0)
goto out;
lsp = request->fl_u.nfs4_fl.owner;
- arg.lock_owner.id = lsp->ls_id;
+ arg.lock_owner.id = lsp->ls_id.id;
status = rpc_call_sync(server->client, &msg, 0);
switch (status) {
case 0:
@@ -3152,6 +3275,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
{
struct nfs4_unlockdata *data;
+ /* Ensure this is an unlock - when canceling a lock, the
+ * canceled lock is passed in, and it won't be an unlock.
+ */
+ fl->fl_type = F_UNLCK;
+
data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
if (data == NULL) {
nfs_free_seqid(seqid);
@@ -3222,7 +3350,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
goto out_free;
p->arg.lock_stateid = &lsp->ls_stateid;
p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
- p->arg.lock_owner.id = lsp->ls_id;
+ p->arg.lock_owner.id = lsp->ls_id.id;
p->lsp = lsp;
atomic_inc(&lsp->ls_count);
p->ctx = get_nfs_open_context(ctx);
@@ -3285,7 +3413,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
sizeof(data->lsp->ls_stateid.data));
data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
- renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
+ renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
}
nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8ed79d5c54f9..e9662ba81d86 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -38,12 +38,14 @@
* subsequent patch.
*/
+#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_idmap.h>
#include <linux/kthread.h>
#include <linux/module.h>
+#include <linux/random.h>
#include <linux/workqueue.h>
#include <linux/bitops.h>
@@ -69,33 +71,14 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
return status;
}
-u32
-nfs4_alloc_lockowner_id(struct nfs_client *clp)
-{
- return clp->cl_lockowner_id ++;
-}
-
-static struct nfs4_state_owner *
-nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
-{
- struct nfs4_state_owner *sp = NULL;
-
- if (!list_empty(&clp->cl_unused)) {
- sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
- atomic_inc(&sp->so_count);
- sp->so_cred = cred;
- list_move(&sp->so_list, &clp->cl_state_owners);
- clp->cl_nunused--;
- }
- return sp;
-}
-
struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
{
struct nfs4_state_owner *sp;
+ struct rb_node *pos;
struct rpc_cred *cred = NULL;
- list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+ for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+ sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
if (list_empty(&sp->so_states))
continue;
cred = get_rpccred(sp->so_cred);
@@ -107,32 +90,146 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
{
struct nfs4_state_owner *sp;
+ struct rb_node *pos;
- if (!list_empty(&clp->cl_state_owners)) {
- sp = list_entry(clp->cl_state_owners.next,
- struct nfs4_state_owner, so_list);
+ pos = rb_first(&clp->cl_state_owners);
+ if (pos != NULL) {
+ sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
return get_rpccred(sp->so_cred);
}
return NULL;
}
+static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
+ __u64 minval, int maxbits)
+{
+ struct rb_node **p, *parent;
+ struct nfs_unique_id *pos;
+ __u64 mask = ~0ULL;
+
+ if (maxbits < 64)
+ mask = (1ULL << maxbits) - 1ULL;
+
+ /* Ensure distribution is more or less flat */
+ get_random_bytes(&new->id, sizeof(new->id));
+ new->id &= mask;
+ if (new->id < minval)
+ new->id += minval;
+retry:
+ p = &root->rb_node;
+ parent = NULL;
+
+ while (*p != NULL) {
+ parent = *p;
+ pos = rb_entry(parent, struct nfs_unique_id, rb_node);
+
+ if (new->id < pos->id)
+ p = &(*p)->rb_left;
+ else if (new->id > pos->id)
+ p = &(*p)->rb_right;
+ else
+ goto id_exists;
+ }
+ rb_link_node(&new->rb_node, parent, p);
+ rb_insert_color(&new->rb_node, root);
+ return;
+id_exists:
+ for (;;) {
+ new->id++;
+ if (new->id < minval || (new->id & mask) != new->id) {
+ new->id = minval;
+ break;
+ }
+ parent = rb_next(parent);
+ if (parent == NULL)
+ break;
+ pos = rb_entry(parent, struct nfs_unique_id, rb_node);
+ if (new->id < pos->id)
+ break;
+ }
+ goto retry;
+}
+
+static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
+{
+ rb_erase(&id->rb_node, root);
+}
+
static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
+nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
{
+ struct nfs_client *clp = server->nfs_client;
+ struct rb_node **p = &clp->cl_state_owners.rb_node,
+ *parent = NULL;
struct nfs4_state_owner *sp, *res = NULL;
- list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
- if (sp->so_cred != cred)
+ while (*p != NULL) {
+ parent = *p;
+ sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+
+ if (server < sp->so_server) {
+ p = &parent->rb_left;
continue;
- atomic_inc(&sp->so_count);
- /* Move to the head of the list */
- list_move(&sp->so_list, &clp->cl_state_owners);
- res = sp;
- break;
+ }
+ if (server > sp->so_server) {
+ p = &parent->rb_right;
+ continue;
+ }
+ if (cred < sp->so_cred)
+ p = &parent->rb_left;
+ else if (cred > sp->so_cred)
+ p = &parent->rb_right;
+ else {
+ atomic_inc(&sp->so_count);
+ res = sp;
+ break;
+ }
}
return res;
}
+static struct nfs4_state_owner *
+nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
+{
+ struct rb_node **p = &clp->cl_state_owners.rb_node,
+ *parent = NULL;
+ struct nfs4_state_owner *sp;
+
+ while (*p != NULL) {
+ parent = *p;
+ sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+
+ if (new->so_server < sp->so_server) {
+ p = &parent->rb_left;
+ continue;
+ }
+ if (new->so_server > sp->so_server) {
+ p = &parent->rb_right;
+ continue;
+ }
+ if (new->so_cred < sp->so_cred)
+ p = &parent->rb_left;
+ else if (new->so_cred > sp->so_cred)
+ p = &parent->rb_right;
+ else {
+ atomic_inc(&sp->so_count);
+ return sp;
+ }
+ }
+ nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
+ rb_link_node(&new->so_client_node, parent, p);
+ rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
+ return new;
+}
+
+static void
+nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
+{
+ if (!RB_EMPTY_NODE(&sp->so_client_node))
+ rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+ nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
+}
+
/*
* nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
* create a new state_owner.
@@ -160,10 +257,14 @@ nfs4_alloc_state_owner(void)
void
nfs4_drop_state_owner(struct nfs4_state_owner *sp)
{
- struct nfs_client *clp = sp->so_client;
- spin_lock(&clp->cl_lock);
- list_del_init(&sp->so_list);
- spin_unlock(&clp->cl_lock);
+ if (!RB_EMPTY_NODE(&sp->so_client_node)) {
+ struct nfs_client *clp = sp->so_client;
+
+ spin_lock(&clp->cl_lock);
+ rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+ RB_CLEAR_NODE(&sp->so_client_node);
+ spin_unlock(&clp->cl_lock);
+ }
}
/*
@@ -175,26 +276,25 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
struct nfs_client *clp = server->nfs_client;
struct nfs4_state_owner *sp, *new;
- get_rpccred(cred);
- new = nfs4_alloc_state_owner();
spin_lock(&clp->cl_lock);
- sp = nfs4_find_state_owner(clp, cred);
- if (sp == NULL)
- sp = nfs4_client_grab_unused(clp, cred);
- if (sp == NULL && new != NULL) {
- list_add(&new->so_list, &clp->cl_state_owners);
- new->so_client = clp;
- new->so_id = nfs4_alloc_lockowner_id(clp);
- new->so_cred = cred;
- sp = new;
- new = NULL;
- }
+ sp = nfs4_find_state_owner(server, cred);
spin_unlock(&clp->cl_lock);
- kfree(new);
if (sp != NULL)
return sp;
- put_rpccred(cred);
- return NULL;
+ new = nfs4_alloc_state_owner();
+ if (new == NULL)
+ return NULL;
+ new->so_client = clp;
+ new->so_server = server;
+ new->so_cred = cred;
+ spin_lock(&clp->cl_lock);
+ sp = nfs4_insert_state_owner(clp, new);
+ spin_unlock(&clp->cl_lock);
+ if (sp == new)
+ get_rpccred(cred);
+ else
+ kfree(new);
+ return sp;
}
/*
@@ -208,18 +308,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
return;
- if (clp->cl_nunused >= OPENOWNER_POOL_SIZE)
- goto out_free;
- if (list_empty(&sp->so_list))
- goto out_free;
- list_move(&sp->so_list, &clp->cl_unused);
- clp->cl_nunused++;
- spin_unlock(&clp->cl_lock);
- put_rpccred(cred);
- cred = NULL;
- return;
-out_free:
- list_del(&sp->so_list);
+ nfs4_remove_state_owner(clp, sp);
spin_unlock(&clp->cl_lock);
put_rpccred(cred);
kfree(sp);
@@ -236,6 +325,7 @@ nfs4_alloc_open_state(void)
atomic_set(&state->count, 1);
INIT_LIST_HEAD(&state->lock_states);
spin_lock_init(&state->state_lock);
+ seqlock_init(&state->seqlock);
return state;
}
@@ -263,13 +353,10 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
struct nfs4_state *state;
list_for_each_entry(state, &nfsi->open_states, inode_states) {
- /* Is this in the process of being freed? */
- if (state->state == 0)
+ if (state->owner != owner)
continue;
- if (state->owner == owner) {
- atomic_inc(&state->count);
+ if (atomic_inc_not_zero(&state->count))
return state;
- }
}
return NULL;
}
@@ -341,16 +428,15 @@ void nfs4_put_open_state(struct nfs4_state *state)
/*
* Close the current file.
*/
-void nfs4_close_state(struct nfs4_state *state, mode_t mode)
+void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
{
- struct inode *inode = state->inode;
struct nfs4_state_owner *owner = state->owner;
- int oldstate, newstate = 0;
+ int call_close = 0;
+ int newstate;
atomic_inc(&owner->so_count);
/* Protect against nfs4_find_state() */
spin_lock(&owner->so_lock);
- spin_lock(&inode->i_lock);
switch (mode & (FMODE_READ | FMODE_WRITE)) {
case FMODE_READ:
state->n_rdonly--;
@@ -361,24 +447,29 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
case FMODE_READ|FMODE_WRITE:
state->n_rdwr--;
}
- oldstate = newstate = state->state;
+ newstate = FMODE_READ|FMODE_WRITE;
if (state->n_rdwr == 0) {
- if (state->n_rdonly == 0)
+ if (state->n_rdonly == 0) {
newstate &= ~FMODE_READ;
- if (state->n_wronly == 0)
+ call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
+ call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+ }
+ if (state->n_wronly == 0) {
newstate &= ~FMODE_WRITE;
+ call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
+ call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+ }
+ if (newstate == 0)
+ clear_bit(NFS_DELEGATED_STATE, &state->flags);
}
- if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
- nfs4_state_set_mode_locked(state, newstate);
- oldstate = newstate;
- }
- spin_unlock(&inode->i_lock);
+ nfs4_state_set_mode_locked(state, newstate);
spin_unlock(&owner->so_lock);
- if (oldstate != newstate && nfs4_do_close(inode, state) == 0)
- return;
- nfs4_put_open_state(state);
- nfs4_put_state_owner(owner);
+ if (!call_close) {
+ nfs4_put_open_state(state);
+ nfs4_put_state_owner(owner);
+ } else
+ nfs4_do_close(path, state);
}
/*
@@ -415,12 +506,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
atomic_set(&lsp->ls_count, 1);
lsp->ls_owner = fl_owner;
spin_lock(&clp->cl_lock);
- lsp->ls_id = nfs4_alloc_lockowner_id(clp);
+ nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
spin_unlock(&clp->cl_lock);
INIT_LIST_HEAD(&lsp->ls_locks);
return lsp;
}
+static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
+{
+ struct nfs_client *clp = lsp->ls_state->owner->so_client;
+
+ spin_lock(&clp->cl_lock);
+ nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+ spin_unlock(&clp->cl_lock);
+ kfree(lsp);
+}
+
/*
* Return a compatible lock_state. If no initialized lock_state structure
* exists, return an uninitialized one.
@@ -450,7 +551,8 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
return NULL;
}
spin_unlock(&state->state_lock);
- kfree(new);
+ if (new != NULL)
+ nfs4_free_lock_state(new);
return lsp;
}
@@ -471,7 +573,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
if (list_empty(&state->lock_states))
clear_bit(LK_STATE_IN_USE, &state->flags);
spin_unlock(&state->state_lock);
- kfree(lsp);
+ nfs4_free_lock_state(lsp);
}
static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -513,8 +615,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
{
struct nfs4_lock_state *lsp;
+ int seq;
- memcpy(dst, &state->stateid, sizeof(*dst));
+ do {
+ seq = read_seqbegin(&state->seqlock);
+ memcpy(dst, &state->stateid, sizeof(*dst));
+ } while (read_seqretry(&state->seqlock, seq));
if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
return;
@@ -557,12 +663,18 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
* failed with a seqid incrementing error -
* see comments nfs_fs.h:seqid_mutating_error()
*/
-static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
+static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
{
switch (status) {
case 0:
break;
case -NFS4ERR_BAD_SEQID:
+ if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
+ return;
+ printk(KERN_WARNING "NFS: v4 server returned a bad"
+ "sequence-id error on an"
+ "unconfirmed sequence %p!\n",
+ seqid->sequence);
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_BAD_STATEID:
@@ -586,7 +698,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
struct nfs4_state_owner, so_seqid);
nfs4_drop_state_owner(sp);
}
- return nfs_increment_seqid(status, seqid);
+ nfs_increment_seqid(status, seqid);
}
/*
@@ -596,7 +708,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
*/
void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
{
- return nfs_increment_seqid(status, seqid);
+ nfs_increment_seqid(status, seqid);
}
int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
@@ -748,15 +860,21 @@ out_err:
static void nfs4_state_mark_reclaim(struct nfs_client *clp)
{
struct nfs4_state_owner *sp;
+ struct rb_node *pos;
struct nfs4_state *state;
struct nfs4_lock_state *lock;
/* Reset all sequence ids to zero */
- list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+ for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+ sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
sp->so_seqid.counter = 0;
sp->so_seqid.flags = 0;
spin_lock(&sp->so_lock);
list_for_each_entry(state, &sp->so_states, open_states) {
+ clear_bit(NFS_DELEGATED_STATE, &state->flags);
+ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ clear_bit(NFS_O_RDWR_STATE, &state->flags);
list_for_each_entry(lock, &state->lock_states, ls_locks) {
lock->ls_seqid.counter = 0;
lock->ls_seqid.flags = 0;
@@ -771,6 +889,7 @@ static int reclaimer(void *ptr)
{
struct nfs_client *clp = ptr;
struct nfs4_state_owner *sp;
+ struct rb_node *pos;
struct nfs4_state_recovery_ops *ops;
struct rpc_cred *cred;
int status = 0;
@@ -816,7 +935,8 @@ restart_loop:
/* Mark all delegations for reclaim */
nfs_delegation_mark_reclaim(clp);
/* Note: list is protected by exclusive lock on cl->cl_sem */
- list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+ for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+ sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
status = nfs4_reclaim_open_state(ops, sp);
if (status < 0) {
if (status == -NFS4ERR_NO_GRACE) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8003c91ccb9a..c08738441f73 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -68,9 +68,10 @@ static int nfs4_stat_to_errno(int);
#endif
/* lock,open owner id:
- * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2)
+ * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
*/
-#define owner_id_maxsz (1 + 1)
+#define open_owner_id_maxsz (1 + 4)
+#define lock_owner_id_maxsz (1 + 4)
#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
#define op_encode_hdr_maxsz (1)
@@ -87,9 +88,11 @@ static int nfs4_stat_to_errno(int);
#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
#define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2))
#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
+#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
/* This is based on getfattr, which uses the most attributes: */
#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
- 3 + 3 + 3 + 2 * nfs4_name_maxsz))
+ 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
nfs4_fattr_value_maxsz)
#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -116,8 +119,27 @@ static int nfs4_stat_to_errno(int);
3 + (NFS4_VERIFIER_SIZE >> 2))
#define decode_setclientid_confirm_maxsz \
(op_decode_hdr_maxsz)
-#define encode_lookup_maxsz (op_encode_hdr_maxsz + \
- 1 + ((3 + NFS4_FHSIZE) >> 2))
+#define encode_lookup_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
+#define decode_lookup_maxsz (op_decode_hdr_maxsz)
+#define encode_share_access_maxsz \
+ (2)
+#define encode_createmode_maxsz (1 + nfs4_fattr_maxsz)
+#define encode_opentype_maxsz (1 + encode_createmode_maxsz)
+#define encode_claim_null_maxsz (1 + nfs4_name_maxsz)
+#define encode_open_maxsz (op_encode_hdr_maxsz + \
+ 2 + encode_share_access_maxsz + 2 + \
+ open_owner_id_maxsz + \
+ encode_opentype_maxsz + \
+ encode_claim_null_maxsz)
+#define decode_ace_maxsz (3 + nfs4_owner_maxsz)
+#define decode_delegation_maxsz (1 + XDR_QUADLEN(NFS4_STATEID_SIZE) + 1 + \
+ decode_ace_maxsz)
+#define decode_change_info_maxsz (5)
+#define decode_open_maxsz (op_decode_hdr_maxsz + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+ decode_change_info_maxsz + 1 + \
+ nfs4_fattr_bitmap_maxsz + \
+ decode_delegation_maxsz)
#define encode_remove_maxsz (op_encode_hdr_maxsz + \
nfs4_name_maxsz)
#define encode_rename_maxsz (op_encode_hdr_maxsz + \
@@ -134,9 +156,15 @@ static int nfs4_stat_to_errno(int);
#define encode_create_maxsz (op_encode_hdr_maxsz + \
2 + nfs4_name_maxsz + \
nfs4_fattr_maxsz)
-#define decode_create_maxsz (op_decode_hdr_maxsz + 8)
+#define decode_create_maxsz (op_decode_hdr_maxsz + \
+ decode_change_info_maxsz + \
+ nfs4_fattr_bitmap_maxsz)
#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
#define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
+#define encode_fs_locations_maxsz \
+ (encode_getattr_maxsz)
+#define decode_fs_locations_maxsz \
+ (0)
#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */
#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */
#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \
@@ -174,16 +202,21 @@ static int nfs4_stat_to_errno(int);
op_decode_hdr_maxsz + 2 + \
decode_getattr_maxsz)
#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
- encode_putfh_maxsz + \
- op_encode_hdr_maxsz + \
- 13 + 3 + 2 + 64 + \
- encode_getattr_maxsz + \
- encode_getfh_maxsz)
+ encode_putfh_maxsz + \
+ encode_savefh_maxsz + \
+ encode_open_maxsz + \
+ encode_getfh_maxsz + \
+ encode_getattr_maxsz + \
+ encode_restorefh_maxsz + \
+ encode_getattr_maxsz)
#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
- decode_putfh_maxsz + \
- op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \
- decode_getattr_maxsz + \
- decode_getfh_maxsz)
+ decode_putfh_maxsz + \
+ decode_savefh_maxsz + \
+ decode_open_maxsz + \
+ decode_getfh_maxsz + \
+ decode_getattr_maxsz + \
+ decode_restorefh_maxsz + \
+ decode_getattr_maxsz)
#define NFS4_enc_open_confirm_sz \
(compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
@@ -193,12 +226,12 @@ static int nfs4_stat_to_errno(int);
op_decode_hdr_maxsz + 4)
#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
- op_encode_hdr_maxsz + \
- 11)
+ encode_open_maxsz + \
+ encode_getattr_maxsz)
#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
- op_decode_hdr_maxsz + \
- 4 + 5 + 2 + 3)
+ decode_open_maxsz + \
+ decode_getattr_maxsz)
#define NFS4_enc_open_downgrade_sz \
(compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
@@ -256,19 +289,19 @@ static int nfs4_stat_to_errno(int);
op_encode_hdr_maxsz + \
1 + 1 + 2 + 2 + \
1 + 4 + 1 + 2 + \
- owner_id_maxsz)
+ lock_owner_id_maxsz)
#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
decode_getattr_maxsz + \
op_decode_hdr_maxsz + \
2 + 2 + 1 + 2 + \
- owner_id_maxsz)
+ lock_owner_id_maxsz)
#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
encode_getattr_maxsz + \
op_encode_hdr_maxsz + \
1 + 2 + 2 + 2 + \
- owner_id_maxsz)
+ lock_owner_id_maxsz)
#define NFS4_dec_lockt_sz (NFS4_dec_lock_sz)
#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
@@ -298,7 +331,7 @@ static int nfs4_stat_to_errno(int);
encode_getfh_maxsz)
#define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
- op_decode_hdr_maxsz + \
+ decode_lookup_maxsz + \
decode_getattr_maxsz + \
decode_getfh_maxsz)
#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
@@ -417,12 +450,13 @@ static int nfs4_stat_to_errno(int);
#define NFS4_enc_fs_locations_sz \
(compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
- encode_getattr_maxsz)
+ encode_lookup_maxsz + \
+ encode_fs_locations_maxsz)
#define NFS4_dec_fs_locations_sz \
(compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
- op_decode_hdr_maxsz + \
- nfs4_fattr_bitmap_maxsz)
+ decode_lookup_maxsz + \
+ decode_fs_locations_maxsz)
static struct {
unsigned int mode;
@@ -793,13 +827,14 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
WRITE64(nfs4_lock_length(args->fl));
WRITE32(args->new_lock_owner);
if (args->new_lock_owner){
- RESERVE_SPACE(4+NFS4_STATEID_SIZE+20);
+ RESERVE_SPACE(4+NFS4_STATEID_SIZE+32);
WRITE32(args->open_seqid->sequence->counter);
WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE);
WRITE32(args->lock_seqid->sequence->counter);
WRITE64(args->lock_owner.clientid);
- WRITE32(4);
- WRITE32(args->lock_owner.id);
+ WRITE32(16);
+ WRITEMEM("lock id:", 8);
+ WRITE64(args->lock_owner.id);
}
else {
RESERVE_SPACE(NFS4_STATEID_SIZE+4);
@@ -814,14 +849,15 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg
{
__be32 *p;
- RESERVE_SPACE(40);
+ RESERVE_SPACE(52);
WRITE32(OP_LOCKT);
WRITE32(nfs4_lock_type(args->fl, 0));
WRITE64(args->fl->fl_start);
WRITE64(nfs4_lock_length(args->fl));
WRITE64(args->lock_owner.clientid);
- WRITE32(4);
- WRITE32(args->lock_owner.id);
+ WRITE32(16);
+ WRITEMEM("lock id:", 8);
+ WRITE64(args->lock_owner.id);
return 0;
}
@@ -886,10 +922,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
WRITE32(OP_OPEN);
WRITE32(arg->seqid->sequence->counter);
encode_share_access(xdr, arg->open_flags);
- RESERVE_SPACE(16);
+ RESERVE_SPACE(28);
WRITE64(arg->clientid);
- WRITE32(4);
- WRITE32(arg->id);
+ WRITE32(16);
+ WRITEMEM("open id:", 8);
+ WRITE64(arg->id);
}
static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1071,7 +1108,7 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
{
- struct rpc_auth *auth = req->rq_task->tk_auth;
+ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
uint32_t attrs[2] = {
FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
FATTR4_WORD1_MOUNTED_ON_FILEID,
@@ -1117,7 +1154,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req)
{
- struct rpc_auth *auth = req->rq_task->tk_auth;
+ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
unsigned int replen;
__be32 *p;
@@ -1735,7 +1772,7 @@ out:
*/
static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
{
- struct rpc_auth *auth = req->rq_task->tk_auth;
+ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
struct xdr_stream xdr;
struct compound_hdr hdr = {
.nops = 2,
@@ -1795,7 +1832,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
struct nfs_getaclargs *args)
{
struct xdr_stream xdr;
- struct rpc_auth *auth = req->rq_task->tk_auth;
+ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
struct compound_hdr hdr = {
.nops = 2,
};
@@ -2030,7 +2067,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
struct compound_hdr hdr = {
.nops = 3,
};
- struct rpc_auth *auth = req->rq_task->tk_auth;
+ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
int replen;
int status;
@@ -3269,7 +3306,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
{
__be32 *p;
- uint32_t bmlen;
+ uint32_t savewords, bmlen, i;
int status;
status = decode_op_hdr(xdr, OP_OPEN);
@@ -3287,7 +3324,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
goto xdr_error;
READ_BUF(bmlen << 2);
- p += bmlen;
+ savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
+ for (i = 0; i < savewords; ++i)
+ READ32(res->attrset[i]);
+ for (; i < NFS4_BITMAP_SIZE; i++)
+ res->attrset[i] = 0;
+
return decode_delegation(xdr, res);
xdr_error:
dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 49d1008ce1d7..3490322d1145 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto)
printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
program, version, NIPQUAD(servaddr));
set_sockaddr(&sin, servaddr, 0);
- return rpcb_getport_external(&sin, program, version, proto);
+ return rpcb_getport_sync(&sin, program, version, proto);
}
@@ -496,7 +496,8 @@ static int __init root_nfs_get_handle(void)
NFS_MNT3_VERSION : NFS_MNT_VERSION;
set_sockaddr(&sin, servaddr, htons(mount_port));
- status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol);
+ status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL,
+ nfs_path, version, protocol, &fh);
if (status < 0)
printk(KERN_ERR "Root-NFS: Server returned error %d "
"while mounting %s\n", status, nfs_path);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index c5bb51a29e80..f56dae5216f4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,9 +85,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
req->wb_offset = offset;
req->wb_pgbase = offset;
req->wb_bytes = count;
- atomic_set(&req->wb_count, 1);
req->wb_context = get_nfs_open_context(ctx);
-
+ kref_init(&req->wb_kref);
return req;
}
@@ -109,30 +108,31 @@ void nfs_unlock_request(struct nfs_page *req)
}
/**
- * nfs_set_page_writeback_locked - Lock a request for writeback
+ * nfs_set_page_tag_locked - Tag a request as locked
* @req:
*/
-int nfs_set_page_writeback_locked(struct nfs_page *req)
+static int nfs_set_page_tag_locked(struct nfs_page *req)
{
- struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+ struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
if (!nfs_lock_request(req))
return 0;
- radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+ radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
return 1;
}
/**
- * nfs_clear_page_writeback - Unlock request and wake up sleepers
+ * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
*/
-void nfs_clear_page_writeback(struct nfs_page *req)
+void nfs_clear_page_tag_locked(struct nfs_page *req)
{
- struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+ struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
if (req->wb_page != NULL) {
- spin_lock(&nfsi->req_lock);
- radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
- spin_unlock(&nfsi->req_lock);
+ spin_lock(&inode->i_lock);
+ radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+ spin_unlock(&inode->i_lock);
}
nfs_unlock_request(req);
}
@@ -160,11 +160,9 @@ void nfs_clear_request(struct nfs_page *req)
*
* Note: Should never be called with the spinlock held!
*/
-void
-nfs_release_request(struct nfs_page *req)
+static void nfs_free_request(struct kref *kref)
{
- if (!atomic_dec_and_test(&req->wb_count))
- return;
+ struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
/* Release struct file or cached credential */
nfs_clear_request(req);
@@ -172,6 +170,11 @@ nfs_release_request(struct nfs_page *req)
nfs_page_free(req);
}
+void nfs_release_request(struct nfs_page *req)
+{
+ kref_put(&req->wb_kref, nfs_free_request);
+}
+
static int nfs_wait_bit_interruptible(void *word)
{
int ret = 0;
@@ -193,7 +196,7 @@ static int nfs_wait_bit_interruptible(void *word)
int
nfs_wait_on_request(struct nfs_page *req)
{
- struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode);
+ struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
sigset_t oldmask;
int ret = 0;
@@ -379,20 +382,20 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
/**
* nfs_scan_list - Scan a list for matching requests
* @nfsi: NFS inode
- * @head: One of the NFS inode request lists
* @dst: Destination list
* @idx_start: lower bound of page->index to scan
* @npages: idx_start + npages sets the upper bound to scan.
+ * @tag: tag to scan for
*
* Moves elements from one of the inode request lists.
* If the number of requests is set to 0, the entire address_space
* starting at index idx_start, is scanned.
* The requests are *not* checked to ensure that they form a contiguous set.
- * You must be holding the inode's req_lock when calling this function
+ * You must be holding the inode's i_lock when calling this function
*/
-int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
+int nfs_scan_list(struct nfs_inode *nfsi,
struct list_head *dst, pgoff_t idx_start,
- unsigned int npages)
+ unsigned int npages, int tag)
{
struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
struct nfs_page *req;
@@ -407,9 +410,9 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
idx_end = idx_start + npages - 1;
for (;;) {
- found = radix_tree_gang_lookup(&nfsi->nfs_page_tree,
+ found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
(void **)&pgvec[0], idx_start,
- NFS_SCAN_MAXENTRIES);
+ NFS_SCAN_MAXENTRIES, tag);
if (found <= 0)
break;
for (i = 0; i < found; i++) {
@@ -417,15 +420,18 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
if (req->wb_index > idx_end)
goto out;
idx_start = req->wb_index + 1;
- if (req->wb_list_head != head)
- continue;
- if (nfs_set_page_writeback_locked(req)) {
+ if (nfs_set_page_tag_locked(req)) {
nfs_list_remove_request(req);
+ radix_tree_tag_clear(&nfsi->nfs_page_tree,
+ req->wb_index, tag);
nfs_list_add_request(req, dst);
res++;
+ if (res == INT_MAX)
+ goto out;
}
}
-
+ /* for latency reduction */
+ cond_resched_lock(&nfsi->vfs_inode.i_lock);
}
out:
return res;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7bd7cb95c034..6ae2e58ed05a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -145,8 +145,8 @@ static void nfs_readpage_release(struct nfs_page *req)
unlock_page(req->wb_page);
dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
- req->wb_context->dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ req->wb_context->path.dentry->d_inode->i_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
nfs_clear_request(req);
@@ -164,7 +164,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
int flags;
data->req = req;
- data->inode = inode = req->wb_context->dentry->d_inode;
+ data->inode = inode = req->wb_context->path.dentry->d_inode;
data->cred = req->wb_context->cred;
data->args.fh = NFS_FH(inode);
@@ -483,17 +483,19 @@ int nfs_readpage(struct file *file, struct page *page)
*/
error = nfs_wb_page(inode, page);
if (error)
- goto out_error;
+ goto out_unlock;
+ if (PageUptodate(page))
+ goto out_unlock;
error = -ESTALE;
if (NFS_STALE(inode))
- goto out_error;
+ goto out_unlock;
if (file == NULL) {
error = -EBADF;
ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
if (ctx == NULL)
- goto out_error;
+ goto out_unlock;
} else
ctx = get_nfs_open_context((struct nfs_open_context *)
file->private_data);
@@ -502,8 +504,7 @@ int nfs_readpage(struct file *file, struct page *page)
put_nfs_open_context(ctx);
return error;
-
-out_error:
+out_unlock:
unlock_page(page);
return error;
}
@@ -520,21 +521,32 @@ readpage_async_filler(void *data, struct page *page)
struct inode *inode = page->mapping->host;
struct nfs_page *new;
unsigned int len;
+ int error;
+
+ error = nfs_wb_page(inode, page);
+ if (error)
+ goto out_unlock;
+ if (PageUptodate(page))
+ goto out_unlock;
- nfs_wb_page(inode, page);
len = nfs_page_length(page);
if (len == 0)
return nfs_return_empty_page(page);
+
new = nfs_create_request(desc->ctx, inode, page, 0, len);
- if (IS_ERR(new)) {
- SetPageError(page);
- unlock_page(page);
- return PTR_ERR(new);
- }
+ if (IS_ERR(new))
+ goto out_error;
+
if (len < PAGE_CACHE_SIZE)
zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
nfs_pageio_add_request(desc->pgio, new);
return 0;
+out_error:
+ error = PTR_ERR(new);
+ SetPageError(page);
+out_unlock:
+ unlock_page(page);
+ return error;
}
int nfs_readpages(struct file *filp, struct address_space *mapping,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ca20d3cc2609..a2b1af89ca1a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -45,6 +45,7 @@
#include <linux/inet.h>
#include <linux/nfs_xdr.h>
#include <linux/magic.h>
+#include <linux/parser.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -57,6 +58,167 @@
#define NFSDBG_FACILITY NFSDBG_VFS
+
+struct nfs_parsed_mount_data {
+ int flags;
+ int rsize, wsize;
+ int timeo, retrans;
+ int acregmin, acregmax,
+ acdirmin, acdirmax;
+ int namlen;
+ unsigned int bsize;
+ unsigned int auth_flavor_len;
+ rpc_authflavor_t auth_flavors[1];
+ char *client_address;
+
+ struct {
+ struct sockaddr_in address;
+ unsigned int program;
+ unsigned int version;
+ unsigned short port;
+ int protocol;
+ } mount_server;
+
+ struct {
+ struct sockaddr_in address;
+ char *hostname;
+ char *export_path;
+ unsigned int program;
+ int protocol;
+ } nfs_server;
+};
+
+enum {
+ /* Mount options that take no arguments */
+ Opt_soft, Opt_hard,
+ Opt_intr, Opt_nointr,
+ Opt_posix, Opt_noposix,
+ Opt_cto, Opt_nocto,
+ Opt_ac, Opt_noac,
+ Opt_lock, Opt_nolock,
+ Opt_v2, Opt_v3,
+ Opt_udp, Opt_tcp,
+ Opt_acl, Opt_noacl,
+ Opt_rdirplus, Opt_nordirplus,
+ Opt_sharecache, Opt_nosharecache,
+
+ /* Mount options that take integer arguments */
+ Opt_port,
+ Opt_rsize, Opt_wsize, Opt_bsize,
+ Opt_timeo, Opt_retrans,
+ Opt_acregmin, Opt_acregmax,
+ Opt_acdirmin, Opt_acdirmax,
+ Opt_actimeo,
+ Opt_namelen,
+ Opt_mountport,
+ Opt_mountprog, Opt_mountvers,
+ Opt_nfsprog, Opt_nfsvers,
+
+ /* Mount options that take string arguments */
+ Opt_sec, Opt_proto, Opt_mountproto,
+ Opt_addr, Opt_mounthost, Opt_clientaddr,
+
+ /* Mount options that are ignored */
+ Opt_userspace, Opt_deprecated,
+
+ Opt_err
+};
+
+static match_table_t nfs_mount_option_tokens = {
+ { Opt_userspace, "bg" },
+ { Opt_userspace, "fg" },
+ { Opt_soft, "soft" },
+ { Opt_hard, "hard" },
+ { Opt_intr, "intr" },
+ { Opt_nointr, "nointr" },
+ { Opt_posix, "posix" },
+ { Opt_noposix, "noposix" },
+ { Opt_cto, "cto" },
+ { Opt_nocto, "nocto" },
+ { Opt_ac, "ac" },
+ { Opt_noac, "noac" },
+ { Opt_lock, "lock" },
+ { Opt_nolock, "nolock" },
+ { Opt_v2, "v2" },
+ { Opt_v3, "v3" },
+ { Opt_udp, "udp" },
+ { Opt_tcp, "tcp" },
+ { Opt_acl, "acl" },
+ { Opt_noacl, "noacl" },
+ { Opt_rdirplus, "rdirplus" },
+ { Opt_nordirplus, "nordirplus" },
+ { Opt_sharecache, "sharecache" },
+ { Opt_nosharecache, "nosharecache" },
+
+ { Opt_port, "port=%u" },
+ { Opt_rsize, "rsize=%u" },
+ { Opt_wsize, "wsize=%u" },
+ { Opt_bsize, "bsize=%u" },
+ { Opt_timeo, "timeo=%u" },
+ { Opt_retrans, "retrans=%u" },
+ { Opt_acregmin, "acregmin=%u" },
+ { Opt_acregmax, "acregmax=%u" },
+ { Opt_acdirmin, "acdirmin=%u" },
+ { Opt_acdirmax, "acdirmax=%u" },
+ { Opt_actimeo, "actimeo=%u" },
+ { Opt_userspace, "retry=%u" },
+ { Opt_namelen, "namlen=%u" },
+ { Opt_mountport, "mountport=%u" },
+ { Opt_mountprog, "mountprog=%u" },
+ { Opt_mountvers, "mountvers=%u" },
+ { Opt_nfsprog, "nfsprog=%u" },
+ { Opt_nfsvers, "nfsvers=%u" },
+ { Opt_nfsvers, "vers=%u" },
+
+ { Opt_sec, "sec=%s" },
+ { Opt_proto, "proto=%s" },
+ { Opt_mountproto, "mountproto=%s" },
+ { Opt_addr, "addr=%s" },
+ { Opt_clientaddr, "clientaddr=%s" },
+ { Opt_mounthost, "mounthost=%s" },
+
+ { Opt_err, NULL }
+};
+
+enum {
+ Opt_xprt_udp, Opt_xprt_tcp,
+
+ Opt_xprt_err
+};
+
+static match_table_t nfs_xprt_protocol_tokens = {
+ { Opt_xprt_udp, "udp" },
+ { Opt_xprt_tcp, "tcp" },
+
+ { Opt_xprt_err, NULL }
+};
+
+enum {
+ Opt_sec_none, Opt_sec_sys,
+ Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
+ Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp,
+ Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp,
+
+ Opt_sec_err
+};
+
+static match_table_t nfs_secflavor_tokens = {
+ { Opt_sec_none, "none" },
+ { Opt_sec_none, "null" },
+ { Opt_sec_sys, "sys" },
+
+ { Opt_sec_krb5, "krb5" },
+ { Opt_sec_krb5i, "krb5i" },
+ { Opt_sec_krb5p, "krb5p" },
+
+ { Opt_sec_lkey, "lkey" },
+ { Opt_sec_lkeyi, "lkeyi" },
+ { Opt_sec_lkeyp, "lkeyp" },
+
+ { Opt_sec_err, NULL }
+};
+
+
static void nfs_umount_begin(struct vfsmount *, int);
static int nfs_statfs(struct dentry *, struct kstatfs *);
static int nfs_show_options(struct seq_file *, struct vfsmount *);
@@ -263,11 +425,11 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
{ RPC_AUTH_GSS_SPKM, "spkm" },
{ RPC_AUTH_GSS_SPKMI, "spkmi" },
{ RPC_AUTH_GSS_SPKMP, "spkmp" },
- { -1, "unknown" }
+ { UINT_MAX, "unknown" }
};
int i;
- for (i=0; sec_flavours[i].flavour != -1; i++) {
+ for (i = 0; sec_flavours[i].flavour != UINT_MAX; i++) {
if (sec_flavours[i].flavour == flavour)
break;
}
@@ -291,6 +453,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
{ NFS_MOUNT_NONLM, ",nolock", "" },
{ NFS_MOUNT_NOACL, ",noacl", "" },
{ NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
+ { NFS_MOUNT_UNSHARED, ",nosharecache", ""},
{ 0, NULL, NULL }
};
const struct proc_nfs_info *nfs_infop;
@@ -430,87 +593,641 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
*/
static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
{
+ struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb);
+ struct rpc_clnt *rpc;
+
shrink_submounts(vfsmnt, &nfs_automount_list);
+
+ if (!(flags & MNT_FORCE))
+ return;
+ /* -EIO all pending I/O */
+ rpc = server->client_acl;
+ if (!IS_ERR(rpc))
+ rpc_killall_tasks(rpc);
+ rpc = server->client;
+ if (!IS_ERR(rpc))
+ rpc_killall_tasks(rpc);
}
/*
- * Validate the NFS2/NFS3 mount data
- * - fills in the mount root filehandle
+ * Sanity-check a server address provided by the mount command
*/
-static int nfs_validate_mount_data(struct nfs_mount_data *data,
- struct nfs_fh *mntfh)
+static int nfs_verify_server_address(struct sockaddr *addr)
{
- if (data == NULL) {
- dprintk("%s: missing data argument\n", __FUNCTION__);
- return -EINVAL;
+ switch (addr->sa_family) {
+ case AF_INET: {
+ struct sockaddr_in *sa = (struct sockaddr_in *) addr;
+ if (sa->sin_addr.s_addr != INADDR_ANY)
+ return 1;
+ break;
+ }
}
- if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
- dprintk("%s: bad mount version\n", __FUNCTION__);
- return -EINVAL;
+ return 0;
+}
+
+/*
+ * Error-check and convert a string of mount options from user space into
+ * a data structure
+ */
+static int nfs_parse_mount_options(char *raw,
+ struct nfs_parsed_mount_data *mnt)
+{
+ char *p, *string;
+
+ if (!raw) {
+ dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
+ return 1;
}
+ dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
- switch (data->version) {
- case 1:
- data->namlen = 0;
- case 2:
- data->bsize = 0;
- case 3:
- if (data->flags & NFS_MOUNT_VER3) {
- dprintk("%s: mount structure version %d does not support NFSv3\n",
- __FUNCTION__,
- data->version);
- return -EINVAL;
+ while ((p = strsep(&raw, ",")) != NULL) {
+ substring_t args[MAX_OPT_ARGS];
+ int option, token;
+
+ if (!*p)
+ continue;
+
+ dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p);
+
+ token = match_token(p, nfs_mount_option_tokens, args);
+ switch (token) {
+ case Opt_soft:
+ mnt->flags |= NFS_MOUNT_SOFT;
+ break;
+ case Opt_hard:
+ mnt->flags &= ~NFS_MOUNT_SOFT;
+ break;
+ case Opt_intr:
+ mnt->flags |= NFS_MOUNT_INTR;
+ break;
+ case Opt_nointr:
+ mnt->flags &= ~NFS_MOUNT_INTR;
+ break;
+ case Opt_posix:
+ mnt->flags |= NFS_MOUNT_POSIX;
+ break;
+ case Opt_noposix:
+ mnt->flags &= ~NFS_MOUNT_POSIX;
+ break;
+ case Opt_cto:
+ mnt->flags &= ~NFS_MOUNT_NOCTO;
+ break;
+ case Opt_nocto:
+ mnt->flags |= NFS_MOUNT_NOCTO;
+ break;
+ case Opt_ac:
+ mnt->flags &= ~NFS_MOUNT_NOAC;
+ break;
+ case Opt_noac:
+ mnt->flags |= NFS_MOUNT_NOAC;
+ break;
+ case Opt_lock:
+ mnt->flags &= ~NFS_MOUNT_NONLM;
+ break;
+ case Opt_nolock:
+ mnt->flags |= NFS_MOUNT_NONLM;
+ break;
+ case Opt_v2:
+ mnt->flags &= ~NFS_MOUNT_VER3;
+ break;
+ case Opt_v3:
+ mnt->flags |= NFS_MOUNT_VER3;
+ break;
+ case Opt_udp:
+ mnt->flags &= ~NFS_MOUNT_TCP;
+ mnt->nfs_server.protocol = IPPROTO_UDP;
+ mnt->timeo = 7;
+ mnt->retrans = 5;
+ break;
+ case Opt_tcp:
+ mnt->flags |= NFS_MOUNT_TCP;
+ mnt->nfs_server.protocol = IPPROTO_TCP;
+ mnt->timeo = 600;
+ mnt->retrans = 2;
+ break;
+ case Opt_acl:
+ mnt->flags &= ~NFS_MOUNT_NOACL;
+ break;
+ case Opt_noacl:
+ mnt->flags |= NFS_MOUNT_NOACL;
+ break;
+ case Opt_rdirplus:
+ mnt->flags &= ~NFS_MOUNT_NORDIRPLUS;
+ break;
+ case Opt_nordirplus:
+ mnt->flags |= NFS_MOUNT_NORDIRPLUS;
+ break;
+ case Opt_sharecache:
+ mnt->flags &= ~NFS_MOUNT_UNSHARED;
+ break;
+ case Opt_nosharecache:
+ mnt->flags |= NFS_MOUNT_UNSHARED;
+ break;
+
+ case Opt_port:
+ if (match_int(args, &option))
+ return 0;
+ if (option < 0 || option > 65535)
+ return 0;
+ mnt->nfs_server.address.sin_port = htonl(option);
+ break;
+ case Opt_rsize:
+ if (match_int(args, &mnt->rsize))
+ return 0;
+ break;
+ case Opt_wsize:
+ if (match_int(args, &mnt->wsize))
+ return 0;
+ break;
+ case Opt_bsize:
+ if (match_int(args, &option))
+ return 0;
+ if (option < 0)
+ return 0;
+ mnt->bsize = option;
+ break;
+ case Opt_timeo:
+ if (match_int(args, &mnt->timeo))
+ return 0;
+ break;
+ case Opt_retrans:
+ if (match_int(args, &mnt->retrans))
+ return 0;
+ break;
+ case Opt_acregmin:
+ if (match_int(args, &mnt->acregmin))
+ return 0;
+ break;
+ case Opt_acregmax:
+ if (match_int(args, &mnt->acregmax))
+ return 0;
+ break;
+ case Opt_acdirmin:
+ if (match_int(args, &mnt->acdirmin))
+ return 0;
+ break;
+ case Opt_acdirmax:
+ if (match_int(args, &mnt->acdirmax))
+ return 0;
+ break;
+ case Opt_actimeo:
+ if (match_int(args, &option))
+ return 0;
+ if (option < 0)
+ return 0;
+ mnt->acregmin =
+ mnt->acregmax =
+ mnt->acdirmin =
+ mnt->acdirmax = option;
+ break;
+ case Opt_namelen:
+ if (match_int(args, &mnt->namlen))
+ return 0;
+ break;
+ case Opt_mountport:
+ if (match_int(args, &option))
+ return 0;
+ if (option < 0 || option > 65535)
+ return 0;
+ mnt->mount_server.port = option;
+ break;
+ case Opt_mountprog:
+ if (match_int(args, &option))
+ return 0;
+ if (option < 0)
+ return 0;
+ mnt->mount_server.program = option;
+ break;
+ case Opt_mountvers:
+ if (match_int(args, &option))
+ return 0;
+ if (option < 0)
+ return 0;
+ mnt->mount_server.version = option;
+ break;
+ case Opt_nfsprog:
+ if (match_int(args, &option))
+ return 0;
+ if (option < 0)
+ return 0;
+ mnt->nfs_server.program = option;
+ break;
+ case Opt_nfsvers:
+ if (match_int(args, &option))
+ return 0;
+ switch (option) {
+ case 2:
+ mnt->flags &= ~NFS_MOUNT_VER3;
+ break;
+ case 3:
+ mnt->flags |= NFS_MOUNT_VER3;
+ break;
+ default:
+ goto out_unrec_vers;
}
- data->root.size = NFS2_FHSIZE;
- memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
- case 4:
- if (data->flags & NFS_MOUNT_SECFLAVOUR) {
- dprintk("%s: mount structure version %d does not support strong security\n",
- __FUNCTION__,
- data->version);
- return -EINVAL;
+ break;
+
+ case Opt_sec:
+ string = match_strdup(args);
+ if (string == NULL)
+ goto out_nomem;
+ token = match_token(string, nfs_secflavor_tokens, args);
+ kfree(string);
+
+ /*
+ * The flags setting is for v2/v3. The flavor_len
+ * setting is for v4. v2/v3 also need to know the
+ * difference between NULL and UNIX.
+ */
+ switch (token) {
+ case Opt_sec_none:
+ mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
+ mnt->auth_flavor_len = 0;
+ mnt->auth_flavors[0] = RPC_AUTH_NULL;
+ break;
+ case Opt_sec_sys:
+ mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
+ mnt->auth_flavor_len = 0;
+ mnt->auth_flavors[0] = RPC_AUTH_UNIX;
+ break;
+ case Opt_sec_krb5:
+ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+ mnt->auth_flavor_len = 1;
+ mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
+ break;
+ case Opt_sec_krb5i:
+ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+ mnt->auth_flavor_len = 1;
+ mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
+ break;
+ case Opt_sec_krb5p:
+ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+ mnt->auth_flavor_len = 1;
+ mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
+ break;
+ case Opt_sec_lkey:
+ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+ mnt->auth_flavor_len = 1;
+ mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
+ break;
+ case Opt_sec_lkeyi:
+ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+ mnt->auth_flavor_len = 1;
+ mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
+ break;
+ case Opt_sec_lkeyp:
+ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+ mnt->auth_flavor_len = 1;
+ mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
+ break;
+ case Opt_sec_spkm:
+ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+ mnt->auth_flavor_len = 1;
+ mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
+ break;
+ case Opt_sec_spkmi:
+ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+ mnt->auth_flavor_len = 1;
+ mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
+ break;
+ case Opt_sec_spkmp:
+ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+ mnt->auth_flavor_len = 1;
+ mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
+ break;
+ default:
+ goto out_unrec_sec;
}
- case 5:
- memset(data->context, 0, sizeof(data->context));
- }
+ break;
+ case Opt_proto:
+ string = match_strdup(args);
+ if (string == NULL)
+ goto out_nomem;
+ token = match_token(string,
+ nfs_xprt_protocol_tokens, args);
+ kfree(string);
+
+ switch (token) {
+ case Opt_udp:
+ mnt->flags &= ~NFS_MOUNT_TCP;
+ mnt->nfs_server.protocol = IPPROTO_UDP;
+ mnt->timeo = 7;
+ mnt->retrans = 5;
+ break;
+ case Opt_tcp:
+ mnt->flags |= NFS_MOUNT_TCP;
+ mnt->nfs_server.protocol = IPPROTO_TCP;
+ mnt->timeo = 600;
+ mnt->retrans = 2;
+ break;
+ default:
+ goto out_unrec_xprt;
+ }
+ break;
+ case Opt_mountproto:
+ string = match_strdup(args);
+ if (string == NULL)
+ goto out_nomem;
+ token = match_token(string,
+ nfs_xprt_protocol_tokens, args);
+ kfree(string);
+
+ switch (token) {
+ case Opt_udp:
+ mnt->mount_server.protocol = IPPROTO_UDP;
+ break;
+ case Opt_tcp:
+ mnt->mount_server.protocol = IPPROTO_TCP;
+ break;
+ default:
+ goto out_unrec_xprt;
+ }
+ break;
+ case Opt_addr:
+ string = match_strdup(args);
+ if (string == NULL)
+ goto out_nomem;
+ mnt->nfs_server.address.sin_family = AF_INET;
+ mnt->nfs_server.address.sin_addr.s_addr =
+ in_aton(string);
+ kfree(string);
+ break;
+ case Opt_clientaddr:
+ string = match_strdup(args);
+ if (string == NULL)
+ goto out_nomem;
+ mnt->client_address = string;
+ break;
+ case Opt_mounthost:
+ string = match_strdup(args);
+ if (string == NULL)
+ goto out_nomem;
+ mnt->mount_server.address.sin_family = AF_INET;
+ mnt->mount_server.address.sin_addr.s_addr =
+ in_aton(string);
+ kfree(string);
+ break;
- /* Set the pseudoflavor */
- if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
- data->pseudoflavor = RPC_AUTH_UNIX;
+ case Opt_userspace:
+ case Opt_deprecated:
+ break;
-#ifndef CONFIG_NFS_V3
- /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
- if (data->flags & NFS_MOUNT_VER3) {
- dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
- return -EPROTONOSUPPORT;
+ default:
+ goto out_unknown;
+ }
}
-#endif /* CONFIG_NFS_V3 */
- /* We now require that the mount process passes the remote address */
- if (data->addr.sin_addr.s_addr == INADDR_ANY) {
- dprintk("%s: mount program didn't pass remote address!\n",
- __FUNCTION__);
- return -EINVAL;
+ return 1;
+
+out_nomem:
+ printk(KERN_INFO "NFS: not enough memory to parse option\n");
+ return 0;
+
+out_unrec_vers:
+ printk(KERN_INFO "NFS: unrecognized NFS version number\n");
+ return 0;
+
+out_unrec_xprt:
+ printk(KERN_INFO "NFS: unrecognized transport protocol\n");
+ return 0;
+
+out_unrec_sec:
+ printk(KERN_INFO "NFS: unrecognized security flavor\n");
+ return 0;
+
+out_unknown:
+ printk(KERN_INFO "NFS: unknown mount option: %s\n", p);
+ return 0;
+}
+
+/*
+ * Use the remote server's MOUNT service to request the NFS file handle
+ * corresponding to the provided path.
+ */
+static int nfs_try_mount(struct nfs_parsed_mount_data *args,
+ struct nfs_fh *root_fh)
+{
+ struct sockaddr_in sin;
+ int status;
+
+ if (args->mount_server.version == 0) {
+ if (args->flags & NFS_MOUNT_VER3)
+ args->mount_server.version = NFS_MNT3_VERSION;
+ else
+ args->mount_server.version = NFS_MNT_VERSION;
}
- /* Prepare the root filehandle */
- if (data->flags & NFS_MOUNT_VER3)
- mntfh->size = data->root.size;
+ /*
+ * Construct the mount server's address.
+ */
+ if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY)
+ sin = args->mount_server.address;
else
- mntfh->size = NFS2_FHSIZE;
+ sin = args->nfs_server.address;
+ if (args->mount_server.port == 0) {
+ status = rpcb_getport_sync(&sin,
+ args->mount_server.program,
+ args->mount_server.version,
+ args->mount_server.protocol);
+ if (status < 0)
+ goto out_err;
+ sin.sin_port = htons(status);
+ } else
+ sin.sin_port = htons(args->mount_server.port);
+
+ /*
+ * Now ask the mount server to map our export path
+ * to a file handle.
+ */
+ status = nfs_mount((struct sockaddr *) &sin,
+ sizeof(sin),
+ args->nfs_server.hostname,
+ args->nfs_server.export_path,
+ args->mount_server.version,
+ args->mount_server.protocol,
+ root_fh);
+ if (status < 0)
+ goto out_err;
+
+ return status;
- if (mntfh->size > sizeof(mntfh->data)) {
- dprintk("%s: invalid root filehandle\n", __FUNCTION__);
- return -EINVAL;
+out_err:
+ dfprintk(MOUNT, "NFS: unable to contact server on host "
+ NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr));
+ return status;
+}
+
+/*
+ * Validate the NFS2/NFS3 mount data
+ * - fills in the mount root filehandle
+ *
+ * For option strings, user space handles the following behaviors:
+ *
+ * + DNS: mapping server host name to IP address ("addr=" option)
+ *
+ * + failure mode: how to behave if a mount request can't be handled
+ * immediately ("fg/bg" option)
+ *
+ * + retry: how often to retry a mount request ("retry=" option)
+ *
+ * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
+ * mountproto=tcp after mountproto=udp, and so on
+ *
+ * XXX: as far as I can tell, changing the NFS program number is not
+ * supported in the NFS client.
+ */
+static int nfs_validate_mount_data(struct nfs_mount_data **options,
+ struct nfs_fh *mntfh,
+ const char *dev_name)
+{
+ struct nfs_mount_data *data = *options;
+
+ if (data == NULL)
+ goto out_no_data;
+
+ switch (data->version) {
+ case 1:
+ data->namlen = 0;
+ case 2:
+ data->bsize = 0;
+ case 3:
+ if (data->flags & NFS_MOUNT_VER3)
+ goto out_no_v3;
+ data->root.size = NFS2_FHSIZE;
+ memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+ case 4:
+ if (data->flags & NFS_MOUNT_SECFLAVOUR)
+ goto out_no_sec;
+ case 5:
+ memset(data->context, 0, sizeof(data->context));
+ case 6:
+ if (data->flags & NFS_MOUNT_VER3)
+ mntfh->size = data->root.size;
+ else
+ mntfh->size = NFS2_FHSIZE;
+
+ if (mntfh->size > sizeof(mntfh->data))
+ goto out_invalid_fh;
+
+ memcpy(mntfh->data, data->root.data, mntfh->size);
+ if (mntfh->size < sizeof(mntfh->data))
+ memset(mntfh->data + mntfh->size, 0,
+ sizeof(mntfh->data) - mntfh->size);
+ break;
+ default: {
+ unsigned int len;
+ char *c;
+ int status;
+ struct nfs_parsed_mount_data args = {
+ .flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP),
+ .rsize = NFS_MAX_FILE_IO_SIZE,
+ .wsize = NFS_MAX_FILE_IO_SIZE,
+ .timeo = 600,
+ .retrans = 2,
+ .acregmin = 3,
+ .acregmax = 60,
+ .acdirmin = 30,
+ .acdirmax = 60,
+ .mount_server.protocol = IPPROTO_UDP,
+ .mount_server.program = NFS_MNT_PROGRAM,
+ .nfs_server.protocol = IPPROTO_TCP,
+ .nfs_server.program = NFS_PROGRAM,
+ };
+
+ if (nfs_parse_mount_options((char *) *options, &args) == 0)
+ return -EINVAL;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (data == NULL)
+ return -ENOMEM;
+
+ /*
+ * NB: after this point, caller will free "data"
+ * if we return an error
+ */
+ *options = data;
+
+ c = strchr(dev_name, ':');
+ if (c == NULL)
+ return -EINVAL;
+ len = c - dev_name - 1;
+ if (len > sizeof(data->hostname))
+ return -EINVAL;
+ strncpy(data->hostname, dev_name, len);
+ args.nfs_server.hostname = data->hostname;
+
+ c++;
+ if (strlen(c) > NFS_MAXPATHLEN)
+ return -EINVAL;
+ args.nfs_server.export_path = c;
+
+ status = nfs_try_mount(&args, mntfh);
+ if (status)
+ return -EINVAL;
+
+ /*
+ * Translate to nfs_mount_data, which nfs_fill_super
+ * can deal with.
+ */
+ data->version = 6;
+ data->flags = args.flags;
+ data->rsize = args.rsize;
+ data->wsize = args.wsize;
+ data->timeo = args.timeo;
+ data->retrans = args.retrans;
+ data->acregmin = args.acregmin;
+ data->acregmax = args.acregmax;
+ data->acdirmin = args.acdirmin;
+ data->acdirmax = args.acdirmax;
+ data->addr = args.nfs_server.address;
+ data->namlen = args.namlen;
+ data->bsize = args.bsize;
+ data->pseudoflavor = args.auth_flavors[0];
+
+ break;
+ }
}
- memcpy(mntfh->data, data->root.data, mntfh->size);
- if (mntfh->size < sizeof(mntfh->data))
- memset(mntfh->data + mntfh->size, 0,
- sizeof(mntfh->data) - mntfh->size);
+ if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
+ data->pseudoflavor = RPC_AUTH_UNIX;
+
+#ifndef CONFIG_NFS_V3
+ if (data->flags & NFS_MOUNT_VER3)
+ goto out_v3_not_compiled;
+#endif /* !CONFIG_NFS_V3 */
+
+ if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
+ goto out_no_address;
return 0;
+
+out_no_data:
+ dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n");
+ return -EINVAL;
+
+out_no_v3:
+ dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n",
+ data->version);
+ return -EINVAL;
+
+out_no_sec:
+ dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n");
+ return -EINVAL;
+
+#ifndef CONFIG_NFS_V3
+out_v3_not_compiled:
+ dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n");
+ return -EPROTONOSUPPORT;
+#endif /* !CONFIG_NFS_V3 */
+
+out_no_address:
+ dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
+ return -EINVAL;
+
+out_invalid_fh:
+ dfprintk(MOUNT, "NFS: invalid root filehandle\n");
+ return -EINVAL;
}
/*
@@ -600,13 +1317,51 @@ static int nfs_compare_super(struct super_block *sb, void *data)
{
struct nfs_server *server = data, *old = NFS_SB(sb);
- if (old->nfs_client != server->nfs_client)
+ if (memcmp(&old->nfs_client->cl_addr,
+ &server->nfs_client->cl_addr,
+ sizeof(old->nfs_client->cl_addr)) != 0)
+ return 0;
+ /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
+ if (old->flags & NFS_MOUNT_UNSHARED)
return 0;
if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
return 0;
return 1;
}
+#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
+
+static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
+{
+ const struct nfs_server *a = s->s_fs_info;
+ const struct rpc_clnt *clnt_a = a->client;
+ const struct rpc_clnt *clnt_b = b->client;
+
+ if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
+ goto Ebusy;
+ if (a->nfs_client != b->nfs_client)
+ goto Ebusy;
+ if (a->flags != b->flags)
+ goto Ebusy;
+ if (a->wsize != b->wsize)
+ goto Ebusy;
+ if (a->rsize != b->rsize)
+ goto Ebusy;
+ if (a->acregmin != b->acregmin)
+ goto Ebusy;
+ if (a->acregmax != b->acregmax)
+ goto Ebusy;
+ if (a->acdirmin != b->acdirmin)
+ goto Ebusy;
+ if (a->acdirmax != b->acdirmax)
+ goto Ebusy;
+ if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
+ goto Ebusy;
+ return 0;
+Ebusy:
+ return -EBUSY;
+}
+
static int nfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
{
@@ -615,30 +1370,37 @@ static int nfs_get_sb(struct file_system_type *fs_type,
struct nfs_fh mntfh;
struct nfs_mount_data *data = raw_data;
struct dentry *mntroot;
+ int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
int error;
/* Validate the mount data */
- error = nfs_validate_mount_data(data, &mntfh);
+ error = nfs_validate_mount_data(&data, &mntfh, dev_name);
if (error < 0)
- return error;
+ goto out;
/* Get a volume representation */
server = nfs_create_server(data, &mntfh);
if (IS_ERR(server)) {
error = PTR_ERR(server);
- goto out_err_noserver;
+ goto out;
}
+ if (server->flags & NFS_MOUNT_UNSHARED)
+ compare_super = NULL;
+
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+ s = sget(fs_type, compare_super, nfs_set_super, server);
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto out_err_nosb;
}
if (s->s_fs_info != server) {
+ error = nfs_compare_mount_options(s, server, flags);
nfs_free_server(server);
server = NULL;
+ if (error < 0)
+ goto error_splat_super;
}
if (!s->s_root) {
@@ -656,17 +1418,21 @@ static int nfs_get_sb(struct file_system_type *fs_type,
s->s_flags |= MS_ACTIVE;
mnt->mnt_sb = s;
mnt->mnt_root = mntroot;
- return 0;
+ error = 0;
+
+out:
+ if (data != raw_data)
+ kfree(data);
+ return error;
out_err_nosb:
nfs_free_server(server);
-out_err_noserver:
- return error;
+ goto out;
error_splat_super:
up_write(&s->s_umount);
deactivate_super(s);
- return error;
+ goto out;
}
/*
@@ -691,6 +1457,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
struct super_block *s;
struct nfs_server *server;
struct dentry *mntroot;
+ int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
int error;
dprintk("--> nfs_xdev_get_sb()\n");
@@ -702,16 +1469,22 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
goto out_err_noserver;
}
+ if (server->flags & NFS_MOUNT_UNSHARED)
+ compare_super = NULL;
+
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+ s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto out_err_nosb;
}
if (s->s_fs_info != server) {
+ error = nfs_compare_mount_options(s, server, flags);
nfs_free_server(server);
server = NULL;
+ if (error < 0)
+ goto error_splat_super;
}
if (!s->s_root) {
@@ -772,25 +1545,164 @@ static void nfs4_fill_super(struct super_block *sb)
nfs_initialise_sb(sb);
}
-static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+/*
+ * Validate NFSv4 mount options
+ */
+static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
+ const char *dev_name,
+ struct sockaddr_in *addr,
+ rpc_authflavor_t *authflavour,
+ char **hostname,
+ char **mntpath,
+ char **ip_addr)
{
- void *p = NULL;
-
- if (!src->len)
- return ERR_PTR(-EINVAL);
- if (src->len < maxlen)
- maxlen = src->len;
- if (dst == NULL) {
- p = dst = kmalloc(maxlen + 1, GFP_KERNEL);
- if (p == NULL)
- return ERR_PTR(-ENOMEM);
- }
- if (copy_from_user(dst, src->data, maxlen)) {
- kfree(p);
- return ERR_PTR(-EFAULT);
+ struct nfs4_mount_data *data = *options;
+ char *c;
+
+ if (data == NULL)
+ goto out_no_data;
+
+ switch (data->version) {
+ case 1:
+ if (data->host_addrlen != sizeof(*addr))
+ goto out_no_address;
+ if (copy_from_user(addr, data->host_addr, sizeof(*addr)))
+ return -EFAULT;
+ if (addr->sin_port == 0)
+ addr->sin_port = htons(NFS_PORT);
+ if (!nfs_verify_server_address((struct sockaddr *) addr))
+ goto out_no_address;
+
+ switch (data->auth_flavourlen) {
+ case 0:
+ *authflavour = RPC_AUTH_UNIX;
+ break;
+ case 1:
+ if (copy_from_user(authflavour, data->auth_flavours,
+ sizeof(*authflavour)))
+ return -EFAULT;
+ break;
+ default:
+ goto out_inval_auth;
+ }
+
+ c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ *hostname = c;
+
+ c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ *mntpath = c;
+ dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath);
+
+ c = strndup_user(data->client_addr.data, 16);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ *ip_addr = c;
+
+ break;
+ default: {
+ unsigned int len;
+ struct nfs_parsed_mount_data args = {
+ .rsize = NFS_MAX_FILE_IO_SIZE,
+ .wsize = NFS_MAX_FILE_IO_SIZE,
+ .timeo = 600,
+ .retrans = 2,
+ .acregmin = 3,
+ .acregmax = 60,
+ .acdirmin = 30,
+ .acdirmax = 60,
+ .nfs_server.protocol = IPPROTO_TCP,
+ };
+
+ if (nfs_parse_mount_options((char *) *options, &args) == 0)
+ return -EINVAL;
+
+ if (!nfs_verify_server_address((struct sockaddr *)
+ &args.nfs_server.address))
+ return -EINVAL;
+ *addr = args.nfs_server.address;
+
+ switch (args.auth_flavor_len) {
+ case 0:
+ *authflavour = RPC_AUTH_UNIX;
+ break;
+ case 1:
+ *authflavour = (rpc_authflavor_t) args.auth_flavors[0];
+ break;
+ default:
+ goto out_inval_auth;
+ }
+
+ /*
+ * Translate to nfs4_mount_data, which nfs4_fill_super
+ * can deal with.
+ */
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (data == NULL)
+ return -ENOMEM;
+ *options = data;
+
+ data->version = 1;
+ data->flags = args.flags & NFS4_MOUNT_FLAGMASK;
+ data->rsize = args.rsize;
+ data->wsize = args.wsize;
+ data->timeo = args.timeo;
+ data->retrans = args.retrans;
+ data->acregmin = args.acregmin;
+ data->acregmax = args.acregmax;
+ data->acdirmin = args.acdirmin;
+ data->acdirmax = args.acdirmax;
+ data->proto = args.nfs_server.protocol;
+
+ /*
+ * Split "dev_name" into "hostname:mntpath".
+ */
+ c = strchr(dev_name, ':');
+ if (c == NULL)
+ return -EINVAL;
+ /* while calculating len, pretend ':' is '\0' */
+ len = c - dev_name;
+ if (len > NFS4_MAXNAMLEN)
+ return -EINVAL;
+ *hostname = kzalloc(len, GFP_KERNEL);
+ if (*hostname == NULL)
+ return -ENOMEM;
+ strncpy(*hostname, dev_name, len - 1);
+
+ c++; /* step over the ':' */
+ len = strlen(c);
+ if (len > NFS4_MAXPATHLEN)
+ return -EINVAL;
+ *mntpath = kzalloc(len + 1, GFP_KERNEL);
+ if (*mntpath == NULL)
+ return -ENOMEM;
+ strncpy(*mntpath, c, len);
+
+ dprintk("MNTPATH: %s\n", *mntpath);
+
+ *ip_addr = args.client_address;
+
+ break;
+ }
}
- dst[maxlen] = '\0';
- return dst;
+
+ return 0;
+
+out_no_data:
+ dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n");
+ return -EINVAL;
+
+out_inval_auth:
+ dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n",
+ data->auth_flavourlen);
+ return -EINVAL;
+
+out_no_address:
+ dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
+ return -EINVAL;
}
/*
@@ -806,81 +1718,29 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
rpc_authflavor_t authflavour;
struct nfs_fh mntfh;
struct dentry *mntroot;
- char *mntpath = NULL, *hostname = NULL, ip_addr[16];
- void *p;
+ char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL;
+ int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
int error;
- if (data == NULL) {
- dprintk("%s: missing data argument\n", __FUNCTION__);
- return -EINVAL;
- }
- if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) {
- dprintk("%s: bad mount version\n", __FUNCTION__);
- return -EINVAL;
- }
-
- /* We now require that the mount process passes the remote address */
- if (data->host_addrlen != sizeof(addr))
- return -EINVAL;
-
- if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
- return -EFAULT;
-
- if (addr.sin_family != AF_INET ||
- addr.sin_addr.s_addr == INADDR_ANY
- ) {
- dprintk("%s: mount program didn't pass remote IP address!\n",
- __FUNCTION__);
- return -EINVAL;
- }
- /* RFC3530: The default port for NFS is 2049 */
- if (addr.sin_port == 0)
- addr.sin_port = htons(NFS_PORT);
-
- /* Grab the authentication type */
- authflavour = RPC_AUTH_UNIX;
- if (data->auth_flavourlen != 0) {
- if (data->auth_flavourlen != 1) {
- dprintk("%s: Invalid number of RPC auth flavours %d.\n",
- __FUNCTION__, data->auth_flavourlen);
- error = -EINVAL;
- goto out_err_noserver;
- }
-
- if (copy_from_user(&authflavour, data->auth_flavours,
- sizeof(authflavour))) {
- error = -EFAULT;
- goto out_err_noserver;
- }
- }
-
- p = nfs_copy_user_string(NULL, &data->hostname, 256);
- if (IS_ERR(p))
- goto out_err;
- hostname = p;
-
- p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
- if (IS_ERR(p))
- goto out_err;
- mntpath = p;
-
- dprintk("MNTPATH: %s\n", mntpath);
-
- p = nfs_copy_user_string(ip_addr, &data->client_addr,
- sizeof(ip_addr) - 1);
- if (IS_ERR(p))
- goto out_err;
+ /* Validate the mount data */
+ error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour,
+ &hostname, &mntpath, &ip_addr);
+ if (error < 0)
+ goto out;
/* Get a volume representation */
server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
authflavour, &mntfh);
if (IS_ERR(server)) {
error = PTR_ERR(server);
- goto out_err_noserver;
+ goto out;
}
+ if (server->flags & NFS4_MOUNT_UNSHARED)
+ compare_super = NULL;
+
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+ s = sget(fs_type, compare_super, nfs_set_super, server);
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto out_free;
@@ -906,25 +1766,22 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
s->s_flags |= MS_ACTIVE;
mnt->mnt_sb = s;
mnt->mnt_root = mntroot;
+ error = 0;
+
+out:
+ kfree(ip_addr);
kfree(mntpath);
kfree(hostname);
- return 0;
-
-out_err:
- error = PTR_ERR(p);
- goto out_err_noserver;
+ return error;
out_free:
nfs_free_server(server);
-out_err_noserver:
- kfree(mntpath);
- kfree(hostname);
- return error;
+ goto out;
error_splat_super:
up_write(&s->s_umount);
deactivate_super(s);
- goto out_err_noserver;
+ goto out;
}
static void nfs4_kill_super(struct super_block *sb)
@@ -949,6 +1806,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
struct super_block *s;
struct nfs_server *server;
struct dentry *mntroot;
+ int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
int error;
dprintk("--> nfs4_xdev_get_sb()\n");
@@ -960,8 +1818,11 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
goto out_err_noserver;
}
+ if (server->flags & NFS4_MOUNT_UNSHARED)
+ compare_super = NULL;
+
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+ s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto out_err_nosb;
@@ -1016,6 +1877,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
struct nfs_server *server;
struct dentry *mntroot;
struct nfs_fh mntfh;
+ int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
int error;
dprintk("--> nfs4_referral_get_sb()\n");
@@ -1027,8 +1889,11 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
goto out_err_noserver;
}
+ if (server->flags & NFS4_MOUNT_UNSHARED)
+ compare_super = NULL;
+
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+ s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto out_err_nosb;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index af344a158e01..73ac992ece85 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -117,19 +117,19 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page)
if (PagePrivate(page)) {
req = (struct nfs_page *)page_private(page);
if (req != NULL)
- atomic_inc(&req->wb_count);
+ kref_get(&req->wb_kref);
}
return req;
}
static struct nfs_page *nfs_page_find_request(struct page *page)
{
+ struct inode *inode = page->mapping->host;
struct nfs_page *req = NULL;
- spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
- spin_lock(req_lock);
+ spin_lock(&inode->i_lock);
req = nfs_page_find_request_locked(page);
- spin_unlock(req_lock);
+ spin_unlock(&inode->i_lock);
return req;
}
@@ -191,8 +191,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
}
/* Update file length */
nfs_grow_file(page, offset, count);
- /* Set the PG_uptodate flag? */
- nfs_mark_uptodate(page, offset, count);
nfs_unlock_request(req);
return 0;
}
@@ -253,16 +251,16 @@ static void nfs_end_page_writeback(struct page *page)
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
struct page *page)
{
+ struct inode *inode = page->mapping->host;
+ struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *req;
- struct nfs_inode *nfsi = NFS_I(page->mapping->host);
- spinlock_t *req_lock = &nfsi->req_lock;
int ret;
- spin_lock(req_lock);
+ spin_lock(&inode->i_lock);
for(;;) {
req = nfs_page_find_request_locked(page);
if (req == NULL) {
- spin_unlock(req_lock);
+ spin_unlock(&inode->i_lock);
return 1;
}
if (nfs_lock_request_dontget(req))
@@ -272,28 +270,28 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
* succeed provided that someone hasn't already marked the
* request as dirty (in which case we don't care).
*/
- spin_unlock(req_lock);
+ spin_unlock(&inode->i_lock);
ret = nfs_wait_on_request(req);
nfs_release_request(req);
if (ret != 0)
return ret;
- spin_lock(req_lock);
+ spin_lock(&inode->i_lock);
}
if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
/* This request is marked for commit */
- spin_unlock(req_lock);
+ spin_unlock(&inode->i_lock);
nfs_unlock_request(req);
nfs_pageio_complete(pgio);
return 1;
}
if (nfs_set_page_writeback(page) != 0) {
- spin_unlock(req_lock);
+ spin_unlock(&inode->i_lock);
BUG();
}
radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
- NFS_PAGE_TAG_WRITEBACK);
+ NFS_PAGE_TAG_LOCKED);
ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
- spin_unlock(req_lock);
+ spin_unlock(&inode->i_lock);
nfs_pageio_add_request(pgio, req);
return ret;
}
@@ -400,7 +398,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
if (PageDirty(req->wb_page))
set_bit(PG_NEED_FLUSH, &req->wb_flags);
nfsi->npages++;
- atomic_inc(&req->wb_count);
+ kref_get(&req->wb_kref);
return 0;
}
@@ -409,12 +407,12 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
*/
static void nfs_inode_remove_request(struct nfs_page *req)
{
- struct inode *inode = req->wb_context->dentry->d_inode;
+ struct inode *inode = req->wb_context->path.dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
BUG_ON (!NFS_WBACK_BUSY(req));
- spin_lock(&nfsi->req_lock);
+ spin_lock(&inode->i_lock);
set_page_private(req->wb_page, 0);
ClearPagePrivate(req->wb_page);
radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
@@ -422,11 +420,11 @@ static void nfs_inode_remove_request(struct nfs_page *req)
__set_page_dirty_nobuffers(req->wb_page);
nfsi->npages--;
if (!nfsi->npages) {
- spin_unlock(&nfsi->req_lock);
+ spin_unlock(&inode->i_lock);
nfs_end_data_update(inode);
iput(inode);
} else
- spin_unlock(&nfsi->req_lock);
+ spin_unlock(&inode->i_lock);
nfs_clear_request(req);
nfs_release_request(req);
}
@@ -457,14 +455,16 @@ nfs_dirty_request(struct nfs_page *req)
static void
nfs_mark_request_commit(struct nfs_page *req)
{
- struct inode *inode = req->wb_context->dentry->d_inode;
+ struct inode *inode = req->wb_context->path.dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
- spin_lock(&nfsi->req_lock);
- nfs_list_add_request(req, &nfsi->commit);
+ spin_lock(&inode->i_lock);
nfsi->ncommit++;
set_bit(PG_NEED_COMMIT, &(req)->wb_flags);
- spin_unlock(&nfsi->req_lock);
+ radix_tree_tag_set(&nfsi->nfs_page_tree,
+ req->wb_index,
+ NFS_PAGE_TAG_COMMIT);
+ spin_unlock(&inode->i_lock);
inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
}
@@ -526,18 +526,18 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
idx_end = idx_start + npages - 1;
next = idx_start;
- while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
+ while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
if (req->wb_index > idx_end)
break;
next = req->wb_index + 1;
BUG_ON(!NFS_WBACK_BUSY(req));
- atomic_inc(&req->wb_count);
- spin_unlock(&nfsi->req_lock);
+ kref_get(&req->wb_kref);
+ spin_unlock(&inode->i_lock);
error = nfs_wait_on_request(req);
nfs_release_request(req);
- spin_lock(&nfsi->req_lock);
+ spin_lock(&inode->i_lock);
if (error < 0)
return error;
res++;
@@ -577,10 +577,9 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
int res = 0;
if (nfsi->ncommit != 0) {
- res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages);
+ res = nfs_scan_list(nfsi, dst, idx_start, npages,
+ NFS_PAGE_TAG_COMMIT);
nfsi->ncommit -= res;
- if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
- printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
}
return res;
}
@@ -603,7 +602,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
{
struct address_space *mapping = page->mapping;
struct inode *inode = mapping->host;
- struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *req, *new = NULL;
pgoff_t rqend, end;
@@ -613,13 +611,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
/* Loop over all inode entries and see if we find
* A request for the page we wish to update
*/
- spin_lock(&nfsi->req_lock);
+ spin_lock(&inode->i_lock);
req = nfs_page_find_request_locked(page);
if (req) {
if (!nfs_lock_request_dontget(req)) {
int error;
- spin_unlock(&nfsi->req_lock);
+ spin_unlock(&inode->i_lock);
error = nfs_wait_on_request(req);
nfs_release_request(req);
if (error < 0) {
@@ -629,7 +627,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
}
continue;
}
- spin_unlock(&nfsi->req_lock);
+ spin_unlock(&inode->i_lock);
if (new)
nfs_release_request(new);
break;
@@ -640,14 +638,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
nfs_lock_request_dontget(new);
error = nfs_inode_add_request(inode, new);
if (error) {
- spin_unlock(&nfsi->req_lock);
+ spin_unlock(&inode->i_lock);
nfs_unlock_request(new);
return ERR_PTR(error);
}
- spin_unlock(&nfsi->req_lock);
+ spin_unlock(&inode->i_lock);
return new;
}
- spin_unlock(&nfsi->req_lock);
+ spin_unlock(&inode->i_lock);
new = nfs_create_request(ctx, inode, page, offset, bytes);
if (IS_ERR(new))
@@ -751,12 +749,17 @@ int nfs_updatepage(struct file *file, struct page *page,
static void nfs_writepage_release(struct nfs_page *req)
{
- if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
+ if (PageError(req->wb_page)) {
+ nfs_end_page_writeback(req->wb_page);
+ nfs_inode_remove_request(req);
+ } else if (!nfs_reschedule_unstable_write(req)) {
+ /* Set the PG_uptodate flag */
+ nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes);
nfs_end_page_writeback(req->wb_page);
nfs_inode_remove_request(req);
} else
nfs_end_page_writeback(req->wb_page);
- nfs_clear_page_writeback(req);
+ nfs_clear_page_tag_locked(req);
}
static inline int flush_task_priority(int how)
@@ -786,7 +789,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
* NB: take care not to mess about with data->commit et al. */
data->req = req;
- data->inode = inode = req->wb_context->dentry->d_inode;
+ data->inode = inode = req->wb_context->path.dentry->d_inode;
data->cred = req->wb_context->cred;
data->args.fh = NFS_FH(inode);
@@ -885,7 +888,7 @@ out_bad:
}
nfs_redirty_request(req);
nfs_end_page_writeback(req->wb_page);
- nfs_clear_page_writeback(req);
+ nfs_clear_page_tag_locked(req);
return -ENOMEM;
}
@@ -928,7 +931,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
nfs_list_remove_request(req);
nfs_redirty_request(req);
nfs_end_page_writeback(req->wb_page);
- nfs_clear_page_writeback(req);
+ nfs_clear_page_tag_locked(req);
}
return -ENOMEM;
}
@@ -954,8 +957,8 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
struct page *page = req->wb_page;
dprintk("NFS: write (%s/%Ld %d@%Ld)",
- req->wb_context->dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ req->wb_context->path.dentry->d_inode->i_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
@@ -970,9 +973,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
}
if (nfs_write_need_commit(data)) {
- spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
+ struct inode *inode = page->mapping->host;
- spin_lock(req_lock);
+ spin_lock(&inode->i_lock);
if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
/* Do nothing we need to resend the writes */
} else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
@@ -983,7 +986,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
clear_bit(PG_NEED_COMMIT, &req->wb_flags);
dprintk(" server reboot detected\n");
}
- spin_unlock(req_lock);
+ spin_unlock(&inode->i_lock);
} else
dprintk(" OK\n");
@@ -1020,8 +1023,8 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
page = req->wb_page;
dprintk("NFS: write (%s/%Ld %d@%Ld)",
- req->wb_context->dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ req->wb_context->path.dentry->d_inode->i_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
@@ -1039,12 +1042,14 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
dprintk(" marked for commit\n");
goto next;
}
+ /* Set the PG_uptodate flag? */
+ nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
dprintk(" OK\n");
remove_request:
nfs_end_page_writeback(page);
nfs_inode_remove_request(req);
next:
- nfs_clear_page_writeback(req);
+ nfs_clear_page_tag_locked(req);
}
}
@@ -1157,7 +1162,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
list_splice_init(head, &data->pages);
first = nfs_list_entry(data->pages.next);
- inode = first->wb_context->dentry->d_inode;
+ inode = first->wb_context->path.dentry->d_inode;
data->inode = inode;
data->cred = first->wb_context->cred;
@@ -1207,7 +1212,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
nfs_list_remove_request(req);
nfs_mark_request_commit(req);
dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
- nfs_clear_page_writeback(req);
+ nfs_clear_page_tag_locked(req);
}
return -ENOMEM;
}
@@ -1234,8 +1239,8 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
dprintk("NFS: commit (%s/%Ld %d@%Ld)",
- req->wb_context->dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ req->wb_context->path.dentry->d_inode->i_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
if (task->tk_status < 0) {
@@ -1249,6 +1254,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
* returned by the server against all stored verfs. */
if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
/* We have a match */
+ /* Set the PG_uptodate flag */
+ nfs_mark_uptodate(req->wb_page, req->wb_pgbase,
+ req->wb_bytes);
nfs_inode_remove_request(req);
dprintk(" OK\n");
goto next;
@@ -1257,7 +1265,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
dprintk(" mismatch\n");
nfs_redirty_request(req);
next:
- nfs_clear_page_writeback(req);
+ nfs_clear_page_tag_locked(req);
}
}
@@ -1268,13 +1276,12 @@ static const struct rpc_call_ops nfs_commit_ops = {
int nfs_commit_inode(struct inode *inode, int how)
{
- struct nfs_inode *nfsi = NFS_I(inode);
LIST_HEAD(head);
int res;
- spin_lock(&nfsi->req_lock);
+ spin_lock(&inode->i_lock);
res = nfs_scan_commit(inode, &head, 0, 0);
- spin_unlock(&nfsi->req_lock);
+ spin_unlock(&inode->i_lock);
if (res) {
int error = nfs_commit_list(inode, &head, how);
if (error < 0)
@@ -1292,7 +1299,6 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i
long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
{
struct inode *inode = mapping->host;
- struct nfs_inode *nfsi = NFS_I(inode);
pgoff_t idx_start, idx_end;
unsigned int npages = 0;
LIST_HEAD(head);
@@ -1314,7 +1320,7 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
}
}
how &= ~FLUSH_NOCOMMIT;
- spin_lock(&nfsi->req_lock);
+ spin_lock(&inode->i_lock);
do {
ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
if (ret != 0)
@@ -1325,18 +1331,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
if (pages == 0)
break;
if (how & FLUSH_INVALIDATE) {
- spin_unlock(&nfsi->req_lock);
+ spin_unlock(&inode->i_lock);
nfs_cancel_commit_list(&head);
ret = pages;
- spin_lock(&nfsi->req_lock);
+ spin_lock(&inode->i_lock);
continue;
}
pages += nfs_scan_commit(inode, &head, 0, 0);
- spin_unlock(&nfsi->req_lock);
+ spin_unlock(&inode->i_lock);
ret = nfs_commit_list(inode, &head, how);
- spin_lock(&nfsi->req_lock);
+ spin_lock(&inode->i_lock);
+
} while (ret >= 0);
- spin_unlock(&nfsi->req_lock);
+ spin_unlock(&inode->i_lock);
return ret;
}
@@ -1430,7 +1437,6 @@ int nfs_set_page_dirty(struct page *page)
{
struct address_space *mapping = page->mapping;
struct inode *inode;
- spinlock_t *req_lock;
struct nfs_page *req;
int ret;
@@ -1439,18 +1445,17 @@ int nfs_set_page_dirty(struct page *page)
inode = mapping->host;
if (!inode)
goto out_raced;
- req_lock = &NFS_I(inode)->req_lock;
- spin_lock(req_lock);
+ spin_lock(&inode->i_lock);
req = nfs_page_find_request_locked(page);
if (req != NULL) {
/* Mark any existing write requests for flushing */
ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags);
- spin_unlock(req_lock);
+ spin_unlock(&inode->i_lock);
nfs_release_request(req);
return ret;
}
ret = __set_page_dirty_nobuffers(page);
- spin_unlock(req_lock);
+ spin_unlock(&inode->i_lock);
return ret;
out_raced:
return !TestSetPageDirty(page);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 864090edc28b..5443c52b57aa 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -394,7 +394,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
.rpc_argp = clp,
};
- char clientname[16];
int status;
if (atomic_read(&cb->cb_set))
@@ -417,11 +416,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
memset(program->stats, 0, sizeof(cb->cb_stat));
program->stats->program = program;
- /* Just here to make some printk's more useful: */
- snprintf(clientname, sizeof(clientname),
- "%u.%u.%u.%u", NIPQUAD(addr.sin_addr));
- args.servername = clientname;
-
/* Create RPC client */
cb->cb_client = rpc_create(&args);
if (IS_ERR(cb->cb_client)) {
@@ -429,29 +423,23 @@ nfsd4_probe_callback(struct nfs4_client *clp)
goto out_err;
}
- /* Kick rpciod, put the call on the wire. */
- if (rpciod_up() != 0)
- goto out_clnt;
-
/* the task holds a reference to the nfs4_client struct */
atomic_inc(&clp->cl_count);
msg.rpc_cred = nfsd4_lookupcred(clp,0);
if (IS_ERR(msg.rpc_cred))
- goto out_rpciod;
+ goto out_release_clp;
status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
put_rpccred(msg.rpc_cred);
if (status != 0) {
dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
- goto out_rpciod;
+ goto out_release_clp;
}
return;
-out_rpciod:
+out_release_clp:
atomic_dec(&clp->cl_count);
- rpciod_down();
-out_clnt:
rpc_shutdown_client(cb->cb_client);
out_err:
cb->cb_client = NULL;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3cc8ce422ab1..8c52913d7cb6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -378,7 +378,6 @@ shutdown_callback_client(struct nfs4_client *clp)
if (clnt) {
clp->cl_callback.cb_client = NULL;
rpc_shutdown_client(clnt);
- rpciod_down();
}
}
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 05707e2fccae..e2d1ce36b367 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -39,6 +39,7 @@
struct nlm_host {
struct hlist_node h_hash; /* doubly linked list */
struct sockaddr_in h_addr; /* peer address */
+ struct sockaddr_in h_saddr; /* our address (optional) */
struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */
char * h_name; /* remote hostname */
u32 h_version; /* interface version */
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 7e7f33a38fc0..8726491de154 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -15,6 +15,7 @@
#include <linux/types.h>
+#define NFS4_BITMAP_SIZE 2
#define NFS4_VERIFIER_SIZE 8
#define NFS4_STATEID_SIZE 16
#define NFS4_FHSIZE 128
diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h
index 26b4c83f831d..a0dcf6655657 100644
--- a/include/linux/nfs4_mount.h
+++ b/include/linux/nfs4_mount.h
@@ -65,6 +65,7 @@ struct nfs4_mount_data {
#define NFS4_MOUNT_NOCTO 0x0010 /* 1 */
#define NFS4_MOUNT_NOAC 0x0020 /* 1 */
#define NFS4_MOUNT_STRICTLOCK 0x1000 /* 1 */
-#define NFS4_MOUNT_FLAGMASK 0xFFFF
+#define NFS4_MOUNT_UNSHARED 0x8000 /* 1 */
+#define NFS4_MOUNT_FLAGMASK 0x9033
#endif
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 0543439a97af..c098ae194f79 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -30,7 +30,9 @@
#ifdef __KERNEL__
#include <linux/in.h>
+#include <linux/kref.h>
#include <linux/mm.h>
+#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/rbtree.h>
#include <linux/rwsem.h>
@@ -69,9 +71,8 @@ struct nfs_access_entry {
struct nfs4_state;
struct nfs_open_context {
- atomic_t count;
- struct vfsmount *vfsmnt;
- struct dentry *dentry;
+ struct kref kref;
+ struct path path;
struct rpc_cred *cred;
struct nfs4_state *state;
fl_owner_t lockowner;
@@ -155,13 +156,9 @@ struct nfs_inode {
/*
* This is the list of dirty unwritten pages.
*/
- spinlock_t req_lock;
- struct list_head dirty;
- struct list_head commit;
struct radix_tree_root nfs_page_tree;
- unsigned int ndirty,
- ncommit,
+ unsigned long ncommit,
npages;
/* Open contexts for shared mmap writes */
@@ -187,6 +184,7 @@ struct nfs_inode {
#define NFS_INO_INVALID_ACCESS 0x0008 /* cached access cred invalid */
#define NFS_INO_INVALID_ACL 0x0010 /* cached acls are invalid */
#define NFS_INO_REVAL_PAGECACHE 0x0020 /* must revalidate pagecache */
+#define NFS_INO_REVAL_FORCED 0x0040 /* force revalidation ignoring a delegation */
/*
* Bit offsets in flags field
@@ -496,21 +494,18 @@ static inline void nfs3_forget_cached_acls(struct inode *inode)
/*
* linux/fs/mount_clnt.c
- * (Used only by nfsroot module)
*/
-extern int nfsroot_mount(struct sockaddr_in *, char *, struct nfs_fh *,
- int, int);
+extern int nfs_mount(struct sockaddr *, size_t, char *, char *,
+ int, int, struct nfs_fh *);
/*
* inline functions
*/
-static inline loff_t
-nfs_size_to_loff_t(__u64 size)
+static inline loff_t nfs_size_to_loff_t(__u64 size)
{
- loff_t maxsz = (((loff_t) ULONG_MAX) << PAGE_CACHE_SHIFT) + PAGE_CACHE_SIZE - 1;
- if (size > maxsz)
- return maxsz;
+ if (size > (__u64) OFFSET_MAX - 1)
+ return OFFSET_MAX - 1;
return (loff_t) size;
}
@@ -557,6 +552,7 @@ extern void * nfs_root_data(void);
#define NFSDBG_ROOT 0x0080
#define NFSDBG_CALLBACK 0x0100
#define NFSDBG_CLIENT 0x0200
+#define NFSDBG_MOUNT 0x0400
#define NFSDBG_ALL 0xFFFF
#ifdef __KERNEL__
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 52b4378311c8..0cac49bc0955 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -16,7 +16,6 @@ struct nfs_client {
#define NFS_CS_INITING 1 /* busy initialising */
int cl_nfsversion; /* NFS protocol version */
unsigned long cl_res_state; /* NFS resources state */
-#define NFS_CS_RPCIOD 0 /* - rpciod started */
#define NFS_CS_CALLBACK 1 /* - callback started */
#define NFS_CS_IDMAP 2 /* - idmap started */
#define NFS_CS_RENEWD 3 /* - renewd started */
@@ -35,7 +34,8 @@ struct nfs_client {
nfs4_verifier cl_confirm;
unsigned long cl_state;
- u32 cl_lockowner_id;
+ struct rb_root cl_openowner_id;
+ struct rb_root cl_lockowner_id;
/*
* The following rwsem ensures exclusive access to the server
@@ -44,9 +44,7 @@ struct nfs_client {
struct rw_semaphore cl_sem;
struct list_head cl_delegations;
- struct list_head cl_state_owners;
- struct list_head cl_unused;
- int cl_nunused;
+ struct rb_root cl_state_owners;
spinlock_t cl_lock;
unsigned long cl_lease_time;
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index cc8b9c59acb8..a3ade89a64d2 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -37,7 +37,7 @@ struct nfs_mount_data {
int acdirmin; /* 1 */
int acdirmax; /* 1 */
struct sockaddr_in addr; /* 1 */
- char hostname[256]; /* 1 */
+ char hostname[NFS_MAXNAMLEN + 1]; /* 1 */
int namlen; /* 2 */
unsigned int bsize; /* 3 */
struct nfs3_fh root; /* 4 */
@@ -62,6 +62,7 @@ struct nfs_mount_data {
#define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */
#define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */
#define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */
+#define NFS_MOUNT_UNSHARED 0x8000 /* 5 */
#define NFS_MOUNT_FLAGMASK 0xFFFF
#endif
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index bd193af80162..78e60798d10e 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -16,12 +16,13 @@
#include <linux/sunrpc/auth.h>
#include <linux/nfs_xdr.h>
-#include <asm/atomic.h>
+#include <linux/kref.h>
/*
* Valid flags for the radix tree
*/
-#define NFS_PAGE_TAG_WRITEBACK 0
+#define NFS_PAGE_TAG_LOCKED 0
+#define NFS_PAGE_TAG_COMMIT 1
/*
* Valid flags for a dirty buffer
@@ -33,8 +34,7 @@
struct nfs_inode;
struct nfs_page {
- struct list_head wb_list, /* Defines state of page: */
- *wb_list_head; /* read/write/commit */
+ struct list_head wb_list; /* Defines state of page: */
struct page *wb_page; /* page to read in/write out */
struct nfs_open_context *wb_context; /* File state context info */
atomic_t wb_complete; /* i/os we're waiting for */
@@ -42,7 +42,7 @@ struct nfs_page {
unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */
wb_pgbase, /* Start of page data */
wb_bytes; /* Length of request */
- atomic_t wb_count; /* reference count */
+ struct kref wb_kref; /* reference count */
unsigned long wb_flags;
struct nfs_writeverf wb_verf; /* Commit cookie */
};
@@ -71,8 +71,8 @@ extern void nfs_clear_request(struct nfs_page *req);
extern void nfs_release_request(struct nfs_page *req);
-extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst,
- pgoff_t idx_start, unsigned int npages);
+extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
+ pgoff_t idx_start, unsigned int npages, int tag);
extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
@@ -84,12 +84,11 @@ extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
extern int nfs_wait_on_request(struct nfs_page *);
extern void nfs_unlock_request(struct nfs_page *req);
-extern int nfs_set_page_writeback_locked(struct nfs_page *req);
-extern void nfs_clear_page_writeback(struct nfs_page *req);
+extern void nfs_clear_page_tag_locked(struct nfs_page *req);
/*
- * Lock the page of an asynchronous request without incrementing the wb_count
+ * Lock the page of an asynchronous request without getting a new reference
*/
static inline int
nfs_lock_request_dontget(struct nfs_page *req)
@@ -98,14 +97,14 @@ nfs_lock_request_dontget(struct nfs_page *req)
}
/*
- * Lock the page of an asynchronous request
+ * Lock the page of an asynchronous request and take a reference
*/
static inline int
nfs_lock_request(struct nfs_page *req)
{
if (test_and_set_bit(PG_BUSY, &req->wb_flags))
return 0;
- atomic_inc(&req->wb_count);
+ kref_get(&req->wb_kref);
return 1;
}
@@ -118,7 +117,6 @@ static inline void
nfs_list_add_request(struct nfs_page *req, struct list_head *head)
{
list_add_tail(&req->wb_list, head);
- req->wb_list_head = head;
}
@@ -132,7 +130,6 @@ nfs_list_remove_request(struct nfs_page *req)
if (list_empty(&req->wb_list))
return;
list_del_init(&req->wb_list);
- req->wb_list_head = NULL;
}
static inline struct nfs_page *
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 10c26ed0db71..38d77681cf27 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -119,7 +119,7 @@ struct nfs_openargs {
struct nfs_seqid * seqid;
int open_flags;
__u64 clientid;
- __u32 id;
+ __u64 id;
union {
struct iattr * attrs; /* UNCHECKED, GUARDED */
nfs4_verifier verifier; /* EXCLUSIVE */
@@ -144,6 +144,7 @@ struct nfs_openres {
nfs4_stateid delegation;
__u32 do_recall;
__u64 maxsize;
+ __u32 attrset[NFS4_BITMAP_SIZE];
};
/*
@@ -180,7 +181,7 @@ struct nfs_closeres {
* */
struct nfs_lowner {
__u64 clientid;
- u32 id;
+ __u64 id;
};
struct nfs_lock_args {
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 534cdc7be58d..7a69ca3bebaf 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -16,6 +16,7 @@
#include <linux/sunrpc/xdr.h>
#include <asm/atomic.h>
+#include <linux/rcupdate.h>
/* size of the nodename buffer */
#define UNX_MAXNODENAME 32
@@ -30,22 +31,28 @@ struct auth_cred {
/*
* Client user credentials
*/
+struct rpc_auth;
+struct rpc_credops;
struct rpc_cred {
struct hlist_node cr_hash; /* hash chain */
- struct rpc_credops * cr_ops;
- unsigned long cr_expire; /* when to gc */
- atomic_t cr_count; /* ref count */
- unsigned short cr_flags; /* various flags */
+ struct list_head cr_lru; /* lru garbage collection */
+ struct rcu_head cr_rcu;
+ struct rpc_auth * cr_auth;
+ const struct rpc_credops *cr_ops;
#ifdef RPC_DEBUG
unsigned long cr_magic; /* 0x0f4aa4f0 */
#endif
+ unsigned long cr_expire; /* when to gc */
+ unsigned long cr_flags; /* various flags */
+ atomic_t cr_count; /* ref count */
uid_t cr_uid;
/* per-flavor data */
};
-#define RPCAUTH_CRED_NEW 0x0001
-#define RPCAUTH_CRED_UPTODATE 0x0002
+#define RPCAUTH_CRED_NEW 0
+#define RPCAUTH_CRED_UPTODATE 1
+#define RPCAUTH_CRED_HASHED 2
#define RPCAUTH_CRED_MAGIC 0x0f4aa4f0
@@ -56,10 +63,10 @@ struct rpc_cred {
#define RPC_CREDCACHE_MASK (RPC_CREDCACHE_NR - 1)
struct rpc_cred_cache {
struct hlist_head hashtable[RPC_CREDCACHE_NR];
- unsigned long nextgc; /* next garbage collection */
- unsigned long expire; /* cache expiry interval */
+ spinlock_t lock;
};
+struct rpc_authops;
struct rpc_auth {
unsigned int au_cslack; /* call cred size estimate */
/* guess at number of u32's auth adds before
@@ -69,7 +76,7 @@ struct rpc_auth {
unsigned int au_verfsize;
unsigned int au_flags; /* various flags */
- struct rpc_authops * au_ops; /* operations */
+ const struct rpc_authops *au_ops; /* operations */
rpc_authflavor_t au_flavor; /* pseudoflavor (note may
* differ from the flavor in
* au_ops->au_flavor in gss
@@ -115,17 +122,19 @@ struct rpc_credops {
void *, __be32 *, void *);
};
-extern struct rpc_authops authunix_ops;
-extern struct rpc_authops authnull_ops;
-#ifdef CONFIG_SUNRPC_SECURE
-extern struct rpc_authops authdes_ops;
-#endif
+extern const struct rpc_authops authunix_ops;
+extern const struct rpc_authops authnull_ops;
+
+void __init rpc_init_authunix(void);
+void __init rpcauth_init_module(void);
+void __exit rpcauth_remove_module(void);
-int rpcauth_register(struct rpc_authops *);
-int rpcauth_unregister(struct rpc_authops *);
+int rpcauth_register(const struct rpc_authops *);
+int rpcauth_unregister(const struct rpc_authops *);
struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
-void rpcauth_destroy(struct rpc_auth *);
+void rpcauth_release(struct rpc_auth *);
struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
+void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int);
struct rpc_cred * rpcauth_bindcred(struct rpc_task *);
void rpcauth_holdcred(struct rpc_task *);
@@ -138,8 +147,9 @@ int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
int rpcauth_refreshcred(struct rpc_task *);
void rpcauth_invalcred(struct rpc_task *);
int rpcauth_uptodatecred(struct rpc_task *);
-int rpcauth_init_credcache(struct rpc_auth *, unsigned long);
-void rpcauth_free_credcache(struct rpc_auth *);
+int rpcauth_init_credcache(struct rpc_auth *);
+void rpcauth_destroy_credcache(struct rpc_auth *);
+void rpcauth_clear_credcache(struct rpc_cred_cache *);
static inline
struct rpc_cred * get_rpccred(struct rpc_cred *cred)
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 2db2fbf34947..67658e17a375 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -75,6 +75,7 @@ struct gss_cl_ctx {
struct xdr_netobj gc_wire_ctx;
u32 gc_win;
unsigned long gc_expiry;
+ struct rcu_head gc_rcu;
};
struct gss_upcall_msg;
@@ -85,11 +86,6 @@ struct gss_cred {
struct gss_upcall_msg *gc_upcall;
};
-#define gc_uid gc_base.cr_uid
-#define gc_count gc_base.cr_count
-#define gc_flags gc_base.cr_flags
-#define gc_expire gc_base.cr_expire
-
#endif /* __KERNEL__ */
#endif /* _LINUX_SUNRPC_AUTH_GSS_H */
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 66611423c8ee..c0d9d14983b3 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -24,8 +24,10 @@ struct rpc_inode;
* The high-level client handle
*/
struct rpc_clnt {
- atomic_t cl_count; /* Number of clones */
- atomic_t cl_users; /* number of references */
+ struct kref cl_kref; /* Number of references */
+ struct list_head cl_clients; /* Global list of clients */
+ struct list_head cl_tasks; /* List of tasks */
+ spinlock_t cl_lock; /* spinlock */
struct rpc_xprt * cl_xprt; /* transport */
struct rpc_procinfo * cl_procinfo; /* procedure info */
u32 cl_prog, /* RPC program number */
@@ -41,9 +43,7 @@ struct rpc_clnt {
unsigned int cl_softrtry : 1,/* soft timeouts */
cl_intr : 1,/* interruptible */
cl_discrtry : 1,/* disconnect before retry */
- cl_autobind : 1,/* use getport() */
- cl_oneshot : 1,/* dispose after use */
- cl_dead : 1;/* abandoned */
+ cl_autobind : 1;/* use getport() */
struct rpc_rtt * cl_rtt; /* RTO estimator data */
@@ -98,6 +98,7 @@ struct rpc_create_args {
int protocol;
struct sockaddr *address;
size_t addrsize;
+ struct sockaddr *saddress;
struct rpc_timeout *timeout;
char *servername;
struct rpc_program *program;
@@ -110,20 +111,20 @@ struct rpc_create_args {
#define RPC_CLNT_CREATE_HARDRTRY (1UL << 0)
#define RPC_CLNT_CREATE_INTR (1UL << 1)
#define RPC_CLNT_CREATE_AUTOBIND (1UL << 2)
-#define RPC_CLNT_CREATE_ONESHOT (1UL << 3)
-#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 4)
-#define RPC_CLNT_CREATE_NOPING (1UL << 5)
-#define RPC_CLNT_CREATE_DISCRTRY (1UL << 6)
+#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3)
+#define RPC_CLNT_CREATE_NOPING (1UL << 4)
+#define RPC_CLNT_CREATE_DISCRTRY (1UL << 5)
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
struct rpc_program *, int);
struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
-int rpc_shutdown_client(struct rpc_clnt *);
-int rpc_destroy_client(struct rpc_clnt *);
+void rpc_shutdown_client(struct rpc_clnt *);
void rpc_release_client(struct rpc_clnt *);
+
int rpcb_register(u32, u32, int, unsigned short, int *);
-void rpcb_getport(struct rpc_task *);
+int rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int);
+void rpcb_getport_async(struct rpc_task *);
void rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
@@ -132,20 +133,16 @@ int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg,
void *calldata);
int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg,
int flags);
+struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
+ int flags);
void rpc_restart_call(struct rpc_task *);
void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
size_t rpc_max_payload(struct rpc_clnt *);
void rpc_force_rebind(struct rpc_clnt *);
-int rpc_ping(struct rpc_clnt *clnt, int flags);
size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
-/*
- * Helper function for NFSroot support
- */
-int rpcb_getport_external(struct sockaddr_in *, __u32, __u32, int);
-
#endif /* __KERNEL__ */
#endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
index 5eca9e442051..bbac101ac372 100644
--- a/include/linux/sunrpc/gss_api.h
+++ b/include/linux/sunrpc/gss_api.h
@@ -77,7 +77,7 @@ struct gss_api_mech {
struct module *gm_owner;
struct xdr_netobj gm_oid;
char *gm_name;
- struct gss_api_ops *gm_ops;
+ const struct gss_api_ops *gm_ops;
/* pseudoflavors supported by this mechanism: */
int gm_pf_num;
struct pf_desc * gm_pfs;
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index ad293760f6eb..51b977a4ca20 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -23,9 +23,11 @@ struct rpc_inode {
void *private;
struct list_head pipe;
struct list_head in_upcall;
+ struct list_head in_downcall;
int pipelen;
int nreaders;
int nwriters;
+ int nkern_readwriters;
wait_queue_head_t waitq;
#define RPC_PIPE_WAIT_FOR_OPEN 1
int flags;
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 2047fb202a13..8ea077db0099 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -98,7 +98,6 @@ struct rpc_task {
unsigned short tk_pid; /* debugging aid */
#endif
};
-#define tk_auth tk_client->cl_auth
#define tk_xprt tk_client->cl_xprt
/* support walking a list of tasks on a wait queue */
@@ -110,11 +109,6 @@ struct rpc_task {
if (!list_empty(head) && \
((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1))
-/* .. and walking list of all tasks */
-#define alltask_for_each(task, pos, head) \
- list_for_each(pos, head) \
- if ((task=list_entry(pos, struct rpc_task, tk_task)),1)
-
typedef void (*rpc_action)(struct rpc_task *);
struct rpc_call_ops {
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index e21dd93ac4b7..a53e0fa855d2 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -59,6 +59,7 @@ struct svc_sock {
/* cache of various info for TCP sockets */
void *sk_info_authunix;
+ struct sockaddr_storage sk_local; /* local address */
struct sockaddr_storage sk_remote; /* remote peer's address */
int sk_remotelen; /* length of address */
};
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 34f7590506fa..d11cedd14f0f 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -17,6 +17,8 @@
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/msg_prot.h>
+#ifdef __KERNEL__
+
extern unsigned int xprt_udp_slot_table_entries;
extern unsigned int xprt_tcp_slot_table_entries;
@@ -194,7 +196,13 @@ struct rpc_xprt {
char * address_strings[RPC_DISPLAY_MAX];
};
-#ifdef __KERNEL__
+struct rpc_xprtsock_create {
+ int proto; /* IPPROTO_UDP or IPPROTO_TCP */
+ struct sockaddr * srcaddr; /* optional local address */
+ struct sockaddr * dstaddr; /* remote peer address */
+ size_t addrlen;
+ struct rpc_timeout * timeout; /* optional timeout parameters */
+};
/*
* Transport operations used by ULPs
@@ -204,7 +212,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long
/*
* Generic internal transport functions
*/
-struct rpc_xprt * xprt_create_transport(int proto, struct sockaddr *addr, size_t size, struct rpc_timeout *toparms);
+struct rpc_xprt * xprt_create_transport(struct rpc_xprtsock_create *args);
void xprt_connect(struct rpc_task *task);
void xprt_reserve(struct rpc_task *task);
int xprt_reserve_xprt(struct rpc_task *task);
@@ -242,8 +250,8 @@ void xprt_disconnect(struct rpc_xprt *xprt);
/*
* Socket transport setup operations
*/
-struct rpc_xprt * xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to);
-struct rpc_xprt * xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to);
+struct rpc_xprt * xs_setup_udp(struct rpc_xprtsock_create *args);
+struct rpc_xprt * xs_setup_tcp(struct rpc_xprtsock_create *args);
int init_socket_xprt(void);
void cleanup_socket_xprt(void);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 9527f2bb1744..aa55d0a03e6f 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -13,17 +13,22 @@
#include <linux/errno.h>
#include <linux/sunrpc/clnt.h>
#include <linux/spinlock.h>
+#include <linux/smp_lock.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-static struct rpc_authops * auth_flavors[RPC_AUTH_MAXFLAVOR] = {
+static DEFINE_SPINLOCK(rpc_authflavor_lock);
+static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
&authnull_ops, /* AUTH_NULL */
&authunix_ops, /* AUTH_UNIX */
NULL, /* others can be loadable modules */
};
+static LIST_HEAD(cred_unused);
+static unsigned long number_cred_unused;
+
static u32
pseudoflavor_to_flavor(u32 flavor) {
if (flavor >= RPC_AUTH_MAXFLAVOR)
@@ -32,55 +37,67 @@ pseudoflavor_to_flavor(u32 flavor) {
}
int
-rpcauth_register(struct rpc_authops *ops)
+rpcauth_register(const struct rpc_authops *ops)
{
rpc_authflavor_t flavor;
+ int ret = -EPERM;
if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
return -EINVAL;
- if (auth_flavors[flavor] != NULL)
- return -EPERM; /* what else? */
- auth_flavors[flavor] = ops;
- return 0;
+ spin_lock(&rpc_authflavor_lock);
+ if (auth_flavors[flavor] == NULL) {
+ auth_flavors[flavor] = ops;
+ ret = 0;
+ }
+ spin_unlock(&rpc_authflavor_lock);
+ return ret;
}
int
-rpcauth_unregister(struct rpc_authops *ops)
+rpcauth_unregister(const struct rpc_authops *ops)
{
rpc_authflavor_t flavor;
+ int ret = -EPERM;
if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
return -EINVAL;
- if (auth_flavors[flavor] != ops)
- return -EPERM; /* what else? */
- auth_flavors[flavor] = NULL;
- return 0;
+ spin_lock(&rpc_authflavor_lock);
+ if (auth_flavors[flavor] == ops) {
+ auth_flavors[flavor] = NULL;
+ ret = 0;
+ }
+ spin_unlock(&rpc_authflavor_lock);
+ return ret;
}
struct rpc_auth *
rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
{
struct rpc_auth *auth;
- struct rpc_authops *ops;
+ const struct rpc_authops *ops;
u32 flavor = pseudoflavor_to_flavor(pseudoflavor);
auth = ERR_PTR(-EINVAL);
if (flavor >= RPC_AUTH_MAXFLAVOR)
goto out;
- /* FIXME - auth_flavors[] really needs an rw lock,
- * and module refcounting. */
#ifdef CONFIG_KMOD
if ((ops = auth_flavors[flavor]) == NULL)
request_module("rpc-auth-%u", flavor);
#endif
- if ((ops = auth_flavors[flavor]) == NULL)
+ spin_lock(&rpc_authflavor_lock);
+ ops = auth_flavors[flavor];
+ if (ops == NULL || !try_module_get(ops->owner)) {
+ spin_unlock(&rpc_authflavor_lock);
goto out;
+ }
+ spin_unlock(&rpc_authflavor_lock);
auth = ops->create(clnt, pseudoflavor);
+ module_put(ops->owner);
if (IS_ERR(auth))
return auth;
if (clnt->cl_auth)
- rpcauth_destroy(clnt->cl_auth);
+ rpcauth_release(clnt->cl_auth);
clnt->cl_auth = auth;
out:
@@ -88,7 +105,7 @@ out:
}
void
-rpcauth_destroy(struct rpc_auth *auth)
+rpcauth_release(struct rpc_auth *auth)
{
if (!atomic_dec_and_test(&auth->au_count))
return;
@@ -97,11 +114,31 @@ rpcauth_destroy(struct rpc_auth *auth)
static DEFINE_SPINLOCK(rpc_credcache_lock);
+static void
+rpcauth_unhash_cred_locked(struct rpc_cred *cred)
+{
+ hlist_del_rcu(&cred->cr_hash);
+ smp_mb__before_clear_bit();
+ clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
+}
+
+static void
+rpcauth_unhash_cred(struct rpc_cred *cred)
+{
+ spinlock_t *cache_lock;
+
+ cache_lock = &cred->cr_auth->au_credcache->lock;
+ spin_lock(cache_lock);
+ if (atomic_read(&cred->cr_count) == 0)
+ rpcauth_unhash_cred_locked(cred);
+ spin_unlock(cache_lock);
+}
+
/*
* Initialize RPC credential cache
*/
int
-rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire)
+rpcauth_init_credcache(struct rpc_auth *auth)
{
struct rpc_cred_cache *new;
int i;
@@ -111,8 +148,7 @@ rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire)
return -ENOMEM;
for (i = 0; i < RPC_CREDCACHE_NR; i++)
INIT_HLIST_HEAD(&new->hashtable[i]);
- new->expire = expire;
- new->nextgc = jiffies + (expire >> 1);
+ spin_lock_init(&new->lock);
auth->au_credcache = new;
return 0;
}
@@ -121,13 +157,13 @@ rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire)
* Destroy a list of credentials
*/
static inline
-void rpcauth_destroy_credlist(struct hlist_head *head)
+void rpcauth_destroy_credlist(struct list_head *head)
{
struct rpc_cred *cred;
- while (!hlist_empty(head)) {
- cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
- hlist_del_init(&cred->cr_hash);
+ while (!list_empty(head)) {
+ cred = list_entry(head->next, struct rpc_cred, cr_lru);
+ list_del_init(&cred->cr_lru);
put_rpccred(cred);
}
}
@@ -137,58 +173,95 @@ void rpcauth_destroy_credlist(struct hlist_head *head)
* that are not referenced.
*/
void
-rpcauth_free_credcache(struct rpc_auth *auth)
+rpcauth_clear_credcache(struct rpc_cred_cache *cache)
{
- struct rpc_cred_cache *cache = auth->au_credcache;
- HLIST_HEAD(free);
- struct hlist_node *pos, *next;
+ LIST_HEAD(free);
+ struct hlist_head *head;
struct rpc_cred *cred;
int i;
spin_lock(&rpc_credcache_lock);
+ spin_lock(&cache->lock);
for (i = 0; i < RPC_CREDCACHE_NR; i++) {
- hlist_for_each_safe(pos, next, &cache->hashtable[i]) {
- cred = hlist_entry(pos, struct rpc_cred, cr_hash);
- __hlist_del(&cred->cr_hash);
- hlist_add_head(&cred->cr_hash, &free);
+ head = &cache->hashtable[i];
+ while (!hlist_empty(head)) {
+ cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
+ get_rpccred(cred);
+ if (!list_empty(&cred->cr_lru)) {
+ list_del(&cred->cr_lru);
+ number_cred_unused--;
+ }
+ list_add_tail(&cred->cr_lru, &free);
+ rpcauth_unhash_cred_locked(cred);
}
}
+ spin_unlock(&cache->lock);
spin_unlock(&rpc_credcache_lock);
rpcauth_destroy_credlist(&free);
}
-static void
-rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free)
+/*
+ * Destroy the RPC credential cache
+ */
+void
+rpcauth_destroy_credcache(struct rpc_auth *auth)
{
- if (atomic_read(&cred->cr_count) != 1)
- return;
- if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire))
- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
- if (!(cred->cr_flags & RPCAUTH_CRED_UPTODATE)) {
- __hlist_del(&cred->cr_hash);
- hlist_add_head(&cred->cr_hash, free);
+ struct rpc_cred_cache *cache = auth->au_credcache;
+
+ if (cache) {
+ auth->au_credcache = NULL;
+ rpcauth_clear_credcache(cache);
+ kfree(cache);
}
}
/*
* Remove stale credentials. Avoid sleeping inside the loop.
*/
-static void
-rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free)
+static int
+rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
{
- struct rpc_cred_cache *cache = auth->au_credcache;
- struct hlist_node *pos, *next;
- struct rpc_cred *cred;
- int i;
+ spinlock_t *cache_lock;
+ struct rpc_cred *cred;
- dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth);
- for (i = 0; i < RPC_CREDCACHE_NR; i++) {
- hlist_for_each_safe(pos, next, &cache->hashtable[i]) {
- cred = hlist_entry(pos, struct rpc_cred, cr_hash);
- rpcauth_prune_expired(auth, cred, free);
+ while (!list_empty(&cred_unused)) {
+ cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru);
+ list_del_init(&cred->cr_lru);
+ number_cred_unused--;
+ if (atomic_read(&cred->cr_count) != 0)
+ continue;
+ cache_lock = &cred->cr_auth->au_credcache->lock;
+ spin_lock(cache_lock);
+ if (atomic_read(&cred->cr_count) == 0) {
+ get_rpccred(cred);
+ list_add_tail(&cred->cr_lru, free);
+ rpcauth_unhash_cred_locked(cred);
+ nr_to_scan--;
}
+ spin_unlock(cache_lock);
+ if (nr_to_scan == 0)
+ break;
}
- cache->nextgc = jiffies + cache->expire;
+ return nr_to_scan;
+}
+
+/*
+ * Run memory cache shrinker.
+ */
+static int
+rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+{
+ LIST_HEAD(free);
+ int res;
+
+ if (list_empty(&cred_unused))
+ return 0;
+ spin_lock(&rpc_credcache_lock);
+ nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan);
+ res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
+ spin_unlock(&rpc_credcache_lock);
+ rpcauth_destroy_credlist(&free);
+ return res;
}
/*
@@ -198,53 +271,56 @@ struct rpc_cred *
rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
int flags)
{
+ LIST_HEAD(free);
struct rpc_cred_cache *cache = auth->au_credcache;
- HLIST_HEAD(free);
- struct hlist_node *pos, *next;
- struct rpc_cred *new = NULL,
- *cred = NULL;
+ struct hlist_node *pos;
+ struct rpc_cred *cred = NULL,
+ *entry, *new;
int nr = 0;
if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS))
nr = acred->uid & RPC_CREDCACHE_MASK;
-retry:
- spin_lock(&rpc_credcache_lock);
- if (time_before(cache->nextgc, jiffies))
- rpcauth_gc_credcache(auth, &free);
- hlist_for_each_safe(pos, next, &cache->hashtable[nr]) {
- struct rpc_cred *entry;
- entry = hlist_entry(pos, struct rpc_cred, cr_hash);
- if (entry->cr_ops->crmatch(acred, entry, flags)) {
- hlist_del(&entry->cr_hash);
- cred = entry;
- break;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) {
+ if (!entry->cr_ops->crmatch(acred, entry, flags))
+ continue;
+ spin_lock(&cache->lock);
+ if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
+ spin_unlock(&cache->lock);
+ continue;
}
- rpcauth_prune_expired(auth, entry, &free);
+ cred = get_rpccred(entry);
+ spin_unlock(&cache->lock);
+ break;
}
- if (new) {
- if (cred)
- hlist_add_head(&new->cr_hash, &free);
- else
- cred = new;
- }
- if (cred) {
- hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]);
- get_rpccred(cred);
- }
- spin_unlock(&rpc_credcache_lock);
+ rcu_read_unlock();
- rpcauth_destroy_credlist(&free);
+ if (cred != NULL)
+ goto found;
- if (!cred) {
- new = auth->au_ops->crcreate(auth, acred, flags);
- if (!IS_ERR(new)) {
-#ifdef RPC_DEBUG
- new->cr_magic = RPCAUTH_CRED_MAGIC;
-#endif
- goto retry;
- } else
- cred = new;
- } else if ((cred->cr_flags & RPCAUTH_CRED_NEW)
+ new = auth->au_ops->crcreate(auth, acred, flags);
+ if (IS_ERR(new)) {
+ cred = new;
+ goto out;
+ }
+
+ spin_lock(&cache->lock);
+ hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) {
+ if (!entry->cr_ops->crmatch(acred, entry, flags))
+ continue;
+ cred = get_rpccred(entry);
+ break;
+ }
+ if (cred == NULL) {
+ cred = new;
+ set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
+ hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]);
+ } else
+ list_add_tail(&new->cr_lru, &free);
+ spin_unlock(&cache->lock);
+found:
+ if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)
&& cred->cr_ops->cr_init != NULL
&& !(flags & RPCAUTH_LOOKUP_NEW)) {
int res = cred->cr_ops->cr_init(auth, cred);
@@ -253,8 +329,9 @@ retry:
cred = ERR_PTR(res);
}
}
-
- return (struct rpc_cred *) cred;
+ rpcauth_destroy_credlist(&free);
+out:
+ return cred;
}
struct rpc_cred *
@@ -275,10 +352,27 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
return ret;
}
+void
+rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
+ struct rpc_auth *auth, const struct rpc_credops *ops)
+{
+ INIT_HLIST_NODE(&cred->cr_hash);
+ INIT_LIST_HEAD(&cred->cr_lru);
+ atomic_set(&cred->cr_count, 1);
+ cred->cr_auth = auth;
+ cred->cr_ops = ops;
+ cred->cr_expire = jiffies;
+#ifdef RPC_DEBUG
+ cred->cr_magic = RPCAUTH_CRED_MAGIC;
+#endif
+ cred->cr_uid = acred->uid;
+}
+EXPORT_SYMBOL(rpcauth_init_cred);
+
struct rpc_cred *
rpcauth_bindcred(struct rpc_task *task)
{
- struct rpc_auth *auth = task->tk_auth;
+ struct rpc_auth *auth = task->tk_client->cl_auth;
struct auth_cred acred = {
.uid = current->fsuid,
.gid = current->fsgid,
@@ -288,7 +382,7 @@ rpcauth_bindcred(struct rpc_task *task)
int flags = 0;
dprintk("RPC: %5u looking up %s cred\n",
- task->tk_pid, task->tk_auth->au_ops->au_name);
+ task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
get_group_info(acred.group_info);
if (task->tk_flags & RPC_TASK_ROOTCREDS)
flags |= RPCAUTH_LOOKUP_ROOTCREDS;
@@ -304,19 +398,42 @@ rpcauth_bindcred(struct rpc_task *task)
void
rpcauth_holdcred(struct rpc_task *task)
{
- dprintk("RPC: %5u holding %s cred %p\n",
- task->tk_pid, task->tk_auth->au_ops->au_name,
- task->tk_msg.rpc_cred);
- if (task->tk_msg.rpc_cred)
- get_rpccred(task->tk_msg.rpc_cred);
+ struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ if (cred != NULL) {
+ get_rpccred(cred);
+ dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
+ cred->cr_auth->au_ops->au_name, cred);
+ }
}
void
put_rpccred(struct rpc_cred *cred)
{
- cred->cr_expire = jiffies;
+ /* Fast path for unhashed credentials */
+ if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
+ goto need_lock;
+
if (!atomic_dec_and_test(&cred->cr_count))
return;
+ goto out_destroy;
+need_lock:
+ if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
+ return;
+ if (!list_empty(&cred->cr_lru)) {
+ number_cred_unused--;
+ list_del_init(&cred->cr_lru);
+ }
+ if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
+ rpcauth_unhash_cred(cred);
+ else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
+ cred->cr_expire = jiffies;
+ list_add_tail(&cred->cr_lru, &cred_unused);
+ number_cred_unused++;
+ spin_unlock(&rpc_credcache_lock);
+ return;
+ }
+ spin_unlock(&rpc_credcache_lock);
+out_destroy:
cred->cr_ops->crdestroy(cred);
}
@@ -326,7 +443,7 @@ rpcauth_unbindcred(struct rpc_task *task)
struct rpc_cred *cred = task->tk_msg.rpc_cred;
dprintk("RPC: %5u releasing %s cred %p\n",
- task->tk_pid, task->tk_auth->au_ops->au_name, cred);
+ task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
put_rpccred(cred);
task->tk_msg.rpc_cred = NULL;
@@ -338,7 +455,7 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p)
struct rpc_cred *cred = task->tk_msg.rpc_cred;
dprintk("RPC: %5u marshaling %s cred %p\n",
- task->tk_pid, task->tk_auth->au_ops->au_name, cred);
+ task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
return cred->cr_ops->crmarshal(task, p);
}
@@ -349,7 +466,7 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p)
struct rpc_cred *cred = task->tk_msg.rpc_cred;
dprintk("RPC: %5u validating %s cred %p\n",
- task->tk_pid, task->tk_auth->au_ops->au_name, cred);
+ task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
return cred->cr_ops->crvalidate(task, p);
}
@@ -359,13 +476,17 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
__be32 *data, void *obj)
{
struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ int ret;
dprintk("RPC: %5u using %s cred %p to wrap rpc data\n",
task->tk_pid, cred->cr_ops->cr_name, cred);
if (cred->cr_ops->crwrap_req)
return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
/* By default, we encode the arguments normally. */
- return encode(rqstp, data, obj);
+ lock_kernel();
+ ret = encode(rqstp, data, obj);
+ unlock_kernel();
+ return ret;
}
int
@@ -373,6 +494,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
__be32 *data, void *obj)
{
struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ int ret;
dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n",
task->tk_pid, cred->cr_ops->cr_name, cred);
@@ -380,7 +502,10 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
data, obj);
/* By default, we decode the arguments normally. */
- return decode(rqstp, data, obj);
+ lock_kernel();
+ ret = decode(rqstp, data, obj);
+ unlock_kernel();
+ return ret;
}
int
@@ -390,7 +515,7 @@ rpcauth_refreshcred(struct rpc_task *task)
int err;
dprintk("RPC: %5u refreshing %s cred %p\n",
- task->tk_pid, task->tk_auth->au_ops->au_name, cred);
+ task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
err = cred->cr_ops->crrefresh(task);
if (err < 0)
@@ -401,17 +526,34 @@ rpcauth_refreshcred(struct rpc_task *task)
void
rpcauth_invalcred(struct rpc_task *task)
{
+ struct rpc_cred *cred = task->tk_msg.rpc_cred;
+
dprintk("RPC: %5u invalidating %s cred %p\n",
- task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred);
- spin_lock(&rpc_credcache_lock);
- if (task->tk_msg.rpc_cred)
- task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
- spin_unlock(&rpc_credcache_lock);
+ task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
+ if (cred)
+ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
}
int
rpcauth_uptodatecred(struct rpc_task *task)
{
- return !(task->tk_msg.rpc_cred) ||
- (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
+ struct rpc_cred *cred = task->tk_msg.rpc_cred;
+
+ return cred == NULL ||
+ test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0;
+}
+
+
+static struct shrinker *rpc_cred_shrinker;
+
+void __init rpcauth_init_module(void)
+{
+ rpc_init_authunix();
+ rpc_cred_shrinker = set_shrinker(DEFAULT_SEEKS, rpcauth_cache_shrinker);
+}
+
+void __exit rpcauth_remove_module(void)
+{
+ if (rpc_cred_shrinker != NULL)
+ remove_shrinker(rpc_cred_shrinker);
}
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 4e4ccc5b6fea..baf4096d52d4 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -54,9 +54,10 @@
#include <linux/sunrpc/gss_api.h>
#include <asm/uaccess.h>
-static struct rpc_authops authgss_ops;
+static const struct rpc_authops authgss_ops;
-static struct rpc_credops gss_credops;
+static const struct rpc_credops gss_credops;
+static const struct rpc_credops gss_nullops;
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -64,7 +65,6 @@ static struct rpc_credops gss_credops;
#define NFS_NGROUPS 16
-#define GSS_CRED_EXPIRE (60 * HZ) /* XXX: reasonable? */
#define GSS_CRED_SLACK 1024 /* XXX: unused */
/* length of a krb5 verifier (48), plus data added before arguments when
* using integrity (two 4-byte integers): */
@@ -79,19 +79,16 @@ static struct rpc_credops gss_credops;
/* dump the buffer in `emacs-hexl' style */
#define isprint(c) ((c > 0x1f) && (c < 0x7f))
-static DEFINE_RWLOCK(gss_ctx_lock);
-
struct gss_auth {
+ struct kref kref;
struct rpc_auth rpc_auth;
struct gss_api_mech *mech;
enum rpc_gss_svc service;
- struct list_head upcalls;
struct rpc_clnt *client;
struct dentry *dentry;
- spinlock_t lock;
};
-static void gss_destroy_ctx(struct gss_cl_ctx *);
+static void gss_free_ctx(struct gss_cl_ctx *);
static struct rpc_pipe_ops gss_upcall_ops;
static inline struct gss_cl_ctx *
@@ -105,20 +102,24 @@ static inline void
gss_put_ctx(struct gss_cl_ctx *ctx)
{
if (atomic_dec_and_test(&ctx->count))
- gss_destroy_ctx(ctx);
+ gss_free_ctx(ctx);
}
+/* gss_cred_set_ctx:
+ * called by gss_upcall_callback and gss_create_upcall in order
+ * to set the gss context. The actual exchange of an old context
+ * and a new one is protected by the inode->i_lock.
+ */
static void
gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
{
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_cl_ctx *old;
- write_lock(&gss_ctx_lock);
+
old = gss_cred->gc_ctx;
- gss_cred->gc_ctx = ctx;
- cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
- cred->cr_flags &= ~RPCAUTH_CRED_NEW;
- write_unlock(&gss_ctx_lock);
+ rcu_assign_pointer(gss_cred->gc_ctx, ctx);
+ set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
if (old)
gss_put_ctx(old);
}
@@ -129,10 +130,10 @@ gss_cred_is_uptodate_ctx(struct rpc_cred *cred)
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
int res = 0;
- read_lock(&gss_ctx_lock);
- if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx)
+ rcu_read_lock();
+ if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) && gss_cred->gc_ctx)
res = 1;
- read_unlock(&gss_ctx_lock);
+ rcu_read_unlock();
return res;
}
@@ -171,10 +172,10 @@ gss_cred_get_ctx(struct rpc_cred *cred)
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_cl_ctx *ctx = NULL;
- read_lock(&gss_ctx_lock);
+ rcu_read_lock();
if (gss_cred->gc_ctx)
ctx = gss_get_ctx(gss_cred->gc_ctx);
- read_unlock(&gss_ctx_lock);
+ rcu_read_unlock();
return ctx;
}
@@ -269,10 +270,10 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
}
static struct gss_upcall_msg *
-__gss_find_upcall(struct gss_auth *gss_auth, uid_t uid)
+__gss_find_upcall(struct rpc_inode *rpci, uid_t uid)
{
struct gss_upcall_msg *pos;
- list_for_each_entry(pos, &gss_auth->upcalls, list) {
+ list_for_each_entry(pos, &rpci->in_downcall, list) {
if (pos->uid != uid)
continue;
atomic_inc(&pos->count);
@@ -290,24 +291,24 @@ __gss_find_upcall(struct gss_auth *gss_auth, uid_t uid)
static inline struct gss_upcall_msg *
gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg)
{
+ struct inode *inode = gss_auth->dentry->d_inode;
+ struct rpc_inode *rpci = RPC_I(inode);
struct gss_upcall_msg *old;
- spin_lock(&gss_auth->lock);
- old = __gss_find_upcall(gss_auth, gss_msg->uid);
+ spin_lock(&inode->i_lock);
+ old = __gss_find_upcall(rpci, gss_msg->uid);
if (old == NULL) {
atomic_inc(&gss_msg->count);
- list_add(&gss_msg->list, &gss_auth->upcalls);
+ list_add(&gss_msg->list, &rpci->in_downcall);
} else
gss_msg = old;
- spin_unlock(&gss_auth->lock);
+ spin_unlock(&inode->i_lock);
return gss_msg;
}
static void
__gss_unhash_msg(struct gss_upcall_msg *gss_msg)
{
- if (list_empty(&gss_msg->list))
- return;
list_del_init(&gss_msg->list);
rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
wake_up_all(&gss_msg->waitqueue);
@@ -318,10 +319,14 @@ static void
gss_unhash_msg(struct gss_upcall_msg *gss_msg)
{
struct gss_auth *gss_auth = gss_msg->auth;
+ struct inode *inode = gss_auth->dentry->d_inode;
- spin_lock(&gss_auth->lock);
- __gss_unhash_msg(gss_msg);
- spin_unlock(&gss_auth->lock);
+ if (list_empty(&gss_msg->list))
+ return;
+ spin_lock(&inode->i_lock);
+ if (!list_empty(&gss_msg->list))
+ __gss_unhash_msg(gss_msg);
+ spin_unlock(&inode->i_lock);
}
static void
@@ -330,16 +335,16 @@ gss_upcall_callback(struct rpc_task *task)
struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
struct gss_cred, gc_base);
struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall;
+ struct inode *inode = gss_msg->auth->dentry->d_inode;
- BUG_ON(gss_msg == NULL);
+ spin_lock(&inode->i_lock);
if (gss_msg->ctx)
gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx));
else
task->tk_status = gss_msg->msg.errno;
- spin_lock(&gss_msg->auth->lock);
gss_cred->gc_upcall = NULL;
rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
- spin_unlock(&gss_msg->auth->lock);
+ spin_unlock(&inode->i_lock);
gss_release_msg(gss_msg);
}
@@ -386,11 +391,12 @@ static inline int
gss_refresh_upcall(struct rpc_task *task)
{
struct rpc_cred *cred = task->tk_msg.rpc_cred;
- struct gss_auth *gss_auth = container_of(task->tk_client->cl_auth,
+ struct gss_auth *gss_auth = container_of(cred->cr_auth,
struct gss_auth, rpc_auth);
struct gss_cred *gss_cred = container_of(cred,
struct gss_cred, gc_base);
struct gss_upcall_msg *gss_msg;
+ struct inode *inode = gss_auth->dentry->d_inode;
int err = 0;
dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid,
@@ -400,7 +406,7 @@ gss_refresh_upcall(struct rpc_task *task)
err = PTR_ERR(gss_msg);
goto out;
}
- spin_lock(&gss_auth->lock);
+ spin_lock(&inode->i_lock);
if (gss_cred->gc_upcall != NULL)
rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL);
else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
@@ -411,7 +417,7 @@ gss_refresh_upcall(struct rpc_task *task)
rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL);
} else
err = gss_msg->msg.errno;
- spin_unlock(&gss_auth->lock);
+ spin_unlock(&inode->i_lock);
gss_release_msg(gss_msg);
out:
dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n",
@@ -422,6 +428,7 @@ out:
static inline int
gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
{
+ struct inode *inode = gss_auth->dentry->d_inode;
struct rpc_cred *cred = &gss_cred->gc_base;
struct gss_upcall_msg *gss_msg;
DEFINE_WAIT(wait);
@@ -435,12 +442,11 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
}
for (;;) {
prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE);
- spin_lock(&gss_auth->lock);
+ spin_lock(&inode->i_lock);
if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) {
- spin_unlock(&gss_auth->lock);
break;
}
- spin_unlock(&gss_auth->lock);
+ spin_unlock(&inode->i_lock);
if (signalled()) {
err = -ERESTARTSYS;
goto out_intr;
@@ -451,6 +457,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
gss_cred_set_ctx(cred, gss_get_ctx(gss_msg->ctx));
else
err = gss_msg->msg.errno;
+ spin_unlock(&inode->i_lock);
out_intr:
finish_wait(&gss_msg->waitqueue, &wait);
gss_release_msg(gss_msg);
@@ -489,12 +496,11 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
const void *p, *end;
void *buf;
struct rpc_clnt *clnt;
- struct gss_auth *gss_auth;
- struct rpc_cred *cred;
struct gss_upcall_msg *gss_msg;
+ struct inode *inode = filp->f_path.dentry->d_inode;
struct gss_cl_ctx *ctx;
uid_t uid;
- int err = -EFBIG;
+ ssize_t err = -EFBIG;
if (mlen > MSG_BUF_MAXSIZE)
goto out;
@@ -503,7 +509,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (!buf)
goto out;
- clnt = RPC_I(filp->f_path.dentry->d_inode)->private;
+ clnt = RPC_I(inode)->private;
err = -EFAULT;
if (copy_from_user(buf, src, mlen))
goto err;
@@ -519,43 +525,38 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
ctx = gss_alloc_context();
if (ctx == NULL)
goto err;
- err = 0;
- gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth);
- p = gss_fill_context(p, end, ctx, gss_auth->mech);
+
+ err = -ENOENT;
+ /* Find a matching upcall */
+ spin_lock(&inode->i_lock);
+ gss_msg = __gss_find_upcall(RPC_I(inode), uid);
+ if (gss_msg == NULL) {
+ spin_unlock(&inode->i_lock);
+ goto err_put_ctx;
+ }
+ list_del_init(&gss_msg->list);
+ spin_unlock(&inode->i_lock);
+
+ p = gss_fill_context(p, end, ctx, gss_msg->auth->mech);
if (IS_ERR(p)) {
err = PTR_ERR(p);
- if (err != -EACCES)
- goto err_put_ctx;
- }
- spin_lock(&gss_auth->lock);
- gss_msg = __gss_find_upcall(gss_auth, uid);
- if (gss_msg) {
- if (err == 0 && gss_msg->ctx == NULL)
- gss_msg->ctx = gss_get_ctx(ctx);
- gss_msg->msg.errno = err;
- __gss_unhash_msg(gss_msg);
- spin_unlock(&gss_auth->lock);
- gss_release_msg(gss_msg);
- } else {
- struct auth_cred acred = { .uid = uid };
- spin_unlock(&gss_auth->lock);
- cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, RPCAUTH_LOOKUP_NEW);
- if (IS_ERR(cred)) {
- err = PTR_ERR(cred);
- goto err_put_ctx;
- }
- gss_cred_set_ctx(cred, gss_get_ctx(ctx));
+ gss_msg->msg.errno = (err == -EACCES) ? -EACCES : -EAGAIN;
+ goto err_release_msg;
}
- gss_put_ctx(ctx);
- kfree(buf);
- dprintk("RPC: gss_pipe_downcall returning length %Zu\n", mlen);
- return mlen;
+ gss_msg->ctx = gss_get_ctx(ctx);
+ err = mlen;
+
+err_release_msg:
+ spin_lock(&inode->i_lock);
+ __gss_unhash_msg(gss_msg);
+ spin_unlock(&inode->i_lock);
+ gss_release_msg(gss_msg);
err_put_ctx:
gss_put_ctx(ctx);
err:
kfree(buf);
out:
- dprintk("RPC: gss_pipe_downcall returning %d\n", err);
+ dprintk("RPC: gss_pipe_downcall returning %Zd\n", err);
return err;
}
@@ -563,27 +564,21 @@ static void
gss_pipe_release(struct inode *inode)
{
struct rpc_inode *rpci = RPC_I(inode);
- struct rpc_clnt *clnt;
- struct rpc_auth *auth;
- struct gss_auth *gss_auth;
+ struct gss_upcall_msg *gss_msg;
- clnt = rpci->private;
- auth = clnt->cl_auth;
- gss_auth = container_of(auth, struct gss_auth, rpc_auth);
- spin_lock(&gss_auth->lock);
- while (!list_empty(&gss_auth->upcalls)) {
- struct gss_upcall_msg *gss_msg;
+ spin_lock(&inode->i_lock);
+ while (!list_empty(&rpci->in_downcall)) {
- gss_msg = list_entry(gss_auth->upcalls.next,
+ gss_msg = list_entry(rpci->in_downcall.next,
struct gss_upcall_msg, list);
gss_msg->msg.errno = -EPIPE;
atomic_inc(&gss_msg->count);
__gss_unhash_msg(gss_msg);
- spin_unlock(&gss_auth->lock);
+ spin_unlock(&inode->i_lock);
gss_release_msg(gss_msg);
- spin_lock(&gss_auth->lock);
+ spin_lock(&inode->i_lock);
}
- spin_unlock(&gss_auth->lock);
+ spin_unlock(&inode->i_lock);
}
static void
@@ -637,18 +632,13 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
if (gss_auth->service == 0)
goto err_put_mech;
- INIT_LIST_HEAD(&gss_auth->upcalls);
- spin_lock_init(&gss_auth->lock);
auth = &gss_auth->rpc_auth;
auth->au_cslack = GSS_CRED_SLACK >> 2;
auth->au_rslack = GSS_VERF_SLACK >> 2;
auth->au_ops = &authgss_ops;
auth->au_flavor = flavor;
atomic_set(&auth->au_count, 1);
-
- err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE);
- if (err)
- goto err_put_mech;
+ kref_init(&gss_auth->kref);
gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name,
clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
@@ -657,7 +647,13 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
goto err_put_mech;
}
+ err = rpcauth_init_credcache(auth);
+ if (err)
+ goto err_unlink_pipe;
+
return auth;
+err_unlink_pipe:
+ rpc_unlink(gss_auth->dentry);
err_put_mech:
gss_mech_put(gss_auth->mech);
err_free:
@@ -668,6 +664,25 @@ out_dec:
}
static void
+gss_free(struct gss_auth *gss_auth)
+{
+ rpc_unlink(gss_auth->dentry);
+ gss_auth->dentry = NULL;
+ gss_mech_put(gss_auth->mech);
+
+ kfree(gss_auth);
+ module_put(THIS_MODULE);
+}
+
+static void
+gss_free_callback(struct kref *kref)
+{
+ struct gss_auth *gss_auth = container_of(kref, struct gss_auth, kref);
+
+ gss_free(gss_auth);
+}
+
+static void
gss_destroy(struct rpc_auth *auth)
{
struct gss_auth *gss_auth;
@@ -675,23 +690,51 @@ gss_destroy(struct rpc_auth *auth)
dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
auth, auth->au_flavor);
+ rpcauth_destroy_credcache(auth);
+
gss_auth = container_of(auth, struct gss_auth, rpc_auth);
- rpc_unlink(gss_auth->dentry);
- gss_auth->dentry = NULL;
- gss_mech_put(gss_auth->mech);
+ kref_put(&gss_auth->kref, gss_free_callback);
+}
- rpcauth_free_credcache(auth);
- kfree(gss_auth);
- module_put(THIS_MODULE);
+/*
+ * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call
+ * to the server with the GSS control procedure field set to
+ * RPC_GSS_PROC_DESTROY. This should normally cause the server to release
+ * all RPCSEC_GSS state associated with that context.
+ */
+static int
+gss_destroying_context(struct rpc_cred *cred)
+{
+ struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
+ struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
+ struct rpc_task *task;
+
+ if (gss_cred->gc_ctx == NULL ||
+ gss_cred->gc_ctx->gc_proc == RPC_GSS_PROC_DESTROY)
+ return 0;
+
+ gss_cred->gc_ctx->gc_proc = RPC_GSS_PROC_DESTROY;
+ cred->cr_ops = &gss_nullops;
+
+ /* Take a reference to ensure the cred will be destroyed either
+ * by the RPC call or by the put_rpccred() below */
+ get_rpccred(cred);
+
+ task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC);
+ if (!IS_ERR(task))
+ rpc_put_task(task);
+
+ put_rpccred(cred);
+ return 1;
}
-/* gss_destroy_cred (and gss_destroy_ctx) are used to clean up after failure
+/* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure
* to create a new cred or context, so they check that things have been
* allocated before freeing them. */
static void
-gss_destroy_ctx(struct gss_cl_ctx *ctx)
+gss_do_free_ctx(struct gss_cl_ctx *ctx)
{
- dprintk("RPC: gss_destroy_ctx\n");
+ dprintk("RPC: gss_free_ctx\n");
if (ctx->gc_gss_ctx)
gss_delete_sec_context(&ctx->gc_gss_ctx);
@@ -701,15 +744,46 @@ gss_destroy_ctx(struct gss_cl_ctx *ctx)
}
static void
-gss_destroy_cred(struct rpc_cred *rc)
+gss_free_ctx_callback(struct rcu_head *head)
{
- struct gss_cred *cred = container_of(rc, struct gss_cred, gc_base);
+ struct gss_cl_ctx *ctx = container_of(head, struct gss_cl_ctx, gc_rcu);
+ gss_do_free_ctx(ctx);
+}
- dprintk("RPC: gss_destroy_cred \n");
+static void
+gss_free_ctx(struct gss_cl_ctx *ctx)
+{
+ call_rcu(&ctx->gc_rcu, gss_free_ctx_callback);
+}
- if (cred->gc_ctx)
- gss_put_ctx(cred->gc_ctx);
- kfree(cred);
+static void
+gss_free_cred(struct gss_cred *gss_cred)
+{
+ dprintk("RPC: gss_free_cred %p\n", gss_cred);
+ kfree(gss_cred);
+}
+
+static void
+gss_free_cred_callback(struct rcu_head *head)
+{
+ struct gss_cred *gss_cred = container_of(head, struct gss_cred, gc_base.cr_rcu);
+ gss_free_cred(gss_cred);
+}
+
+static void
+gss_destroy_cred(struct rpc_cred *cred)
+{
+ struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
+ struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
+ struct gss_cl_ctx *ctx = gss_cred->gc_ctx;
+
+ if (gss_destroying_context(cred))
+ return;
+ rcu_assign_pointer(gss_cred->gc_ctx, NULL);
+ call_rcu(&cred->cr_rcu, gss_free_cred_callback);
+ if (ctx)
+ gss_put_ctx(ctx);
+ kref_put(&gss_auth->kref, gss_free_callback);
}
/*
@@ -734,16 +808,14 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL)))
goto out_err;
- atomic_set(&cred->gc_count, 1);
- cred->gc_uid = acred->uid;
+ rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops);
/*
* Note: in order to force a call to call_refresh(), we deliberately
* fail to flag the credential as RPCAUTH_CRED_UPTODATE.
*/
- cred->gc_flags = 0;
- cred->gc_base.cr_ops = &gss_credops;
- cred->gc_base.cr_flags = RPCAUTH_CRED_NEW;
+ cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW;
cred->gc_service = gss_auth->service;
+ kref_get(&gss_auth->kref);
return &cred->gc_base;
out_err:
@@ -774,7 +846,7 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
* we don't really care if the credential has expired or not,
* since the caller should be prepared to reinitialise it.
*/
- if ((flags & RPCAUTH_LOOKUP_NEW) && (rc->cr_flags & RPCAUTH_CRED_NEW))
+ if ((flags & RPCAUTH_LOOKUP_NEW) && test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
goto out;
/* Don't match with creds that have expired. */
if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry))
@@ -830,7 +902,7 @@ gss_marshal(struct rpc_task *task, __be32 *p)
mic.data = (u8 *)(p + 1);
maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
} else if (maj_stat != 0) {
printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
goto out_put_ctx;
@@ -855,6 +927,13 @@ gss_refresh(struct rpc_task *task)
return 0;
}
+/* Dummy refresh routine: used only when destroying the context */
+static int
+gss_refresh_null(struct rpc_task *task)
+{
+ return -EACCES;
+}
+
static __be32 *
gss_validate(struct rpc_task *task, __be32 *p)
{
@@ -883,12 +962,15 @@ gss_validate(struct rpc_task *task, __be32 *p)
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
- if (maj_stat)
+ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ if (maj_stat) {
+ dprintk("RPC: %5u gss_validate: gss_verify_mic returned"
+ "error 0x%08x\n", task->tk_pid, maj_stat);
goto out_bad;
+ }
/* We leave it to unwrap to calculate au_rslack. For now we just
* calculate the length of the verifier: */
- task->tk_auth->au_verfsize = XDR_QUADLEN(len) + 2;
+ cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
gss_put_ctx(ctx);
dprintk("RPC: %5u gss_validate: gss_verify_mic succeeded.\n",
task->tk_pid);
@@ -917,7 +999,9 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
*p++ = htonl(rqstp->rq_seqno);
+ lock_kernel();
status = encode(rqstp, p, obj);
+ unlock_kernel();
if (status)
return status;
@@ -937,7 +1021,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
status = -EIO; /* XXX? */
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
else if (maj_stat)
return status;
q = xdr_encode_opaque(p, NULL, mic.len);
@@ -1011,7 +1095,9 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
*p++ = htonl(rqstp->rq_seqno);
+ lock_kernel();
status = encode(rqstp, p, obj);
+ unlock_kernel();
if (status)
return status;
@@ -1036,7 +1122,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
* done anyway, so it's safe to put the request on the wire: */
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
else if (maj_stat)
return status;
@@ -1070,12 +1156,16 @@ gss_wrap_req(struct rpc_task *task,
/* The spec seems a little ambiguous here, but I think that not
* wrapping context destruction requests makes the most sense.
*/
+ lock_kernel();
status = encode(rqstp, p, obj);
+ unlock_kernel();
goto out;
}
switch (gss_cred->gc_service) {
case RPC_GSS_SVC_NONE:
+ lock_kernel();
status = encode(rqstp, p, obj);
+ unlock_kernel();
break;
case RPC_GSS_SVC_INTEGRITY:
status = gss_wrap_req_integ(cred, ctx, encode,
@@ -1123,7 +1213,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat != GSS_S_COMPLETE)
return status;
return 0;
@@ -1148,7 +1238,7 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat != GSS_S_COMPLETE)
return status;
if (ntohl(*(*p)++) != rqstp->rq_seqno)
@@ -1188,10 +1278,12 @@ gss_unwrap_resp(struct rpc_task *task,
break;
}
/* take into account extra slack for integrity and privacy cases: */
- task->tk_auth->au_rslack = task->tk_auth->au_verfsize + (p - savedp)
+ cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
+ (savedlen - head->iov_len);
out_decode:
+ lock_kernel();
status = decode(rqstp, p, obj);
+ unlock_kernel();
out:
gss_put_ctx(ctx);
dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid,
@@ -1199,7 +1291,7 @@ out:
return status;
}
-static struct rpc_authops authgss_ops = {
+static const struct rpc_authops authgss_ops = {
.owner = THIS_MODULE,
.au_flavor = RPC_AUTH_GSS,
#ifdef RPC_DEBUG
@@ -1211,7 +1303,7 @@ static struct rpc_authops authgss_ops = {
.crcreate = gss_create_cred
};
-static struct rpc_credops gss_credops = {
+static const struct rpc_credops gss_credops = {
.cr_name = "AUTH_GSS",
.crdestroy = gss_destroy_cred,
.cr_init = gss_cred_init,
@@ -1223,6 +1315,17 @@ static struct rpc_credops gss_credops = {
.crunwrap_resp = gss_unwrap_resp,
};
+static const struct rpc_credops gss_nullops = {
+ .cr_name = "AUTH_GSS",
+ .crdestroy = gss_destroy_cred,
+ .crmatch = gss_match,
+ .crmarshal = gss_marshal,
+ .crrefresh = gss_refresh_null,
+ .crvalidate = gss_validate,
+ .crwrap_req = gss_wrap_req,
+ .crunwrap_resp = gss_unwrap_resp,
+};
+
static struct rpc_pipe_ops gss_upcall_ops = {
.upcall = gss_pipe_upcall,
.downcall = gss_pipe_downcall,
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 7b1943217053..71b9daefdff3 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -201,7 +201,7 @@ gss_delete_sec_context_kerberos(void *internal_ctx) {
kfree(kctx);
}
-static struct gss_api_ops gss_kerberos_ops = {
+static const struct gss_api_ops gss_kerberos_ops = {
.gss_import_sec_context = gss_import_sec_context_kerberos,
.gss_get_mic = gss_get_mic_kerberos,
.gss_verify_mic = gss_verify_mic_kerberos,
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 7e15aa68ae64..577d590e755f 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -202,7 +202,7 @@ gss_get_mic_spkm3(struct gss_ctx *ctx,
return err;
}
-static struct gss_api_ops gss_spkm3_ops = {
+static const struct gss_api_ops gss_spkm3_ops = {
.gss_import_sec_context = gss_import_sec_context_spkm3,
.gss_get_mic = gss_get_mic_spkm3,
.gss_verify_mic = gss_verify_mic_spkm3,
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 3df9fccab2f8..537d0e8589dd 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -76,7 +76,7 @@ nul_marshal(struct rpc_task *task, __be32 *p)
static int
nul_refresh(struct rpc_task *task)
{
- task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
+ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
return 0;
}
@@ -101,7 +101,7 @@ nul_validate(struct rpc_task *task, __be32 *p)
return p;
}
-struct rpc_authops authnull_ops = {
+const struct rpc_authops authnull_ops = {
.owner = THIS_MODULE,
.au_flavor = RPC_AUTH_NULL,
#ifdef RPC_DEBUG
@@ -122,7 +122,7 @@ struct rpc_auth null_auth = {
};
static
-struct rpc_credops null_credops = {
+const struct rpc_credops null_credops = {
.cr_name = "AUTH_NULL",
.crdestroy = nul_destroy_cred,
.crmatch = nul_match,
@@ -133,9 +133,11 @@ struct rpc_credops null_credops = {
static
struct rpc_cred null_cred = {
+ .cr_lru = LIST_HEAD_INIT(null_cred.cr_lru),
+ .cr_auth = &null_auth,
.cr_ops = &null_credops,
.cr_count = ATOMIC_INIT(1),
- .cr_flags = RPCAUTH_CRED_UPTODATE,
+ .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE,
#ifdef RPC_DEBUG
.cr_magic = RPCAUTH_CRED_MAGIC,
#endif
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 4e7733aee36e..5ed91e5bcee4 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -20,11 +20,6 @@ struct unx_cred {
gid_t uc_gids[NFS_NGROUPS];
};
#define uc_uid uc_base.cr_uid
-#define uc_count uc_base.cr_count
-#define uc_flags uc_base.cr_flags
-#define uc_expire uc_base.cr_expire
-
-#define UNX_CRED_EXPIRE (60 * HZ)
#define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2))
@@ -34,15 +29,14 @@ struct unx_cred {
static struct rpc_auth unix_auth;
static struct rpc_cred_cache unix_cred_cache;
-static struct rpc_credops unix_credops;
+static const struct rpc_credops unix_credops;
static struct rpc_auth *
unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
{
dprintk("RPC: creating UNIX authenticator for client %p\n",
clnt);
- if (atomic_inc_return(&unix_auth.au_count) == 0)
- unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1);
+ atomic_inc(&unix_auth.au_count);
return &unix_auth;
}
@@ -50,7 +44,7 @@ static void
unx_destroy(struct rpc_auth *auth)
{
dprintk("RPC: destroying UNIX authenticator %p\n", auth);
- rpcauth_free_credcache(auth);
+ rpcauth_clear_credcache(auth->au_credcache);
}
/*
@@ -74,8 +68,8 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
return ERR_PTR(-ENOMEM);
- atomic_set(&cred->uc_count, 1);
- cred->uc_flags = RPCAUTH_CRED_UPTODATE;
+ rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
+ cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
if (flags & RPCAUTH_LOOKUP_ROOTCREDS) {
cred->uc_uid = 0;
cred->uc_gid = 0;
@@ -85,22 +79,34 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
if (groups > NFS_NGROUPS)
groups = NFS_NGROUPS;
- cred->uc_uid = acred->uid;
cred->uc_gid = acred->gid;
for (i = 0; i < groups; i++)
cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
if (i < NFS_NGROUPS)
cred->uc_gids[i] = NOGROUP;
}
- cred->uc_base.cr_ops = &unix_credops;
- return (struct rpc_cred *) cred;
+ return &cred->uc_base;
+}
+
+static void
+unx_free_cred(struct unx_cred *unx_cred)
+{
+ dprintk("RPC: unx_free_cred %p\n", unx_cred);
+ kfree(unx_cred);
+}
+
+static void
+unx_free_cred_callback(struct rcu_head *head)
+{
+ struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu);
+ unx_free_cred(unx_cred);
}
static void
unx_destroy_cred(struct rpc_cred *cred)
{
- kfree(cred);
+ call_rcu(&cred->cr_rcu, unx_free_cred_callback);
}
/*
@@ -111,7 +117,7 @@ unx_destroy_cred(struct rpc_cred *cred)
static int
unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
{
- struct unx_cred *cred = (struct unx_cred *) rcred;
+ struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base);
int i;
if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) {
@@ -142,7 +148,7 @@ static __be32 *
unx_marshal(struct rpc_task *task, __be32 *p)
{
struct rpc_clnt *clnt = task->tk_client;
- struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred;
+ struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base);
__be32 *base, *hold;
int i;
@@ -175,7 +181,7 @@ unx_marshal(struct rpc_task *task, __be32 *p)
static int
unx_refresh(struct rpc_task *task)
{
- task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
+ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
return 0;
}
@@ -198,13 +204,18 @@ unx_validate(struct rpc_task *task, __be32 *p)
printk("RPC: giant verf size: %u\n", size);
return NULL;
}
- task->tk_auth->au_rslack = (size >> 2) + 2;
+ task->tk_msg.rpc_cred->cr_auth->au_rslack = (size >> 2) + 2;
p += (size >> 2);
return p;
}
-struct rpc_authops authunix_ops = {
+void __init rpc_init_authunix(void)
+{
+ spin_lock_init(&unix_cred_cache.lock);
+}
+
+const struct rpc_authops authunix_ops = {
.owner = THIS_MODULE,
.au_flavor = RPC_AUTH_UNIX,
#ifdef RPC_DEBUG
@@ -218,7 +229,6 @@ struct rpc_authops authunix_ops = {
static
struct rpc_cred_cache unix_cred_cache = {
- .expire = UNX_CRED_EXPIRE,
};
static
@@ -232,7 +242,7 @@ struct rpc_auth unix_auth = {
};
static
-struct rpc_credops unix_credops = {
+const struct rpc_credops unix_credops = {
.cr_name = "AUTH_UNIX",
.crdestroy = unx_destroy_cred,
.crmatch = unx_match,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d8fbee40a19c..52429b1ffcc1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -44,6 +44,12 @@
dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \
__FUNCTION__, t->tk_status)
+/*
+ * All RPC clients are linked into this list
+ */
+static LIST_HEAD(all_clients);
+static DEFINE_SPINLOCK(rpc_client_lock);
+
static DECLARE_WAIT_QUEUE_HEAD(destroy_wait);
@@ -66,6 +72,21 @@ static void call_connect_status(struct rpc_task *task);
static __be32 * call_header(struct rpc_task *task);
static __be32 * call_verify(struct rpc_task *task);
+static int rpc_ping(struct rpc_clnt *clnt, int flags);
+
+static void rpc_register_client(struct rpc_clnt *clnt)
+{
+ spin_lock(&rpc_client_lock);
+ list_add(&clnt->cl_clients, &all_clients);
+ spin_unlock(&rpc_client_lock);
+}
+
+static void rpc_unregister_client(struct rpc_clnt *clnt)
+{
+ spin_lock(&rpc_client_lock);
+ list_del(&clnt->cl_clients);
+ spin_unlock(&rpc_client_lock);
+}
static int
rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
@@ -111,6 +132,9 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
dprintk("RPC: creating %s client for %s (xprt %p)\n",
program->name, servname, xprt);
+ err = rpciod_up();
+ if (err)
+ goto out_no_rpciod;
err = -EINVAL;
if (!xprt)
goto out_no_xprt;
@@ -121,8 +145,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
if (!clnt)
goto out_err;
- atomic_set(&clnt->cl_users, 0);
- atomic_set(&clnt->cl_count, 1);
clnt->cl_parent = clnt;
clnt->cl_server = clnt->cl_inline_name;
@@ -148,6 +170,8 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
if (clnt->cl_metrics == NULL)
goto out_no_stats;
clnt->cl_program = program;
+ INIT_LIST_HEAD(&clnt->cl_tasks);
+ spin_lock_init(&clnt->cl_lock);
if (!xprt_bound(clnt->cl_xprt))
clnt->cl_autobind = 1;
@@ -155,6 +179,8 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
clnt->cl_rtt = &clnt->cl_rtt_default;
rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval);
+ kref_init(&clnt->cl_kref);
+
err = rpc_setup_pipedir(clnt, program->pipe_dir_name);
if (err < 0)
goto out_no_path;
@@ -172,6 +198,7 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
if (clnt->cl_nodelen > UNX_MAXNODENAME)
clnt->cl_nodelen = UNX_MAXNODENAME;
memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen);
+ rpc_register_client(clnt);
return clnt;
out_no_auth:
@@ -188,6 +215,8 @@ out_no_stats:
out_err:
xprt_put(xprt);
out_no_xprt:
+ rpciod_down();
+out_no_rpciod:
return ERR_PTR(err);
}
@@ -205,13 +234,32 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
{
struct rpc_xprt *xprt;
struct rpc_clnt *clnt;
+ struct rpc_xprtsock_create xprtargs = {
+ .proto = args->protocol,
+ .srcaddr = args->saddress,
+ .dstaddr = args->address,
+ .addrlen = args->addrsize,
+ .timeout = args->timeout
+ };
+ char servername[20];
- xprt = xprt_create_transport(args->protocol, args->address,
- args->addrsize, args->timeout);
+ xprt = xprt_create_transport(&xprtargs);
if (IS_ERR(xprt))
return (struct rpc_clnt *)xprt;
/*
+ * If the caller chooses not to specify a hostname, whip
+ * up a string representation of the passed-in address.
+ */
+ if (args->servername == NULL) {
+ struct sockaddr_in *addr =
+ (struct sockaddr_in *) &args->address;
+ snprintf(servername, sizeof(servername), NIPQUAD_FMT,
+ NIPQUAD(addr->sin_addr.s_addr));
+ args->servername = servername;
+ }
+
+ /*
* By default, kernel RPC client connects from a reserved port.
* CAP_NET_BIND_SERVICE will not be set for unprivileged requesters,
* but it is always enabled for rpciod, which handles the connect
@@ -245,8 +293,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
clnt->cl_intr = 1;
if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
clnt->cl_autobind = 1;
- if (args->flags & RPC_CLNT_CREATE_ONESHOT)
- clnt->cl_oneshot = 1;
if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
clnt->cl_discrtry = 1;
@@ -268,24 +314,25 @@ rpc_clone_client(struct rpc_clnt *clnt)
new = kmemdup(clnt, sizeof(*new), GFP_KERNEL);
if (!new)
goto out_no_clnt;
- atomic_set(&new->cl_count, 1);
- atomic_set(&new->cl_users, 0);
+ new->cl_parent = clnt;
+ /* Turn off autobind on clones */
+ new->cl_autobind = 0;
+ INIT_LIST_HEAD(&new->cl_tasks);
+ spin_lock_init(&new->cl_lock);
+ rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
new->cl_metrics = rpc_alloc_iostats(clnt);
if (new->cl_metrics == NULL)
goto out_no_stats;
+ kref_init(&new->cl_kref);
err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
if (err != 0)
goto out_no_path;
- new->cl_parent = clnt;
- atomic_inc(&clnt->cl_count);
- new->cl_xprt = xprt_get(clnt->cl_xprt);
- /* Turn off autobind on clones */
- new->cl_autobind = 0;
- new->cl_oneshot = 0;
- new->cl_dead = 0;
- rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
if (new->cl_auth)
atomic_inc(&new->cl_auth->au_count);
+ xprt_get(clnt->cl_xprt);
+ kref_get(&clnt->cl_kref);
+ rpc_register_client(new);
+ rpciod_up();
return new;
out_no_path:
rpc_free_iostats(new->cl_metrics);
@@ -298,86 +345,86 @@ out_no_clnt:
/*
* Properly shut down an RPC client, terminating all outstanding
- * requests. Note that we must be certain that cl_oneshot and
- * cl_dead are cleared, or else the client would be destroyed
- * when the last task releases it.
+ * requests.
*/
-int
-rpc_shutdown_client(struct rpc_clnt *clnt)
+void rpc_shutdown_client(struct rpc_clnt *clnt)
{
- dprintk("RPC: shutting down %s client for %s, tasks=%d\n",
- clnt->cl_protname, clnt->cl_server,
- atomic_read(&clnt->cl_users));
-
- while (atomic_read(&clnt->cl_users) > 0) {
- /* Don't let rpc_release_client destroy us */
- clnt->cl_oneshot = 0;
- clnt->cl_dead = 0;
+ dprintk("RPC: shutting down %s client for %s\n",
+ clnt->cl_protname, clnt->cl_server);
+
+ while (!list_empty(&clnt->cl_tasks)) {
rpc_killall_tasks(clnt);
wait_event_timeout(destroy_wait,
- !atomic_read(&clnt->cl_users), 1*HZ);
- }
-
- if (atomic_read(&clnt->cl_users) < 0) {
- printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n",
- clnt, atomic_read(&clnt->cl_users));
-#ifdef RPC_DEBUG
- rpc_show_tasks();
-#endif
- BUG();
+ list_empty(&clnt->cl_tasks), 1*HZ);
}
- return rpc_destroy_client(clnt);
+ rpc_release_client(clnt);
}
/*
- * Delete an RPC client
+ * Free an RPC client
*/
-int
-rpc_destroy_client(struct rpc_clnt *clnt)
+static void
+rpc_free_client(struct kref *kref)
{
- if (!atomic_dec_and_test(&clnt->cl_count))
- return 1;
- BUG_ON(atomic_read(&clnt->cl_users) != 0);
+ struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
dprintk("RPC: destroying %s client for %s\n",
clnt->cl_protname, clnt->cl_server);
- if (clnt->cl_auth) {
- rpcauth_destroy(clnt->cl_auth);
- clnt->cl_auth = NULL;
- }
if (!IS_ERR(clnt->cl_dentry)) {
rpc_rmdir(clnt->cl_dentry);
rpc_put_mount();
}
if (clnt->cl_parent != clnt) {
- rpc_destroy_client(clnt->cl_parent);
+ rpc_release_client(clnt->cl_parent);
goto out_free;
}
if (clnt->cl_server != clnt->cl_inline_name)
kfree(clnt->cl_server);
out_free:
+ rpc_unregister_client(clnt);
rpc_free_iostats(clnt->cl_metrics);
clnt->cl_metrics = NULL;
xprt_put(clnt->cl_xprt);
+ rpciod_down();
kfree(clnt);
- return 0;
}
/*
- * Release an RPC client
+ * Free an RPC client
+ */
+static void
+rpc_free_auth(struct kref *kref)
+{
+ struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
+
+ if (clnt->cl_auth == NULL) {
+ rpc_free_client(kref);
+ return;
+ }
+
+ /*
+ * Note: RPCSEC_GSS may need to send NULL RPC calls in order to
+ * release remaining GSS contexts. This mechanism ensures
+ * that it can do so safely.
+ */
+ kref_init(kref);
+ rpcauth_release(clnt->cl_auth);
+ clnt->cl_auth = NULL;
+ kref_put(kref, rpc_free_client);
+}
+
+/*
+ * Release reference to the RPC client
*/
void
rpc_release_client(struct rpc_clnt *clnt)
{
- dprintk("RPC: rpc_release_client(%p, %d)\n",
- clnt, atomic_read(&clnt->cl_users));
+ dprintk("RPC: rpc_release_client(%p)\n", clnt);
- if (!atomic_dec_and_test(&clnt->cl_users))
- return;
- wake_up(&destroy_wait);
- if (clnt->cl_oneshot || clnt->cl_dead)
- rpc_destroy_client(clnt);
+ if (list_empty(&clnt->cl_tasks))
+ wake_up(&destroy_wait);
+ kref_put(&clnt->cl_kref, rpc_free_auth);
}
/**
@@ -468,82 +515,96 @@ void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
rpc_restore_sigmask(oldset);
}
-/*
- * New rpc_call implementation
+static
+struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt,
+ struct rpc_message *msg,
+ int flags,
+ const struct rpc_call_ops *ops,
+ void *data)
+{
+ struct rpc_task *task, *ret;
+ sigset_t oldset;
+
+ task = rpc_new_task(clnt, flags, ops, data);
+ if (task == NULL) {
+ rpc_release_calldata(ops, data);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
+ rpc_task_sigmask(task, &oldset);
+ if (msg != NULL) {
+ rpc_call_setup(task, msg, 0);
+ if (task->tk_status != 0) {
+ ret = ERR_PTR(task->tk_status);
+ rpc_put_task(task);
+ goto out;
+ }
+ }
+ atomic_inc(&task->tk_count);
+ rpc_execute(task);
+ ret = task;
+out:
+ rpc_restore_sigmask(&oldset);
+ return ret;
+}
+
+/**
+ * rpc_call_sync - Perform a synchronous RPC call
+ * @clnt: pointer to RPC client
+ * @msg: RPC call parameters
+ * @flags: RPC call flags
*/
int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
{
struct rpc_task *task;
- sigset_t oldset;
- int status;
-
- /* If this client is slain all further I/O fails */
- if (clnt->cl_dead)
- return -EIO;
+ int status;
BUG_ON(flags & RPC_TASK_ASYNC);
- task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL);
- if (task == NULL)
- return -ENOMEM;
-
- /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */
- rpc_task_sigmask(task, &oldset);
-
- /* Set up the call info struct and execute the task */
- rpc_call_setup(task, msg, 0);
- if (task->tk_status == 0) {
- atomic_inc(&task->tk_count);
- rpc_execute(task);
- }
+ task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
status = task->tk_status;
rpc_put_task(task);
- rpc_restore_sigmask(&oldset);
return status;
}
-/*
- * New rpc_call implementation
+/**
+ * rpc_call_async - Perform an asynchronous RPC call
+ * @clnt: pointer to RPC client
+ * @msg: RPC call parameters
+ * @flags: RPC call flags
+ * @ops: RPC call ops
+ * @data: user call data
*/
int
rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
const struct rpc_call_ops *tk_ops, void *data)
{
struct rpc_task *task;
- sigset_t oldset;
- int status;
-
- /* If this client is slain all further I/O fails */
- status = -EIO;
- if (clnt->cl_dead)
- goto out_release;
-
- flags |= RPC_TASK_ASYNC;
-
- /* Create/initialize a new RPC task */
- status = -ENOMEM;
- if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
- goto out_release;
-
- /* Mask signals on GSS_AUTH upcalls */
- rpc_task_sigmask(task, &oldset);
- rpc_call_setup(task, msg, 0);
-
- /* Set up the call info struct and execute the task */
- status = task->tk_status;
- if (status == 0)
- rpc_execute(task);
- else
- rpc_put_task(task);
-
- rpc_restore_sigmask(&oldset);
- return status;
-out_release:
- rpc_release_calldata(tk_ops, data);
- return status;
+ task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ rpc_put_task(task);
+ return 0;
}
+/**
+ * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
+ * @clnt: pointer to RPC client
+ * @flags: RPC flags
+ * @ops: RPC call ops
+ * @data: user call data
+ */
+struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
+ const struct rpc_call_ops *tk_ops,
+ void *data)
+{
+ return rpc_do_run_task(clnt, NULL, flags, tk_ops, data);
+}
+EXPORT_SYMBOL(rpc_run_task);
void
rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
@@ -745,7 +806,7 @@ call_reserveresult(struct rpc_task *task)
static void
call_allocate(struct rpc_task *task)
{
- unsigned int slack = task->tk_auth->au_cslack;
+ unsigned int slack = task->tk_msg.rpc_cred->cr_auth->au_cslack;
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
@@ -843,10 +904,8 @@ call_encode(struct rpc_task *task)
if (encode == NULL)
return;
- lock_kernel();
task->tk_status = rpcauth_wrap_req(task, encode, req, p,
task->tk_msg.rpc_argp);
- unlock_kernel();
if (task->tk_status == -ENOMEM) {
/* XXX: Is this sane? */
rpc_delay(task, 3*HZ);
@@ -1177,10 +1236,8 @@ call_decode(struct rpc_task *task)
task->tk_action = rpc_exit_task;
if (decode) {
- lock_kernel();
task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
task->tk_msg.rpc_resp);
- unlock_kernel();
}
dprintk("RPC: %5u call_decode result %d\n", task->tk_pid,
task->tk_status);
@@ -1273,9 +1330,9 @@ call_verify(struct rpc_task *task)
* - if it isn't pointer subtraction in the NFS client may give
* undefined results
*/
- printk(KERN_WARNING
- "call_verify: XDR representation not a multiple of"
- " 4 bytes: 0x%x\n", task->tk_rqstp->rq_rcv_buf.len);
+ dprintk("RPC: %5u %s: XDR representation not a multiple of"
+ " 4 bytes: 0x%x\n", task->tk_pid, __FUNCTION__,
+ task->tk_rqstp->rq_rcv_buf.len);
goto out_eio;
}
if ((len -= 3) < 0)
@@ -1283,7 +1340,8 @@ call_verify(struct rpc_task *task)
p += 1; /* skip XID */
if ((n = ntohl(*p++)) != RPC_REPLY) {
- printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n);
+ dprintk("RPC: %5u %s: not an RPC reply: %x\n",
+ task->tk_pid, __FUNCTION__, n);
goto out_garbage;
}
if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
@@ -1334,7 +1392,8 @@ call_verify(struct rpc_task *task)
"authentication.\n", task->tk_client->cl_server);
break;
default:
- printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
+ dprintk("RPC: %5u %s: unknown auth error: %x\n",
+ task->tk_pid, __FUNCTION__, n);
error = -EIO;
}
dprintk("RPC: %5u %s: call rejected %d\n",
@@ -1342,7 +1401,8 @@ call_verify(struct rpc_task *task)
goto out_err;
}
if (!(p = rpcauth_checkverf(task, p))) {
- printk(KERN_WARNING "call_verify: auth check failed\n");
+ dprintk("RPC: %5u %s: auth check failed\n",
+ task->tk_pid, __FUNCTION__);
goto out_garbage; /* bad verifier, retry */
}
len = p - (__be32 *)iov->iov_base - 1;
@@ -1381,7 +1441,8 @@ call_verify(struct rpc_task *task)
task->tk_pid, __FUNCTION__);
break; /* retry */
default:
- printk(KERN_WARNING "call_verify: server accept status: %x\n", n);
+ dprintk("RPC: %5u %s: server accept status: %x\n",
+ task->tk_pid, __FUNCTION__, n);
/* Also retry */
}
@@ -1395,14 +1456,16 @@ out_garbage:
out_retry:
return ERR_PTR(-EAGAIN);
}
- printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__);
out_eio:
error = -EIO;
out_err:
rpc_exit(task, error);
+ dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid,
+ __FUNCTION__, error);
return ERR_PTR(error);
out_overflow:
- printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
+ dprintk("RPC: %5u %s: server reply was truncated.\n", task->tk_pid,
+ __FUNCTION__);
goto out_garbage;
}
@@ -1421,7 +1484,7 @@ static struct rpc_procinfo rpcproc_null = {
.p_decode = rpcproc_decode_null,
};
-int rpc_ping(struct rpc_clnt *clnt, int flags)
+static int rpc_ping(struct rpc_clnt *clnt, int flags)
{
struct rpc_message msg = {
.rpc_proc = &rpcproc_null,
@@ -1432,3 +1495,51 @@ int rpc_ping(struct rpc_clnt *clnt, int flags)
put_rpccred(msg.rpc_cred);
return err;
}
+
+struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &rpcproc_null,
+ .rpc_cred = cred,
+ };
+ return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL);
+}
+EXPORT_SYMBOL(rpc_call_null);
+
+#ifdef RPC_DEBUG
+void rpc_show_tasks(void)
+{
+ struct rpc_clnt *clnt;
+ struct rpc_task *t;
+
+ spin_lock(&rpc_client_lock);
+ if (list_empty(&all_clients))
+ goto out;
+ printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
+ "-rpcwait -action- ---ops--\n");
+ list_for_each_entry(clnt, &all_clients, cl_clients) {
+ if (list_empty(&clnt->cl_tasks))
+ continue;
+ spin_lock(&clnt->cl_lock);
+ list_for_each_entry(t, &clnt->cl_tasks, tk_task) {
+ const char *rpc_waitq = "none";
+
+ if (RPC_IS_QUEUED(t))
+ rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
+
+ printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
+ t->tk_pid,
+ (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
+ t->tk_flags, t->tk_status,
+ t->tk_client,
+ (t->tk_client ? t->tk_client->cl_prog : 0),
+ t->tk_rqstp, t->tk_timeout,
+ rpc_waitq,
+ t->tk_action, t->tk_ops);
+ }
+ spin_unlock(&clnt->cl_lock);
+ }
+out:
+ spin_unlock(&rpc_client_lock);
+}
+#endif
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 5887457dc936..e787b6a43eee 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -344,7 +344,7 @@ rpc_info_open(struct inode *inode, struct file *file)
mutex_lock(&inode->i_mutex);
clnt = RPC_I(inode)->private;
if (clnt) {
- atomic_inc(&clnt->cl_users);
+ kref_get(&clnt->cl_kref);
m->private = clnt;
} else {
single_release(inode, file);
@@ -448,6 +448,15 @@ void rpc_put_mount(void)
simple_release_fs(&rpc_mount, &rpc_mount_count);
}
+static int rpc_delete_dentry(struct dentry *dentry)
+{
+ return 1;
+}
+
+static struct dentry_operations rpc_dentry_operations = {
+ .d_delete = rpc_delete_dentry,
+};
+
static int
rpc_lookup_parent(char *path, struct nameidata *nd)
{
@@ -506,7 +515,7 @@ rpc_get_inode(struct super_block *sb, int mode)
* FIXME: This probably has races.
*/
static void
-rpc_depopulate(struct dentry *parent)
+rpc_depopulate(struct dentry *parent, int start, int eof)
{
struct inode *dir = parent->d_inode;
struct list_head *pos, *next;
@@ -518,6 +527,10 @@ repeat:
spin_lock(&dcache_lock);
list_for_each_safe(pos, next, &parent->d_subdirs) {
dentry = list_entry(pos, struct dentry, d_u.d_child);
+ if (!dentry->d_inode ||
+ dentry->d_inode->i_ino < start ||
+ dentry->d_inode->i_ino >= eof)
+ continue;
spin_lock(&dentry->d_lock);
if (!d_unhashed(dentry)) {
dget_locked(dentry);
@@ -533,11 +546,11 @@ repeat:
if (n) {
do {
dentry = dvec[--n];
- if (dentry->d_inode) {
- rpc_close_pipes(dentry->d_inode);
+ if (S_ISREG(dentry->d_inode->i_mode))
simple_unlink(dir, dentry);
- }
- inode_dir_notify(dir, DN_DELETE);
+ else if (S_ISDIR(dentry->d_inode->i_mode))
+ simple_rmdir(dir, dentry);
+ d_delete(dentry);
dput(dentry);
} while (n);
goto repeat;
@@ -560,6 +573,7 @@ rpc_populate(struct dentry *parent,
dentry = d_alloc_name(parent, files[i].name);
if (!dentry)
goto out_bad;
+ dentry->d_op = &rpc_dentry_operations;
mode = files[i].mode;
inode = rpc_get_inode(dir->i_sb, mode);
if (!inode) {
@@ -607,21 +621,14 @@ static int
__rpc_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
-
- shrink_dcache_parent(dentry);
- if (d_unhashed(dentry))
- return 0;
- if ((error = simple_rmdir(dir, dentry)) != 0)
- return error;
- if (!error) {
- inode_dir_notify(dir, DN_DELETE);
- d_drop(dentry);
- }
- return 0;
+ error = simple_rmdir(dir, dentry);
+ if (!error)
+ d_delete(dentry);
+ return error;
}
static struct dentry *
-rpc_lookup_create(struct dentry *parent, const char *name, int len)
+rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive)
{
struct inode *dir = parent->d_inode;
struct dentry *dentry;
@@ -630,7 +637,9 @@ rpc_lookup_create(struct dentry *parent, const char *name, int len)
dentry = lookup_one_len(name, parent, len);
if (IS_ERR(dentry))
goto out_err;
- if (dentry->d_inode) {
+ if (!dentry->d_inode)
+ dentry->d_op = &rpc_dentry_operations;
+ else if (exclusive) {
dput(dentry);
dentry = ERR_PTR(-EEXIST);
goto out_err;
@@ -649,7 +658,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
if ((error = rpc_lookup_parent(path, nd)) != 0)
return ERR_PTR(error);
- dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len);
+ dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len, 1);
if (IS_ERR(dentry))
rpc_release_path(nd);
return dentry;
@@ -681,7 +690,7 @@ out:
rpc_release_path(&nd);
return dentry;
err_depopulate:
- rpc_depopulate(dentry);
+ rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF);
__rpc_rmdir(dir, dentry);
err_dput:
dput(dentry);
@@ -701,7 +710,7 @@ rpc_rmdir(struct dentry *dentry)
parent = dget_parent(dentry);
dir = parent->d_inode;
mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
- rpc_depopulate(dentry);
+ rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF);
error = __rpc_rmdir(dir, dentry);
dput(dentry);
mutex_unlock(&dir->i_mutex);
@@ -716,10 +725,21 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi
struct inode *dir, *inode;
struct rpc_inode *rpci;
- dentry = rpc_lookup_create(parent, name, strlen(name));
+ dentry = rpc_lookup_create(parent, name, strlen(name), 0);
if (IS_ERR(dentry))
return dentry;
dir = parent->d_inode;
+ if (dentry->d_inode) {
+ rpci = RPC_I(dentry->d_inode);
+ if (rpci->private != private ||
+ rpci->ops != ops ||
+ rpci->flags != flags) {
+ dput (dentry);
+ dentry = ERR_PTR(-EBUSY);
+ }
+ rpci->nkern_readwriters++;
+ goto out;
+ }
inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR);
if (!inode)
goto err_dput;
@@ -730,6 +750,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi
rpci->private = private;
rpci->flags = flags;
rpci->ops = ops;
+ rpci->nkern_readwriters = 1;
inode_dir_notify(dir, DN_CREATE);
dget(dentry);
out:
@@ -754,13 +775,11 @@ rpc_unlink(struct dentry *dentry)
parent = dget_parent(dentry);
dir = parent->d_inode;
mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
- if (!d_unhashed(dentry)) {
- d_drop(dentry);
- if (dentry->d_inode) {
- rpc_close_pipes(dentry->d_inode);
- error = simple_unlink(dir, dentry);
- }
- inode_dir_notify(dir, DN_DELETE);
+ if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) {
+ rpc_close_pipes(dentry->d_inode);
+ error = simple_unlink(dir, dentry);
+ if (!error)
+ d_delete(dentry);
}
dput(dentry);
mutex_unlock(&dir->i_mutex);
@@ -833,6 +852,7 @@ init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
rpci->nreaders = 0;
rpci->nwriters = 0;
INIT_LIST_HEAD(&rpci->in_upcall);
+ INIT_LIST_HEAD(&rpci->in_downcall);
INIT_LIST_HEAD(&rpci->pipe);
rpci->pipelen = 0;
init_waitqueue_head(&rpci->waitq);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 6c7aa8a1f0c6..d1740dbab991 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -12,6 +12,8 @@
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
+#include <linux/module.h>
+
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/kernel.h>
@@ -184,8 +186,8 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
.program = &rpcb_program,
.version = version,
.authflavor = RPC_AUTH_UNIX,
- .flags = (RPC_CLNT_CREATE_ONESHOT |
- RPC_CLNT_CREATE_NOPING),
+ .flags = (RPC_CLNT_CREATE_NOPING |
+ RPC_CLNT_CREATE_INTR),
};
((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
@@ -238,6 +240,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
error = rpc_call_sync(rpcb_clnt, &msg, 0);
+ rpc_shutdown_client(rpcb_clnt);
if (error < 0)
printk(KERN_WARNING "RPC: failed to contact local rpcbind "
"server (errno %d).\n", -error);
@@ -246,21 +249,20 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
return error;
}
-#ifdef CONFIG_ROOT_NFS
/**
- * rpcb_getport_external - obtain the port for an RPC service on a given host
+ * rpcb_getport_sync - obtain the port for an RPC service on a given host
* @sin: address of remote peer
* @prog: RPC program number to bind
* @vers: RPC version number to bind
* @prot: transport protocol to use to make this request
*
* Called from outside the RPC client in a synchronous task context.
+ * Uses default timeout parameters specified by underlying transport.
*
- * For now, this supports only version 2 queries, but is used only by
- * mount_clnt for NFS_ROOT.
+ * XXX: Needs to support IPv6, and rpcbind versions 3 and 4
*/
-int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog,
- __u32 vers, int prot)
+int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog,
+ __u32 vers, int prot)
{
struct rpcbind_args map = {
.r_prog = prog,
@@ -277,15 +279,16 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog,
char hostname[40];
int status;
- dprintk("RPC: rpcb_getport_external(%u.%u.%u.%u, %u, %u, %d)\n",
- NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
+ dprintk("RPC: %s(" NIPQUAD_FMT ", %u, %u, %d)\n",
+ __FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
- sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
+ sprintf(hostname, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0);
if (IS_ERR(rpcb_clnt))
return PTR_ERR(rpcb_clnt);
status = rpc_call_sync(rpcb_clnt, &msg, 0);
+ rpc_shutdown_client(rpcb_clnt);
if (status >= 0) {
if (map.r_port != 0)
@@ -294,16 +297,16 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog,
}
return status;
}
-#endif
+EXPORT_SYMBOL_GPL(rpcb_getport_sync);
/**
- * rpcb_getport - obtain the port for a given RPC service on a given host
+ * rpcb_getport_async - obtain the port for a given RPC service on a given host
* @task: task that is waiting for portmapper request
*
* This one can be called for an ongoing RPC request, and can be used in
* an async (rpciod) context.
*/
-void rpcb_getport(struct rpc_task *task)
+void rpcb_getport_async(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
int bind_version;
@@ -314,17 +317,17 @@ void rpcb_getport(struct rpc_task *task)
struct sockaddr addr;
int status;
- dprintk("RPC: %5u rpcb_getport(%s, %u, %u, %d)\n",
- task->tk_pid, clnt->cl_server,
- clnt->cl_prog, clnt->cl_vers, xprt->prot);
+ dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
+ task->tk_pid, __FUNCTION__,
+ clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot);
/* Autobind on cloned rpc clients is discouraged */
BUG_ON(clnt->cl_parent != clnt);
if (xprt_test_and_set_binding(xprt)) {
status = -EACCES; /* tell caller to check again */
- dprintk("RPC: %5u rpcb_getport waiting for another binder\n",
- task->tk_pid);
+ dprintk("RPC: %5u %s: waiting for another binder\n",
+ task->tk_pid, __FUNCTION__);
goto bailout_nowake;
}
@@ -335,27 +338,28 @@ void rpcb_getport(struct rpc_task *task)
/* Someone else may have bound if we slept */
if (xprt_bound(xprt)) {
status = 0;
- dprintk("RPC: %5u rpcb_getport already bound\n", task->tk_pid);
+ dprintk("RPC: %5u %s: already bound\n",
+ task->tk_pid, __FUNCTION__);
goto bailout_nofree;
}
if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) {
xprt->bind_index = 0;
status = -EACCES; /* tell caller to try again later */
- dprintk("RPC: %5u rpcb_getport no more getport versions "
- "available\n", task->tk_pid);
+ dprintk("RPC: %5u %s: no more getport versions available\n",
+ task->tk_pid, __FUNCTION__);
goto bailout_nofree;
}
bind_version = rpcb_next_version[xprt->bind_index].rpc_vers;
- dprintk("RPC: %5u rpcb_getport trying rpcbind version %u\n",
- task->tk_pid, bind_version);
+ dprintk("RPC: %5u %s: trying rpcbind version %u\n",
+ task->tk_pid, __FUNCTION__, bind_version);
map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
if (!map) {
status = -ENOMEM;
- dprintk("RPC: %5u rpcb_getport no memory available\n",
- task->tk_pid);
+ dprintk("RPC: %5u %s: no memory available\n",
+ task->tk_pid, __FUNCTION__);
goto bailout_nofree;
}
map->r_prog = clnt->cl_prog;
@@ -373,16 +377,17 @@ void rpcb_getport(struct rpc_task *task)
rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0);
if (IS_ERR(rpcb_clnt)) {
status = PTR_ERR(rpcb_clnt);
- dprintk("RPC: %5u rpcb_getport rpcb_create failed, error %ld\n",
- task->tk_pid, PTR_ERR(rpcb_clnt));
+ dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
+ task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt));
goto bailout;
}
child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
+ rpc_release_client(rpcb_clnt);
if (IS_ERR(child)) {
status = -EIO;
- dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n",
- task->tk_pid);
+ dprintk("RPC: %5u %s: rpc_run_task failed\n",
+ task->tk_pid, __FUNCTION__);
goto bailout_nofree;
}
rpc_put_task(child);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 944d75396fb3..2ac43c41c3a9 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -25,7 +25,6 @@
#ifdef RPC_DEBUG
#define RPCDBG_FACILITY RPCDBG_SCHED
#define RPC_TASK_MAGIC_ID 0xf00baa
-static int rpc_task_id;
#endif
/*
@@ -40,7 +39,6 @@ static mempool_t *rpc_task_mempool __read_mostly;
static mempool_t *rpc_buffer_mempool __read_mostly;
static void __rpc_default_timer(struct rpc_task *task);
-static void rpciod_killall(void);
static void rpc_async_schedule(struct work_struct *);
static void rpc_release_task(struct rpc_task *task);
@@ -50,23 +48,13 @@ static void rpc_release_task(struct rpc_task *task);
static RPC_WAITQ(delay_queue, "delayq");
/*
- * All RPC tasks are linked into this list
- */
-static LIST_HEAD(all_tasks);
-
-/*
* rpciod-related stuff
*/
static DEFINE_MUTEX(rpciod_mutex);
-static unsigned int rpciod_users;
+static atomic_t rpciod_users = ATOMIC_INIT(0);
struct workqueue_struct *rpciod_workqueue;
/*
- * Spinlock for other critical sections of code.
- */
-static DEFINE_SPINLOCK(rpc_sched_lock);
-
-/*
* Disable the timer for a given RPC task. Should be called with
* queue->lock and bh_disabled in order to avoid races within
* rpc_run_timer().
@@ -267,18 +255,33 @@ static int rpc_wait_bit_interruptible(void *word)
return 0;
}
+#ifdef RPC_DEBUG
+static void rpc_task_set_debuginfo(struct rpc_task *task)
+{
+ static atomic_t rpc_pid;
+
+ task->tk_magic = RPC_TASK_MAGIC_ID;
+ task->tk_pid = atomic_inc_return(&rpc_pid);
+}
+#else
+static inline void rpc_task_set_debuginfo(struct rpc_task *task)
+{
+}
+#endif
+
static void rpc_set_active(struct rpc_task *task)
{
+ struct rpc_clnt *clnt;
if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0)
return;
- spin_lock(&rpc_sched_lock);
-#ifdef RPC_DEBUG
- task->tk_magic = RPC_TASK_MAGIC_ID;
- task->tk_pid = rpc_task_id++;
-#endif
+ rpc_task_set_debuginfo(task);
/* Add to global list of all tasks */
- list_add_tail(&task->tk_task, &all_tasks);
- spin_unlock(&rpc_sched_lock);
+ clnt = task->tk_client;
+ if (clnt != NULL) {
+ spin_lock(&clnt->cl_lock);
+ list_add_tail(&task->tk_task, &clnt->cl_tasks);
+ spin_unlock(&clnt->cl_lock);
+ }
}
/*
@@ -818,6 +821,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
if (tk_ops->rpc_call_prepare != NULL)
task->tk_action = rpc_prepare_task;
task->tk_calldata = calldata;
+ INIT_LIST_HEAD(&task->tk_task);
/* Initialize retry counters */
task->tk_garb_retry = 2;
@@ -830,7 +834,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
task->tk_workqueue = rpciod_workqueue;
if (clnt) {
- atomic_inc(&clnt->cl_users);
+ kref_get(&clnt->cl_kref);
if (clnt->cl_softrtry)
task->tk_flags |= RPC_TASK_SOFT;
if (!clnt->cl_intr)
@@ -860,9 +864,7 @@ static void rpc_free_task(struct rcu_head *rcu)
}
/*
- * Create a new task for the specified client. We have to
- * clean up after an allocation failure, as the client may
- * have specified "oneshot".
+ * Create a new task for the specified client.
*/
struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
{
@@ -870,7 +872,7 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc
task = rpc_alloc_task();
if (!task)
- goto cleanup;
+ goto out;
rpc_init_task(task, clnt, flags, tk_ops, calldata);
@@ -878,16 +880,6 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc
task->tk_flags |= RPC_TASK_DYNAMIC;
out:
return task;
-
-cleanup:
- /* Check whether to release the client */
- if (clnt) {
- printk("rpc_new_task: failed, users=%d, oneshot=%d\n",
- atomic_read(&clnt->cl_users), clnt->cl_oneshot);
- atomic_inc(&clnt->cl_users); /* pretend we were used ... */
- rpc_release_client(clnt);
- }
- goto out;
}
@@ -920,11 +912,13 @@ static void rpc_release_task(struct rpc_task *task)
#endif
dprintk("RPC: %5u release task\n", task->tk_pid);
- /* Remove from global task list */
- spin_lock(&rpc_sched_lock);
- list_del(&task->tk_task);
- spin_unlock(&rpc_sched_lock);
-
+ if (!list_empty(&task->tk_task)) {
+ struct rpc_clnt *clnt = task->tk_client;
+ /* Remove from client task list */
+ spin_lock(&clnt->cl_lock);
+ list_del(&task->tk_task);
+ spin_unlock(&clnt->cl_lock);
+ }
BUG_ON (RPC_IS_QUEUED(task));
/* Synchronously delete any running timer */
@@ -939,29 +933,6 @@ static void rpc_release_task(struct rpc_task *task)
rpc_put_task(task);
}
-/**
- * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
- * @clnt: pointer to RPC client
- * @flags: RPC flags
- * @ops: RPC call ops
- * @data: user call data
- */
-struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
- const struct rpc_call_ops *ops,
- void *data)
-{
- struct rpc_task *task;
- task = rpc_new_task(clnt, flags, ops, data);
- if (task == NULL) {
- rpc_release_calldata(ops, data);
- return ERR_PTR(-ENOMEM);
- }
- atomic_inc(&task->tk_count);
- rpc_execute(task);
- return task;
-}
-EXPORT_SYMBOL(rpc_run_task);
-
/*
* Kill all tasks for the given client.
* XXX: kill their descendants as well?
@@ -969,44 +940,25 @@ EXPORT_SYMBOL(rpc_run_task);
void rpc_killall_tasks(struct rpc_clnt *clnt)
{
struct rpc_task *rovr;
- struct list_head *le;
- dprintk("RPC: killing all tasks for client %p\n", clnt);
+ if (list_empty(&clnt->cl_tasks))
+ return;
+ dprintk("RPC: killing all tasks for client %p\n", clnt);
/*
* Spin lock all_tasks to prevent changes...
*/
- spin_lock(&rpc_sched_lock);
- alltask_for_each(rovr, le, &all_tasks) {
+ spin_lock(&clnt->cl_lock);
+ list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
if (! RPC_IS_ACTIVATED(rovr))
continue;
- if (!clnt || rovr->tk_client == clnt) {
+ if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
rovr->tk_flags |= RPC_TASK_KILLED;
rpc_exit(rovr, -EIO);
rpc_wake_up_task(rovr);
}
}
- spin_unlock(&rpc_sched_lock);
-}
-
-static void rpciod_killall(void)
-{
- unsigned long flags;
-
- while (!list_empty(&all_tasks)) {
- clear_thread_flag(TIF_SIGPENDING);
- rpc_killall_tasks(NULL);
- flush_workqueue(rpciod_workqueue);
- if (!list_empty(&all_tasks)) {
- dprintk("RPC: rpciod_killall: waiting for tasks "
- "to exit\n");
- yield();
- }
- }
-
- spin_lock_irqsave(&current->sighand->siglock, flags);
- recalc_sigpending();
- spin_unlock_irqrestore(&current->sighand->siglock, flags);
+ spin_unlock(&clnt->cl_lock);
}
/*
@@ -1018,28 +970,27 @@ rpciod_up(void)
struct workqueue_struct *wq;
int error = 0;
+ if (atomic_inc_not_zero(&rpciod_users))
+ return 0;
+
mutex_lock(&rpciod_mutex);
- dprintk("RPC: rpciod_up: users %u\n", rpciod_users);
- rpciod_users++;
- if (rpciod_workqueue)
- goto out;
- /*
- * If there's no pid, we should be the first user.
- */
- if (rpciod_users > 1)
- printk(KERN_WARNING "rpciod_up: no workqueue, %u users??\n", rpciod_users);
+
+ /* Guard against races with rpciod_down() */
+ if (rpciod_workqueue != NULL)
+ goto out_ok;
/*
* Create the rpciod thread and wait for it to start.
*/
+ dprintk("RPC: creating workqueue rpciod\n");
error = -ENOMEM;
wq = create_workqueue("rpciod");
- if (wq == NULL) {
- printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error);
- rpciod_users--;
+ if (wq == NULL)
goto out;
- }
+
rpciod_workqueue = wq;
error = 0;
+out_ok:
+ atomic_inc(&rpciod_users);
out:
mutex_unlock(&rpciod_mutex);
return error;
@@ -1048,59 +999,19 @@ out:
void
rpciod_down(void)
{
+ if (!atomic_dec_and_test(&rpciod_users))
+ return;
+
mutex_lock(&rpciod_mutex);
- dprintk("RPC: rpciod_down sema %u\n", rpciod_users);
- if (rpciod_users) {
- if (--rpciod_users)
- goto out;
- } else
- printk(KERN_WARNING "rpciod_down: no users??\n");
+ dprintk("RPC: destroying workqueue rpciod\n");
- if (!rpciod_workqueue) {
- dprintk("RPC: rpciod_down: Nothing to do!\n");
- goto out;
+ if (atomic_read(&rpciod_users) == 0 && rpciod_workqueue != NULL) {
+ destroy_workqueue(rpciod_workqueue);
+ rpciod_workqueue = NULL;
}
- rpciod_killall();
-
- destroy_workqueue(rpciod_workqueue);
- rpciod_workqueue = NULL;
- out:
mutex_unlock(&rpciod_mutex);
}
-#ifdef RPC_DEBUG
-void rpc_show_tasks(void)
-{
- struct list_head *le;
- struct rpc_task *t;
-
- spin_lock(&rpc_sched_lock);
- if (list_empty(&all_tasks)) {
- spin_unlock(&rpc_sched_lock);
- return;
- }
- printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
- "-rpcwait -action- ---ops--\n");
- alltask_for_each(t, le, &all_tasks) {
- const char *rpc_waitq = "none";
-
- if (RPC_IS_QUEUED(t))
- rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
-
- printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
- t->tk_pid,
- (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
- t->tk_flags, t->tk_status,
- t->tk_client,
- (t->tk_client ? t->tk_client->cl_prog : 0),
- t->tk_rqstp, t->tk_timeout,
- rpc_waitq,
- t->tk_action, t->tk_ops);
- }
- spin_unlock(&rpc_sched_lock);
-}
-#endif
-
void
rpc_destroy_mempool(void)
{
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 73075dec83c0..384c4ad5ab86 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -28,15 +28,11 @@ EXPORT_SYMBOL(rpc_init_task);
EXPORT_SYMBOL(rpc_sleep_on);
EXPORT_SYMBOL(rpc_wake_up_next);
EXPORT_SYMBOL(rpc_wake_up_task);
-EXPORT_SYMBOL(rpciod_down);
-EXPORT_SYMBOL(rpciod_up);
-EXPORT_SYMBOL(rpc_new_task);
EXPORT_SYMBOL(rpc_wake_up_status);
/* RPC client functions */
EXPORT_SYMBOL(rpc_clone_client);
EXPORT_SYMBOL(rpc_bind_new_program);
-EXPORT_SYMBOL(rpc_destroy_client);
EXPORT_SYMBOL(rpc_shutdown_client);
EXPORT_SYMBOL(rpc_killall_tasks);
EXPORT_SYMBOL(rpc_call_sync);
@@ -61,7 +57,7 @@ EXPORT_SYMBOL(rpcauth_unregister);
EXPORT_SYMBOL(rpcauth_create);
EXPORT_SYMBOL(rpcauth_lookupcred);
EXPORT_SYMBOL(rpcauth_lookup_credcache);
-EXPORT_SYMBOL(rpcauth_free_credcache);
+EXPORT_SYMBOL(rpcauth_destroy_credcache);
EXPORT_SYMBOL(rpcauth_init_credcache);
EXPORT_SYMBOL(put_rpccred);
@@ -156,6 +152,7 @@ init_sunrpc(void)
cache_register(&ip_map_cache);
cache_register(&unix_gid_cache);
init_socket_xprt();
+ rpcauth_init_module();
out:
return err;
}
@@ -163,6 +160,7 @@ out:
static void __exit
cleanup_sunrpc(void)
{
+ rpcauth_remove_module();
cleanup_socket_xprt();
unregister_rpc_pipefs();
rpc_destroy_mempool();
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5baf48de2558..64b9b8c743c4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -644,6 +644,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
struct msghdr msg = {
.msg_flags = MSG_DONTWAIT,
};
+ struct sockaddr *sin;
int len;
len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
@@ -654,6 +655,19 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen);
rqstp->rq_addrlen = svsk->sk_remotelen;
+ /* Destination address in request is needed for binding the
+ * source address in RPC callbacks later.
+ */
+ sin = (struct sockaddr *)&svsk->sk_local;
+ switch (sin->sa_family) {
+ case AF_INET:
+ rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr;
+ break;
+ case AF_INET6:
+ rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr;
+ break;
+ }
+
dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
svsk, iov[0].iov_base, iov[0].iov_len, len);
@@ -1064,6 +1078,12 @@ svc_tcp_accept(struct svc_sock *svsk)
goto failed;
memcpy(&newsvsk->sk_remote, sin, slen);
newsvsk->sk_remotelen = slen;
+ err = kernel_getsockname(newsock, sin, &slen);
+ if (unlikely(err < 0)) {
+ dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
+ slen = offsetof(struct sockaddr, sa_data);
+ }
+ memcpy(&newsvsk->sk_local, sin, slen);
svc_sock_received(newsvsk);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 5b05b73e4c1d..c8c2edccad7e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -127,7 +127,7 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
clear_bit(XPRT_LOCKED, &xprt->state);
smp_mb__after_clear_bit();
} else
- schedule_work(&xprt->task_cleanup);
+ queue_work(rpciod_workqueue, &xprt->task_cleanup);
}
/*
@@ -515,7 +515,7 @@ xprt_init_autodisconnect(unsigned long data)
if (xprt_connecting(xprt))
xprt_release_write(xprt, NULL);
else
- schedule_work(&xprt->task_cleanup);
+ queue_work(rpciod_workqueue, &xprt->task_cleanup);
return;
out_abort:
spin_unlock(&xprt->transport_lock);
@@ -886,27 +886,24 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i
/**
* xprt_create_transport - create an RPC transport
- * @proto: requested transport protocol
- * @ap: remote peer address
- * @size: length of address
- * @to: timeout parameters
+ * @args: rpc transport creation arguments
*
*/
-struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to)
+struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args)
{
struct rpc_xprt *xprt;
struct rpc_rqst *req;
- switch (proto) {
+ switch (args->proto) {
case IPPROTO_UDP:
- xprt = xs_setup_udp(ap, size, to);
+ xprt = xs_setup_udp(args);
break;
case IPPROTO_TCP:
- xprt = xs_setup_tcp(ap, size, to);
+ xprt = xs_setup_tcp(args);
break;
default:
printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
- proto);
+ args->proto);
return ERR_PTR(-EIO);
}
if (IS_ERR(xprt)) {
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index cc33c5880abb..4ae7eed7f617 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -235,6 +235,7 @@ struct sock_xprt {
* Connection of transports
*/
struct delayed_work connect_worker;
+ struct sockaddr_storage addr;
unsigned short port;
/*
@@ -653,8 +654,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
dprintk("RPC: xs_destroy xprt %p\n", xprt);
- cancel_delayed_work(&transport->connect_worker);
- flush_scheduled_work();
+ cancel_rearming_delayed_work(&transport->connect_worker);
xprt_disconnect(xprt);
xs_close(xprt);
@@ -1001,7 +1001,7 @@ static void xs_tcp_state_change(struct sock *sk)
/* Try to schedule an autoclose RPC calls */
set_bit(XPRT_CLOSE_WAIT, &xprt->state);
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
- schedule_work(&xprt->task_cleanup);
+ queue_work(rpciod_workqueue, &xprt->task_cleanup);
default:
xprt_disconnect(xprt);
}
@@ -1146,31 +1146,36 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
sap->sin_port = htons(port);
}
-static int xs_bindresvport(struct sock_xprt *transport, struct socket *sock)
+static int xs_bind(struct sock_xprt *transport, struct socket *sock)
{
struct sockaddr_in myaddr = {
.sin_family = AF_INET,
};
+ struct sockaddr_in *sa;
int err;
unsigned short port = transport->port;
+ if (!transport->xprt.resvport)
+ port = 0;
+ sa = (struct sockaddr_in *)&transport->addr;
+ myaddr.sin_addr = sa->sin_addr;
do {
myaddr.sin_port = htons(port);
err = kernel_bind(sock, (struct sockaddr *) &myaddr,
sizeof(myaddr));
+ if (!transport->xprt.resvport)
+ break;
if (err == 0) {
transport->port = port;
- dprintk("RPC: xs_bindresvport bound to port %u\n",
- port);
- return 0;
+ break;
}
if (port <= xprt_min_resvport)
port = xprt_max_resvport;
else
port--;
} while (err == -EADDRINUSE && port != transport->port);
-
- dprintk("RPC: can't bind to reserved port (%d).\n", -err);
+ dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n",
+ NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err);
return err;
}
@@ -1229,7 +1234,7 @@ static void xs_udp_connect_worker(struct work_struct *work)
}
xs_reclassify_socket(sock);
- if (xprt->resvport && xs_bindresvport(transport, sock) < 0) {
+ if (xs_bind(transport, sock)) {
sock_release(sock);
goto out;
}
@@ -1316,7 +1321,7 @@ static void xs_tcp_connect_worker(struct work_struct *work)
}
xs_reclassify_socket(sock);
- if (xprt->resvport && xs_bindresvport(transport, sock) < 0) {
+ if (xs_bind(transport, sock)) {
sock_release(sock);
goto out;
}
@@ -1410,18 +1415,16 @@ static void xs_connect(struct rpc_task *task)
dprintk("RPC: xs_connect delayed xprt %p for %lu "
"seconds\n",
xprt, xprt->reestablish_timeout / HZ);
- schedule_delayed_work(&transport->connect_worker,
- xprt->reestablish_timeout);
+ queue_delayed_work(rpciod_workqueue,
+ &transport->connect_worker,
+ xprt->reestablish_timeout);
xprt->reestablish_timeout <<= 1;
if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
} else {
dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
- schedule_delayed_work(&transport->connect_worker, 0);
-
- /* flush_scheduled_work can sleep... */
- if (!RPC_IS_ASYNC(task))
- flush_scheduled_work();
+ queue_delayed_work(rpciod_workqueue,
+ &transport->connect_worker, 0);
}
}
@@ -1476,7 +1479,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
.set_buffer_size = xs_udp_set_buffer_size,
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong,
- .rpcbind = rpcb_getport,
+ .rpcbind = rpcb_getport_async,
.set_port = xs_set_port,
.connect = xs_connect,
.buf_alloc = rpc_malloc,
@@ -1493,7 +1496,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
static struct rpc_xprt_ops xs_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt,
- .rpcbind = rpcb_getport,
+ .rpcbind = rpcb_getport_async,
.set_port = xs_set_port,
.connect = xs_connect,
.buf_alloc = rpc_malloc,
@@ -1505,12 +1508,12 @@ static struct rpc_xprt_ops xs_tcp_ops = {
.print_stats = xs_tcp_print_stats,
};
-static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, unsigned int slot_table_size)
+static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size)
{
struct rpc_xprt *xprt;
struct sock_xprt *new;
- if (addrlen > sizeof(xprt->addr)) {
+ if (args->addrlen > sizeof(xprt->addr)) {
dprintk("RPC: xs_setup_xprt: address too large\n");
return ERR_PTR(-EBADF);
}
@@ -1532,8 +1535,10 @@ static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, uns
return ERR_PTR(-ENOMEM);
}
- memcpy(&xprt->addr, addr, addrlen);
- xprt->addrlen = addrlen;
+ memcpy(&xprt->addr, args->dstaddr, args->addrlen);
+ xprt->addrlen = args->addrlen;
+ if (args->srcaddr)
+ memcpy(&new->addr, args->srcaddr, args->addrlen);
new->port = xs_get_random_port();
return xprt;
@@ -1541,22 +1546,20 @@ static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, uns
/**
* xs_setup_udp - Set up transport to use a UDP socket
- * @addr: address of remote server
- * @addrlen: length of address in bytes
- * @to: timeout parameters
+ * @args: rpc transport creation arguments
*
*/
-struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to)
+struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
{
struct rpc_xprt *xprt;
struct sock_xprt *transport;
- xprt = xs_setup_xprt(addr, addrlen, xprt_udp_slot_table_entries);
+ xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
if (IS_ERR(xprt))
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
- if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0)
+ if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
xprt_set_bound(xprt);
xprt->prot = IPPROTO_UDP;
@@ -1572,8 +1575,8 @@ struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_
xprt->ops = &xs_udp_ops;
- if (to)
- xprt->timeout = *to;
+ if (args->timeout)
+ xprt->timeout = *args->timeout;
else
xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
@@ -1586,22 +1589,20 @@ struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_
/**
* xs_setup_tcp - Set up transport to use a TCP socket
- * @addr: address of remote server
- * @addrlen: length of address in bytes
- * @to: timeout parameters
+ * @args: rpc transport creation arguments
*
*/
-struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to)
+struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args)
{
struct rpc_xprt *xprt;
struct sock_xprt *transport;
- xprt = xs_setup_xprt(addr, addrlen, xprt_tcp_slot_table_entries);
+ xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
if (IS_ERR(xprt))
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
- if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0)
+ if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
xprt_set_bound(xprt);
xprt->prot = IPPROTO_TCP;
@@ -1616,8 +1617,8 @@ struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_
xprt->ops = &xs_tcp_ops;
- if (to)
- xprt->timeout = *to;
+ if (args->timeout)
+ xprt->timeout = *args->timeout;
else
xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);