From 19d97ac5aabec2e253fd40d110ecbe326040d917 Mon Sep 17 00:00:00 2001
From: Chen Hanxiao <chenhx.fnst@fujitsu.com>
Date: Fri, 22 Nov 2024 10:18:25 +0800
Subject: [PATCH 01/42] nfsd: trace: remove redundant stateid even deleg_recall

Since commit e56dc9e2949e ("nfsd: remove fault injection code") remove
all nfsd_recall_delegations codes,
we don't need trace_nfsd_deleg_recall any more.

Signed-off-by: Chen Hanxiao <chenhx.fnst@fujitsu.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/trace.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 696c89f68a9e..ad2c0c432d08 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -626,7 +626,6 @@ DEFINE_STATEID_EVENT(open);
 DEFINE_STATEID_EVENT(deleg_read);
 DEFINE_STATEID_EVENT(deleg_write);
 DEFINE_STATEID_EVENT(deleg_return);
-DEFINE_STATEID_EVENT(deleg_recall);
 
 DECLARE_EVENT_CLASS(nfsd_stateseqid_class,
 	TP_PROTO(u32 seqid, const stateid_t *stp),

From 935fee5d5ba8073b223a9cc9906a62950f0e13ed Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Dec 2024 13:55:52 +1100
Subject: [PATCH 02/42] nfsd: use new wake_up_var interfaces.

The wake_up_var interface is fragile as barriers are sometimes needed.
There are now new interfaces so that most wake-ups can use an interface
that is guaranteed to have all barriers needed.

This patch changes the wake up on cl_cb_inflight to use
atomic_dec_and_wake_up().

It also changes the wake up on rp_locked to use store_release_wake_up().
This involves changing rp_locked from atomic_t to int.

Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4callback.c |  3 +--
 fs/nfsd/nfs4state.c    | 16 ++++++----------
 fs/nfsd/state.h        |  2 +-
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c083e539e898..94479483c3d6 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1036,8 +1036,7 @@ static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
 static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
 {
 
-	if (atomic_dec_and_test(&clp->cl_cb_inflight))
-		wake_up_var(&clp->cl_cb_inflight);
+	atomic_dec_and_wake_up(&clp->cl_cb_inflight);
 }
 
 static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 741b9449f727..9fbf7c8f0a3e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4739,7 +4739,7 @@ static void init_nfs4_replay(struct nfs4_replay *rp)
 	rp->rp_status = nfserr_serverfault;
 	rp->rp_buflen = 0;
 	rp->rp_buf = rp->rp_ibuf;
-	atomic_set(&rp->rp_locked, RP_UNLOCKED);
+	rp->rp_locked = RP_UNLOCKED;
 }
 
 static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
@@ -4747,9 +4747,9 @@ static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
 {
 	if (!nfsd4_has_session(cstate)) {
 		wait_var_event(&so->so_replay.rp_locked,
-			       atomic_cmpxchg(&so->so_replay.rp_locked,
-					      RP_UNLOCKED, RP_LOCKED) != RP_LOCKED);
-		if (atomic_read(&so->so_replay.rp_locked) == RP_UNHASHED)
+			       cmpxchg(&so->so_replay.rp_locked,
+				       RP_UNLOCKED, RP_LOCKED) != RP_LOCKED);
+		if (so->so_replay.rp_locked == RP_UNHASHED)
 			return -EAGAIN;
 		cstate->replay_owner = nfs4_get_stateowner(so);
 	}
@@ -4762,9 +4762,7 @@ void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
 
 	if (so != NULL) {
 		cstate->replay_owner = NULL;
-		atomic_set(&so->so_replay.rp_locked, RP_UNLOCKED);
-		smp_mb__after_atomic();
-		wake_up_var(&so->so_replay.rp_locked);
+		store_release_wake_up(&so->so_replay.rp_locked, RP_UNLOCKED);
 		nfs4_put_stateowner(so);
 	}
 }
@@ -5069,9 +5067,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
 	 * Some threads with a reference might be waiting for rp_locked,
 	 * so tell them to stop waiting.
 	 */
-	atomic_set(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED);
-	smp_mb__after_atomic();
-	wake_up_var(&oo->oo_owner.so_replay.rp_locked);
+	store_release_wake_up(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED);
 	wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
 
 	release_all_access(s);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e16bb3717fb9..ba30b2335b66 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -505,7 +505,7 @@ struct nfs4_replay {
 	unsigned int		rp_buflen;
 	char			*rp_buf;
 	struct knfsd_fh		rp_openfh;
-	atomic_t		rp_locked;
+	int			rp_locked;
 	char			rp_ibuf[NFSD4_REPLAY_ISIZE];
 };
 

From 6e1d75f778d644d02147d8e61ca2cef033ce045d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Dec 2024 13:55:53 +1100
Subject: [PATCH 03/42] sunrpc/svc: use store_release_wake_up()

svc_thread_init_status() contains an open-coded
store_release_wake_up().  It is cleaner to use that function directly
rather than needing to remember the barrier.

Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index e68fecf6eab5..e4f09f58d58c 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -327,12 +327,7 @@ static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
  */
 static inline void svc_thread_init_status(struct svc_rqst *rqstp, int err)
 {
-	rqstp->rq_err = err;
-	/* memory barrier ensures assignment to error above is visible before
-	 * waitqueue_active() test below completes.
-	 */
-	smp_mb();
-	wake_up_var(&rqstp->rq_err);
+	store_release_wake_up(&rqstp->rq_err, err);
 	if (err)
 		kthread_exit(1);
 }

From 6f035c99acd6ef6b875ac4ac28e6117e60db8f89 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 6 Dec 2024 16:36:33 -0500
Subject: [PATCH 04/42] NFSD: Clean up unused variable

@sb should have been removed by commit 7e64c5bc497c ("NLM/NFSD: Fix
lock notifications for async-capable filesystems").

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9fbf7c8f0a3e..b9d0a300d376 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7962,7 +7962,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	struct nfsd4_blocked_lock *nbl = NULL;
 	struct file_lock *file_lock = NULL;
 	struct file_lock *conflock = NULL;
-	struct super_block *sb;
 	__be32 status = 0;
 	int lkflg;
 	int err;
@@ -7982,7 +7981,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
 	if (status != nfs_ok)
 		return status;
-	sb = cstate->current_fh.fh_dentry->d_sb;
 
 	if (lock->lk_is_new) {
 		if (nfsd4_has_session(cstate))

From de71d4e211eddb670b285a0ea477a299601ce1ca Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Tue, 10 Dec 2024 07:25:54 -0500
Subject: [PATCH 05/42] nfsd: fix legacy client tracking initialization

Get rid of the nfsd4_legacy_tracking_ops->init() call in
check_for_legacy_methods().  That will be handled in the caller
(nfsd4_client_tracking_init()).  Otherwise, we'll wind up calling
nfsd4_legacy_tracking_ops->init() twice, and the second time we'll
trigger the BUG_ON() in nfsd4_init_recdir().

Fixes: 74fd48739d04 ("nfsd: new Kconfig option for legacy client tracking")
Reported-by: Jur van der Burg <jur@avtware.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=219580
Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Salvatore Bonaccorso <carnil@debian.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4recover.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 4a765555bf84..1c8fcb04b3cd 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -2052,7 +2052,6 @@ static inline int check_for_legacy_methods(int status, struct net *net)
 		path_put(&path);
 		if (status)
 			return -ENOTDIR;
-		status = nn->client_tracking_ops->init(net);
 	}
 	return status;
 }

From eccbbc7c00a5aae5e704d4002adfaf4c3fa4b30d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 9 Dec 2024 11:41:26 +1100
Subject: [PATCH 06/42] nfsd: don't use sv_nrthreads in connection limiting
 calculations.

The heuristic for limiting the number of incoming connections to nfsd
currently uses sv_nrthreads - allowing more connections if more threads
were configured.

A future patch will allow number of threads to grow dynamically so that
there will be no need to configure sv_nrthreads.  So we need a different
solution for limiting connections.

It isn't clear what problem is solved by limiting connections (as
mentioned in a code comment) but the most likely problem is a connection
storm - many connections that are not doing productive work.  These will
be closed after about 6 minutes already but it might help to slow down a
storm.

This patch adds a per-connection flag XPT_PEER_VALID which indicates
that the peer has presented a filehandle for which it has some sort of
access.  i.e the peer is known to be trusted in some way.  We now only
count connections which have NOT been determined to be valid.  There
should be relative few of these at any given time.

If the number of non-validated peer exceed a limit - currently 64 - we
close the oldest non-validated peer to avoid having too many of these
useless connections.

Note that this patch significantly changes the meaning of the various
configuration parameters for "max connections".  The next patch will
remove all of these.

Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/callback.c               |  4 ----
 fs/nfs/callback_xdr.c           |  1 +
 fs/nfsd/netns.h                 |  4 ++--
 fs/nfsd/nfsfh.c                 |  2 ++
 include/linux/sunrpc/svc.h      |  2 +-
 include/linux/sunrpc/svc_xprt.h | 16 ++++++++++++++++
 net/sunrpc/svc_xprt.c           | 32 ++++++++++++++++----------------
 7 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 6cf92498a5ac..86bdc7d23fb9 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -211,10 +211,6 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
 		return ERR_PTR(-ENOMEM);
 	}
 	cb_info->serv = serv;
-	/* As there is only one thread we need to over-ride the
-	 * default maximum of 80 connections
-	 */
-	serv->sv_maxconn = 1024;
 	dprintk("nfs_callback_create_svc: service created\n");
 	return serv;
 }
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index fdeb0b34a3d3..4254ba3ee7c5 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -984,6 +984,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 			nfs_put_client(cps.clp);
 			goto out_invalidcred;
 		}
+		svc_xprt_set_valid(rqstp->rq_xprt);
 	}
 
 	cps.minorversion = hdr_arg.minorversion;
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 26f7b34d1a03..a05a45bb1978 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -129,8 +129,8 @@ struct nfsd_net {
 	unsigned char writeverf[8];
 
 	/*
-	 * Max number of connections this nfsd container will allow. Defaults
-	 * to '0' which is means that it bases this on the number of threads.
+	 * Max number of non-validated connections this nfsd container
+	 * will allow.  Defaults to '0' gets mapped to 64.
 	 */
 	unsigned int max_connections;
 
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 6a831cb242df..bf59f83c6224 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -382,6 +382,8 @@ __fh_verify(struct svc_rqst *rqstp,
 	if (error)
 		goto out;
 
+	svc_xprt_set_valid(rqstp->rq_xprt);
+
 	/* Finally, check access permissions. */
 	error = nfsd_permission(cred, exp, dentry, access);
 out:
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index e4f09f58d58c..4f9418cbf8c9 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -81,7 +81,7 @@ struct svc_serv {
 	unsigned int		sv_xdrsize;	/* XDR buffer size */
 	struct list_head	sv_permsocks;	/* all permanent sockets */
 	struct list_head	sv_tempsocks;	/* all temporary sockets */
-	int			sv_tmpcnt;	/* count of temporary sockets */
+	int			sv_tmpcnt;	/* count of temporary "valid" sockets */
 	struct timer_list	sv_temptimer;	/* timer for aging temporary sockets */
 
 	char *			sv_name;	/* service name */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 0981e35a9fed..7064ebbd550b 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -99,8 +99,24 @@ enum {
 	XPT_HANDSHAKE,		/* xprt requests a handshake */
 	XPT_TLS_SESSION,	/* transport-layer security established */
 	XPT_PEER_AUTH,		/* peer has been authenticated */
+	XPT_PEER_VALID,		/* peer has presented a filehandle that
+				 * it has access to.  It is NOT counted
+				 * in ->sv_tmpcnt.
+				 */
 };
 
+static inline void svc_xprt_set_valid(struct svc_xprt *xpt)
+{
+	if (test_bit(XPT_TEMP, &xpt->xpt_flags) &&
+	    !test_and_set_bit(XPT_PEER_VALID, &xpt->xpt_flags)) {
+		struct svc_serv *serv = xpt->xpt_server;
+
+		spin_lock(&serv->sv_lock);
+		serv->sv_tmpcnt -= 1;
+		spin_unlock(&serv->sv_lock);
+	}
+}
+
 static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
 {
 	spin_lock(&xpt->xpt_lock);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 43c57124de52..dbd96b295dfa 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -606,7 +606,8 @@ int svc_port_is_privileged(struct sockaddr *sin)
 }
 
 /*
- * Make sure that we don't have too many active connections. If we have,
+ * Make sure that we don't have too many connections that have not yet
+ * demonstrated that they have access to the NFS server. If we have,
  * something must be dropped. It's not clear what will happen if we allow
  * "too many" connections, but when dealing with network-facing software,
  * we have to code defensively. Here we do that by imposing hard limits.
@@ -625,27 +626,25 @@ int svc_port_is_privileged(struct sockaddr *sin)
  */
 static void svc_check_conn_limits(struct svc_serv *serv)
 {
-	unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn :
-				(serv->sv_nrthreads+3) * 20;
+	unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn : 64;
 
 	if (serv->sv_tmpcnt > limit) {
-		struct svc_xprt *xprt = NULL;
+		struct svc_xprt *xprt = NULL, *xprti;
 		spin_lock_bh(&serv->sv_lock);
 		if (!list_empty(&serv->sv_tempsocks)) {
-			/* Try to help the admin */
-			net_notice_ratelimited("%s: too many open connections, consider increasing the %s\n",
-					       serv->sv_name, serv->sv_maxconn ?
-					       "max number of connections" :
-					       "number of threads");
 			/*
 			 * Always select the oldest connection. It's not fair,
-			 * but so is life
+			 * but nor is life.
 			 */
-			xprt = list_entry(serv->sv_tempsocks.prev,
-					  struct svc_xprt,
-					  xpt_list);
-			set_bit(XPT_CLOSE, &xprt->xpt_flags);
-			svc_xprt_get(xprt);
+			list_for_each_entry_reverse(xprti, &serv->sv_tempsocks,
+						    xpt_list) {
+				if (!test_bit(XPT_PEER_VALID, &xprti->xpt_flags)) {
+					xprt = xprti;
+					set_bit(XPT_CLOSE, &xprt->xpt_flags);
+					svc_xprt_get(xprt);
+					break;
+				}
+			}
 		}
 		spin_unlock_bh(&serv->sv_lock);
 
@@ -1039,7 +1038,8 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
 
 	spin_lock_bh(&serv->sv_lock);
 	list_del_init(&xprt->xpt_list);
-	if (test_bit(XPT_TEMP, &xprt->xpt_flags))
+	if (test_bit(XPT_TEMP, &xprt->xpt_flags) &&
+	    !test_bit(XPT_PEER_VALID, &xprt->xpt_flags))
 		serv->sv_tmpcnt--;
 	spin_unlock_bh(&serv->sv_lock);
 

From a4b853f183a19a88ad635f9ae8ba97e7cb377a23 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 9 Dec 2024 11:41:27 +1100
Subject: [PATCH 07/42] sunrpc: remove all connection limit configuration

Now that the connection limit only apply to unconfirmed connections,
there is no need to configure it.  So remove all the configuration and
fix the number of unconfirmed connections as always 64 - which is
now given a name: XPT_MAX_TMP_CONN

Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svc.c                  |  8 -------
 fs/nfsd/netns.h                 |  6 -----
 fs/nfsd/nfsctl.c                | 42 ---------------------------------
 fs/nfsd/nfssvc.c                |  5 ----
 include/linux/sunrpc/svc.h      |  4 ----
 include/linux/sunrpc/svc_xprt.h |  6 +++++
 net/sunrpc/svc_xprt.c           |  8 +------
 7 files changed, 7 insertions(+), 72 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 4ec22c2f2ea3..7ded57ec3a60 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -70,9 +70,6 @@ static unsigned long		nlm_grace_period;
 unsigned long			nlm_timeout = LOCKD_DFLT_TIMEO;
 static int			nlm_udpport, nlm_tcpport;
 
-/* RLIM_NOFILE defaults to 1024. That seems like a reasonable default here. */
-static unsigned int		nlm_max_connections = 1024;
-
 /*
  * Constants needed for the sysctl interface.
  */
@@ -136,9 +133,6 @@ lockd(void *vrqstp)
 	 * NFS mount or NFS daemon has gone away.
 	 */
 	while (!svc_thread_should_stop(rqstp)) {
-		/* update sv_maxconn if it has changed */
-		rqstp->rq_server->sv_maxconn = nlm_max_connections;
-
 		nlmsvc_retry_blocked(rqstp);
 		svc_recv(rqstp);
 	}
@@ -340,7 +334,6 @@ static int lockd_get(void)
 		return -ENOMEM;
 	}
 
-	serv->sv_maxconn = nlm_max_connections;
 	error = svc_set_num_threads(serv, NULL, 1);
 	if (error < 0) {
 		svc_destroy(&serv);
@@ -542,7 +535,6 @@ module_param_call(nlm_udpport, param_set_port, param_get_int,
 module_param_call(nlm_tcpport, param_set_port, param_get_int,
 		  &nlm_tcpport, 0644);
 module_param(nsm_use_hostnames, bool, 0644);
-module_param(nlm_max_connections, uint, 0644);
 
 static int lockd_init_net(struct net *net)
 {
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index a05a45bb1978..4a07b8d0837b 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -128,12 +128,6 @@ struct nfsd_net {
 	seqlock_t writeverf_lock;
 	unsigned char writeverf[8];
 
-	/*
-	 * Max number of non-validated connections this nfsd container
-	 * will allow.  Defaults to '0' gets mapped to 64.
-	 */
-	unsigned int max_connections;
-
 	u32 clientid_base;
 	u32 clientid_counter;
 	u32 clverifier_counter;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3adbc05ebaac..95ea4393305b 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -48,7 +48,6 @@ enum {
 	NFSD_Versions,
 	NFSD_Ports,
 	NFSD_MaxBlkSize,
-	NFSD_MaxConnections,
 	NFSD_Filecache,
 	NFSD_Leasetime,
 	NFSD_Gracetime,
@@ -68,7 +67,6 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
 static ssize_t write_versions(struct file *file, char *buf, size_t size);
 static ssize_t write_ports(struct file *file, char *buf, size_t size);
 static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
-static ssize_t write_maxconn(struct file *file, char *buf, size_t size);
 #ifdef CONFIG_NFSD_V4
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
 static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
@@ -87,7 +85,6 @@ static ssize_t (*const write_op[])(struct file *, char *, size_t) = {
 	[NFSD_Versions] = write_versions,
 	[NFSD_Ports] = write_ports,
 	[NFSD_MaxBlkSize] = write_maxblksize,
-	[NFSD_MaxConnections] = write_maxconn,
 #ifdef CONFIG_NFSD_V4
 	[NFSD_Leasetime] = write_leasetime,
 	[NFSD_Gracetime] = write_gracetime,
@@ -902,44 +899,6 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 							nfsd_max_blksize);
 }
 
-/*
- * write_maxconn - Set or report the current max number of connections
- *
- * Input:
- *			buf:		ignored
- *			size:		zero
- * OR
- *
- * Input:
- *			buf:		C string containing an unsigned
- *					integer value representing the new
- *					number of max connections
- *			size:		non-zero length of C string in @buf
- * Output:
- *	On success:	passed-in buffer filled with '\n'-terminated C string
- *			containing numeric value of max_connections setting
- *			for this net namespace;
- *			return code is the size in bytes of the string
- *	On error:	return code is zero or a negative errno value
- */
-static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
-{
-	char *mesg = buf;
-	struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
-	unsigned int maxconn = nn->max_connections;
-
-	if (size > 0) {
-		int rv = get_uint(&mesg, &maxconn);
-
-		if (rv)
-			return rv;
-		trace_nfsd_ctl_maxconn(netns(file), maxconn);
-		nn->max_connections = maxconn;
-	}
-
-	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn);
-}
-
 #ifdef CONFIG_NFSD_V4
 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
 				  time64_t *time, struct nfsd_net *nn)
@@ -1372,7 +1331,6 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
 		[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
 		[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
-		[NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
 		[NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
 #ifdef CONFIG_NFSD_V4
 		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 49e2f32102ab..b77097de5936 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -668,7 +668,6 @@ int nfsd_create_serv(struct net *net)
 	if (serv == NULL)
 		return -ENOMEM;
 
-	serv->sv_maxconn = nn->max_connections;
 	error = svc_bind(serv, net);
 	if (error < 0) {
 		svc_destroy(&serv);
@@ -954,11 +953,7 @@ nfsd(void *vrqstp)
 	 * The main request loop
 	 */
 	while (!svc_thread_should_stop(rqstp)) {
-		/* Update sv_maxconn if it has changed */
-		rqstp->rq_server->sv_maxconn = nn->max_connections;
-
 		svc_recv(rqstp);
-
 		nfsd_file_net_dispose(nn);
 	}
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4f9418cbf8c9..74658cca0f38 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -72,10 +72,6 @@ struct svc_serv {
 	spinlock_t		sv_lock;
 	unsigned int		sv_nprogs;	/* Number of sv_programs */
 	unsigned int		sv_nrthreads;	/* # of server threads */
-	unsigned int		sv_maxconn;	/* max connections allowed or
-						 * '0' causing max to be based
-						 * on number of threads. */
-
 	unsigned int		sv_max_payload;	/* datagram payload size */
 	unsigned int		sv_max_mesg;	/* max_payload + 1 page for overheads */
 	unsigned int		sv_xdrsize;	/* XDR buffer size */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 7064ebbd550b..72be60952579 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -105,6 +105,12 @@ enum {
 				 */
 };
 
+/*
+ * Maximum number of "tmp" connections - those without XPT_PEER_VALID -
+ * permitted on any service.
+ */
+#define XPT_MAX_TMP_CONN	64
+
 static inline void svc_xprt_set_valid(struct svc_xprt *xpt)
 {
 	if (test_bit(XPT_TEMP, &xpt->xpt_flags) &&
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index dbd96b295dfa..06779b4cdd0a 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -619,16 +619,10 @@ int svc_port_is_privileged(struct sockaddr *sin)
  * The only somewhat efficient mechanism would be if drop old
  * connections from the same IP first. But right now we don't even
  * record the client IP in svc_sock.
- *
- * single-threaded services that expect a lot of clients will probably
- * need to set sv_maxconn to override the default value which is based
- * on the number of threads
  */
 static void svc_check_conn_limits(struct svc_serv *serv)
 {
-	unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn : 64;
-
-	if (serv->sv_tmpcnt > limit) {
+	if (serv->sv_tmpcnt > XPT_MAX_TMP_CONN) {
 		struct svc_xprt *xprt = NULL, *xprti;
 		spin_lock_bh(&serv->sv_lock);
 		if (!list_empty(&serv->sv_tempsocks)) {

From 0b6e14242630ad5f65839b23400bd67c5166e2b4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Dec 2024 08:47:04 +1100
Subject: [PATCH 08/42] nfsd: use an xarray to store v4.1 session slots

Using an xarray to store session slots will make it easier to change the
number of active slots based on demand, and removes an unnecessary
limit.

To achieve good throughput with a high-latency server it can be helpful
to have hundreds of concurrent writes, which means hundreds of slots.
So increase the limit to 2048 (twice what the Linux client will
currently use).  This limit is only a sanity check, not a hard limit.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c | 28 ++++++++++++++++++----------
 fs/nfsd/state.h     |  9 ++++++---
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b9d0a300d376..aabe5223f95e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1915,8 +1915,11 @@ free_session_slots(struct nfsd4_session *ses)
 	int i;
 
 	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
-		free_svc_cred(&ses->se_slots[i]->sl_cred);
-		kfree(ses->se_slots[i]);
+		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+
+		xa_erase(&ses->se_slots, i);
+		free_svc_cred(&slot->sl_cred);
+		kfree(slot);
 	}
 }
 
@@ -1996,17 +1999,20 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	struct nfsd4_session *new;
 	int i;
 
-	BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION)
-		     > PAGE_SIZE);
-
-	new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL);
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
 	if (!new)
 		return NULL;
+	xa_init(&new->se_slots);
 	/* allocate each struct nfsd4_slot and data cache in one piece */
 	for (i = 0; i < numslots; i++) {
-		new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL);
-		if (!new->se_slots[i])
+		struct nfsd4_slot *slot;
+		slot = kzalloc(slotsize, GFP_KERNEL);
+		if (!slot)
 			goto out_free;
+		if (xa_is_err(xa_store(&new->se_slots, i, slot, GFP_KERNEL))) {
+			kfree(slot);
+			goto out_free;
+		}
 	}
 
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
@@ -2017,7 +2023,8 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	return new;
 out_free:
 	while (i--)
-		kfree(new->se_slots[i]);
+		kfree(xa_load(&new->se_slots, i));
+	xa_destroy(&new->se_slots);
 	kfree(new);
 	return NULL;
 }
@@ -2124,6 +2131,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
 static void __free_session(struct nfsd4_session *ses)
 {
 	free_session_slots(ses);
+	xa_destroy(&ses->se_slots);
 	kfree(ses);
 }
 
@@ -4278,7 +4286,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (seq->slotid >= session->se_fchannel.maxreqs)
 		goto out_put_session;
 
-	slot = session->se_slots[seq->slotid];
+	slot = xa_load(&session->se_slots, seq->slotid);
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	/* We do not negotiate the number of slots yet, so set the
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index ba30b2335b66..e2ddb25b8dfc 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -227,8 +227,11 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
 	return container_of(s, struct nfs4_delegation, dl_stid);
 }
 
-/* Maximum number of slots per session. 160 is useful for long haul TCP */
-#define NFSD_MAX_SLOTS_PER_SESSION     160
+/* Maximum number of slots per session.  This is for sanity-check only.
+ * It could be increased if we had a mechanism to shutdown misbehaving clients.
+ * A large number can be needed to get good throughput on high-latency servers.
+ */
+#define NFSD_MAX_SLOTS_PER_SESSION	2048
 /* Maximum  session per slot cache size */
 #define NFSD_SLOT_CACHE_SIZE		2048
 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
@@ -327,7 +330,7 @@ struct nfsd4_session {
 	struct nfsd4_cb_sec	se_cb_sec;
 	struct list_head	se_conns;
 	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
-	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
+	struct xarray		se_slots;	/* forward channel slots */
 };
 
 /* formatted contents of nfs4_sessionid */

From b5fba969a2e445e2f36b699582d8410478a99374 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Dec 2024 08:47:05 +1100
Subject: [PATCH 09/42] nfsd: remove artificial limits on the session-based DRC

Rather than guessing how much space it might be safe to use for the DRC,
simply try allocating slots and be prepared to accept failure.

The first slot for each session is allocated with GFP_KERNEL which is
unlikely to fail.  Subsequent slots are allocated with the addition of
__GFP_NORETRY which is expected to fail if there isn't much free memory.

This is probably too aggressive but clears the way for adding a
shrinker interface to free extra slots when memory is tight.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c | 94 ++++++++-------------------------------------
 fs/nfsd/nfsd.h      |  3 --
 fs/nfsd/nfssvc.c    | 32 ---------------
 3 files changed, 16 insertions(+), 113 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index aabe5223f95e..84d364975ae1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1938,65 +1938,13 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
 	return size + sizeof(struct nfsd4_slot);
 }
 
-/*
- * XXX: If we run out of reserved DRC memory we could (up to a point)
- * re-negotiate active sessions and reduce their slot usage to make
- * room for new connections. For now we just fail the create session.
- */
-static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
-{
-	u32 slotsize = slot_bytes(ca);
-	u32 num = ca->maxreqs;
-	unsigned long avail, total_avail;
-	unsigned int scale_factor;
-
-	spin_lock(&nfsd_drc_lock);
-	if (nfsd_drc_max_mem > nfsd_drc_mem_used)
-		total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
-	else
-		/* We have handed out more space than we chose in
-		 * set_max_drc() to allow.  That isn't really a
-		 * problem as long as that doesn't make us think we
-		 * have lots more due to integer overflow.
-		 */
-		total_avail = 0;
-	avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
-	/*
-	 * Never use more than a fraction of the remaining memory,
-	 * unless it's the only way to give this client a slot.
-	 * The chosen fraction is either 1/8 or 1/number of threads,
-	 * whichever is smaller.  This ensures there are adequate
-	 * slots to support multiple clients per thread.
-	 * Give the client one slot even if that would require
-	 * over-allocation--it is better than failure.
-	 */
-	scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
-
-	avail = clamp_t(unsigned long, avail, slotsize,
-			total_avail/scale_factor);
-	num = min_t(int, num, avail / slotsize);
-	num = max_t(int, num, 1);
-	nfsd_drc_mem_used += num * slotsize;
-	spin_unlock(&nfsd_drc_lock);
-
-	return num;
-}
-
-static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
-{
-	int slotsize = slot_bytes(ca);
-
-	spin_lock(&nfsd_drc_lock);
-	nfsd_drc_mem_used -= slotsize * ca->maxreqs;
-	spin_unlock(&nfsd_drc_lock);
-}
-
 static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 					   struct nfsd4_channel_attrs *battrs)
 {
 	int numslots = fattrs->maxreqs;
 	int slotsize = slot_bytes(fattrs);
 	struct nfsd4_session *new;
+	struct nfsd4_slot *slot;
 	int i;
 
 	new = kzalloc(sizeof(*new), GFP_KERNEL);
@@ -2004,17 +1952,21 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 		return NULL;
 	xa_init(&new->se_slots);
 	/* allocate each struct nfsd4_slot and data cache in one piece */
-	for (i = 0; i < numslots; i++) {
-		struct nfsd4_slot *slot;
-		slot = kzalloc(slotsize, GFP_KERNEL);
+	slot = kzalloc(slotsize, GFP_KERNEL);
+	if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL)))
+		goto out_free;
+
+	for (i = 1; i < numslots; i++) {
+		const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
+		slot = kzalloc(slotsize, gfp);
 		if (!slot)
-			goto out_free;
-		if (xa_is_err(xa_store(&new->se_slots, i, slot, GFP_KERNEL))) {
+			break;
+		if (xa_is_err(xa_store(&new->se_slots, i, slot, gfp))) {
 			kfree(slot);
-			goto out_free;
+			break;
 		}
 	}
-
+	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
@@ -2022,8 +1974,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	spin_lock_init(&new->se_lock);
 	return new;
 out_free:
-	while (i--)
-		kfree(xa_load(&new->se_slots, i));
+	kfree(slot);
 	xa_destroy(&new->se_slots);
 	kfree(new);
 	return NULL;
@@ -2138,7 +2089,6 @@ static void __free_session(struct nfsd4_session *ses)
 static void free_session(struct nfsd4_session *ses)
 {
 	nfsd4_del_conns(ses);
-	nfsd4_put_drc_mem(&ses->se_fchannel);
 	__free_session(ses);
 }
 
@@ -3786,17 +3736,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
 	ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
 			NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
 	ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
-	/*
-	 * Note decreasing slot size below client's request may make it
-	 * difficult for client to function correctly, whereas
-	 * decreasing the number of slots will (just?) affect
-	 * performance.  When short on memory we therefore prefer to
-	 * decrease number of slots instead of their size.  Clients that
-	 * request larger slots than they need will get poor results:
-	 * Note that we always allow at least one slot, because our
-	 * accounting is soft and provides no guarantees either way.
-	 */
-	ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
 
 	return nfs_ok;
 }
@@ -3874,11 +3813,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		return status;
 	status = check_backchannel_attrs(&cr_ses->back_channel);
 	if (status)
-		goto out_release_drc_mem;
+		goto out_err;
 	status = nfserr_jukebox;
 	new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
 	if (!new)
-		goto out_release_drc_mem;
+		goto out_err;
 	conn = alloc_conn_from_crses(rqstp, cr_ses);
 	if (!conn)
 		goto out_free_session;
@@ -3987,8 +3926,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 	free_conn(conn);
 out_free_session:
 	__free_session(new);
-out_release_drc_mem:
-	nfsd4_put_drc_mem(&cr_ses->fore_channel);
+out_err:
 	return status;
 }
 
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 4b56ba1e8e48..3eb21e63b921 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -88,9 +88,6 @@ struct nfsd_genl_rqstp {
 extern struct svc_program	nfsd_programs[];
 extern const struct svc_version	nfsd_version2, nfsd_version3, nfsd_version4;
 extern struct mutex		nfsd_mutex;
-extern spinlock_t		nfsd_drc_lock;
-extern unsigned long		nfsd_drc_max_mem;
-extern unsigned long		nfsd_drc_mem_used;
 extern atomic_t			nfsd_th_cnt;		/* number of available threads */
 
 extern const struct seq_operations nfs_exports_op;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index b77097de5936..3f5104ed70bf 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -70,16 +70,6 @@ static __be32			nfsd_init_request(struct svc_rqst *,
  */
 DEFINE_MUTEX(nfsd_mutex);
 
-/*
- * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
- * nfsd_drc_max_pages limits the total amount of memory available for
- * version 4.1 DRC caches.
- * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
- */
-DEFINE_SPINLOCK(nfsd_drc_lock);
-unsigned long	nfsd_drc_max_mem;
-unsigned long	nfsd_drc_mem_used;
-
 #if IS_ENABLED(CONFIG_NFS_LOCALIO)
 static const struct svc_version *localio_versions[] = {
 	[1] = &localio_version1,
@@ -575,27 +565,6 @@ void nfsd_reset_versions(struct nfsd_net *nn)
 		}
 }
 
-/*
- * Each session guarantees a negotiated per slot memory cache for replies
- * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated
- * NFSv4.1 server might want to use more memory for a DRC than a machine
- * with mutiple services.
- *
- * Impose a hard limit on the number of pages for the DRC which varies
- * according to the machines free pages. This is of course only a default.
- *
- * For now this is a #defined shift which could be under admin control
- * in the future.
- */
-static void set_max_drc(void)
-{
-	#define NFSD_DRC_SIZE_SHIFT	7
-	nfsd_drc_max_mem = (nr_free_buffer_pages()
-					>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
-	nfsd_drc_mem_used = 0;
-	dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
-}
-
 static int nfsd_get_default_max_blksize(void)
 {
 	struct sysinfo i;
@@ -677,7 +646,6 @@ int nfsd_create_serv(struct net *net)
 	nn->nfsd_serv = serv;
 	spin_unlock(&nfsd_notifier_lock);
 
-	set_max_drc();
 	/* check if the notifier is already set */
 	if (atomic_inc_return(&nfsd_notifier_refcount) == 1) {
 		register_inetaddr_notifier(&nfsd_inetaddr_notifier);

From 601c8cb349c2d9a3a6cea6f53e0bf838e2e60893 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Dec 2024 08:47:06 +1100
Subject: [PATCH 10/42] nfsd: add session slot count to
 /proc/fs/nfsd/clients/*/info

Each client now reports the number of slots allocated in each session.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 84d364975ae1..bdfe791b02f1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2643,6 +2643,7 @@ static const char *cb_state2str(int state)
 static int client_info_show(struct seq_file *m, void *v)
 {
 	struct inode *inode = file_inode(m->file);
+	struct nfsd4_session *ses;
 	struct nfs4_client *clp;
 	u64 clid;
 
@@ -2679,6 +2680,13 @@ static int client_info_show(struct seq_file *m, void *v)
 	seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
 	seq_printf(m, "admin-revoked states: %d\n",
 		   atomic_read(&clp->cl_admin_revoked));
+	spin_lock(&clp->cl_lock);
+	seq_printf(m, "session slots:");
+	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+	spin_unlock(&clp->cl_lock);
+	seq_puts(m, "\n");
+
 	drop_client(clp);
 
 	return 0;

From 60aa6564317db29ea72b6db821b5bbb45f1e879d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Dec 2024 08:47:07 +1100
Subject: [PATCH 11/42] nfsd: allocate new session-based DRC slots on demand.

If a client ever uses the highest available slot for a given session,
attempt to allocate more slots so there is room for the client to use
them if wanted.  GFP_NOWAIT is used so if there is not plenty of
free memory, failure is expected - which is what we want.  It also
allows the allocation while holding a spinlock.

Each time we increase the number of slots by 20% (rounded up).  This
allows fairly quick growth while avoiding excessive over-shoot.

We would expect to stablise with around 10% more slots available than
the client actually uses.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bdfe791b02f1..fcc0153b6b90 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4235,11 +4235,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	slot = xa_load(&session->se_slots, seq->slotid);
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
-	/* We do not negotiate the number of slots yet, so set the
-	 * maxslots to the session maxreqs which is used to encode
-	 * sr_highest_slotid and the sr_target_slot id to maxslots */
-	seq->maxslots = session->se_fchannel.maxreqs;
-
 	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
 	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
 					slot->sl_flags & NFSD4_SLOT_INUSE);
@@ -4289,6 +4284,38 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	cstate->session = session;
 	cstate->clp = clp;
 
+	/*
+	 * If the client ever uses the highest available slot,
+	 * gently try to allocate another 20%.  This allows
+	 * fairly quick growth without grossly over-shooting what
+	 * the client might use.
+	 */
+	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
+		int s = session->se_fchannel.maxreqs;
+		int cnt = DIV_ROUND_UP(s, 5);
+
+		do {
+			/*
+			 * GFP_NOWAIT both allows allocation under a
+			 * spinlock, and only succeeds if there is
+			 * plenty of memory.
+			 */
+			slot = kzalloc(slot_bytes(&session->se_fchannel),
+				       GFP_NOWAIT);
+			if (slot &&
+			    !xa_is_err(xa_store(&session->se_slots, s, slot,
+						GFP_NOWAIT))) {
+				s += 1;
+				session->se_fchannel.maxreqs = s;
+			} else {
+				kfree(slot);
+				slot = NULL;
+			}
+		} while (slot && --cnt > 0);
+	}
+	seq->maxslots = session->se_fchannel.maxreqs;
+
 out:
 	switch (clp->cl_cb_state) {
 	case NFSD4_CB_DOWN:

From fc8738c68d0bbf5033dd98b4f63d277ecbd49fd7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Dec 2024 08:47:08 +1100
Subject: [PATCH 12/42] nfsd: add support for freeing unused session-DRC slots

Reducing the number of slots in the session slot table requires
confirmation from the client.  This patch adds reduce_session_slots()
which starts the process of getting confirmation, but never calls it.
That will come in a later patch.

Before we can free a slot we need to confirm that the client won't try
to use it again.  This involves returning a lower cr_maxrequests in a
SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
is not larger than we limit we are trying to impose.  So for each slot
we need to remember that we have sent a reduced cr_maxrequests.

To achieve this we introduce a concept of request "generations".  Each
time we decide to reduce cr_maxrequests we increment the generation
number, and record this when we return the lower cr_maxrequests to the
client.  When a slot with the current generation reports a low
ca_maxrequests, we commit to that level and free extra slots.

We use an 16 bit generation number (64 seems wasteful) and if it cycles
we iterate all slots and reset the generation number to avoid false matches.

When we free a slot we store the seqid in the slot pointer so that it can
be restored when we reactivate the slot.  The RFC can be read as
suggesting that the slot number could restart from one after a slot is
retired and reactivated, but also suggests that retiring slots is not
required.  So when we reactive a slot we accept with the next seqid in
sequence, or 1.

When decoding sa_highest_slotid into maxslots we need to add 1 - this
matches how it is encoded for the reply.

se_dead is moved in struct nfsd4_session to remove a hole.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
 fs/nfsd/nfs4xdr.c   |  5 ++-
 fs/nfsd/state.h     |  6 ++-
 fs/nfsd/xdr4.h      |  2 -
 4 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fcc0153b6b90..d29737018f04 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
 #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 
 static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
 {
 	int i;
 
-	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+	if (from >= ses->se_fchannel.maxreqs)
+		return;
+
+	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
 		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
 
-		xa_erase(&ses->se_slots, i);
+		/*
+		 * Save the seqid in case we reactivate this slot.
+		 * This will never require a memory allocation so GFP
+		 * flag is irrelevant
+		 */
+		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
 		free_svc_cred(&slot->sl_cred);
 		kfree(slot);
 	}
+	ses->se_fchannel.maxreqs = from;
+	if (ses->se_target_maxslots > from)
+		ses->se_target_maxslots = from;
+}
+
+/**
+ * reduce_session_slots - reduce the target max-slots of a session if possible
+ * @ses:  The session to affect
+ * @dec:  how much to decrease the target by
+ *
+ * This interface can be used by a shrinker to reduce the target max-slots
+ * for a session so that some slots can eventually be freed.
+ * It uses spin_trylock() as it may be called in a context where another
+ * spinlock is held that has a dependency on client_lock.  As shrinkers are
+ * best-effort, skiping a session is client_lock is already held has no
+ * great coast
+ *
+ * Return value:
+ *   The number of slots that the target was reduced by.
+ */
+static int __maybe_unused
+reduce_session_slots(struct nfsd4_session *ses, int dec)
+{
+	struct nfsd_net *nn = net_generic(ses->se_client->net,
+					  nfsd_net_id);
+	int ret = 0;
+
+	if (ses->se_target_maxslots <= 1)
+		return ret;
+	if (!spin_trylock(&nn->client_lock))
+		return ret;
+	ret = min(dec, ses->se_target_maxslots-1);
+	ses->se_target_maxslots -= ret;
+	ses->se_slot_gen += 1;
+	if (ses->se_slot_gen == 0) {
+		int i;
+		ses->se_slot_gen = 1;
+		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+			slot->sl_generation = 0;
+		}
+	}
+	spin_unlock(&nn->client_lock);
+	return ret;
 }
 
 /*
@@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	}
 	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+	new->se_target_maxslots = i;
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
 				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
 
 static void __free_session(struct nfsd4_session *ses)
 {
-	free_session_slots(ses);
+	free_session_slots(ses, 0);
 	xa_destroy(&ses->se_slots);
 	kfree(ses);
 }
@@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
 	seq_printf(m, "session slots:");
 	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
 		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+	seq_printf(m, "\nsession target slots:");
+	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+		seq_printf(m, " %u", ses->se_target_maxslots);
 	spin_unlock(&clp->cl_lock);
 	seq_puts(m, "\n");
 
@@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
 	kfree(exid->server_impl_name);
 }
 
-static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
 {
 	/* The slot is in use, and no response has been sent. */
-	if (slot_inuse) {
+	if (flags & NFSD4_SLOT_INUSE) {
 		if (seqid == slot_seqid)
 			return nfserr_jukebox;
 		else
@@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
 	/* Note unsigned 32-bit arithmetic handles wraparound: */
 	if (likely(seqid == slot_seqid + 1))
 		return nfs_ok;
+	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+		return nfs_ok;
 	if (seqid == slot_seqid)
 		return nfserr_replay_cache;
 	return nfserr_seq_misordered;
@@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
-	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
-					slot->sl_flags & NFSD4_SLOT_INUSE);
+	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
 	if (status == nfserr_replay_cache) {
 		status = nfserr_seq_misordered;
 		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out_put_session;
 
+	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+	    slot->sl_generation == session->se_slot_gen &&
+	    seq->maxslots <= session->se_target_maxslots)
+		/* Client acknowledged our reduce maxreqs */
+		free_session_slots(session, session->se_target_maxslots);
+
 	buflen = (seq->cachethis) ?
 			session->se_fchannel.maxresp_cached :
 			session->se_fchannel.maxresp_sz;
@@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	svc_reserve(rqstp, buflen);
 
 	status = nfs_ok;
-	/* Success! bump slot seqid */
+	/* Success! accept new slot seqid */
 	slot->sl_seqid = seq->seqid;
+	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
 	slot->sl_flags |= NFSD4_SLOT_INUSE;
+	slot->sl_generation = session->se_slot_gen;
 	if (seq->cachethis)
 		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
 	else
@@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	 * the client might use.
 	 */
 	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
 	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
 		int s = session->se_fchannel.maxreqs;
 		int cnt = DIV_ROUND_UP(s, 5);
+		void *prev_slot;
 
 		do {
 			/*
@@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			 */
 			slot = kzalloc(slot_bytes(&session->se_fchannel),
 				       GFP_NOWAIT);
+			prev_slot = xa_load(&session->se_slots, s);
+			if (xa_is_value(prev_slot) && slot) {
+				slot->sl_seqid = xa_to_value(prev_slot);
+				slot->sl_flags |= NFSD4_SLOT_REUSED;
+			}
 			if (slot &&
 			    !xa_is_err(xa_store(&session->se_slots, s, slot,
 						GFP_NOWAIT))) {
 				s += 1;
 				session->se_fchannel.maxreqs = s;
+				session->se_target_maxslots = s;
 			} else {
 				kfree(slot);
 				slot = NULL;
 			}
 		} while (slot && --cnt > 0);
 	}
-	seq->maxslots = session->se_fchannel.maxreqs;
+	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
+	seq->target_maxslots = session->se_target_maxslots;
 
 out:
 	switch (clp->cl_cb_state) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 53fac037611c..4dcb03cd9292 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
 		return nfserr_bad_xdr;
 	seq->seqid = be32_to_cpup(p++);
 	seq->slotid = be32_to_cpup(p++);
-	seq->maxslots = be32_to_cpup(p++);
+	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
+	seq->maxslots = be32_to_cpup(p++) + 1;
 	seq->cachethis = be32_to_cpup(p);
 
 	seq->status_flags = 0;
@@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_target_highest_slotid */
-	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_status_flags */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e2ddb25b8dfc..2b0e6148a87a 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -245,10 +245,12 @@ struct nfsd4_slot {
 	struct svc_cred sl_cred;
 	u32	sl_datalen;
 	u16	sl_opcnt;
+	u16	sl_generation;
 #define NFSD4_SLOT_INUSE	(1 << 0)
 #define NFSD4_SLOT_CACHETHIS	(1 << 1)
 #define NFSD4_SLOT_INITIALIZED	(1 << 2)
 #define NFSD4_SLOT_CACHED	(1 << 3)
+#define NFSD4_SLOT_REUSED	(1 << 4)
 	u8	sl_flags;
 	char	sl_data[];
 };
@@ -321,7 +323,6 @@ struct nfsd4_session {
 	u32			se_cb_slot_avail; /* bitmap of available slots */
 	u32			se_cb_highest_slot;	/* highest slot client wants */
 	u32			se_cb_prog;
-	bool			se_dead;
 	struct list_head	se_hash;	/* hash by sessionid */
 	struct list_head	se_perclnt;
 	struct nfs4_client	*se_client;
@@ -331,6 +332,9 @@ struct nfsd4_session {
 	struct list_head	se_conns;
 	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
 	struct xarray		se_slots;	/* forward channel slots */
+	u16			se_slot_gen;
+	bool			se_dead;
+	u32			se_target_maxslots;
 };
 
 /* formatted contents of nfs4_sessionid */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 382cc1389396..c26ba86dbdfd 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -576,9 +576,7 @@ struct nfsd4_sequence {
 	u32			slotid;			/* request/response */
 	u32			maxslots;		/* request/response */
 	u32			cachethis;		/* request */
-#if 0
 	u32			target_maxslots;	/* response */
-#endif /* not yet */
 	u32			status_flags;		/* response */
 };
 

From 35e34642b5996df91e2879d59f726df6072c82f9 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Dec 2024 08:47:09 +1100
Subject: [PATCH 13/42] nfsd: add shrinker to reduce number of slots allocated
 per session

Add a shrinker which frees unused slots and may ask the clients to use
fewer slots on each session.

We keep a global count of the number of freeable slots, which is the sum
of one less than the current "target" slots in all sessions in all
clients in all net-namespaces. This number is reported by the shrinker.

When the shrinker is asked to free some, we call xxx on each session in
a round-robin asking each to reduce the slot count by 1.  This will
reduce the "target" so the number reported by the shrinker will reduce
immediately.  The memory will only be freed later when the client
confirmed that it is no longer needed.

We use a global list of sessions and move the "head" to after the last
session that we asked to reduce, so the next callback from the shrinker
will move on to the next session.  This pressure should be applied
"evenly" across all sessions over time.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c | 77 ++++++++++++++++++++++++++++++++++++++++++---
 fs/nfsd/state.h     |  1 +
 2 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d29737018f04..b38b3a1c0307 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1909,6 +1909,16 @@ gen_sessionid(struct nfsd4_session *ses)
  */
 #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 
+static struct shrinker *nfsd_slot_shrinker;
+static DEFINE_SPINLOCK(nfsd_session_list_lock);
+static LIST_HEAD(nfsd_session_list);
+/* The sum of "target_slots-1" on every session.  The shrinker can push this
+ * down, though it can take a little while for the memory to actually
+ * be freed.  The "-1" is because we can never free slot 0 while the
+ * session is active.
+ */
+static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0);
+
 static void
 free_session_slots(struct nfsd4_session *ses, int from)
 {
@@ -1930,8 +1940,11 @@ free_session_slots(struct nfsd4_session *ses, int from)
 		kfree(slot);
 	}
 	ses->se_fchannel.maxreqs = from;
-	if (ses->se_target_maxslots > from)
-		ses->se_target_maxslots = from;
+	if (ses->se_target_maxslots > from) {
+		int new_target = from ?: 1;
+		atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots);
+		ses->se_target_maxslots = new_target;
+	}
 }
 
 /**
@@ -1949,7 +1962,7 @@ free_session_slots(struct nfsd4_session *ses, int from)
  * Return value:
  *   The number of slots that the target was reduced by.
  */
-static int __maybe_unused
+static int
 reduce_session_slots(struct nfsd4_session *ses, int dec)
 {
 	struct nfsd_net *nn = net_generic(ses->se_client->net,
@@ -1962,6 +1975,7 @@ reduce_session_slots(struct nfsd4_session *ses, int dec)
 		return ret;
 	ret = min(dec, ses->se_target_maxslots-1);
 	ses->se_target_maxslots -= ret;
+	atomic_sub(ret, &nfsd_total_target_slots);
 	ses->se_slot_gen += 1;
 	if (ses->se_slot_gen == 0) {
 		int i;
@@ -2021,6 +2035,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
 	new->se_target_maxslots = i;
+	atomic_add(i - 1, &nfsd_total_target_slots);
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
 				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2145,6 +2160,36 @@ static void free_session(struct nfsd4_session *ses)
 	__free_session(ses);
 }
 
+static unsigned long
+nfsd_slot_count(struct shrinker *s, struct shrink_control *sc)
+{
+	unsigned long cnt = atomic_read(&nfsd_total_target_slots);
+
+	return cnt ? cnt : SHRINK_EMPTY;
+}
+
+static unsigned long
+nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc)
+{
+	struct nfsd4_session *ses;
+	unsigned long scanned = 0;
+	unsigned long freed = 0;
+
+	spin_lock(&nfsd_session_list_lock);
+	list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) {
+		freed += reduce_session_slots(ses, 1);
+		scanned += 1;
+		if (scanned >= sc->nr_to_scan) {
+			/* Move starting point for next scan */
+			list_move(&nfsd_session_list, &ses->se_all_sessions);
+			break;
+		}
+	}
+	spin_unlock(&nfsd_session_list_lock);
+	sc->nr_scanned = scanned;
+	return freed;
+}
+
 static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
 {
 	int idx;
@@ -2169,6 +2214,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
 	list_add(&new->se_perclnt, &clp->cl_sessions);
 	spin_unlock(&clp->cl_lock);
 
+	spin_lock(&nfsd_session_list_lock);
+	list_add_tail(&new->se_all_sessions, &nfsd_session_list);
+	spin_unlock(&nfsd_session_list_lock);
+
 	{
 		struct sockaddr *sa = svc_addr(rqstp);
 		/*
@@ -2238,6 +2287,9 @@ unhash_session(struct nfsd4_session *ses)
 	spin_lock(&ses->se_client->cl_lock);
 	list_del(&ses->se_perclnt);
 	spin_unlock(&ses->se_client->cl_lock);
+	spin_lock(&nfsd_session_list_lock);
+	list_del(&ses->se_all_sessions);
+	spin_unlock(&nfsd_session_list_lock);
 }
 
 /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
@@ -2373,8 +2425,12 @@ unhash_client_locked(struct nfs4_client *clp)
 	}
 	list_del_init(&clp->cl_lru);
 	spin_lock(&clp->cl_lock);
-	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+	spin_lock(&nfsd_session_list_lock);
+	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) {
 		list_del_init(&ses->se_hash);
+		list_del_init(&ses->se_all_sessions);
+	}
+	spin_unlock(&nfsd_session_list_lock);
 	spin_unlock(&clp->cl_lock);
 }
 
@@ -4380,6 +4436,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 						GFP_NOWAIT))) {
 				s += 1;
 				session->se_fchannel.maxreqs = s;
+				atomic_add(s - session->se_target_maxslots,
+					   &nfsd_total_target_slots);
 				session->se_target_maxslots = s;
 			} else {
 				kfree(slot);
@@ -8770,7 +8828,6 @@ nfs4_state_start_net(struct net *net)
 }
 
 /* initialization to perform when the nfsd service is started: */
-
 int
 nfs4_state_start(void)
 {
@@ -8780,6 +8837,15 @@ nfs4_state_start(void)
 	if (ret)
 		return ret;
 
+	nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot");
+	if (!nfsd_slot_shrinker) {
+		rhltable_destroy(&nfs4_file_rhltable);
+		return -ENOMEM;
+	}
+	nfsd_slot_shrinker->count_objects = nfsd_slot_count;
+	nfsd_slot_shrinker->scan_objects = nfsd_slot_scan;
+	shrinker_register(nfsd_slot_shrinker);
+
 	set_max_delegations();
 	return 0;
 }
@@ -8821,6 +8887,7 @@ void
 nfs4_state_shutdown(void)
 {
 	rhltable_destroy(&nfs4_file_rhltable);
+	shrinker_free(nfsd_slot_shrinker);
 }
 
 static void
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 2b0e6148a87a..b31a8523c8e5 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -325,6 +325,7 @@ struct nfsd4_session {
 	u32			se_cb_prog;
 	struct list_head	se_hash;	/* hash by sessionid */
 	struct list_head	se_perclnt;
+	struct list_head	se_all_sessions;/* global list of sessions */
 	struct nfs4_client	*se_client;
 	struct nfs4_sessionid	se_sessionid;
 	struct nfsd4_channel_attrs se_fchannel;

From 1b3e26a5ccbfc2f85bda1930cc278e313165e353 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <okorniev@redhat.com>
Date: Thu, 19 Dec 2024 15:12:04 -0500
Subject: [PATCH 14/42] NFSD: fix decoding in nfs4_xdr_dec_cb_getattr

If a client were to send an error to a CB_GETATTR call, the code
erronously continues to try decode past the error code. It ends
up returning BAD_XDR error to the rpc layer and then in turn
trigger a WARN_ONCE in nfsd4_cb_done() function.

Fixes: 6487a13b5c6b ("NFSD: add support for CB_GETATTR callback")
Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4callback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 94479483c3d6..151de0285d22 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -647,7 +647,7 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
 		return status;
 
 	status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
-	if (status)
+	if (unlikely(status || cb->cb_seq_status))
 		return status;
 	if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
 		return -NFSERR_BAD_XDR;

From cb80ecf75ac38df30cea1163563391ef7e76a24e Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <okorniev@redhat.com>
Date: Thu, 19 Dec 2024 16:57:48 -0500
Subject: [PATCH 15/42] NFSD: add cb opcode to WARN_ONCE on failed callback

It helps to know what kind of callback happened that triggered the
WARN_ONCE in nfsd4_cb_done() function in diagnosing what can set
an uncommon state where both cb_status and tk_status are set at
the same time.

Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4callback.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 151de0285d22..a8e33f561bb3 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1396,8 +1396,9 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
 		return;
 
 	if (cb->cb_status) {
-		WARN_ONCE(task->tk_status, "cb_status=%d tk_status=%d",
-			  cb->cb_status, task->tk_status);
+		WARN_ONCE(task->tk_status,
+			  "cb_status=%d tk_status=%d cb_opcode=%d",
+			  cb->cb_status, task->tk_status, cb->cb_ops->opcode);
 		task->tk_status = cb->cb_status;
 	}
 

From 2f55dbe4e2072c9e99298c6f37473778a98c9107 Mon Sep 17 00:00:00 2001
From: Yang Erkun <yangerkun@huawei.com>
Date: Wed, 25 Dec 2024 14:59:05 +0800
Subject: [PATCH 16/42] SUNRPC: introduce cache_check_rcu to help check in rcu
 context

This is a prepare patch to add cache_check_rcu, will use it with follow
patch.

Suggested-by: NeilBrown <neilb@suse.de>
Signed-off-by: Yang Erkun <yangerkun@huawei.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/cache.h |  2 ++
 net/sunrpc/cache.c           | 41 +++++++++++++++++++++++-------------
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 35766963dd14..e783132e481f 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -222,6 +222,8 @@ static inline bool cache_is_expired(struct cache_detail *detail, struct cache_he
 	return detail->flush_time >= h->last_refresh;
 }
 
+extern int cache_check_rcu(struct cache_detail *detail,
+		       struct cache_head *h, struct cache_req *rqstp);
 extern int cache_check(struct cache_detail *detail,
 		       struct cache_head *h, struct cache_req *rqstp);
 extern void cache_flush(void);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 059f6ef1ad18..88f42a27c8cc 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -281,21 +281,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
 	return rv;
 }
 
-/*
- * This is the generic cache management routine for all
- * the authentication caches.
- * It checks the currency of a cache item and will (later)
- * initiate an upcall to fill it if needed.
- *
- *
- * Returns 0 if the cache_head can be used, or cache_puts it and returns
- * -EAGAIN if upcall is pending and request has been queued
- * -ETIMEDOUT if upcall failed or request could not be queue or
- *           upcall completed but item is still invalid (implying that
- *           the cache item has been replaced with a newer one).
- * -ENOENT if cache entry was negative
- */
-int cache_check(struct cache_detail *detail,
+int cache_check_rcu(struct cache_detail *detail,
 		    struct cache_head *h, struct cache_req *rqstp)
 {
 	int rv;
@@ -336,6 +322,31 @@ int cache_check(struct cache_detail *detail,
 				rv = -ETIMEDOUT;
 		}
 	}
+
+	return rv;
+}
+EXPORT_SYMBOL_GPL(cache_check_rcu);
+
+/*
+ * This is the generic cache management routine for all
+ * the authentication caches.
+ * It checks the currency of a cache item and will (later)
+ * initiate an upcall to fill it if needed.
+ *
+ *
+ * Returns 0 if the cache_head can be used, or cache_puts it and returns
+ * -EAGAIN if upcall is pending and request has been queued
+ * -ETIMEDOUT if upcall failed or request could not be queue or
+ *           upcall completed but item is still invalid (implying that
+ *           the cache item has been replaced with a newer one).
+ * -ENOENT if cache entry was negative
+ */
+int cache_check(struct cache_detail *detail,
+		struct cache_head *h, struct cache_req *rqstp)
+{
+	int rv;
+
+	rv = cache_check_rcu(detail, h, rqstp);
 	if (rv)
 		cache_put(h, detail);
 	return rv;

From c224edca7af028828e2ad866b61d731b5e72b46d Mon Sep 17 00:00:00 2001
From: Yang Erkun <yangerkun@huawei.com>
Date: Wed, 25 Dec 2024 14:59:06 +0800
Subject: [PATCH 17/42] nfsd: no need get cache ref when protected by rcu

rcu_read_lock/rcu_read_unlock has already provide protection for the
pointer we will reference when we call e_show. Therefore, there is no
need to obtain a cache reference to help protect cache_head.
Additionally, the .put such as expkey_put/svc_export_put will invoke
dput, which can sleep and break rcu. Stop get cache reference to fix
them all.

Fixes: ae74136b4bb6 ("SUNRPC: Allow cache lookups to use RCU protection rather than the r/w spinlock")
Suggested-by: NeilBrown <neilb@suse.de>
Signed-off-by: Yang Erkun <yangerkun@huawei.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/export.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index aa4712362b3b..c6168bccfb6c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1425,13 +1425,9 @@ static int e_show(struct seq_file *m, void *p)
 		return 0;
 	}
 
-	if (!cache_get_rcu(&exp->h))
+	if (cache_check_rcu(cd, &exp->h, NULL))
 		return 0;
 
-	if (cache_check(cd, &exp->h, NULL))
-		return 0;
-
-	exp_put(exp);
 	return svc_export_show(m, cd, cp);
 }
 

From 1b10f0b603c066d81327c163a23c19f01e112366 Mon Sep 17 00:00:00 2001
From: Yang Erkun <yangerkun@huawei.com>
Date: Wed, 25 Dec 2024 14:59:07 +0800
Subject: [PATCH 18/42] SUNRPC: no need get cache ref when protected by rcu

rcu_read_lock/rcu_read_unlock has already provide protection for the
pointer we will reference when we call c_show. Therefore, there is no
need to obtain a cache reference to help protect cache_head.
Additionally, the .put such as expkey_put/svc_export_put will invoke
dput, which can sleep and break rcu. Stop get cache reference to fix
them all.

Fixes: ae74136b4bb6 ("SUNRPC: Allow cache lookups to use RCU protection rather than the r/w spinlock")
Suggested-by: NeilBrown <neilb@suse.de>
Signed-off-by: Yang Erkun <yangerkun@huawei.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/cache.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 88f42a27c8cc..cb279eb9ac4b 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1438,17 +1438,11 @@ static int c_show(struct seq_file *m, void *p)
 		seq_printf(m, "# expiry=%lld refcnt=%d flags=%lx\n",
 			   convert_to_wallclock(cp->expiry_time),
 			   kref_read(&cp->ref), cp->flags);
-	if (!cache_get_rcu(cp))
-		return 0;
 
-	if (cache_check(cd, cp, NULL))
-		/* cache_check does a cache_put on failure */
+	if (cache_check_rcu(cd, cp, NULL))
+		seq_puts(m, "# ");
+	else if (cache_is_expired(cd, cp))
 		seq_puts(m, "# ");
-	else {
-		if (cache_is_expired(cd, cp))
-			seq_puts(m, "# ");
-		cache_put(cp, cd);
-	}
 
 	return cd->cache_show(m, cd, cp);
 }

From 2530766492ec7726582bcde44575ec3ff7487cd2 Mon Sep 17 00:00:00 2001
From: Yang Erkun <yangerkun@huawei.com>
Date: Wed, 25 Dec 2024 14:59:08 +0800
Subject: [PATCH 19/42] nfsd: fix UAF when access ex_uuid or ex_stats

We can access exp->ex_stats or exp->ex_uuid in rcu context(c_show and
e_show). All these resources should be released using kfree_rcu. Fix this
by using call_rcu, clean them all after a rcu grace period.

==================================================================
BUG: KASAN: slab-use-after-free in svc_export_show+0x362/0x430 [nfsd]
Read of size 1 at addr ff11000010fdc120 by task cat/870

CPU: 1 UID: 0 PID: 870 Comm: cat Not tainted 6.12.0-rc3+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
1.16.1-2.fc37 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x53/0x70
 print_address_description.constprop.0+0x2c/0x3a0
 print_report+0xb9/0x280
 kasan_report+0xae/0xe0
 svc_export_show+0x362/0x430 [nfsd]
 c_show+0x161/0x390 [sunrpc]
 seq_read_iter+0x589/0x770
 seq_read+0x1e5/0x270
 proc_reg_read+0xe1/0x140
 vfs_read+0x125/0x530
 ksys_read+0xc1/0x160
 do_syscall_64+0x5f/0x170
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Allocated by task 830:
 kasan_save_stack+0x20/0x40
 kasan_save_track+0x14/0x30
 __kasan_kmalloc+0x8f/0xa0
 __kmalloc_node_track_caller_noprof+0x1bc/0x400
 kmemdup_noprof+0x22/0x50
 svc_export_parse+0x8a9/0xb80 [nfsd]
 cache_do_downcall+0x71/0xa0 [sunrpc]
 cache_write_procfs+0x8e/0xd0 [sunrpc]
 proc_reg_write+0xe1/0x140
 vfs_write+0x1a5/0x6d0
 ksys_write+0xc1/0x160
 do_syscall_64+0x5f/0x170
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Freed by task 868:
 kasan_save_stack+0x20/0x40
 kasan_save_track+0x14/0x30
 kasan_save_free_info+0x3b/0x60
 __kasan_slab_free+0x37/0x50
 kfree+0xf3/0x3e0
 svc_export_put+0x87/0xb0 [nfsd]
 cache_purge+0x17f/0x1f0 [sunrpc]
 nfsd_destroy_serv+0x226/0x2d0 [nfsd]
 nfsd_svc+0x125/0x1e0 [nfsd]
 write_threads+0x16a/0x2a0 [nfsd]
 nfsctl_transaction_write+0x74/0xa0 [nfsd]
 vfs_write+0x1a5/0x6d0
 ksys_write+0xc1/0x160
 do_syscall_64+0x5f/0x170
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Fixes: ae74136b4bb6 ("SUNRPC: Allow cache lookups to use RCU protection rather than the r/w spinlock")
Signed-off-by: Yang Erkun <yangerkun@huawei.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/export.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c6168bccfb6c..0363720280d4 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -355,16 +355,25 @@ static void export_stats_destroy(struct export_stats *stats)
 					    EXP_STATS_COUNTERS_NUM);
 }
 
-static void svc_export_put(struct kref *ref)
+static void svc_export_release(struct rcu_head *rcu_head)
 {
-	struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
-	path_put(&exp->ex_path);
-	auth_domain_put(exp->ex_client);
+	struct svc_export *exp = container_of(rcu_head, struct svc_export,
+			ex_rcu);
+
 	nfsd4_fslocs_free(&exp->ex_fslocs);
 	export_stats_destroy(exp->ex_stats);
 	kfree(exp->ex_stats);
 	kfree(exp->ex_uuid);
-	kfree_rcu(exp, ex_rcu);
+	kfree(exp);
+}
+
+static void svc_export_put(struct kref *ref)
+{
+	struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
+
+	path_put(&exp->ex_path);
+	auth_domain_put(exp->ex_client);
+	call_rcu(&exp->ex_rcu, svc_export_release);
 }
 
 static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)

From ef3675b45bcb6c17cabbbde620c6cea52ffb21ac Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 30 Dec 2024 19:28:52 -0500
Subject: [PATCH 20/42] NFSD: Encode COMPOUND operation status on page
 boundaries

J. David reports an odd corruption of a READDIR reply sent to a
FreeBSD client.

xdr_reserve_space() has to do a special trick when the @nbytes value
requests more space than there is in the current page of the XDR
buffer.

In that case, xdr_reserve_space() returns a pointer to the start of
the next page, and then the next call to xdr_reserve_space() invokes
__xdr_commit_encode() to copy enough of the data item back into the
previous page to make that data item contiguous across the page
boundary.

But we need to be careful in the case where buffer space is reserved
early for a data item whose value will be inserted into the buffer
later.

One such caller, nfsd4_encode_operation(), reserves 8 bytes in the
encoding buffer for each COMPOUND operation. However, a READDIR
result can sometimes encode file names so that there are only 4
bytes left at the end of the current XDR buffer page (though plenty
of pages are left to handle the remaining encoding tasks).

If a COMPOUND operation follows the READDIR result (say, a GETATTR),
then nfsd4_encode_operation() will reserve 8 bytes for the op number
(9) and the op status (usually NFS4_OK). In this weird case,
xdr_reserve_space() returns a pointer to byte zero of the next buffer
page, as it assumes the data item will be copied back into place (in
the previous page) on the next call to xdr_reserve_space().

nfsd4_encode_operation() writes the op num into the buffer, then
saves the next 4-byte location for the op's status code. The next
xdr_reserve_space() call is part of GETATTR encoding, so the op num
gets copied back into the previous page, but the saved location for
the op status continues to point to the wrong spot in the current
XDR buffer page because __xdr_commit_encode() moved that data item.

After GETATTR encoding is complete, nfsd4_encode_operation() writes
the op status over the first XDR data item in the GETATTR result.
The NFS4_OK status code (0) makes it look like there are zero items
in the GETATTR's attribute bitmask.

The patch description of commit 2825a7f90753 ("nfsd4: allow encoding
across page boundaries") [2014] remarks that NFSD "can't handle a
new operation starting close to the end of a page." This bug appears
to be one reason for that remark.

Reported-by: J David <j.david.lists@gmail.com>
Closes: https://lore.kernel.org/linux-nfs/3998d739-c042-46b4-8166-dbd6c5f0e804@oracle.com/T/#t
Tested-by: Rick Macklem <rmacklem@uoguelph.ca>
Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 4dcb03cd9292..b2243b5dbf18 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -5761,15 +5761,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 	struct nfs4_stateowner *so = resp->cstate.replay_owner;
 	struct svc_rqst *rqstp = resp->rqstp;
 	const struct nfsd4_operation *opdesc = op->opdesc;
-	int post_err_offset;
+	unsigned int op_status_offset;
 	nfsd4_enc encoder;
-	__be32 *p;
 
-	p = xdr_reserve_space(xdr, 8);
-	if (!p)
+	if (xdr_stream_encode_u32(xdr, op->opnum) != XDR_UNIT)
+		goto release;
+	op_status_offset = xdr->buf->len;
+	if (!xdr_reserve_space(xdr, XDR_UNIT))
 		goto release;
-	*p++ = cpu_to_be32(op->opnum);
-	post_err_offset = xdr->buf->len;
 
 	if (op->opnum == OP_ILLEGAL)
 		goto status;
@@ -5810,20 +5809,21 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 		 * bug if we had to do this on a non-idempotent op:
 		 */
 		warn_on_nonidempotent_op(op);
-		xdr_truncate_encode(xdr, post_err_offset);
+		xdr_truncate_encode(xdr, op_status_offset + XDR_UNIT);
 	}
 	if (so) {
-		int len = xdr->buf->len - post_err_offset;
+		int len = xdr->buf->len - (op_status_offset + XDR_UNIT);
 
 		so->so_replay.rp_status = op->status;
 		so->so_replay.rp_buflen = len;
-		read_bytes_from_xdr_buf(xdr->buf, post_err_offset,
+		read_bytes_from_xdr_buf(xdr->buf, op_status_offset + XDR_UNIT,
 						so->so_replay.rp_buf, len);
 	}
 status:
 	op->status = nfsd4_map_status(op->status,
 				      resp->cstate.minorversion);
-	*p = op->status;
+	write_bytes_to_xdr_buf(xdr->buf, op_status_offset,
+			       &op->status, XDR_UNIT);
 release:
 	if (opdesc && opdesc->op_release)
 		opdesc->op_release(&op->u);

From 1a861150bd6a69ea14da7a0d752da2b442e6a5dc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 30 Dec 2024 19:28:53 -0500
Subject: [PATCH 21/42] NFSD: Insulate nfsd4_encode_read() from page boundaries
 in the encode buffer

Commit 28d5bc468efe ("NFSD: Optimize nfsd4_encode_readv()") replaced
the use of write_bytes_to_xdr_buf() because it's expensive and the
data items to be encoded are already properly aligned.

However, the current code will corrupt the encoded data if the XDR
data items that are reserved early and then poked into the XDR
buffer later happen to fall on a page boundary in the XDR encoding
buffer.

__xdr_commit_encode can shift encoded data items in the encoding
buffer so that pointers returned from xdr_reserve_space() no longer
address the same part of the encoding stream.

This isn't an issue for splice reads because the reserved encode
buffer areas must fall in the XDR buffers header for the splice to
work without error. For vectored reads, however, there is a
possibility of send buffer corruption in rare cases.

Fixes: 28d5bc468efe ("NFSD: Optimize nfsd4_encode_readv()")
Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b2243b5dbf18..2399b17db052 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4317,6 +4317,15 @@ static __be32 nfsd4_encode_splice_read(
 	int status, space_left;
 	__be32 nfserr;
 
+	/*
+	 * Splice read doesn't work if encoding has already wandered
+	 * into the XDR buf's page array.
+	 */
+	if (unlikely(xdr->buf->page_len)) {
+		WARN_ON_ONCE(1);
+		return nfserr_serverfault;
+	}
+
 	/*
 	 * Make sure there is room at the end of buf->head for
 	 * svcxdr_encode_opaque_pages() to create a tail buffer
@@ -4399,25 +4408,23 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	struct nfsd4_compoundargs *argp = resp->rqstp->rq_argp;
 	struct nfsd4_read *read = &u->read;
 	struct xdr_stream *xdr = resp->xdr;
-	int starting_len = xdr->buf->len;
 	bool splice_ok = argp->splice_ok;
+	unsigned int eof_offset;
 	unsigned long maxcount;
+	__be32 wire_data[2];
 	struct file *file;
-	__be32 *p;
 
 	if (nfserr)
 		return nfserr;
+
+	eof_offset = xdr->buf->len;
 	file = read->rd_nf->nf_file;
 
-	p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
-	if (!p) {
+	/* Reserve space for the eof flag and byte count */
+	if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 2))) {
 		WARN_ON_ONCE(splice_ok);
 		return nfserr_resource;
 	}
-	if (resp->xdr->buf->page_len && splice_ok) {
-		WARN_ON_ONCE(1);
-		return nfserr_serverfault;
-	}
 	xdr_commit_encode(xdr);
 
 	maxcount = min_t(unsigned long, read->rd_length,
@@ -4428,12 +4435,13 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	else
 		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
 	if (nfserr) {
-		xdr_truncate_encode(xdr, starting_len);
+		xdr_truncate_encode(xdr, eof_offset);
 		return nfserr;
 	}
 
-	p = xdr_encode_bool(p, read->rd_eof);
-	*p = cpu_to_be32(read->rd_length);
+	wire_data[0] = read->rd_eof ? xdr_one : xdr_zero;
+	wire_data[1] = cpu_to_be32(read->rd_length);
+	write_bytes_to_xdr_buf(xdr->buf, eof_offset, &wire_data, XDR_UNIT * 2);
 	return nfs_ok;
 }
 
@@ -5304,10 +5312,6 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
 	p = xdr_reserve_space(xdr, 4 + 8 + 4);
 	if (!p)
 		return nfserr_io;
-	if (resp->xdr->buf->page_len && splice_ok) {
-		WARN_ON_ONCE(splice_ok);
-		return nfserr_serverfault;
-	}
 
 	maxcount = min_t(unsigned long, read->rd_length,
 			 (xdr->buf->buflen - xdr->buf->len));

From c9fc7772bacb28a8bd8efb08399c5af7217fbbb7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 30 Dec 2024 19:28:54 -0500
Subject: [PATCH 22/42] NFSD: Insulate nfsd4_encode_read_plus() from page
 boundaries in the encode buffer

Commit eeadcb757945 ("NFSD: Simplify READ_PLUS") replaced the use of
write_bytes_to_xdr_buf(), copying what was in nfsd4_encode_read()
at the time.

However, the current code will corrupt the encoded data if the XDR
data items that are reserved early and then poked into the XDR
buffer later happen to fall on a page boundary in the XDR encoding
buffer.

__xdr_commit_encode can shift encoded data items in the encoding
buffer so that pointers returned from xdr_reserve_space() no longer
address the same part of the encoding stream.

Fixes: eeadcb757945 ("NFSD: Simplify READ_PLUS")
Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2399b17db052..474fd663b0a9 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -5337,16 +5337,17 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
 	struct nfsd4_read *read = &u->read;
 	struct file *file = read->rd_nf->nf_file;
 	struct xdr_stream *xdr = resp->xdr;
-	int starting_len = xdr->buf->len;
+	unsigned int eof_offset;
+	__be32 wire_data[2];
 	u32 segments = 0;
-	__be32 *p;
 
 	if (nfserr)
 		return nfserr;
 
-	/* eof flag, segment count */
-	p = xdr_reserve_space(xdr, 4 + 4);
-	if (!p)
+	eof_offset = xdr->buf->len;
+
+	/* Reserve space for the eof flag and segment count */
+	if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 2)))
 		return nfserr_io;
 	xdr_commit_encode(xdr);
 
@@ -5356,15 +5357,16 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 	nfserr = nfsd4_encode_read_plus_data(resp, read);
 	if (nfserr) {
-		xdr_truncate_encode(xdr, starting_len);
+		xdr_truncate_encode(xdr, eof_offset);
 		return nfserr;
 	}
 
 	segments++;
 
 out:
-	p = xdr_encode_bool(p, read->rd_eof);
-	*p = cpu_to_be32(segments);
+	wire_data[0] = read->rd_eof ? xdr_one : xdr_zero;
+	wire_data[1] = cpu_to_be32(segments);
+	write_bytes_to_xdr_buf(xdr->buf, eof_offset, &wire_data, XDR_UNIT * 2);
 	return nfserr;
 }
 

From 26ea81638fa0fb9c02b76775301995722368356a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 30 Dec 2024 19:28:55 -0500
Subject: [PATCH 23/42] NFSD: Insulate nfsd4_encode_read_plus_data() from page
 boundaries in the encode buffer

Commit eeadcb757945 ("NFSD: Simplify READ_PLUS") replaced the use of
write_bytes_to_xdr_buf(), copying what was in nfsd4_encode_read()
at the time.

However, the current code will corrupt the encoded data if the XDR
data items that are reserved early and then poked into the XDR
buffer later happen to fall on a page boundary in the XDR encoding
buffer.

__xdr_commit_encode can shift encoded data items in the encoding
buffer so that pointers returned from xdr_reserve_space() no longer
address the same part of the encoding stream.

Fixes: eeadcb757945 ("NFSD: Simplify READ_PLUS")
Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 474fd663b0a9..3a68bed1829d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -5305,13 +5305,20 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
 	struct file *file = read->rd_nf->nf_file;
 	struct xdr_stream *xdr = resp->xdr;
 	bool splice_ok = argp->splice_ok;
+	unsigned int offset_offset;
+	__be32 nfserr, wire_count;
 	unsigned long maxcount;
-	__be32 nfserr, *p;
+	__be64 wire_offset;
 
-	/* Content type, offset, byte count */
-	p = xdr_reserve_space(xdr, 4 + 8 + 4);
-	if (!p)
+	if (xdr_stream_encode_u32(xdr, NFS4_CONTENT_DATA) != XDR_UNIT)
+		return nfserr_io;
+
+	offset_offset = xdr->buf->len;
+
+	/* Reserve space for the byte offset and count */
+	if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 3)))
 		return nfserr_io;
+	xdr_commit_encode(xdr);
 
 	maxcount = min_t(unsigned long, read->rd_length,
 			 (xdr->buf->buflen - xdr->buf->len));
@@ -5323,10 +5330,12 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
 	if (nfserr)
 		return nfserr;
 
-	*p++ = cpu_to_be32(NFS4_CONTENT_DATA);
-	p = xdr_encode_hyper(p, read->rd_offset);
-	*p = cpu_to_be32(read->rd_length);
-
+	wire_offset = cpu_to_be64(read->rd_offset);
+	write_bytes_to_xdr_buf(xdr->buf, offset_offset, &wire_offset,
+			       XDR_UNIT * 2);
+	wire_count = cpu_to_be32(read->rd_length);
+	write_bytes_to_xdr_buf(xdr->buf, offset_offset + XDR_UNIT * 2,
+			       &wire_count, XDR_UNIT);
 	return nfs_ok;
 }
 

From 201cb2048a926b041f80cbd7331de77b87867940 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 30 Dec 2024 19:28:57 -0500
Subject: [PATCH 24/42] NFSD: Insulate nfsd4_encode_readlink() from page
 boundaries in the encode buffer

There's no guarantee that the pointer returned from
xdr_reserve_space() will still point to the correct reserved space
in the encode buffer after one or more intervening calls to
xdr_reserve_space(). It just happens to work with the current
implementation of xdr_reserve_space().

Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3a68bed1829d..2116a03830c3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4450,25 +4450,21 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr,
 		      union nfsd4_op_u *u)
 {
 	struct nfsd4_readlink *readlink = &u->readlink;
-	__be32 *p, *maxcount_p, zero = xdr_zero;
+	__be32 *p, wire_count, zero = xdr_zero;
 	struct xdr_stream *xdr = resp->xdr;
-	int length_offset = xdr->buf->len;
+	unsigned int length_offset;
 	int maxcount, status;
 
-	maxcount_p = xdr_reserve_space(xdr, XDR_UNIT);
-	if (!maxcount_p)
+	/* linktext4.count */
+	length_offset = xdr->buf->len;
+	if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT)))
 		return nfserr_resource;
-	maxcount = PAGE_SIZE;
 
+	/* linktext4.data */
+	maxcount = PAGE_SIZE;
 	p = xdr_reserve_space(xdr, maxcount);
 	if (!p)
 		return nfserr_resource;
-	/*
-	 * XXX: By default, vfs_readlink() will truncate symlinks if they
-	 * would overflow the buffer.  Is this kosher in NFSv4?  If not, one
-	 * easy fix is: if vfs_readlink() precisely fills the buffer, assume
-	 * that truncation occurred, and return NFS4ERR_RESOURCE.
-	 */
 	nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp,
 						(char *)p, &maxcount);
 	if (nfserr == nfserr_isdir)
@@ -4481,7 +4477,9 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr,
 		nfserr = nfserrno(status);
 		goto out_err;
 	}
-	*maxcount_p = cpu_to_be32(maxcount);
+
+	wire_count = cpu_to_be32(maxcount);
+	write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, XDR_UNIT);
 	xdr_truncate_encode(xdr, length_offset + 4 + xdr_align_size(maxcount));
 	write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero,
 			       xdr_pad_size(maxcount));

From 825562bc7d5948723511046686217829dbeb067f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 30 Dec 2024 19:28:58 -0500
Subject: [PATCH 25/42] NFSD: Refactor nfsd4_do_encode_secinfo() again

Extract the code that encodes the secinfo4 union data type to
clarify the logic. The removed warning is pretty well obscured and
thus probably not terribly useful.

Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 65 +++++++++++++++++++++++++----------------------
 1 file changed, 34 insertions(+), 31 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2116a03830c3..5cc365a3ed0d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4613,13 +4613,41 @@ nfsd4_encode_rpcsec_gss_info(struct xdr_stream *xdr,
 	return nfs_ok;
 }
 
+static __be32
+nfsd4_encode_secinfo4(struct xdr_stream *xdr, rpc_authflavor_t pf,
+		      u32 *supported)
+{
+	struct rpcsec_gss_info info;
+	__be32 status;
+
+	if (rpcauth_get_gssinfo(pf, &info) == 0) {
+		(*supported)++;
+
+		/* flavor */
+		status = nfsd4_encode_uint32_t(xdr, RPC_AUTH_GSS);
+		if (status != nfs_ok)
+			return status;
+		/* flavor_info */
+		status = nfsd4_encode_rpcsec_gss_info(xdr, &info);
+		if (status != nfs_ok)
+			return status;
+	} else if (pf < RPC_AUTH_MAXFLAVOR) {
+		(*supported)++;
+
+		/* flavor */
+		status = nfsd4_encode_uint32_t(xdr, pf);
+		if (status != nfs_ok)
+			return status;
+	}
+	return nfs_ok;
+}
+
 static __be32
 nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
 {
 	u32 i, nflavs, supported;
 	struct exp_flavor_info *flavs;
 	struct exp_flavor_info def_flavs[2];
-	static bool report = true;
 	__be32 *flavorsp;
 	__be32 status;
 
@@ -4643,42 +4671,17 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
 		}
 	}
 
-	supported = 0;
 	flavorsp = xdr_reserve_space(xdr, XDR_UNIT);
 	if (!flavorsp)
 		return nfserr_resource;
 
-	for (i = 0; i < nflavs; i++) {
-		rpc_authflavor_t pf = flavs[i].pseudoflavor;
-		struct rpcsec_gss_info info;
-
-		if (rpcauth_get_gssinfo(pf, &info) == 0) {
-			supported++;
-
-			/* flavor */
-			status = nfsd4_encode_uint32_t(xdr, RPC_AUTH_GSS);
-			if (status != nfs_ok)
-				return status;
-			/* flavor_info */
-			status = nfsd4_encode_rpcsec_gss_info(xdr, &info);
-			if (status != nfs_ok)
-				return status;
-		} else if (pf < RPC_AUTH_MAXFLAVOR) {
-			supported++;
-
-			/* flavor */
-			status = nfsd4_encode_uint32_t(xdr, pf);
-			if (status != nfs_ok)
-				return status;
-		} else {
-			if (report)
-				pr_warn("NFS: SECINFO: security flavor %u "
-					"is not supported\n", pf);
-		}
+	for (i = 0, supported = 0; i < nflavs; i++) {
+		status = nfsd4_encode_secinfo4(xdr, flavs[i].pseudoflavor,
+					       &supported);
+		if (status != nfs_ok)
+			return status;
 	}
 
-	if (nflavs != supported)
-		report = false;
 	*flavorsp = cpu_to_be32(supported);
 	return 0;
 }

From b786caa65d4baa73257487e27ebab9004dc768d1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 30 Dec 2024 19:28:59 -0500
Subject: [PATCH 26/42] NFSD: Insulate nfsd4_encode_secinfo() from page
 boundaries in the encode buffer

There's no guarantee that the pointer returned from
xdr_reserve_space() will still point to the correct reserved space
in the encode buffer after one or more intervening calls to
xdr_reserve_space(). It just happens to work with the current
implementation of xdr_reserve_space().

Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5cc365a3ed0d..ae4c6c0060bf 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4643,13 +4643,13 @@ nfsd4_encode_secinfo4(struct xdr_stream *xdr, rpc_authflavor_t pf,
 }
 
 static __be32
-nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
+nfsd4_encode_SECINFO4resok(struct xdr_stream *xdr, struct svc_export *exp)
 {
 	u32 i, nflavs, supported;
 	struct exp_flavor_info *flavs;
 	struct exp_flavor_info def_flavs[2];
-	__be32 *flavorsp;
-	__be32 status;
+	unsigned int count_offset;
+	__be32 status, wire_count;
 
 	if (exp->ex_nflavors) {
 		flavs = exp->ex_flavors;
@@ -4671,8 +4671,8 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
 		}
 	}
 
-	flavorsp = xdr_reserve_space(xdr, XDR_UNIT);
-	if (!flavorsp)
+	count_offset = xdr->buf->len;
+	if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT)))
 		return nfserr_resource;
 
 	for (i = 0, supported = 0; i < nflavs; i++) {
@@ -4682,7 +4682,9 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
 			return status;
 	}
 
-	*flavorsp = cpu_to_be32(supported);
+	wire_count = cpu_to_be32(supported);
+	write_bytes_to_xdr_buf(xdr->buf, count_offset, &wire_count,
+			       XDR_UNIT);
 	return 0;
 }
 
@@ -4693,7 +4695,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
 	struct nfsd4_secinfo *secinfo = &u->secinfo;
 	struct xdr_stream *xdr = resp->xdr;
 
-	return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp);
+	return nfsd4_encode_SECINFO4resok(xdr, secinfo->si_exp);
 }
 
 static __be32
@@ -4703,7 +4705,7 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
 	struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name;
 	struct xdr_stream *xdr = resp->xdr;
 
-	return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp);
+	return nfsd4_encode_SECINFO4resok(xdr, secinfo->sin_exp);
 }
 
 static __be32

From 4163ee711cf141ada9e884a94e6e329431547fd7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 30 Dec 2024 19:28:56 -0500
Subject: [PATCH 27/42] NFSD: Insulate nfsd4_encode_fattr4() from page
 boundaries in the encode buffer

Commit ab04de60ae1c ("NFSD: Optimize nfsd4_encode_fattr()") replaced
the use of write_bytes_to_xdr_buf() because it's expensive and the
data items to be encoded are already properly aligned.

However, there's no guarantee that the pointer returned from
xdr_reserve_space() will still point to the correct reserved space
in the encode buffer after one or more intervening calls to
xdr_reserve_space(). It just happens to work with the current
implementation of xdr_reserve_space().

This commit effectively reverts the optimization.

Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index ae4c6c0060bf..8d373d2edff1 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3507,8 +3507,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 	struct nfsd4_fattr_args args;
 	struct svc_fh *tempfh = NULL;
 	int starting_len = xdr->buf->len;
-	__be32 *attrlen_p, status;
-	int attrlen_offset;
+	unsigned int attrlen_offset;
+	__be32 attrlen, status;
 	u32 attrmask[3];
 	int err;
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
@@ -3629,8 +3629,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 
 	/* attr_vals */
 	attrlen_offset = xdr->buf->len;
-	attrlen_p = xdr_reserve_space(xdr, XDR_UNIT);
-	if (!attrlen_p)
+	if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT)))
 		goto out_resource;
 	bitmap_from_arr32(attr_bitmap, attrmask,
 			  ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
@@ -3640,7 +3639,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 		if (status != nfs_ok)
 			goto out;
 	}
-	*attrlen_p = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT);
+	attrlen = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT);
+	write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, XDR_UNIT);
 	status = nfs_ok;
 
 out:

From 1196bdce3d107194dd15f508602871ffb7ff2d0b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 30 Dec 2024 19:29:00 -0500
Subject: [PATCH 28/42] SUNRPC: Document validity guarantees of the pointer
 returned by reserve_space

A subtlety of this API is that if the @nbytes region traverses a
page boundary, the next __xdr_commit_encode will shift the data item
in the XDR encode buffer. This makes the returned pointer point to
something else, leading to unexpected behavior.

There are a few cases where the caller saves the returned pointer
and then later uses it to insert a computed value into an earlier
part of the stream. This can be safe only if either:

 - the data item is guaranteed to be in the XDR buffer's head, and
   thus is not ever going to be near a page boundary, or
 - the data item is no larger than 4 octets, since XDR alignment
   rules require all data items to start on 4-octet boundaries

But that safety is only an artifact of the current implementation.
It would be less brittle if these "safe" uses were eventually
replaced.

Reviewed-by: NeilBrown <neilb@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xdr.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 62e07c330a66..4e003cb516fe 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1097,6 +1097,12 @@ static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
  * Checks that we have enough buffer space to encode 'nbytes' more
  * bytes of data. If so, update the total xdr_buf length, and
  * adjust the length of the current kvec.
+ *
+ * The returned pointer is valid only until the next call to
+ * xdr_reserve_space() or xdr_commit_encode() on @xdr. The current
+ * implementation of this API guarantees that space reserved for a
+ * four-byte data item remains valid until @xdr is destroyed, but
+ * that might not always be true in the future.
  */
 __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
 {

From 531503054e8fe9f4502fff0dceba20dfaa9920d5 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 9 Dec 2024 16:13:53 -0500
Subject: [PATCH 29/42] nfsd: fix handling of delegated change attr in
 CB_GETATTR

RFC8881, section 10.4.3 has some specific guidance as to how the
delegated change attribute should be handled. We currently don't follow
that guidance properly.

In particular, when the file is modified, the server always reports the
initial change attribute + 1. Section 10.4.3 however indicates that it
should be incremented on every GETATTR request from other clients.

Only request the change attribute until the file has been modified. If
there is an outstanding delegation, then increment the cached change
attribute on every GETATTR.

Fixes: 6487a13b5c6b ("NFSD: add support for CB_GETATTR callback")
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4callback.c |  8 +++++---
 fs/nfsd/nfs4xdr.c      | 15 +++++++++------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index a8e33f561bb3..1457f61ae051 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -361,12 +361,14 @@ static void
 encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr,
 			struct nfs4_cb_fattr *fattr)
 {
-	struct nfs4_delegation *dp =
-		container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
+	struct nfs4_delegation *dp = container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
 	struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle;
+	struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
 	u32 bmap[1];
 
-	bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
+	bmap[0] = FATTR4_WORD0_SIZE;
+	if (!ncf->ncf_file_modified)
+		bmap[0] |= FATTR4_WORD0_CHANGE;
 
 	encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR);
 	encode_nfs_fh4(xdr, fh);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 8d373d2edff1..31270e1a40ed 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2920,6 +2920,7 @@ struct nfsd4_fattr_args {
 	struct kstat		stat;
 	struct kstatfs		statfs;
 	struct nfs4_acl		*acl;
+	u64			change_attr;
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 	void			*context;
 	int			contextlen;
@@ -3019,7 +3020,6 @@ static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr,
 					 const struct nfsd4_fattr_args *args)
 {
 	const struct svc_export *exp = args->exp;
-	u64 c;
 
 	if (unlikely(exp->ex_flags & NFSEXP_V4ROOT)) {
 		u32 flush_time = convert_to_wallclock(exp->cd->flush_time);
@@ -3030,9 +3030,7 @@ static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr,
 			return nfserr_resource;
 		return nfs_ok;
 	}
-
-	c = nfsd4_change_attribute(&args->stat);
-	return nfsd4_encode_changeid4(xdr, c);
+	return nfsd4_encode_changeid4(xdr, args->change_attr);
 }
 
 static __be32 nfsd4_encode_fattr4_size(struct xdr_stream *xdr,
@@ -3557,11 +3555,16 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 	if (dp) {
 		struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
 
-		if (ncf->ncf_file_modified)
+		if (ncf->ncf_file_modified) {
+			++ncf->ncf_initial_cinfo;
 			args.stat.size = ncf->ncf_cur_fsize;
-
+		}
+		args.change_attr = ncf->ncf_initial_cinfo;
 		nfs4_put_stid(&dp->dl_stid);
+	} else {
+		args.change_attr = nfsd4_change_attribute(&args.stat);
 	}
+
 	if (err)
 		goto out_nfserr;
 

From 8e1d32273ab7d06b6f78771e05824bfab01141f4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 9 Dec 2024 16:13:54 -0500
Subject: [PATCH 30/42] nfs_common: make include/linux/nfs4.h include generated
 nfs4_1.h

In the long run, the NFS development community intends to autogenerate a
lot of the XDR handling code.  Both the NFS client and server include
"include/linux/nfs4.hi". That file was hand-rolled, and some of the symbols
in it conflict with the autogenerated symbols.

Add a small nfs4_1.x to Documentation that currently just has the
necessary definitions for the delstid draft, and generate the relevant
header and source files. Make include/linux/nfs4.h include the generated
include/linux/sunrpc/xdrgen/nfs4_1.h and remove the conflicting
definitions from it and nfs_xdr.h.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 Documentation/sunrpc/xdr/nfs4_1.x    | 166 +++++++++++++++++++
 fs/nfsd/Makefile                     |  16 +-
 fs/nfsd/nfs4xdr_gen.c                | 239 +++++++++++++++++++++++++++
 fs/nfsd/nfs4xdr_gen.h                |  25 +++
 include/linux/nfs4.h                 |   7 +-
 include/linux/nfs_xdr.h              |   5 -
 include/linux/sunrpc/xdrgen/nfs4_1.h | 124 ++++++++++++++
 7 files changed, 570 insertions(+), 12 deletions(-)
 create mode 100644 Documentation/sunrpc/xdr/nfs4_1.x
 create mode 100644 fs/nfsd/nfs4xdr_gen.c
 create mode 100644 fs/nfsd/nfs4xdr_gen.h
 create mode 100644 include/linux/sunrpc/xdrgen/nfs4_1.h

diff --git a/Documentation/sunrpc/xdr/nfs4_1.x b/Documentation/sunrpc/xdr/nfs4_1.x
new file mode 100644
index 000000000000..fc37d1ecba0f
--- /dev/null
+++ b/Documentation/sunrpc/xdr/nfs4_1.x
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2010 IETF Trust and the persons identified
+ * as the document authors.  All rights reserved.
+ *
+ * The document authors are identified in RFC 3530 and
+ * RFC 5661.
+ *
+ * Redistribution and use in source and binary forms, with
+ * or without modification, are permitted provided that the
+ * following conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the
+ *   following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the
+ *   following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ *
+ * - Neither the name of Internet Society, IETF or IETF
+ *   Trust, nor the names of specific contributors, may be
+ *   used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ *   AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+ *   WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *   FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ *   EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ *   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ *   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *   NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ *   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ *   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ *   IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ *   ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+pragma header nfs4;
+
+/*
+ * Basic typedefs for RFC 1832 data type definitions
+ */
+typedef hyper		int64_t;
+typedef unsigned int	uint32_t;
+
+/*
+ * Basic data types
+ */
+typedef uint32_t	bitmap4<>;
+
+/*
+ * Timeval
+ */
+struct nfstime4 {
+	int64_t		seconds;
+	uint32_t	nseconds;
+};
+
+
+/*
+ * The following content was extracted from draft-ietf-nfsv4-delstid
+ */
+
+typedef bool            fattr4_offline;
+
+
+const FATTR4_OFFLINE            = 83;
+
+
+struct open_arguments4 {
+  bitmap4  oa_share_access;
+  bitmap4  oa_share_deny;
+  bitmap4  oa_share_access_want;
+  bitmap4  oa_open_claim;
+  bitmap4  oa_create_mode;
+};
+
+
+enum open_args_share_access4 {
+   OPEN_ARGS_SHARE_ACCESS_READ  = 1,
+   OPEN_ARGS_SHARE_ACCESS_WRITE = 2,
+   OPEN_ARGS_SHARE_ACCESS_BOTH  = 3
+};
+
+
+enum open_args_share_deny4 {
+   OPEN_ARGS_SHARE_DENY_NONE  = 0,
+   OPEN_ARGS_SHARE_DENY_READ  = 1,
+   OPEN_ARGS_SHARE_DENY_WRITE = 2,
+   OPEN_ARGS_SHARE_DENY_BOTH  = 3
+};
+
+
+enum open_args_share_access_want4 {
+   OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG           = 3,
+   OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG            = 4,
+   OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL              = 5,
+   OPEN_ARGS_SHARE_ACCESS_WANT_SIGNAL_DELEG_WHEN_RESRC_AVAIL
+                                                   = 17,
+   OPEN_ARGS_SHARE_ACCESS_WANT_PUSH_DELEG_WHEN_UNCONTENDED
+                                                   = 18,
+   OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS    = 20,
+   OPEN_ARGS_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 21
+};
+
+
+enum open_args_open_claim4 {
+   OPEN_ARGS_OPEN_CLAIM_NULL          = 0,
+   OPEN_ARGS_OPEN_CLAIM_PREVIOUS      = 1,
+   OPEN_ARGS_OPEN_CLAIM_DELEGATE_CUR  = 2,
+   OPEN_ARGS_OPEN_CLAIM_DELEGATE_PREV = 3,
+   OPEN_ARGS_OPEN_CLAIM_FH            = 4,
+   OPEN_ARGS_OPEN_CLAIM_DELEG_CUR_FH  = 5,
+   OPEN_ARGS_OPEN_CLAIM_DELEG_PREV_FH = 6
+};
+
+
+enum open_args_createmode4 {
+   OPEN_ARGS_CREATEMODE_UNCHECKED4     = 0,
+   OPEN_ARGS_CREATE_MODE_GUARDED       = 1,
+   OPEN_ARGS_CREATEMODE_EXCLUSIVE4     = 2,
+   OPEN_ARGS_CREATE_MODE_EXCLUSIVE4_1  = 3
+};
+
+
+typedef open_arguments4 fattr4_open_arguments;
+pragma public fattr4_open_arguments;
+
+
+%/*
+% * Determine what OPEN supports.
+% */
+const FATTR4_OPEN_ARGUMENTS     = 86;
+
+
+const OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 0x200000;
+
+
+const OPEN4_RESULT_NO_OPEN_STATEID = 0x00000010;
+
+
+/*
+ * attributes for the delegation times being
+ * cached and served by the "client"
+ */
+typedef nfstime4        fattr4_time_deleg_access;
+typedef nfstime4        fattr4_time_deleg_modify;
+pragma public 		fattr4_time_deleg_access;
+pragma public		fattr4_time_deleg_modify;
+
+
+%/*
+% * New RECOMMENDED Attribute for
+% * delegation caching of times
+% */
+const FATTR4_TIME_DELEG_ACCESS  = 84;
+const FATTR4_TIME_DELEG_MODIFY  = 85;
+
+
+const OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 0x100000;
+
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 18cbd3fa7691..2f687619f65b 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -18,9 +18,23 @@ nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o
 nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
-			   nfs4acl.o nfs4callback.o nfs4recover.o
+			   nfs4acl.o nfs4callback.o nfs4recover.o nfs4xdr_gen.o
 nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
 nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o
 nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o
 nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o
 nfsd-$(CONFIG_NFS_LOCALIO) += localio.o
+
+
+.PHONY: xdrgen
+
+xdrgen: ../../include/linux/sunrpc/xdrgen/nfs4_1.h nfs4xdr_gen.h nfs4xdr_gen.c
+
+../../include/linux/sunrpc/xdrgen/nfs4_1.h: ../../Documentation/sunrpc/xdr/nfs4_1.x
+	../../tools/net/sunrpc/xdrgen/xdrgen definitions $< > $@
+
+nfs4xdr_gen.h: ../../Documentation/sunrpc/xdr/nfs4_1.x
+	../../tools/net/sunrpc/xdrgen/xdrgen declarations $< > $@
+
+nfs4xdr_gen.c: ../../Documentation/sunrpc/xdr/nfs4_1.x
+	../../tools/net/sunrpc/xdrgen/xdrgen source $< > $@
diff --git a/fs/nfsd/nfs4xdr_gen.c b/fs/nfsd/nfs4xdr_gen.c
new file mode 100644
index 000000000000..e5d34f9a3147
--- /dev/null
+++ b/fs/nfsd/nfs4xdr_gen.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+// Generated by xdrgen. Manual edits will be lost.
+// XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x
+// XDR specification modification time: Thu Oct  3 11:30:59 2024
+
+#include <linux/sunrpc/svc.h>
+
+#include "nfs4xdr_gen.h"
+
+static bool __maybe_unused
+xdrgen_decode_int64_t(struct xdr_stream *xdr, int64_t *ptr)
+{
+	return xdrgen_decode_hyper(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_uint32_t(struct xdr_stream *xdr, uint32_t *ptr)
+{
+	return xdrgen_decode_unsigned_int(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_bitmap4(struct xdr_stream *xdr, bitmap4 *ptr)
+{
+	if (xdr_stream_decode_u32(xdr, &ptr->count) < 0)
+		return false;
+	for (u32 i = 0; i < ptr->count; i++)
+		if (!xdrgen_decode_uint32_t(xdr, &ptr->element[i]))
+			return false;
+	return true;
+};
+
+static bool __maybe_unused
+xdrgen_decode_nfstime4(struct xdr_stream *xdr, struct nfstime4 *ptr)
+{
+	if (!xdrgen_decode_int64_t(xdr, &ptr->seconds))
+		return false;
+	if (!xdrgen_decode_uint32_t(xdr, &ptr->nseconds))
+		return false;
+	return true;
+};
+
+static bool __maybe_unused
+xdrgen_decode_fattr4_offline(struct xdr_stream *xdr, fattr4_offline *ptr)
+{
+	return xdrgen_decode_bool(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_open_arguments4(struct xdr_stream *xdr, struct open_arguments4 *ptr)
+{
+	if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_access))
+		return false;
+	if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_deny))
+		return false;
+	if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_access_want))
+		return false;
+	if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_open_claim))
+		return false;
+	if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_create_mode))
+		return false;
+	return true;
+};
+
+static bool __maybe_unused
+xdrgen_decode_open_args_share_access4(struct xdr_stream *xdr, open_args_share_access4 *ptr)
+{
+	u32 val;
+
+	if (xdr_stream_decode_u32(xdr, &val) < 0)
+		return false;
+	*ptr = val;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_share_deny4(struct xdr_stream *xdr, open_args_share_deny4 *ptr)
+{
+	u32 val;
+
+	if (xdr_stream_decode_u32(xdr, &val) < 0)
+		return false;
+	*ptr = val;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_share_access_want4(struct xdr_stream *xdr, open_args_share_access_want4 *ptr)
+{
+	u32 val;
+
+	if (xdr_stream_decode_u32(xdr, &val) < 0)
+		return false;
+	*ptr = val;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_open_claim4(struct xdr_stream *xdr, open_args_open_claim4 *ptr)
+{
+	u32 val;
+
+	if (xdr_stream_decode_u32(xdr, &val) < 0)
+		return false;
+	*ptr = val;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_createmode4(struct xdr_stream *xdr, open_args_createmode4 *ptr)
+{
+	u32 val;
+
+	if (xdr_stream_decode_u32(xdr, &val) < 0)
+		return false;
+	*ptr = val;
+	return true;
+}
+
+bool
+xdrgen_decode_fattr4_open_arguments(struct xdr_stream *xdr, fattr4_open_arguments *ptr)
+{
+	return xdrgen_decode_open_arguments4(xdr, ptr);
+};
+
+bool
+xdrgen_decode_fattr4_time_deleg_access(struct xdr_stream *xdr, fattr4_time_deleg_access *ptr)
+{
+	return xdrgen_decode_nfstime4(xdr, ptr);
+};
+
+bool
+xdrgen_decode_fattr4_time_deleg_modify(struct xdr_stream *xdr, fattr4_time_deleg_modify *ptr)
+{
+	return xdrgen_decode_nfstime4(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_encode_int64_t(struct xdr_stream *xdr, const int64_t value)
+{
+	return xdrgen_encode_hyper(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_uint32_t(struct xdr_stream *xdr, const uint32_t value)
+{
+	return xdrgen_encode_unsigned_int(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_bitmap4(struct xdr_stream *xdr, const bitmap4 value)
+{
+	if (xdr_stream_encode_u32(xdr, value.count) != XDR_UNIT)
+		return false;
+	for (u32 i = 0; i < value.count; i++)
+		if (!xdrgen_encode_uint32_t(xdr, value.element[i]))
+			return false;
+	return true;
+};
+
+static bool __maybe_unused
+xdrgen_encode_nfstime4(struct xdr_stream *xdr, const struct nfstime4 *value)
+{
+	if (!xdrgen_encode_int64_t(xdr, value->seconds))
+		return false;
+	if (!xdrgen_encode_uint32_t(xdr, value->nseconds))
+		return false;
+	return true;
+};
+
+static bool __maybe_unused
+xdrgen_encode_fattr4_offline(struct xdr_stream *xdr, const fattr4_offline value)
+{
+	return xdrgen_encode_bool(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_open_arguments4(struct xdr_stream *xdr, const struct open_arguments4 *value)
+{
+	if (!xdrgen_encode_bitmap4(xdr, value->oa_share_access))
+		return false;
+	if (!xdrgen_encode_bitmap4(xdr, value->oa_share_deny))
+		return false;
+	if (!xdrgen_encode_bitmap4(xdr, value->oa_share_access_want))
+		return false;
+	if (!xdrgen_encode_bitmap4(xdr, value->oa_open_claim))
+		return false;
+	if (!xdrgen_encode_bitmap4(xdr, value->oa_create_mode))
+		return false;
+	return true;
+};
+
+static bool __maybe_unused
+xdrgen_encode_open_args_share_access4(struct xdr_stream *xdr, open_args_share_access4 value)
+{
+	return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_share_deny4(struct xdr_stream *xdr, open_args_share_deny4 value)
+{
+	return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_share_access_want4(struct xdr_stream *xdr, open_args_share_access_want4 value)
+{
+	return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_open_claim4(struct xdr_stream *xdr, open_args_open_claim4 value)
+{
+	return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_createmode4(struct xdr_stream *xdr, open_args_createmode4 value)
+{
+	return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+bool
+xdrgen_encode_fattr4_open_arguments(struct xdr_stream *xdr, const fattr4_open_arguments *value)
+{
+	return xdrgen_encode_open_arguments4(xdr, value);
+};
+
+bool
+xdrgen_encode_fattr4_time_deleg_access(struct xdr_stream *xdr, const fattr4_time_deleg_access *value)
+{
+	return xdrgen_encode_nfstime4(xdr, value);
+};
+
+bool
+xdrgen_encode_fattr4_time_deleg_modify(struct xdr_stream *xdr, const fattr4_time_deleg_modify *value)
+{
+	return xdrgen_encode_nfstime4(xdr, value);
+};
diff --git a/fs/nfsd/nfs4xdr_gen.h b/fs/nfsd/nfs4xdr_gen.h
new file mode 100644
index 000000000000..c4c6a5075b17
--- /dev/null
+++ b/fs/nfsd/nfs4xdr_gen.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Generated by xdrgen. Manual edits will be lost. */
+/* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */
+/* XDR specification modification time: Thu Oct  3 11:30:59 2024 */
+
+#ifndef _LINUX_XDRGEN_NFS4_1_DECL_H
+#define _LINUX_XDRGEN_NFS4_1_DECL_H
+
+#include <linux/types.h>
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/xdrgen/_defs.h>
+#include <linux/sunrpc/xdrgen/_builtins.h>
+#include <linux/sunrpc/xdrgen/nfs4_1.h>
+
+bool xdrgen_decode_fattr4_open_arguments(struct xdr_stream *xdr, fattr4_open_arguments *ptr);
+bool xdrgen_encode_fattr4_open_arguments(struct xdr_stream *xdr, const fattr4_open_arguments *value);
+
+bool xdrgen_decode_fattr4_time_deleg_access(struct xdr_stream *xdr, fattr4_time_deleg_access *ptr);
+bool xdrgen_encode_fattr4_time_deleg_access(struct xdr_stream *xdr, const fattr4_time_deleg_access *value);
+
+bool xdrgen_decode_fattr4_time_deleg_modify(struct xdr_stream *xdr, fattr4_time_deleg_modify *ptr);
+bool xdrgen_encode_fattr4_time_deleg_modify(struct xdr_stream *xdr, const fattr4_time_deleg_modify *value);
+
+#endif /* _LINUX_XDRGEN_NFS4_1_DECL_H */
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 8d7430d9f218..b90719244775 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -17,6 +17,7 @@
 #include <linux/uidgid.h>
 #include <uapi/linux/nfs4.h>
 #include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/xdrgen/nfs4_1.h>
 
 enum nfs4_acl_whotype {
 	NFS4_ACL_WHO_NAMED = 0,
@@ -512,12 +513,6 @@ enum {
 	FATTR4_XATTR_SUPPORT		= 82,
 };
 
-enum {
-	FATTR4_TIME_DELEG_ACCESS	= 84,
-	FATTR4_TIME_DELEG_MODIFY	= 85,
-	FATTR4_OPEN_ARGUMENTS		= 86,
-};
-
 /*
  * The following internal definitions enable processing the above
  * attribute bits within 32-bit word boundaries.
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 559273a0f16d..e74a87bb18a4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1315,11 +1315,6 @@ struct nfs4_fsid_present_res {
 
 #endif /* CONFIG_NFS_V4 */
 
-struct nfstime4 {
-	u64	seconds;
-	u32	nseconds;
-};
-
 #ifdef CONFIG_NFS_V4_1
 
 struct pnfs_commit_bucket {
diff --git a/include/linux/sunrpc/xdrgen/nfs4_1.h b/include/linux/sunrpc/xdrgen/nfs4_1.h
new file mode 100644
index 000000000000..6025ab6b7398
--- /dev/null
+++ b/include/linux/sunrpc/xdrgen/nfs4_1.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Generated by xdrgen. Manual edits will be lost. */
+/* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */
+/* XDR specification modification time: Thu Oct  3 11:30:59 2024 */
+
+#ifndef _LINUX_XDRGEN_NFS4_1_DEF_H
+#define _LINUX_XDRGEN_NFS4_1_DEF_H
+
+#include <linux/types.h>
+#include <linux/sunrpc/xdrgen/_defs.h>
+
+typedef s64 int64_t;
+
+typedef u32 uint32_t;
+
+typedef struct {
+	u32 count;
+	uint32_t *element;
+} bitmap4;
+
+struct nfstime4 {
+	int64_t seconds;
+	uint32_t nseconds;
+};
+
+typedef bool fattr4_offline;
+
+enum { FATTR4_OFFLINE = 83 };
+
+struct open_arguments4 {
+	bitmap4 oa_share_access;
+	bitmap4 oa_share_deny;
+	bitmap4 oa_share_access_want;
+	bitmap4 oa_open_claim;
+	bitmap4 oa_create_mode;
+};
+
+enum open_args_share_access4 {
+	OPEN_ARGS_SHARE_ACCESS_READ = 1,
+	OPEN_ARGS_SHARE_ACCESS_WRITE = 2,
+	OPEN_ARGS_SHARE_ACCESS_BOTH = 3,
+};
+typedef enum open_args_share_access4 open_args_share_access4;
+
+enum open_args_share_deny4 {
+	OPEN_ARGS_SHARE_DENY_NONE = 0,
+	OPEN_ARGS_SHARE_DENY_READ = 1,
+	OPEN_ARGS_SHARE_DENY_WRITE = 2,
+	OPEN_ARGS_SHARE_DENY_BOTH = 3,
+};
+typedef enum open_args_share_deny4 open_args_share_deny4;
+
+enum open_args_share_access_want4 {
+	OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG = 3,
+	OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG = 4,
+	OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL = 5,
+	OPEN_ARGS_SHARE_ACCESS_WANT_SIGNAL_DELEG_WHEN_RESRC_AVAIL = 17,
+	OPEN_ARGS_SHARE_ACCESS_WANT_PUSH_DELEG_WHEN_UNCONTENDED = 18,
+	OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 20,
+	OPEN_ARGS_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 21,
+};
+typedef enum open_args_share_access_want4 open_args_share_access_want4;
+
+enum open_args_open_claim4 {
+	OPEN_ARGS_OPEN_CLAIM_NULL = 0,
+	OPEN_ARGS_OPEN_CLAIM_PREVIOUS = 1,
+	OPEN_ARGS_OPEN_CLAIM_DELEGATE_CUR = 2,
+	OPEN_ARGS_OPEN_CLAIM_DELEGATE_PREV = 3,
+	OPEN_ARGS_OPEN_CLAIM_FH = 4,
+	OPEN_ARGS_OPEN_CLAIM_DELEG_CUR_FH = 5,
+	OPEN_ARGS_OPEN_CLAIM_DELEG_PREV_FH = 6,
+};
+typedef enum open_args_open_claim4 open_args_open_claim4;
+
+enum open_args_createmode4 {
+	OPEN_ARGS_CREATEMODE_UNCHECKED4 = 0,
+	OPEN_ARGS_CREATE_MODE_GUARDED = 1,
+	OPEN_ARGS_CREATEMODE_EXCLUSIVE4 = 2,
+	OPEN_ARGS_CREATE_MODE_EXCLUSIVE4_1 = 3,
+};
+typedef enum open_args_createmode4 open_args_createmode4;
+
+typedef struct open_arguments4 fattr4_open_arguments;
+
+enum { FATTR4_OPEN_ARGUMENTS = 86 };
+
+enum { OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 0x200000 };
+
+enum { OPEN4_RESULT_NO_OPEN_STATEID = 0x00000010 };
+
+typedef struct nfstime4 fattr4_time_deleg_access;
+
+typedef struct nfstime4 fattr4_time_deleg_modify;
+
+enum { FATTR4_TIME_DELEG_ACCESS = 84 };
+
+enum { FATTR4_TIME_DELEG_MODIFY = 85 };
+
+enum { OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 0x100000 };
+
+#define NFS4_int64_t_sz                 \
+	(XDR_hyper)
+#define NFS4_uint32_t_sz                \
+	(XDR_unsigned_int)
+#define NFS4_bitmap4_sz                 (XDR_unsigned_int)
+#define NFS4_nfstime4_sz                \
+	(NFS4_int64_t_sz + NFS4_uint32_t_sz)
+#define NFS4_fattr4_offline_sz          \
+	(XDR_bool)
+#define NFS4_open_arguments4_sz         \
+	(NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz)
+#define NFS4_open_args_share_access4_sz (XDR_int)
+#define NFS4_open_args_share_deny4_sz   (XDR_int)
+#define NFS4_open_args_share_access_want4_sz (XDR_int)
+#define NFS4_open_args_open_claim4_sz   (XDR_int)
+#define NFS4_open_args_createmode4_sz   (XDR_int)
+#define NFS4_fattr4_open_arguments_sz   \
+	(NFS4_open_arguments4_sz)
+#define NFS4_fattr4_time_deleg_access_sz \
+	(NFS4_nfstime4_sz)
+#define NFS4_fattr4_time_deleg_modify_sz \
+	(NFS4_nfstime4_sz)
+
+#endif /* _LINUX_XDRGEN_NFS4_1_DEF_H */

From 8dfbea8bde6e976136948421325b24b5bdb76ad3 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 9 Dec 2024 16:13:55 -0500
Subject: [PATCH 31/42] nfsd: switch to autogenerated definitions for
 open_delegation_type4

Rename the enum with the same name in include/linux/nfs4.h, add the
proper enum to nfs4_1.x and regenerate the headers and source files.  Do
a mass rename of all NFS4_OPEN_DELEGATE_* to OPEN_DELEGATE_* in the nfsd
directory.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 Documentation/sunrpc/xdr/nfs4_1.x    |  9 +++++++-
 fs/nfsd/nfs4state.c                  | 34 ++++++++++++++--------------
 fs/nfsd/nfs4xdr.c                    |  8 +++----
 fs/nfsd/nfs4xdr_gen.c                | 19 +++++++++++++++-
 fs/nfsd/nfs4xdr_gen.h                |  2 +-
 include/linux/nfs4.h                 |  2 +-
 include/linux/sunrpc/xdrgen/nfs4_1.h | 13 ++++++++++-
 7 files changed, 61 insertions(+), 26 deletions(-)

diff --git a/Documentation/sunrpc/xdr/nfs4_1.x b/Documentation/sunrpc/xdr/nfs4_1.x
index fc37d1ecba0f..ee9f8f249f1e 100644
--- a/Documentation/sunrpc/xdr/nfs4_1.x
+++ b/Documentation/sunrpc/xdr/nfs4_1.x
@@ -161,6 +161,13 @@ pragma public		fattr4_time_deleg_modify;
 const FATTR4_TIME_DELEG_ACCESS  = 84;
 const FATTR4_TIME_DELEG_MODIFY  = 85;
 
-
 const OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 0x100000;
 
+enum open_delegation_type4 {
+       OPEN_DELEGATE_NONE                  = 0,
+       OPEN_DELEGATE_READ                  = 1,
+       OPEN_DELEGATE_WRITE                 = 2,
+       OPEN_DELEGATE_NONE_EXT              = 3, /* new to v4.1 */
+       OPEN_DELEGATE_READ_ATTRS_DELEG      = 4,
+       OPEN_DELEGATE_WRITE_ATTRS_DELEG     = 5
+};
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b38b3a1c0307..a39c480da327 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2965,7 +2965,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
 	seq_puts(s, ": { type: deleg, ");
 
 	seq_printf(s, "access: %s",
-		   ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
+		   ds->dl_type == OPEN_DELEGATE_READ ? "r" : "w");
 
 	/* XXX: lease time, whether it's being recalled. */
 
@@ -5581,7 +5581,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
 static inline __be32
 nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
 {
-	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+	if ((flags & WR_STATE) && (dp->dl_type == OPEN_DELEGATE_READ))
 		return nfserr_openmode;
 	else
 		return nfs_ok;
@@ -5823,7 +5823,7 @@ static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
 		return NULL;
 	fl->fl_lmops = &nfsd_lease_mng_ops;
 	fl->c.flc_flags = FL_DELEG;
-	fl->c.flc_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
+	fl->c.flc_type = flag == OPEN_DELEGATE_READ ? F_RDLCK : F_WRLCK;
 	fl->c.flc_owner = (fl_owner_t)dp;
 	fl->c.flc_pid = current->tgid;
 	fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
@@ -5969,7 +5969,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 	 */
 	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) {
 		nf = find_rw_file(fp);
-		dl_type = NFS4_OPEN_DELEGATE_WRITE;
+		dl_type = OPEN_DELEGATE_WRITE;
 	}
 
 	/*
@@ -5978,7 +5978,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 	 */
 	if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) {
 		nf = find_readable_file(fp);
-		dl_type = NFS4_OPEN_DELEGATE_READ;
+		dl_type = OPEN_DELEGATE_READ;
 	}
 
 	if (!nf)
@@ -6067,7 +6067,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 
 static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
 {
-	open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+	open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
 	if (status == -EAGAIN)
 		open->op_why_no_deleg = WND4_CONTENTION;
 	else {
@@ -6183,20 +6183,20 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 			destroy_delegation(dp);
 			goto out_no_deleg;
 		}
-		open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
+		open->op_delegate_type = OPEN_DELEGATE_WRITE;
 		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
 		dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat);
 		trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
 	} else {
-		open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+		open->op_delegate_type = OPEN_DELEGATE_READ;
 		trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
 	}
 	nfs4_put_stid(&dp->dl_stid);
 	return;
 out_no_deleg:
-	open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
+	open->op_delegate_type = OPEN_DELEGATE_NONE;
 	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
-	    open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) {
+	    open->op_delegate_type != OPEN_DELEGATE_NONE) {
 		dprintk("NFSD: WARNING: refusing delegation reclaim\n");
 		open->op_recall = true;
 	}
@@ -6211,17 +6211,17 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
 					struct nfs4_delegation *dp)
 {
 	if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG &&
-	    dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
-		open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+	    dp->dl_type == OPEN_DELEGATE_WRITE) {
+		open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
 		open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
 	} else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG &&
-		   dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
-		open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+		   dp->dl_type == OPEN_DELEGATE_WRITE) {
+		open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
 		open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
 	}
 	/* Otherwise the client must be confused wanting a delegation
 	 * it already has, therefore we don't return
-	 * NFS4_OPEN_DELEGATE_NONE_EXT and reason.
+	 * OPEN_DELEGATE_NONE_EXT and reason.
 	 */
 }
 
@@ -6311,7 +6311,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 
 	if (nfsd4_has_session(&resp->cstate)) {
 		if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
-			open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+			open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
 			open->op_why_no_deleg = WND4_NOT_WANTED;
 			goto nodeleg;
 		}
@@ -6327,7 +6327,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	trace_nfsd_open(&stp->st_stid.sc_stateid);
 out:
 	/* 4.1 client trying to upgrade/downgrade delegation? */
-	if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp &&
+	if (open->op_delegate_type == OPEN_DELEGATE_NONE && dp &&
 	    open->op_deleg_want)
 		nfsd4_deleg_xgrade_none_ext(open, dp);
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 31270e1a40ed..b6966acddcb6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4233,18 +4233,18 @@ nfsd4_encode_open_delegation4(struct xdr_stream *xdr, struct nfsd4_open *open)
 	if (xdr_stream_encode_u32(xdr, open->op_delegate_type) != XDR_UNIT)
 		return nfserr_resource;
 	switch (open->op_delegate_type) {
-	case NFS4_OPEN_DELEGATE_NONE:
+	case OPEN_DELEGATE_NONE:
 		status = nfs_ok;
 		break;
-	case NFS4_OPEN_DELEGATE_READ:
+	case OPEN_DELEGATE_READ:
 		/* read */
 		status = nfsd4_encode_open_read_delegation4(xdr, open);
 		break;
-	case NFS4_OPEN_DELEGATE_WRITE:
+	case OPEN_DELEGATE_WRITE:
 		/* write */
 		status = nfsd4_encode_open_write_delegation4(xdr, open);
 		break;
-	case NFS4_OPEN_DELEGATE_NONE_EXT:
+	case OPEN_DELEGATE_NONE_EXT:
 		/* od_whynone */
 		status = nfsd4_encode_open_none_delegation4(xdr, open);
 		break;
diff --git a/fs/nfsd/nfs4xdr_gen.c b/fs/nfsd/nfs4xdr_gen.c
index e5d34f9a3147..a0e01f50a28d 100644
--- a/fs/nfsd/nfs4xdr_gen.c
+++ b/fs/nfsd/nfs4xdr_gen.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Generated by xdrgen. Manual edits will be lost.
 // XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x
-// XDR specification modification time: Thu Oct  3 11:30:59 2024
+// XDR specification modification time: Sat Oct 12 08:10:54 2024
 
 #include <linux/sunrpc/svc.h>
 
@@ -135,6 +135,17 @@ xdrgen_decode_fattr4_time_deleg_modify(struct xdr_stream *xdr, fattr4_time_deleg
 	return xdrgen_decode_nfstime4(xdr, ptr);
 };
 
+static bool __maybe_unused
+xdrgen_decode_open_delegation_type4(struct xdr_stream *xdr, open_delegation_type4 *ptr)
+{
+	u32 val;
+
+	if (xdr_stream_decode_u32(xdr, &val) < 0)
+		return false;
+	*ptr = val;
+	return true;
+}
+
 static bool __maybe_unused
 xdrgen_encode_int64_t(struct xdr_stream *xdr, const int64_t value)
 {
@@ -237,3 +248,9 @@ xdrgen_encode_fattr4_time_deleg_modify(struct xdr_stream *xdr, const fattr4_time
 {
 	return xdrgen_encode_nfstime4(xdr, value);
 };
+
+static bool __maybe_unused
+xdrgen_encode_open_delegation_type4(struct xdr_stream *xdr, open_delegation_type4 value)
+{
+	return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
diff --git a/fs/nfsd/nfs4xdr_gen.h b/fs/nfsd/nfs4xdr_gen.h
index c4c6a5075b17..3fc8bde2b3b5 100644
--- a/fs/nfsd/nfs4xdr_gen.h
+++ b/fs/nfsd/nfs4xdr_gen.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Generated by xdrgen. Manual edits will be lost. */
 /* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */
-/* XDR specification modification time: Thu Oct  3 11:30:59 2024 */
+/* XDR specification modification time: Sat Oct 12 08:10:54 2024 */
 
 #ifndef _LINUX_XDRGEN_NFS4_1_DECL_H
 #define _LINUX_XDRGEN_NFS4_1_DECL_H
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index b90719244775..71fbebfa43c7 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -366,7 +366,7 @@ enum limit_by4 {
 	NFS4_LIMIT_BLOCKS = 2
 };
 
-enum open_delegation_type4 {
+enum nfs4_open_delegation_type4 {
 	NFS4_OPEN_DELEGATE_NONE = 0,
 	NFS4_OPEN_DELEGATE_READ = 1,
 	NFS4_OPEN_DELEGATE_WRITE = 2,
diff --git a/include/linux/sunrpc/xdrgen/nfs4_1.h b/include/linux/sunrpc/xdrgen/nfs4_1.h
index 6025ab6b7398..9ca83a4a04cf 100644
--- a/include/linux/sunrpc/xdrgen/nfs4_1.h
+++ b/include/linux/sunrpc/xdrgen/nfs4_1.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Generated by xdrgen. Manual edits will be lost. */
 /* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */
-/* XDR specification modification time: Thu Oct  3 11:30:59 2024 */
+/* XDR specification modification time: Sat Oct 12 08:10:54 2024 */
 
 #ifndef _LINUX_XDRGEN_NFS4_1_DEF_H
 #define _LINUX_XDRGEN_NFS4_1_DEF_H
@@ -98,6 +98,16 @@ enum { FATTR4_TIME_DELEG_MODIFY = 85 };
 
 enum { OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 0x100000 };
 
+enum open_delegation_type4 {
+	OPEN_DELEGATE_NONE = 0,
+	OPEN_DELEGATE_READ = 1,
+	OPEN_DELEGATE_WRITE = 2,
+	OPEN_DELEGATE_NONE_EXT = 3,
+	OPEN_DELEGATE_READ_ATTRS_DELEG = 4,
+	OPEN_DELEGATE_WRITE_ATTRS_DELEG = 5,
+};
+typedef enum open_delegation_type4 open_delegation_type4;
+
 #define NFS4_int64_t_sz                 \
 	(XDR_hyper)
 #define NFS4_uint32_t_sz                \
@@ -120,5 +130,6 @@ enum { OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 0x100000 };
 	(NFS4_nfstime4_sz)
 #define NFS4_fattr4_time_deleg_modify_sz \
 	(NFS4_nfstime4_sz)
+#define NFS4_open_delegation_type4_sz   (XDR_int)
 
 #endif /* _LINUX_XDRGEN_NFS4_1_DEF_H */

From c9c99a33e2b0083c83a2c29eebfad92c78e16791 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 9 Dec 2024 16:13:56 -0500
Subject: [PATCH 32/42] nfsd: rename NFS4_SHARE_WANT_* constants to
 OPEN4_SHARE_ACCESS_WANT_*

Add the OPEN4_SHARE_ACCESS_WANT constants from the nfs4.1 and delstid
draft into the nfs4_1.x file, and regenerate the headers and source
files. Do a mass renaming of NFS4_SHARE_WANT_* to
OPEN4_SHARE_ACCESS_WANT_* in the nfsd directory.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 Documentation/sunrpc/xdr/nfs4_1.x    | 15 ++++++++++++++-
 fs/nfsd/nfs4state.c                  | 16 ++++++++--------
 fs/nfsd/nfs4xdr.c                    | 12 ++++++------
 fs/nfsd/nfs4xdr_gen.c                |  2 +-
 fs/nfsd/nfs4xdr_gen.h                |  2 +-
 include/linux/sunrpc/xdrgen/nfs4_1.h | 24 +++++++++++++++++++++---
 6 files changed, 51 insertions(+), 20 deletions(-)

diff --git a/Documentation/sunrpc/xdr/nfs4_1.x b/Documentation/sunrpc/xdr/nfs4_1.x
index ee9f8f249f1e..ca95150a3a29 100644
--- a/Documentation/sunrpc/xdr/nfs4_1.x
+++ b/Documentation/sunrpc/xdr/nfs4_1.x
@@ -138,7 +138,6 @@ pragma public fattr4_open_arguments;
 const FATTR4_OPEN_ARGUMENTS     = 86;
 
 
-const OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 0x200000;
 
 
 const OPEN4_RESULT_NO_OPEN_STATEID = 0x00000010;
@@ -161,7 +160,21 @@ pragma public		fattr4_time_deleg_modify;
 const FATTR4_TIME_DELEG_ACCESS  = 84;
 const FATTR4_TIME_DELEG_MODIFY  = 85;
 
+
+
+/* new flags for share_access field of OPEN4args */
+const OPEN4_SHARE_ACCESS_WANT_DELEG_MASK        = 0xFF00;
+const OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE     = 0x0000;
+const OPEN4_SHARE_ACCESS_WANT_READ_DELEG        = 0x0100;
+const OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG       = 0x0200;
+const OPEN4_SHARE_ACCESS_WANT_ANY_DELEG         = 0x0300;
+const OPEN4_SHARE_ACCESS_WANT_NO_DELEG          = 0x0400;
+const OPEN4_SHARE_ACCESS_WANT_CANCEL            = 0x0500;
+
+const OPEN4_SHARE_ACCESS_WANT_SIGNAL_DELEG_WHEN_RESRC_AVAIL = 0x10000;
+const OPEN4_SHARE_ACCESS_WANT_PUSH_DELEG_WHEN_UNCONTENDED = 0x20000;
 const OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 0x100000;
+const OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 0x200000;
 
 enum open_delegation_type4 {
        OPEN_DELEGATE_NONE                  = 0,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a39c480da327..0c73cb8fae19 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6073,14 +6073,14 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
 	else {
 		open->op_why_no_deleg = WND4_RESOURCE;
 		switch (open->op_deleg_want) {
-		case NFS4_SHARE_WANT_READ_DELEG:
-		case NFS4_SHARE_WANT_WRITE_DELEG:
-		case NFS4_SHARE_WANT_ANY_DELEG:
+		case OPEN4_SHARE_ACCESS_WANT_READ_DELEG:
+		case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG:
+		case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG:
 			break;
-		case NFS4_SHARE_WANT_CANCEL:
+		case OPEN4_SHARE_ACCESS_WANT_CANCEL:
 			open->op_why_no_deleg = WND4_CANCELLED;
 			break;
-		case NFS4_SHARE_WANT_NO_DELEG:
+		case OPEN4_SHARE_ACCESS_WANT_NO_DELEG:
 			WARN_ON_ONCE(1);
 		}
 	}
@@ -6210,11 +6210,11 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
 					struct nfs4_delegation *dp)
 {
-	if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG &&
+	if (open->op_deleg_want == OPEN4_SHARE_ACCESS_WANT_READ_DELEG &&
 	    dp->dl_type == OPEN_DELEGATE_WRITE) {
 		open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
 		open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
-	} else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG &&
+	} else if (open->op_deleg_want == OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG &&
 		   dp->dl_type == OPEN_DELEGATE_WRITE) {
 		open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
 		open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
@@ -6310,7 +6310,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	mutex_unlock(&stp->st_mutex);
 
 	if (nfsd4_has_session(&resp->cstate)) {
-		if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
+		if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_NO_DELEG) {
 			open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
 			open->op_why_no_deleg = WND4_NOT_WANTED;
 			goto nodeleg;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b6966acddcb6..e3f1cb59e118 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1067,12 +1067,12 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh
 	if (!argp->minorversion)
 		return nfserr_bad_xdr;
 	switch (w & NFS4_SHARE_WANT_MASK) {
-	case NFS4_SHARE_WANT_NO_PREFERENCE:
-	case NFS4_SHARE_WANT_READ_DELEG:
-	case NFS4_SHARE_WANT_WRITE_DELEG:
-	case NFS4_SHARE_WANT_ANY_DELEG:
-	case NFS4_SHARE_WANT_NO_DELEG:
-	case NFS4_SHARE_WANT_CANCEL:
+	case OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE:
+	case OPEN4_SHARE_ACCESS_WANT_READ_DELEG:
+	case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG:
+	case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG:
+	case OPEN4_SHARE_ACCESS_WANT_NO_DELEG:
+	case OPEN4_SHARE_ACCESS_WANT_CANCEL:
 		break;
 	default:
 		return nfserr_bad_xdr;
diff --git a/fs/nfsd/nfs4xdr_gen.c b/fs/nfsd/nfs4xdr_gen.c
index a0e01f50a28d..a17b5d8e60b3 100644
--- a/fs/nfsd/nfs4xdr_gen.c
+++ b/fs/nfsd/nfs4xdr_gen.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Generated by xdrgen. Manual edits will be lost.
 // XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x
-// XDR specification modification time: Sat Oct 12 08:10:54 2024
+// XDR specification modification time: Mon Oct 14 09:10:13 2024
 
 #include <linux/sunrpc/svc.h>
 
diff --git a/fs/nfsd/nfs4xdr_gen.h b/fs/nfsd/nfs4xdr_gen.h
index 3fc8bde2b3b5..41a0033b7256 100644
--- a/fs/nfsd/nfs4xdr_gen.h
+++ b/fs/nfsd/nfs4xdr_gen.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Generated by xdrgen. Manual edits will be lost. */
 /* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */
-/* XDR specification modification time: Sat Oct 12 08:10:54 2024 */
+/* XDR specification modification time: Mon Oct 14 09:10:13 2024 */
 
 #ifndef _LINUX_XDRGEN_NFS4_1_DECL_H
 #define _LINUX_XDRGEN_NFS4_1_DECL_H
diff --git a/include/linux/sunrpc/xdrgen/nfs4_1.h b/include/linux/sunrpc/xdrgen/nfs4_1.h
index 9ca83a4a04cf..cf21a14aa885 100644
--- a/include/linux/sunrpc/xdrgen/nfs4_1.h
+++ b/include/linux/sunrpc/xdrgen/nfs4_1.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Generated by xdrgen. Manual edits will be lost. */
 /* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */
-/* XDR specification modification time: Sat Oct 12 08:10:54 2024 */
+/* XDR specification modification time: Mon Oct 14 09:10:13 2024 */
 
 #ifndef _LINUX_XDRGEN_NFS4_1_DEF_H
 #define _LINUX_XDRGEN_NFS4_1_DEF_H
@@ -84,8 +84,6 @@ typedef struct open_arguments4 fattr4_open_arguments;
 
 enum { FATTR4_OPEN_ARGUMENTS = 86 };
 
-enum { OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 0x200000 };
-
 enum { OPEN4_RESULT_NO_OPEN_STATEID = 0x00000010 };
 
 typedef struct nfstime4 fattr4_time_deleg_access;
@@ -96,8 +94,28 @@ enum { FATTR4_TIME_DELEG_ACCESS = 84 };
 
 enum { FATTR4_TIME_DELEG_MODIFY = 85 };
 
+enum { OPEN4_SHARE_ACCESS_WANT_DELEG_MASK = 0xFF00 };
+
+enum { OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE = 0x0000 };
+
+enum { OPEN4_SHARE_ACCESS_WANT_READ_DELEG = 0x0100 };
+
+enum { OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG = 0x0200 };
+
+enum { OPEN4_SHARE_ACCESS_WANT_ANY_DELEG = 0x0300 };
+
+enum { OPEN4_SHARE_ACCESS_WANT_NO_DELEG = 0x0400 };
+
+enum { OPEN4_SHARE_ACCESS_WANT_CANCEL = 0x0500 };
+
+enum { OPEN4_SHARE_ACCESS_WANT_SIGNAL_DELEG_WHEN_RESRC_AVAIL = 0x10000 };
+
+enum { OPEN4_SHARE_ACCESS_WANT_PUSH_DELEG_WHEN_UNCONTENDED = 0x20000 };
+
 enum { OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 0x100000 };
 
+enum { OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 0x200000 };
+
 enum open_delegation_type4 {
 	OPEN_DELEGATE_NONE = 0,
 	OPEN_DELEGATE_READ = 1,

From fbd5573d0deda145fe173431f1f3ca444940de18 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 9 Dec 2024 16:13:57 -0500
Subject: [PATCH 33/42] nfsd: prepare delegation code for handing out
 *_ATTRS_DELEG delegations

Add some preparatory code to various functions that handle delegation
types to allow them to handle the OPEN_DELEGATE_*_ATTRS_DELEG constants.
Add helpers for detecting whether it's a read or write deleg, and
whether the attributes are delegated.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c | 43 ++++++++++++++++++++++++++++---------------
 fs/nfsd/nfs4xdr.c   |  2 ++
 fs/nfsd/state.h     | 16 ++++++++++++++++
 3 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0c73cb8fae19..e7b26806797a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2951,6 +2951,21 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
 	return 0;
 }
 
+static char *nfs4_show_deleg_type(u32 dl_type)
+{
+	switch (dl_type) {
+	case OPEN_DELEGATE_READ:
+		return "r";
+	case OPEN_DELEGATE_WRITE:
+		return "w";
+	case OPEN_DELEGATE_READ_ATTRS_DELEG:
+		return "ra";
+	case OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+		return "wa";
+	}
+	return "?";
+}
+
 static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
 {
 	struct nfs4_delegation *ds;
@@ -2964,8 +2979,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
 	nfs4_show_stateid(s, &st->sc_stateid);
 	seq_puts(s, ": { type: deleg, ");
 
-	seq_printf(s, "access: %s",
-		   ds->dl_type == OPEN_DELEGATE_READ ? "r" : "w");
+	seq_printf(s, "access: %s", nfs4_show_deleg_type(ds->dl_type));
 
 	/* XXX: lease time, whether it's being recalled. */
 
@@ -5581,7 +5595,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
 static inline __be32
 nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
 {
-	if ((flags & WR_STATE) && (dp->dl_type == OPEN_DELEGATE_READ))
+	if ((flags & WR_STATE) && deleg_is_read(dp->dl_type))
 		return nfserr_openmode;
 	else
 		return nfs_ok;
@@ -5813,8 +5827,7 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
 	return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
 }
 
-static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
-						int flag)
+static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp)
 {
 	struct file_lease *fl;
 
@@ -5823,7 +5836,7 @@ static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
 		return NULL;
 	fl->fl_lmops = &nfsd_lease_mng_ops;
 	fl->c.flc_flags = FL_DELEG;
-	fl->c.flc_type = flag == OPEN_DELEGATE_READ ? F_RDLCK : F_WRLCK;
+	fl->c.flc_type = deleg_is_read(dp->dl_type) ? F_RDLCK : F_WRLCK;
 	fl->c.flc_owner = (fl_owner_t)dp;
 	fl->c.flc_pid = current->tgid;
 	fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
@@ -6010,7 +6023,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 	if (!dp)
 		goto out_delegees;
 
-	fl = nfs4_alloc_init_lease(dp, dl_type);
+	fl = nfs4_alloc_init_lease(dp);
 	if (!fl)
 		goto out_clnt_odstate;
 
@@ -6210,14 +6223,14 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
 					struct nfs4_delegation *dp)
 {
-	if (open->op_deleg_want == OPEN4_SHARE_ACCESS_WANT_READ_DELEG &&
-	    dp->dl_type == OPEN_DELEGATE_WRITE) {
-		open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
-		open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
-	} else if (open->op_deleg_want == OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG &&
-		   dp->dl_type == OPEN_DELEGATE_WRITE) {
-		open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
-		open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
+	if (deleg_is_write(dp->dl_type)) {
+		if (open->op_deleg_want == OPEN4_SHARE_ACCESS_WANT_READ_DELEG) {
+			open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
+			open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
+		} else if (open->op_deleg_want == OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) {
+			open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
+			open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
+		}
 	}
 	/* Otherwise the client must be confused wanting a delegation
 	 * it already has, therefore we don't return
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e3f1cb59e118..66b1e2c224ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4237,10 +4237,12 @@ nfsd4_encode_open_delegation4(struct xdr_stream *xdr, struct nfsd4_open *open)
 		status = nfs_ok;
 		break;
 	case OPEN_DELEGATE_READ:
+	case OPEN_DELEGATE_READ_ATTRS_DELEG:
 		/* read */
 		status = nfsd4_encode_open_read_delegation4(xdr, open);
 		break;
 	case OPEN_DELEGATE_WRITE:
+	case OPEN_DELEGATE_WRITE_ATTRS_DELEG:
 		/* write */
 		status = nfsd4_encode_open_write_delegation4(xdr, open);
 		break;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index b31a8523c8e5..17e98a2521ff 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -207,6 +207,22 @@ struct nfs4_delegation {
 	struct nfs4_cb_fattr    dl_cb_fattr;
 };
 
+static inline bool deleg_is_read(u32 dl_type)
+{
+	return (dl_type == OPEN_DELEGATE_READ || dl_type == OPEN_DELEGATE_READ_ATTRS_DELEG);
+}
+
+static inline bool deleg_is_write(u32 dl_type)
+{
+	return (dl_type == OPEN_DELEGATE_WRITE || dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG);
+}
+
+static inline bool deleg_attrs_deleg(u32 dl_type)
+{
+	return dl_type == OPEN_DELEGATE_READ_ATTRS_DELEG ||
+	       dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG;
+}
+
 #define cb_to_delegation(cb) \
 	container_of(cb, struct nfs4_delegation, dl_recall)
 

From 51c0d4f7e317d3cb4a3001e502bd8ca2d57f2a4b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 9 Dec 2024 16:13:58 -0500
Subject: [PATCH 34/42] nfsd: add support for FATTR4_OPEN_ARGUMENTS

Add support for FATTR4_OPEN_ARGUMENTS. This a new mechanism for the
client to discover what OPEN features the server supports.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfsd.h    |  3 ++-
 2 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 66b1e2c224ff..147564a74b46 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -55,6 +55,7 @@
 #include "netns.h"
 #include "pnfs.h"
 #include "filecache.h"
+#include "nfs4xdr_gen.h"
 
 #include "trace.h"
 
@@ -3388,6 +3389,54 @@ static __be32 nfsd4_encode_fattr4_xattr_support(struct xdr_stream *xdr,
 	return nfsd4_encode_bool(xdr, err == 0);
 }
 
+#define NFSD_OA_SHARE_ACCESS	(BIT(OPEN_ARGS_SHARE_ACCESS_READ)	| \
+				 BIT(OPEN_ARGS_SHARE_ACCESS_WRITE)	| \
+				 BIT(OPEN_ARGS_SHARE_ACCESS_BOTH))
+
+#define NFSD_OA_SHARE_DENY	(BIT(OPEN_ARGS_SHARE_DENY_NONE)		| \
+				 BIT(OPEN_ARGS_SHARE_DENY_READ)		| \
+				 BIT(OPEN_ARGS_SHARE_DENY_WRITE)	| \
+				 BIT(OPEN_ARGS_SHARE_DENY_BOTH))
+
+#define NFSD_OA_SHARE_ACCESS_WANT	(BIT(OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG)		| \
+					 BIT(OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG)		| \
+					 BIT(OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL))
+
+#define NFSD_OA_OPEN_CLAIM	(BIT(OPEN_ARGS_OPEN_CLAIM_NULL)		| \
+				 BIT(OPEN_ARGS_OPEN_CLAIM_PREVIOUS)	| \
+				 BIT(OPEN_ARGS_OPEN_CLAIM_DELEGATE_CUR)	| \
+				 BIT(OPEN_ARGS_OPEN_CLAIM_DELEGATE_PREV)| \
+				 BIT(OPEN_ARGS_OPEN_CLAIM_FH)		| \
+				 BIT(OPEN_ARGS_OPEN_CLAIM_DELEG_CUR_FH)	| \
+				 BIT(OPEN_ARGS_OPEN_CLAIM_DELEG_PREV_FH))
+
+#define NFSD_OA_CREATE_MODE	(BIT(OPEN_ARGS_CREATEMODE_UNCHECKED4)	| \
+				 BIT(OPEN_ARGS_CREATE_MODE_GUARDED)	| \
+				 BIT(OPEN_ARGS_CREATEMODE_EXCLUSIVE4)	| \
+				 BIT(OPEN_ARGS_CREATE_MODE_EXCLUSIVE4_1))
+
+static uint32_t oa_share_access = NFSD_OA_SHARE_ACCESS;
+static uint32_t oa_share_deny = NFSD_OA_SHARE_DENY;
+static uint32_t oa_share_access_want = NFSD_OA_SHARE_ACCESS_WANT;
+static uint32_t oa_open_claim = NFSD_OA_OPEN_CLAIM;
+static uint32_t oa_create_mode = NFSD_OA_CREATE_MODE;
+
+static const struct open_arguments4 nfsd_open_arguments = {
+	.oa_share_access = { .count = 1, .element = &oa_share_access },
+	.oa_share_deny = { .count = 1, .element = &oa_share_deny },
+	.oa_share_access_want = { .count = 1, .element = &oa_share_access_want },
+	.oa_open_claim = { .count = 1, .element = &oa_open_claim },
+	.oa_create_mode = { .count = 1, .element = &oa_create_mode },
+};
+
+static __be32 nfsd4_encode_fattr4_open_arguments(struct xdr_stream *xdr,
+						 const struct nfsd4_fattr_args *args)
+{
+	if (!xdrgen_encode_fattr4_open_arguments(xdr, &nfsd_open_arguments))
+		return nfserr_resource;
+	return nfs_ok;
+}
+
 static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
 	[FATTR4_SUPPORTED_ATTRS]	= nfsd4_encode_fattr4_supported_attrs,
 	[FATTR4_TYPE]			= nfsd4_encode_fattr4_type,
@@ -3488,6 +3537,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
 
 	[FATTR4_MODE_UMASK]		= nfsd4_encode_fattr4__noop,
 	[FATTR4_XATTR_SUPPORT]		= nfsd4_encode_fattr4_xattr_support,
+	[FATTR4_OPEN_ARGUMENTS]		= nfsd4_encode_fattr4_open_arguments,
 };
 
 /*
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 3eb21e63b921..988df43f6b07 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -455,7 +455,8 @@ enum {
 	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
 	FATTR4_WORD2_MODE_UMASK | \
 	NFSD4_2_SECURITY_ATTRS | \
-	FATTR4_WORD2_XATTR_SUPPORT)
+	FATTR4_WORD2_XATTR_SUPPORT | \
+	FATTR4_WORD2_OPEN_ARGUMENTS)
 
 extern const u32 nfsd_suppattrs[3][3];
 

From cee9b4ef42512a6e57562460a15f18a022c84dda Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 9 Dec 2024 16:13:59 -0500
Subject: [PATCH 35/42] nfsd: rework NFS4_SHARE_WANT_* flag handling

The delstid draft adds new NFS4_SHARE_WANT_TYPE_MASK values that don't
fit neatly into the existing WANT_MASK or WHEN_MASK. Add a new
NFS4_SHARE_WANT_MOD_MASK value and redefine NFS4_SHARE_WANT_MASK to
include it.

Also fix the checks in nfsd4_deleg_xgrade_none_ext() to check for the
flags instead of equality, since there may be modifier flags in the
value.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c       | 4 ++--
 fs/nfsd/nfs4xdr.c         | 2 +-
 include/uapi/linux/nfs4.h | 7 +++++--
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e7b26806797a..780db0992c9f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6224,10 +6224,10 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
 					struct nfs4_delegation *dp)
 {
 	if (deleg_is_write(dp->dl_type)) {
-		if (open->op_deleg_want == OPEN4_SHARE_ACCESS_WANT_READ_DELEG) {
+		if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_READ_DELEG) {
 			open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
 			open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
-		} else if (open->op_deleg_want == OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) {
+		} else if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) {
 			open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
 			open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
 		}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 147564a74b46..5407fd0d464c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1067,7 +1067,7 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh
 		return nfs_ok;
 	if (!argp->minorversion)
 		return nfserr_bad_xdr;
-	switch (w & NFS4_SHARE_WANT_MASK) {
+	switch (w & NFS4_SHARE_WANT_TYPE_MASK) {
 	case OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE:
 	case OPEN4_SHARE_ACCESS_WANT_READ_DELEG:
 	case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG:
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index caf4db2fcbb9..4273e0249fcb 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -58,7 +58,7 @@
 #define NFS4_SHARE_DENY_BOTH	0x0003
 
 /* nfs41 */
-#define NFS4_SHARE_WANT_MASK		0xFF00
+#define NFS4_SHARE_WANT_TYPE_MASK	0xFF00
 #define NFS4_SHARE_WANT_NO_PREFERENCE	0x0000
 #define NFS4_SHARE_WANT_READ_DELEG	0x0100
 #define NFS4_SHARE_WANT_WRITE_DELEG	0x0200
@@ -66,13 +66,16 @@
 #define NFS4_SHARE_WANT_NO_DELEG	0x0400
 #define NFS4_SHARE_WANT_CANCEL		0x0500
 
-#define NFS4_SHARE_WHEN_MASK		0xF0000
+#define NFS4_SHARE_WHEN_MASK				0xF0000
 #define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL	0x10000
 #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED		0x20000
 
+#define NFS4_SHARE_WANT_MOD_MASK			0xF00000
 #define NFS4_SHARE_WANT_DELEG_TIMESTAMPS		0x100000
 #define NFS4_SHARE_WANT_OPEN_XOR_DELEGATION		0x200000
 
+#define NFS4_SHARE_WANT_MASK	(NFS4_SHARE_WANT_TYPE_MASK | NFS4_SHARE_WANT_MOD_MASK)
+
 #define NFS4_CDFC4_FORE	0x1
 #define NFS4_CDFC4_BACK 0x2
 #define NFS4_CDFC4_BOTH 0x3

From 6ae30d6eb26bce02c48c60074b4306270e2434c1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 9 Dec 2024 16:14:00 -0500
Subject: [PATCH 36/42] nfsd: add support for delegated timestamps

Add support for the delegated timestamps on write delegations. This
allows the server to proxy timestamps from the delegation holder to
other clients that are doing GETATTRs vs. the same inode.

When OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS bit is set in the OPEN
call, set the dl_type to the *_ATTRS_DELEG flavor of delegation.

Add timespec64 fields to nfs4_cb_fattr and decode the timestamps into
those. Vet those timestamps according to the delstid spec and update
the inode attrs if necessary.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4callback.c | 42 ++++++++++++++++--
 fs/nfsd/nfs4state.c    | 99 ++++++++++++++++++++++++++++++++++++------
 fs/nfsd/nfs4xdr.c      | 15 ++++++-
 fs/nfsd/nfsd.h         |  2 +
 fs/nfsd/state.h        |  2 +
 fs/nfsd/xdr4cb.h       | 10 +++--
 include/linux/time64.h |  5 +++
 7 files changed, 152 insertions(+), 23 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 1457f61ae051..50e468bdb8d4 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -42,6 +42,7 @@
 #include "trace.h"
 #include "xdr4cb.h"
 #include "xdr4.h"
+#include "nfs4xdr_gen.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -93,12 +94,35 @@ static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap,
 {
 	fattr->ncf_cb_change = 0;
 	fattr->ncf_cb_fsize = 0;
+	fattr->ncf_cb_atime.tv_sec = 0;
+	fattr->ncf_cb_atime.tv_nsec = 0;
+	fattr->ncf_cb_mtime.tv_sec = 0;
+	fattr->ncf_cb_mtime.tv_nsec = 0;
+
 	if (bitmap[0] & FATTR4_WORD0_CHANGE)
 		if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0)
 			return -NFSERR_BAD_XDR;
 	if (bitmap[0] & FATTR4_WORD0_SIZE)
 		if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0)
 			return -NFSERR_BAD_XDR;
+	if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) {
+		fattr4_time_deleg_access access;
+
+		if (!xdrgen_decode_fattr4_time_deleg_access(xdr, &access))
+			return -NFSERR_BAD_XDR;
+		fattr->ncf_cb_atime.tv_sec = access.seconds;
+		fattr->ncf_cb_atime.tv_nsec = access.nseconds;
+
+	}
+	if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) {
+		fattr4_time_deleg_modify modify;
+
+		if (!xdrgen_decode_fattr4_time_deleg_modify(xdr, &modify))
+			return -NFSERR_BAD_XDR;
+		fattr->ncf_cb_mtime.tv_sec = modify.seconds;
+		fattr->ncf_cb_mtime.tv_nsec = modify.nseconds;
+
+	}
 	return 0;
 }
 
@@ -364,15 +388,21 @@ encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr,
 	struct nfs4_delegation *dp = container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
 	struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle;
 	struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
-	u32 bmap[1];
+	u32 bmap_size = 1;
+	u32 bmap[3];
 
 	bmap[0] = FATTR4_WORD0_SIZE;
 	if (!ncf->ncf_file_modified)
 		bmap[0] |= FATTR4_WORD0_CHANGE;
 
+	if (deleg_attrs_deleg(dp->dl_type)) {
+		bmap[1] = 0;
+		bmap[2] = FATTR4_WORD2_TIME_DELEG_ACCESS | FATTR4_WORD2_TIME_DELEG_MODIFY;
+		bmap_size = 3;
+	}
 	encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR);
 	encode_nfs_fh4(xdr, fh);
-	encode_bitmap4(xdr, bmap, ARRAY_SIZE(bmap));
+	encode_bitmap4(xdr, bmap, bmap_size);
 	hdr->nops++;
 }
 
@@ -636,7 +666,7 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
 	struct nfs4_cb_compound_hdr hdr;
 	int status;
 	u32 bitmap[3] = {0};
-	u32 attrlen;
+	u32 attrlen, maxlen;
 	struct nfs4_cb_fattr *ncf =
 		container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
 
@@ -655,7 +685,11 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
 		return -NFSERR_BAD_XDR;
 	if (xdr_stream_decode_u32(xdr, &attrlen) < 0)
 		return -NFSERR_BAD_XDR;
-	if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize)))
+	maxlen = sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize);
+	if (bitmap[2] != 0)
+		maxlen += (sizeof(ncf->ncf_cb_mtime.tv_sec) +
+			   sizeof(ncf->ncf_cb_mtime.tv_nsec)) * 2;
+	if (attrlen > maxlen)
 		return -NFSERR_BAD_XDR;
 	status = decode_cb_fattr4(xdr, bitmap, ncf);
 	return status;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 780db0992c9f..5e1d30b91738 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5951,13 +5951,14 @@ static struct nfs4_delegation *
 nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 		    struct svc_fh *parent)
 {
-	int status = 0;
+	bool deleg_ts = open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS;
 	struct nfs4_client *clp = stp->st_stid.sc_client;
 	struct nfs4_file *fp = stp->st_stid.sc_file;
 	struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate;
 	struct nfs4_delegation *dp;
 	struct nfsd_file *nf = NULL;
 	struct file_lease *fl;
+	int status = 0;
 	u32 dl_type;
 
 	/*
@@ -5982,7 +5983,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 	 */
 	if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) {
 		nf = find_rw_file(fp);
-		dl_type = OPEN_DELEGATE_WRITE;
+		dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE;
 	}
 
 	/*
@@ -5991,7 +5992,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 	 */
 	if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) {
 		nf = find_readable_file(fp);
-		dl_type = OPEN_DELEGATE_READ;
+		dl_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ;
 	}
 
 	if (!nf)
@@ -6149,13 +6150,14 @@ static void
 nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 		     struct svc_fh *currentfh)
 {
-	struct nfs4_delegation *dp;
+	bool deleg_ts = open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS;
 	struct nfs4_openowner *oo = openowner(stp->st_stateowner);
 	struct nfs4_client *clp = stp->st_stid.sc_client;
 	struct svc_fh *parent = NULL;
-	int cb_up;
-	int status = 0;
+	struct nfs4_delegation *dp;
 	struct kstat stat;
+	int status = 0;
+	int cb_up;
 
 	cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
 	open->op_recall = false;
@@ -6196,12 +6198,14 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 			destroy_delegation(dp);
 			goto out_no_deleg;
 		}
-		open->op_delegate_type = OPEN_DELEGATE_WRITE;
+		open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG :
+						    OPEN_DELEGATE_WRITE;
 		dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
 		dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat);
 		trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
 	} else {
-		open->op_delegate_type = OPEN_DELEGATE_READ;
+		open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG :
+						    OPEN_DELEGATE_READ;
 		trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
 	}
 	nfs4_put_stid(&dp->dl_stid);
@@ -9017,6 +9021,78 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
 	get_stateid(cstate, &u->write.wr_stateid);
 }
 
+/**
+ * set_cb_time - vet and set the timespec for a cb_getattr update
+ * @cb: timestamp from the CB_GETATTR response
+ * @orig: original timestamp in the inode
+ * @now: current time
+ *
+ * Given a timestamp in a CB_GETATTR response, check it against the
+ * current timestamp in the inode and the current time. Returns true
+ * if the inode's timestamp needs to be updated, and false otherwise.
+ * @cb may also be changed if the timestamp needs to be clamped.
+ */
+static bool set_cb_time(struct timespec64 *cb, const struct timespec64 *orig,
+			const struct timespec64 *now)
+{
+
+	/*
+	 * "When the time presented is before the original time, then the
+	 *  update is ignored." Also no need to update if there is no change.
+	 */
+	if (timespec64_compare(cb, orig) <= 0)
+		return false;
+
+	/*
+	 * "When the time presented is in the future, the server can either
+	 *  clamp the new time to the current time, or it may
+	 *  return NFS4ERR_DELAY to the client, allowing it to retry."
+	 */
+	if (timespec64_compare(cb, now) > 0) {
+		/* clamp it */
+		*cb = *now;
+	}
+
+	return true;
+}
+
+static int cb_getattr_update_times(struct dentry *dentry, struct nfs4_delegation *dp)
+{
+	struct inode *inode = d_inode(dentry);
+	struct timespec64 now = current_time(inode);
+	struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
+	struct iattr attrs = { };
+	int ret;
+
+	if (deleg_attrs_deleg(dp->dl_type)) {
+		struct timespec64 atime = inode_get_atime(inode);
+		struct timespec64 mtime = inode_get_mtime(inode);
+
+		attrs.ia_atime = ncf->ncf_cb_atime;
+		attrs.ia_mtime = ncf->ncf_cb_mtime;
+
+		if (set_cb_time(&attrs.ia_atime, &atime, &now))
+			attrs.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
+
+		if (set_cb_time(&attrs.ia_mtime, &mtime, &now)) {
+			attrs.ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET;
+			attrs.ia_ctime = attrs.ia_mtime;
+		}
+	} else {
+		attrs.ia_valid |= ATTR_MTIME | ATTR_CTIME;
+		attrs.ia_mtime = attrs.ia_ctime = now;
+	}
+
+	if (!attrs.ia_valid)
+		return 0;
+
+	attrs.ia_valid |= ATTR_DELEG;
+	inode_lock(inode);
+	ret = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL);
+	inode_unlock(inode);
+	return ret;
+}
+
 /**
  * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
  * @rqstp: RPC transaction context
@@ -9043,7 +9119,6 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
 	struct file_lock_context *ctx;
 	struct nfs4_delegation *dp = NULL;
 	struct file_lease *fl;
-	struct iattr attrs;
 	struct nfs4_cb_fattr *ncf;
 	struct inode *inode = d_inode(dentry);
 
@@ -9105,11 +9180,7 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
 		 * not update the file's metadata with the client's
 		 * modified size
 		 */
-		attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
-		attrs.ia_valid = ATTR_MTIME | ATTR_CTIME | ATTR_DELEG;
-		inode_lock(inode);
-		err = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL);
-		inode_unlock(inode);
+		err = cb_getattr_update_times(dentry, dp);
 		if (err) {
 			status = nfserrno(err);
 			goto out_status;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5407fd0d464c..2c5c288064b0 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3400,7 +3400,8 @@ static __be32 nfsd4_encode_fattr4_xattr_support(struct xdr_stream *xdr,
 
 #define NFSD_OA_SHARE_ACCESS_WANT	(BIT(OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG)		| \
 					 BIT(OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG)		| \
-					 BIT(OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL))
+					 BIT(OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL)		| \
+					 BIT(OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS))
 
 #define NFSD_OA_OPEN_CLAIM	(BIT(OPEN_ARGS_OPEN_CLAIM_NULL)		| \
 				 BIT(OPEN_ARGS_OPEN_CLAIM_PREVIOUS)	| \
@@ -3593,7 +3594,11 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 		if (status)
 			goto out;
 	}
-	if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+	if ((attrmask[0] & (FATTR4_WORD0_CHANGE |
+			    FATTR4_WORD0_SIZE)) ||
+	    (attrmask[1] & (FATTR4_WORD1_TIME_ACCESS |
+			    FATTR4_WORD1_TIME_MODIFY |
+			    FATTR4_WORD1_TIME_METADATA))) {
 		status = nfsd4_deleg_getattr_conflict(rqstp, dentry, &dp);
 		if (status)
 			goto out;
@@ -3608,8 +3613,14 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 		if (ncf->ncf_file_modified) {
 			++ncf->ncf_initial_cinfo;
 			args.stat.size = ncf->ncf_cur_fsize;
+			if (!timespec64_is_epoch(&ncf->ncf_cb_mtime))
+				args.stat.mtime = ncf->ncf_cb_mtime;
 		}
 		args.change_attr = ncf->ncf_initial_cinfo;
+
+		if (!timespec64_is_epoch(&ncf->ncf_cb_atime))
+			args.stat.atime = ncf->ncf_cb_atime;
+
 		nfs4_put_stid(&dp->dl_stid);
 	} else {
 		args.change_attr = nfsd4_change_attribute(&args.stat);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 988df43f6b07..4c101c8be95f 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -456,6 +456,8 @@ enum {
 	FATTR4_WORD2_MODE_UMASK | \
 	NFSD4_2_SECURITY_ATTRS | \
 	FATTR4_WORD2_XATTR_SUPPORT | \
+	FATTR4_WORD2_TIME_DELEG_ACCESS | \
+	FATTR4_WORD2_TIME_DELEG_MODIFY | \
 	FATTR4_WORD2_OPEN_ARGUMENTS)
 
 extern const u32 nfsd_suppattrs[3][3];
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 17e98a2521ff..74d2d7b42676 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -159,6 +159,8 @@ struct nfs4_cb_fattr {
 	/* from CB_GETATTR reply */
 	u64 ncf_cb_change;
 	u64 ncf_cb_fsize;
+	struct timespec64 ncf_cb_mtime;
+	struct timespec64 ncf_cb_atime;
 
 	unsigned long ncf_cb_flags;
 	bool ncf_file_modified;
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index e8b00309c449..f1a315cd31b7 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -59,16 +59,20 @@
  * 1: CB_GETATTR opcode (32-bit)
  * N: file_handle
  * 1: number of entry in attribute array (32-bit)
- * 1: entry 0 in attribute array (32-bit)
+ * 3: entry 0-2 in attribute array (32-bit * 3)
  */
 #define NFS4_enc_cb_getattr_sz		(cb_compound_enc_hdr_sz +       \
 					cb_sequence_enc_sz +            \
-					1 + enc_nfs4_fh_sz + 1 + 1)
+					1 + enc_nfs4_fh_sz + 1 + 3)
 /*
  * 4: fattr_bitmap_maxsz
  * 1: attribute array len
  * 2: change attr (64-bit)
  * 2: size (64-bit)
+ * 2: atime.seconds (64-bit)
+ * 1: atime.nanoseconds (32-bit)
+ * 2: mtime.seconds (64-bit)
+ * 1: mtime.nanoseconds (32-bit)
  */
 #define NFS4_dec_cb_getattr_sz		(cb_compound_dec_hdr_sz  +      \
-			cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz)
+			cb_sequence_dec_sz + 4 + 1 + 2 + 2 + 2 + 1 + 2 + 1 + op_dec_sz)
diff --git a/include/linux/time64.h b/include/linux/time64.h
index f1bcea8c124a..9934331c7b86 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -49,6 +49,11 @@ static inline int timespec64_equal(const struct timespec64 *a,
 	return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
 }
 
+static inline bool timespec64_is_epoch(const struct timespec64 *ts)
+{
+	return ts->tv_sec == 0 && ts->tv_nsec == 0;
+}
+
 /*
  * lhs < rhs:  return <0
  * lhs == rhs: return 0

From 7e13f4f8d27dc02fb88666f603c53ca749d56f92 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 9 Dec 2024 16:14:01 -0500
Subject: [PATCH 37/42] nfsd: handle delegated timestamps in SETATTR

Allow SETATTR to handle delegated timestamps. This patch assumes that
only the delegation holder has the ability to set the timestamps in this
way, so we allow this only if the SETATTR stateid refers to a
*_ATTRS_DELEG delegation.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4proc.c  | 31 ++++++++++++++++++++++++++++---
 fs/nfsd/nfs4state.c |  2 +-
 fs/nfsd/nfs4xdr.c   | 20 ++++++++++++++++++++
 fs/nfsd/nfsd.h      |  5 ++++-
 4 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ad44ad49274f..f6e06c779d09 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1135,18 +1135,43 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		.na_iattr	= &setattr->sa_iattr,
 		.na_seclabel	= &setattr->sa_label,
 	};
+	bool save_no_wcc, deleg_attrs;
+	struct nfs4_stid *st = NULL;
 	struct inode *inode;
 	__be32 status = nfs_ok;
-	bool save_no_wcc;
 	int err;
 
-	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
+	deleg_attrs = setattr->sa_bmval[2] & (FATTR4_WORD2_TIME_DELEG_ACCESS |
+					      FATTR4_WORD2_TIME_DELEG_MODIFY);
+
+	if (deleg_attrs || (setattr->sa_iattr.ia_valid & ATTR_SIZE)) {
+		int flags = WR_STATE;
+
+		if (setattr->sa_bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS)
+			flags |= RD_STATE;
+
 		status = nfs4_preprocess_stateid_op(rqstp, cstate,
 				&cstate->current_fh, &setattr->sa_stateid,
-				WR_STATE, NULL, NULL);
+				flags, NULL, &st);
 		if (status)
 			return status;
 	}
+
+	if (deleg_attrs) {
+		status = nfserr_bad_stateid;
+		if (st->sc_type & SC_TYPE_DELEG) {
+			struct nfs4_delegation *dp = delegstateid(st);
+
+			/* Only for *_ATTRS_DELEG flavors */
+			if (deleg_attrs_deleg(dp->dl_type))
+				status = nfs_ok;
+		}
+	}
+	if (st)
+		nfs4_put_stid(st);
+	if (status)
+		return status;
+
 	err = fh_want_write(&cstate->current_fh);
 	if (err)
 		return nfserrno(err);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5e1d30b91738..ab19ac7b2329 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5595,7 +5595,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
 static inline __be32
 nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
 {
-	if ((flags & WR_STATE) && deleg_is_read(dp->dl_type))
+	if (!(flags & RD_STATE) && deleg_is_read(dp->dl_type))
 		return nfserr_openmode;
 	else
 		return nfs_ok;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2c5c288064b0..178325fcb6cb 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -521,6 +521,26 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen,
 		*umask = mask & S_IRWXUGO;
 		iattr->ia_valid |= ATTR_MODE;
 	}
+	if (bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) {
+		fattr4_time_deleg_access access;
+
+		if (!xdrgen_decode_fattr4_time_deleg_access(argp->xdr, &access))
+			return nfserr_bad_xdr;
+		iattr->ia_atime.tv_sec = access.seconds;
+		iattr->ia_atime.tv_nsec = access.nseconds;
+		iattr->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET | ATTR_DELEG;
+	}
+	if (bmval[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) {
+		fattr4_time_deleg_modify modify;
+
+		if (!xdrgen_decode_fattr4_time_deleg_modify(argp->xdr, &modify))
+			return nfserr_bad_xdr;
+		iattr->ia_mtime.tv_sec = modify.seconds;
+		iattr->ia_mtime.tv_nsec = modify.nseconds;
+		iattr->ia_ctime.tv_sec = modify.seconds;
+		iattr->ia_ctime.tv_nsec = modify.seconds;
+		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET | ATTR_DELEG;
+	}
 
 	/* request sanity: did attrlist4 contain the expected number of words? */
 	if (attrlist4_count != xdr_stream_pos(argp->xdr) - starting_pos)
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 4c101c8be95f..e2997f0ffbc5 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -528,7 +528,10 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
 #endif
 #define NFSD_WRITEABLE_ATTRS_WORD2 \
 	(FATTR4_WORD2_MODE_UMASK \
-	| MAYBE_FATTR4_WORD2_SECURITY_LABEL)
+	| MAYBE_FATTR4_WORD2_SECURITY_LABEL \
+	| FATTR4_WORD2_TIME_DELEG_ACCESS \
+	| FATTR4_WORD2_TIME_DELEG_MODIFY \
+	)
 
 #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
 	NFSD_WRITEABLE_ATTRS_WORD0

From d3edfd9ed17cb3bc754b3064051fb5df7863fda3 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 9 Dec 2024 16:14:02 -0500
Subject: [PATCH 38/42] nfsd: implement
 OPEN_ARGS_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION

Allow clients to request getting a delegation xor an open stateid if a
delegation isn't available. This allows the client to avoid sending a
final CLOSE for the (useless) open stateid, when it is granted a
delegation.

If this flag is requested by the client and there isn't already a new
open stateid, discard the new open stateid before replying.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c | 24 +++++++++++++++++++++++-
 fs/nfsd/nfs4xdr.c   |  3 ++-
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ab19ac7b2329..b7a0cfd05401 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6242,6 +6242,17 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
 	 */
 }
 
+/* Are we returning only a delegation stateid? */
+static bool open_xor_delegation(struct nfsd4_open *open)
+{
+	if (!(open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION))
+		return false;
+	/* Did we actually get a delegation? */
+	if (!deleg_is_read(open->op_delegate_type) && !deleg_is_write(open->op_delegate_type))
+		return false;
+	return true;
+}
+
 /**
  * nfsd4_process_open2 - finish open processing
  * @rqstp: the RPC transaction being executed
@@ -6339,6 +6350,17 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	* OPEN succeeds even if we fail.
 	*/
 	nfs4_open_delegation(open, stp, &resp->cstate.current_fh);
+
+	/*
+	 * If there is an existing open stateid, it must be updated and
+	 * returned. Only respect WANT_OPEN_XOR_DELEGATION when a new
+	 * open stateid would have to be created.
+	 */
+	if (new_stp && open_xor_delegation(open)) {
+		memcpy(&open->op_stateid, &zero_stateid, sizeof(open->op_stateid));
+		open->op_rflags |= OPEN4_RESULT_NO_OPEN_STATEID;
+		release_open_stateid(stp);
+	}
 nodeleg:
 	status = nfs_ok;
 	trace_nfsd_open(&stp->st_stid.sc_stateid);
@@ -6355,7 +6377,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	/*
 	* To finish the open response, we just need to set the rflags.
 	*/
-	open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
+	open->op_rflags |= NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
 	if (nfsd4_has_session(&resp->cstate))
 		open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK;
 	else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED))
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 178325fcb6cb..5d413e751c74 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3421,7 +3421,8 @@ static __be32 nfsd4_encode_fattr4_xattr_support(struct xdr_stream *xdr,
 #define NFSD_OA_SHARE_ACCESS_WANT	(BIT(OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG)		| \
 					 BIT(OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG)		| \
 					 BIT(OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL)		| \
-					 BIT(OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS))
+					 BIT(OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS)	| \
+					 BIT(OPEN_ARGS_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION))
 
 #define NFSD_OA_OPEN_CLAIM	(BIT(OPEN_ARGS_OPEN_CLAIM_NULL)		| \
 				 BIT(OPEN_ARGS_OPEN_CLAIM_PREVIOUS)	| \

From 966a675da844f1a764bb44557c21561cc3d09840 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 2 Jan 2025 20:00:01 -0500
Subject: [PATCH 39/42] Revert "SUNRPC: Reduce thread wake-up rate when
 receiving large RPC messages"

I noticed that a handful of NFSv3 fstests were taking an
unexpectedly long time to run. Troubleshooting showed that the
server's TCP window closed and never re-opened, which caused the
client to trigger an RPC retransmit timeout after 180 seconds.

The client's recovery action was to establish a fresh connection
and retransmit the timed-out requests. This worked, but it adds a
long delay.

I tracked the problem to the commit that attempted to reduce the
rate at which the network layer delivers TCP socket data_ready
callbacks. Under most circumstances this change worked as expected,
but for NFSv3, which has no session or other type of throttling, it
can overwhelm the receiver on occasion.

I'm sure I could tweak the lowat settings, but the small benefit
doesn't seem worth the bother. Just revert it.

Fixes: 2b877fc53e97 ("SUNRPC: Reduce thread wake-up rate when receiving large RPC messages")
Cc: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/svcsock.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 95397677673b..cb3bd12f5818 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1083,9 +1083,6 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk)
 	/* If we have more data, signal svc_xprt_enqueue() to try again */
 	svsk->sk_tcplen = 0;
 	svsk->sk_marker = xdr_zero;
-
-	smp_wmb();
-	tcp_set_rcvlowat(svsk->sk_sk, 1);
 }
 
 /**
@@ -1175,17 +1172,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		goto err_delete;
 	if (len == want)
 		svc_tcp_fragment_received(svsk);
-	else {
-		/* Avoid more ->sk_data_ready() calls until the rest
-		 * of the message has arrived. This reduces service
-		 * thread wake-ups on large incoming messages. */
-		tcp_set_rcvlowat(svsk->sk_sk,
-				 svc_sock_reclen(svsk) - svsk->sk_tcplen);
-
+	else
 		trace_svcsock_tcp_recv_short(&svsk->sk_xprt,
 				svc_sock_reclen(svsk),
 				svsk->sk_tcplen - sizeof(rpc_fraghdr));
-	}
 	goto err_noclose;
 error:
 	if (len != -EAGAIN)

From ee0d90d4b97a9787ed55b22c85c72376329d86ac Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Tue, 10 Dec 2024 01:02:23 +0000
Subject: [PATCH 40/42] sunrpc: Remove unused xprt_iter_get_xprt

xprt_iter_get_xprt() was added by
commit 80b14d5e61ca ("SUNRPC: Add a structure to track multiple
transports") but is unused.

Remove it.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Acked-by: Anna Schumaker <anna.schumaker@oracle.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/xprtmultipath.h |  1 -
 net/sunrpc/xprtmultipath.c           | 17 -----------------
 2 files changed, 18 deletions(-)

diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h
index c0514c684b2c..e411368cdacf 100644
--- a/include/linux/sunrpc/xprtmultipath.h
+++ b/include/linux/sunrpc/xprtmultipath.h
@@ -75,7 +75,6 @@ extern struct rpc_xprt_switch *xprt_iter_xchg_switch(
 		struct rpc_xprt_switch *newswitch);
 
 extern struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi);
-extern struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi);
 extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi);
 
 extern bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 720d3ba742ec..7e98d4dd9f10 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -602,23 +602,6 @@ struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi,
 	return ret;
 }
 
-/**
- * xprt_iter_get_xprt - Returns the rpc_xprt pointed to by the cursor
- * @xpi: pointer to rpc_xprt_iter
- *
- * Returns a reference to the struct rpc_xprt that is currently
- * pointed to by the cursor.
- */
-struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi)
-{
-	struct rpc_xprt *xprt;
-
-	rcu_read_lock();
-	xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_xprt);
-	rcu_read_unlock();
-	return xprt;
-}
-
 /**
  * xprt_iter_get_next - Returns the next rpc_xprt following the cursor
  * @xpi: pointer to rpc_xprt_iter

From afc52b1eeb36f20eea321f50e338e38d00a8a61f Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Tue, 10 Dec 2024 01:02:24 +0000
Subject: [PATCH 41/42] sunrpc: Remove gss_generic_token deadcode

Commit ec596aaf9b48 ("SUNRPC: Remove code behind
CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED") was the last user of the routines
in gss_generic_token.c.

Remove the routines and associated header.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/gss_asn1.h         |  81 ---------
 include/linux/sunrpc/gss_krb5.h         |   1 -
 net/sunrpc/auth_gss/Makefile            |   2 +-
 net/sunrpc/auth_gss/gss_generic_token.c | 231 ------------------------
 net/sunrpc/auth_gss/gss_mech_switch.c   |   1 -
 5 files changed, 1 insertion(+), 315 deletions(-)
 delete mode 100644 include/linux/sunrpc/gss_asn1.h
 delete mode 100644 net/sunrpc/auth_gss/gss_generic_token.c

diff --git a/include/linux/sunrpc/gss_asn1.h b/include/linux/sunrpc/gss_asn1.h
deleted file mode 100644
index 3ccecd0ad229..000000000000
--- a/include/linux/sunrpc/gss_asn1.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- *  linux/include/linux/sunrpc/gss_asn1.h
- *
- *  minimal asn1 for generic encoding/decoding of gss tokens
- *
- *  Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h,
- *  lib/gssapi/krb5/gssapiP_krb5.h, and others
- *
- *  Copyright (c) 2000 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Andy Adamson   <andros@umich.edu>
- */
-
-/*
- * Copyright 1995 by the Massachusetts Institute of Technology.
- * All Rights Reserved.
- *
- * Export of this software from the United States of America may
- *   require a specific license from the United States Government.
- *   It is the responsibility of any person or organization contemplating
- *   export to obtain such a license before exporting.
- *
- * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
- * distribute this software and its documentation for any purpose and
- * without fee is hereby granted, provided that the above copyright
- * notice appear in all copies and that both that copyright notice and
- * this permission notice appear in supporting documentation, and that
- * the name of M.I.T. not be used in advertising or publicity pertaining
- * to distribution of the software without specific, written prior
- * permission.  Furthermore if you modify this software you must label
- * your software as modified software and not distribute it in such a
- * fashion that it might be confused with the original M.I.T. software.
- * M.I.T. makes no representations about the suitability of
- * this software for any purpose.  It is provided "as is" without express
- * or implied warranty.
- *
- */
-
-
-#include <linux/sunrpc/gss_api.h>
-
-#define SIZEOF_INT 4
-
-/* from gssapi_err_generic.h */
-#define G_BAD_SERVICE_NAME                       (-2045022976L)
-#define G_BAD_STRING_UID                         (-2045022975L)
-#define G_NOUSER                                 (-2045022974L)
-#define G_VALIDATE_FAILED                        (-2045022973L)
-#define G_BUFFER_ALLOC                           (-2045022972L)
-#define G_BAD_MSG_CTX                            (-2045022971L)
-#define G_WRONG_SIZE                             (-2045022970L)
-#define G_BAD_USAGE                              (-2045022969L)
-#define G_UNKNOWN_QOP                            (-2045022968L)
-#define G_NO_HOSTNAME                            (-2045022967L)
-#define G_BAD_HOSTNAME                           (-2045022966L)
-#define G_WRONG_MECH                             (-2045022965L)
-#define G_BAD_TOK_HEADER                         (-2045022964L)
-#define G_BAD_DIRECTION                          (-2045022963L)
-#define G_TOK_TRUNC                              (-2045022962L)
-#define G_REFLECT                                (-2045022961L)
-#define G_WRONG_TOKID                            (-2045022960L)
-
-#define g_OID_equal(o1,o2) \
-   (((o1)->len == (o2)->len) && \
-    (memcmp((o1)->data,(o2)->data,(int) (o1)->len) == 0))
-
-u32 g_verify_token_header(
-     struct xdr_netobj *mech,
-     int *body_size,
-     unsigned char **buf_in,
-     int toksize);
-
-int g_token_size(
-     struct xdr_netobj *mech,
-     unsigned int body_size);
-
-void g_make_token_header(
-     struct xdr_netobj *mech,
-     int body_size,
-     unsigned char **buf);
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 78a80bf3fdcb..43950b5237c8 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -40,7 +40,6 @@
 #include <crypto/skcipher.h>
 #include <linux/sunrpc/auth_gss.h>
 #include <linux/sunrpc/gss_err.h>
-#include <linux/sunrpc/gss_asn1.h>
 
 /* Length of constant used in key derivation */
 #define GSS_KRB5_K5CLENGTH (5)
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index ad1736d93b76..452f67deebc6 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -5,7 +5,7 @@
 
 obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
 
-auth_rpcgss-y := auth_gss.o gss_generic_token.o \
+auth_rpcgss-y := auth_gss.o \
 	gss_mech_switch.o svcauth_gss.o \
 	gss_rpc_upcall.o gss_rpc_xdr.o trace.o
 
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
deleted file mode 100644
index 4a4082bb22ad..000000000000
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- *  linux/net/sunrpc/gss_generic_token.c
- *
- *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic/util_token.c
- *
- *  Copyright (c) 2000 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Andy Adamson   <andros@umich.edu>
- */
-
-/*
- * Copyright 1993 by OpenVision Technologies, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software
- * and its documentation for any purpose is hereby granted without fee,
- * provided that the above copyright notice appears in all copies and
- * that both that copyright notice and this permission notice appear in
- * supporting documentation, and that the name of OpenVision not be used
- * in advertising or publicity pertaining to distribution of the software
- * without specific, written prior permission. OpenVision makes no
- * representations about the suitability of this software for any
- * purpose.  It is provided "as is" without express or implied warranty.
- *
- * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
- * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
- * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/sunrpc/sched.h>
-#include <linux/sunrpc/gss_asn1.h>
-
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY        RPCDBG_AUTH
-#endif
-
-
-/* TWRITE_STR from gssapiP_generic.h */
-#define TWRITE_STR(ptr, str, len) \
-	memcpy((ptr), (char *) (str), (len)); \
-	(ptr) += (len);
-
-/* XXXX this code currently makes the assumption that a mech oid will
-   never be longer than 127 bytes.  This assumption is not inherent in
-   the interfaces, so the code can be fixed if the OSI namespace
-   balloons unexpectedly. */
-
-/* Each token looks like this:
-
-0x60				tag for APPLICATION 0, SEQUENCE
-					(constructed, definite-length)
-	<length>		possible multiple bytes, need to parse/generate
-	0x06			tag for OBJECT IDENTIFIER
-		<moid_length>	compile-time constant string (assume 1 byte)
-		<moid_bytes>	compile-time constant string
-	<inner_bytes>		the ANY containing the application token
-					bytes 0,1 are the token type
-					bytes 2,n are the token data
-
-For the purposes of this abstraction, the token "header" consists of
-the sequence tag and length octets, the mech OID DER encoding, and the
-first two inner bytes, which indicate the token type.  The token
-"body" consists of everything else.
-
-*/
-
-static int
-der_length_size( int length)
-{
-	if (length < (1<<7))
-		return 1;
-	else if (length < (1<<8))
-		return 2;
-#if (SIZEOF_INT == 2)
-	else
-		return 3;
-#else
-	else if (length < (1<<16))
-		return 3;
-	else if (length < (1<<24))
-		return 4;
-	else
-		return 5;
-#endif
-}
-
-static void
-der_write_length(unsigned char **buf, int length)
-{
-	if (length < (1<<7)) {
-		*(*buf)++ = (unsigned char) length;
-	} else {
-		*(*buf)++ = (unsigned char) (der_length_size(length)+127);
-#if (SIZEOF_INT > 2)
-		if (length >= (1<<24))
-			*(*buf)++ = (unsigned char) (length>>24);
-		if (length >= (1<<16))
-			*(*buf)++ = (unsigned char) ((length>>16)&0xff);
-#endif
-		if (length >= (1<<8))
-			*(*buf)++ = (unsigned char) ((length>>8)&0xff);
-		*(*buf)++ = (unsigned char) (length&0xff);
-	}
-}
-
-/* returns decoded length, or < 0 on failure.  Advances buf and
-   decrements bufsize */
-
-static int
-der_read_length(unsigned char **buf, int *bufsize)
-{
-	unsigned char sf;
-	int ret;
-
-	if (*bufsize < 1)
-		return -1;
-	sf = *(*buf)++;
-	(*bufsize)--;
-	if (sf & 0x80) {
-		if ((sf &= 0x7f) > ((*bufsize)-1))
-			return -1;
-		if (sf > SIZEOF_INT)
-			return -1;
-		ret = 0;
-		for (; sf; sf--) {
-			ret = (ret<<8) + (*(*buf)++);
-			(*bufsize)--;
-		}
-	} else {
-		ret = sf;
-	}
-
-	return ret;
-}
-
-/* returns the length of a token, given the mech oid and the body size */
-
-int
-g_token_size(struct xdr_netobj *mech, unsigned int body_size)
-{
-	/* set body_size to sequence contents size */
-	body_size += 2 + (int) mech->len;         /* NEED overflow check */
-	return 1 + der_length_size(body_size) + body_size;
-}
-
-EXPORT_SYMBOL_GPL(g_token_size);
-
-/* fills in a buffer with the token header.  The buffer is assumed to
-   be the right size.  buf is advanced past the token header */
-
-void
-g_make_token_header(struct xdr_netobj *mech, int body_size, unsigned char **buf)
-{
-	*(*buf)++ = 0x60;
-	der_write_length(buf, 2 + mech->len + body_size);
-	*(*buf)++ = 0x06;
-	*(*buf)++ = (unsigned char) mech->len;
-	TWRITE_STR(*buf, mech->data, ((int) mech->len));
-}
-
-EXPORT_SYMBOL_GPL(g_make_token_header);
-
-/*
- * Given a buffer containing a token, reads and verifies the token,
- * leaving buf advanced past the token header, and setting body_size
- * to the number of remaining bytes.  Returns 0 on success,
- * G_BAD_TOK_HEADER for a variety of errors, and G_WRONG_MECH if the
- * mechanism in the token does not match the mech argument.  buf and
- * *body_size are left unmodified on error.
- */
-u32
-g_verify_token_header(struct xdr_netobj *mech, int *body_size,
-		      unsigned char **buf_in, int toksize)
-{
-	unsigned char *buf = *buf_in;
-	int seqsize;
-	struct xdr_netobj toid;
-	int ret = 0;
-
-	if ((toksize-=1) < 0)
-		return G_BAD_TOK_HEADER;
-	if (*buf++ != 0x60)
-		return G_BAD_TOK_HEADER;
-
-	if ((seqsize = der_read_length(&buf, &toksize)) < 0)
-		return G_BAD_TOK_HEADER;
-
-	if (seqsize != toksize)
-		return G_BAD_TOK_HEADER;
-
-	if ((toksize-=1) < 0)
-		return G_BAD_TOK_HEADER;
-	if (*buf++ != 0x06)
-		return G_BAD_TOK_HEADER;
-
-	if ((toksize-=1) < 0)
-		return G_BAD_TOK_HEADER;
-	toid.len = *buf++;
-
-	if ((toksize-=toid.len) < 0)
-		return G_BAD_TOK_HEADER;
-	toid.data = buf;
-	buf+=toid.len;
-
-	if (! g_OID_equal(&toid, mech))
-		ret = G_WRONG_MECH;
-
-   /* G_WRONG_MECH is not returned immediately because it's more important
-      to return G_BAD_TOK_HEADER if the token header is in fact bad */
-
-	if ((toksize-=2) < 0)
-		return G_BAD_TOK_HEADER;
-
-	if (ret)
-		return ret;
-
-	*buf_in = buf;
-	*body_size = toksize;
-
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(g_verify_token_header);
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index fae632da1058..c84d0cf61980 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -13,7 +13,6 @@
 #include <linux/module.h>
 #include <linux/oid_registry.h>
 #include <linux/sunrpc/msg_prot.h>
-#include <linux/sunrpc/gss_asn1.h>
 #include <linux/sunrpc/auth_gss.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/gss_err.h>

From c92066e78600b058638785288274a1f1426fe268 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Tue, 10 Dec 2024 01:02:25 +0000
Subject: [PATCH 42/42] sunrpc: Remove gss_{de,en}crypt_xdr_buf deadcode

Commit ec596aaf9b48 ("SUNRPC: Remove code behind
CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED") was the last user of the
gss_decrypt_xdr_buf() and gss_encrypt_xdr_buf() functions.

Remove them.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/auth_gss/gss_krb5_crypto.c   | 55 -------------------------
 net/sunrpc/auth_gss/gss_krb5_internal.h |  7 ----
 2 files changed, 62 deletions(-)

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index d2b02710ab07..9a27201638e2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -442,35 +442,6 @@ encryptor(struct scatterlist *sg, void *data)
 	return 0;
 }
 
-int
-gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf,
-		    int offset, struct page **pages)
-{
-	int ret;
-	struct encryptor_desc desc;
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
-
-	BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0);
-
-	skcipher_request_set_sync_tfm(req, tfm);
-	skcipher_request_set_callback(req, 0, NULL, NULL);
-
-	memset(desc.iv, 0, sizeof(desc.iv));
-	desc.req = req;
-	desc.pos = offset;
-	desc.outbuf = buf;
-	desc.pages = pages;
-	desc.fragno = 0;
-	desc.fraglen = 0;
-
-	sg_init_table(desc.infrags, 4);
-	sg_init_table(desc.outfrags, 4);
-
-	ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc);
-	skcipher_request_zero(req);
-	return ret;
-}
-
 struct decryptor_desc {
 	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
 	struct skcipher_request *req;
@@ -525,32 +496,6 @@ decryptor(struct scatterlist *sg, void *data)
 	return 0;
 }
 
-int
-gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf,
-		    int offset)
-{
-	int ret;
-	struct decryptor_desc desc;
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
-
-	/* XXXJBF: */
-	BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0);
-
-	skcipher_request_set_sync_tfm(req, tfm);
-	skcipher_request_set_callback(req, 0, NULL, NULL);
-
-	memset(desc.iv, 0, sizeof(desc.iv));
-	desc.req = req;
-	desc.fragno = 0;
-	desc.fraglen = 0;
-
-	sg_init_table(desc.frags, 4);
-
-	ret = xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
-	skcipher_request_zero(req);
-	return ret;
-}
-
 /*
  * This function makes the assumption that it was ultimately called
  * from gss_wrap().
diff --git a/net/sunrpc/auth_gss/gss_krb5_internal.h b/net/sunrpc/auth_gss/gss_krb5_internal.h
index 3afd4065bf3d..a47e9ec228a5 100644
--- a/net/sunrpc/auth_gss/gss_krb5_internal.h
+++ b/net/sunrpc/auth_gss/gss_krb5_internal.h
@@ -172,13 +172,6 @@ u32 krb5_decrypt(struct crypto_sync_skcipher *key, void *iv, void *in,
 int xdr_extend_head(struct xdr_buf *buf, unsigned int base,
 		    unsigned int shiftlen);
 
-int gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm,
-			struct xdr_buf *outbuf, int offset,
-			struct page **pages);
-
-int gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm,
-			struct xdr_buf *inbuf, int offset);
-
 u32 gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 			 struct xdr_buf *buf, struct page **pages);