[PATCH v7 26/26] tcp: authopt: Initial implementation of TCP_REPAIR_AUTHOPT

From: Leonard Crestez
Date: Thu Aug 18 2022 - 16:06:24 EST


In order to support TCP_REPAIR for connections using RFC5925
Authentication Option add a sockopt to get/set ISN and SNE values.

The TCP_REPAIR_AUTHOxpTP sockopt is only allowed when the socket is
already in "repair" mode, this behavior is shared with other sockopts
relevant to TCP_REPAIR.

The setsockopt further requires the TCP_ESTABLISHED state, this is
because it relies on snd_nxt which is only initialized after connect().

For SNE restoration we provide a full 64-bit sequence number on "get" and
handle any recent 64-bit sequence number on "set", where recent means
"within ~2GB to the current window".

Linux tracks snd_sne and rcv_sne as the extension of snd_nxt and
rcv_nxt but this is an implementation detail and snd_nxt doesn't even
seem to be one of the values that can be read by userspace. Handling SNE
with 64-bit values means userspace doesn't need to worry about matching
snd_nxt.

Signed-off-by: Leonard Crestez <cdleonard@xxxxxxxxx>
---
include/net/tcp_authopt.h | 2 ++
include/uapi/linux/tcp.h | 19 +++++++++++
net/ipv4/tcp.c | 23 ++++++++++++++
net/ipv4/tcp_authopt.c | 66 +++++++++++++++++++++++++++++++++++++++
4 files changed, 110 insertions(+)

diff --git a/include/net/tcp_authopt.h b/include/net/tcp_authopt.h
index 4f83d8e54fef..fda6dc4b5d57 100644
--- a/include/net/tcp_authopt.h
+++ b/include/net/tcp_authopt.h
@@ -231,10 +231,12 @@ static inline void tcp_authopt_update_snd_sne(struct tcp_sock *tp, u32 seq)
lockdep_sock_is_held((struct sock *)tp));
if (info)
__tcp_authopt_update_snd_sne(tp, info, seq);
}
}
+int tcp_get_authopt_repair_val(struct sock *sk, struct tcp_authopt_repair *opt);
+int tcp_set_authopt_repair(struct sock *sk, sockptr_t optval, unsigned int optlen);
#else
static inline void tcp_authopt_clear(struct sock *sk)
{
}
static inline int tcp_authopt_openreq(struct sock *newsk,
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 5ca8aa9d5e43..ee6836f87cf8 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -128,10 +128,11 @@ enum {
#define TCP_CM_INQ TCP_INQ

#define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */
#define TCP_AUTHOPT 38 /* TCP Authentication Option (RFC5925) */
#define TCP_AUTHOPT_KEY 39 /* TCP Authentication Option Key (RFC5925) */
+#define TCP_REPAIR_AUTHOPT 40


#define TCP_REPAIR_ON 1
#define TCP_REPAIR_OFF 0
#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */
@@ -490,10 +491,28 @@ struct tcp_authopt_key {
* address match is performed.
*/
int prefixlen;
};

+/**
+ * struct tcp_authopt_repair - TCP_REPAIR information related to Authentication Option
+ * @src_isn: Local Initial Sequence Number
+ * @dst_isn: Remote Initial Sequence Number
+ * @snd_sne: Sequence Number Extension for Send (upper 32 bits of snd_seq)
+ * @rcv_sne: Sequence Number Extension for Recv (upper 32 bits of rcv_seq)
+ * @snd_seq: Recent Send Sequence Number (lower 32 bits of snd_sne)
+ * @rcv_seq: Recent Recv Sequence Number (lower 32 bits of rcv_sne)
+ */
+struct tcp_authopt_repair {
+ __u32 src_isn;
+ __u32 dst_isn;
+ __u32 snd_sne;
+ __u32 rcv_sne;
+ __u32 snd_seq;
+ __u32 rcv_seq;
+};
+
/* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */

#define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1
struct tcp_zerocopy_receive {
__u64 address; /* in: address of mapping */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 205534d501ec..ad0af4efd265 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3715,10 +3715,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
err = tcp_set_authopt(sk, optval, optlen);
break;
case TCP_AUTHOPT_KEY:
err = tcp_set_authopt_key(sk, optval, optlen);
break;
+ case TCP_REPAIR_AUTHOPT:
+ err = tcp_set_authopt_repair(sk, optval, optlen);
+ break;
#endif
case TCP_USER_TIMEOUT:
/* Cap the max time in ms TCP will retry or probe the window
* before giving up and aborting (ETIMEDOUT) a connection.
*/
@@ -4387,10 +4390,30 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return -EFAULT;
if (copy_to_user(optval, &info, len))
return -EFAULT;
return 0;
}
+ case TCP_REPAIR_AUTHOPT: {
+ struct tcp_authopt_repair val;
+ int err;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ lock_sock(sk);
+ err = tcp_get_authopt_repair_val(sk, &val);
+ release_sock(sk);
+
+ if (err)
+ return err;
+ len = min_t(unsigned int, len, sizeof(val));
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+ return 0;
+ }
#endif

default:
return -ENOPROTOOPT;
}
diff --git a/net/ipv4/tcp_authopt.c b/net/ipv4/tcp_authopt.c
index 933a4bbddb70..a77067c0498b 100644
--- a/net/ipv4/tcp_authopt.c
+++ b/net/ipv4/tcp_authopt.c
@@ -1775,10 +1775,76 @@ int __tcp_authopt_inbound_check(struct sock *sk, struct sk_buff *skb,

return 1;
}
EXPORT_SYMBOL(__tcp_authopt_inbound_check);

+int tcp_get_authopt_repair_val(struct sock *sk, struct tcp_authopt_repair *opt)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_authopt_info *info;
+ int err;
+
+ memset(opt, 0, sizeof(*opt));
+ sock_owned_by_me(sk);
+ err = check_sysctl_tcp_authopt();
+ if (err)
+ return err;
+ if (!tp->repair)
+ return -EPERM;
+
+ info = rcu_dereference_check(tp->authopt_info, lockdep_sock_is_held(sk));
+ if (!info)
+ return -ENOENT;
+
+ opt->dst_isn = info->dst_isn;
+ opt->src_isn = info->src_isn;
+ opt->rcv_sne = info->rcv_sne;
+ opt->snd_sne = info->snd_sne;
+ opt->rcv_seq = tp->rcv_nxt;
+ opt->snd_seq = tp->snd_nxt;
+
+ return 0;
+}
+
+int tcp_set_authopt_repair(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_authopt_info *info;
+ struct tcp_authopt_repair val;
+ int err;
+
+ sock_owned_by_me(sk);
+ err = check_sysctl_tcp_authopt();
+ if (err)
+ return err;
+
+ if (optlen != sizeof(val))
+ return -EFAULT;
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+ /* tcp_authopt repair relies on fields that are only initialized after
+ * tcp_connect. Doing this setsockopt before connect() can't be correct
+ * so return an error.
+ */
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -EPERM;
+
+ info = rcu_dereference_check(tp->authopt_info, lockdep_sock_is_held(sk));
+ if (!info)
+ return -ENOENT;
+ if (!tp->repair)
+ return -EPERM;
+
+ info->dst_isn = val.dst_isn;
+ info->src_isn = val.src_isn;
+ info->rcv_sne = compute_sne(val.rcv_sne, val.rcv_seq, tp->rcv_nxt);
+ info->snd_sne = compute_sne(val.snd_sne, val.snd_seq, tp->snd_nxt);
+
+ return 0;
+}
+
#ifdef CONFIG_PROC_FS
struct tcp_authopt_iter_state {
struct seq_net_private p;
};

--
2.25.1