[RFC v2 1/1] net/tls: allow limiting maximum record size

From: Wilfred Mallawa
Date: Fri Aug 08 2025 - 03:35:59 EST


From: Wilfred Mallawa <wilfred.mallawa@xxxxxxx>

During a handshake, an endpoint may specify a maximum record size limit.
Currently, the kernel defaults to TLS_MAX_PAYLOAD_SIZE (16KB) for the
maximum record size. Meaning that, the outgoing records from the kernel
can exceed a lower size negotiated during the handshake. In such a case,
the TLS endpoint must send a fatal "record_overflow" alert [1], and
thus the record is discarded.

This patch adds support for retrieving the negotiated record size limit
during a handshake, and enforcing it at the TLS layer such that outgoing
records are no larger than the size negotiated. This patch depends on
the respective userspace support in tlshd [2] and GnuTLS [3].

[1] https://www.rfc-editor.org/rfc/rfc8449
[2] https://github.com/oracle/ktls-utils/pull/112
[3] https://gitlab.com/gnutls/gnutls/-/merge_requests/2005

Signed-off-by: Wilfred Mallawa <wilfred.mallawa@xxxxxxx>
---
Documentation/netlink/specs/handshake.yaml | 3 +++
include/net/tls.h | 2 ++
include/uapi/linux/handshake.h | 1 +
net/handshake/genl.c | 5 ++--
net/handshake/tlshd.c | 29 +++++++++++++++++++++-
net/tls/tls_sw.c | 6 ++++-
6 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/Documentation/netlink/specs/handshake.yaml b/Documentation/netlink/specs/handshake.yaml
index b934cc513e3d..4e6bc348f1fd 100644
--- a/Documentation/netlink/specs/handshake.yaml
+++ b/Documentation/netlink/specs/handshake.yaml
@@ -84,6 +84,9 @@ attribute-sets:
name: remote-auth
type: u32
multi-attr: true
+ -
+ name: record-size-limit
+ type: u32

operations:
list:
diff --git a/include/net/tls.h b/include/net/tls.h
index 857340338b69..02e7b59fcc30 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -250,6 +250,8 @@ struct tls_context {
*/
unsigned long flags;

+ u32 tls_record_size_limit;
+
/* cache cold stuff */
struct proto *sk_proto;
struct sock *sk;
diff --git a/include/uapi/linux/handshake.h b/include/uapi/linux/handshake.h
index 3d7ea58778c9..0768eb8eb415 100644
--- a/include/uapi/linux/handshake.h
+++ b/include/uapi/linux/handshake.h
@@ -54,6 +54,7 @@ enum {
HANDSHAKE_A_DONE_STATUS = 1,
HANDSHAKE_A_DONE_SOCKFD,
HANDSHAKE_A_DONE_REMOTE_AUTH,
+ HANDSHAKE_A_DONE_RECORD_SIZE_LIMIT,

__HANDSHAKE_A_DONE_MAX,
HANDSHAKE_A_DONE_MAX = (__HANDSHAKE_A_DONE_MAX - 1)
diff --git a/net/handshake/genl.c b/net/handshake/genl.c
index f55d14d7b726..44c43ce18361 100644
--- a/net/handshake/genl.c
+++ b/net/handshake/genl.c
@@ -16,10 +16,11 @@ static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HAN
};

/* HANDSHAKE_CMD_DONE - do */
-static const struct nla_policy handshake_done_nl_policy[HANDSHAKE_A_DONE_REMOTE_AUTH + 1] = {
+static const struct nla_policy handshake_done_nl_policy[HANDSHAKE_A_DONE_RECORD_SIZE_LIMIT + 1] = {
[HANDSHAKE_A_DONE_STATUS] = { .type = NLA_U32, },
[HANDSHAKE_A_DONE_SOCKFD] = { .type = NLA_S32, },
[HANDSHAKE_A_DONE_REMOTE_AUTH] = { .type = NLA_U32, },
+ [HANDSHAKE_A_DONE_RECORD_SIZE_LIMIT] = { .type = NLA_U32, },
};

/* Ops table for handshake */
@@ -35,7 +36,7 @@ static const struct genl_split_ops handshake_nl_ops[] = {
.cmd = HANDSHAKE_CMD_DONE,
.doit = handshake_nl_done_doit,
.policy = handshake_done_nl_policy,
- .maxattr = HANDSHAKE_A_DONE_REMOTE_AUTH,
+ .maxattr = HANDSHAKE_A_DONE_RECORD_SIZE_LIMIT,
.flags = GENL_CMD_CAP_DO,
},
};
diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c
index d6f52839827e..f4e793f6288d 100644
--- a/net/handshake/tlshd.c
+++ b/net/handshake/tlshd.c
@@ -19,6 +19,7 @@
#include <net/handshake.h>
#include <net/genetlink.h>
#include <net/tls_prot.h>
+#include <net/tls.h>

#include <uapi/linux/keyctl.h>
#include <uapi/linux/handshake.h>
@@ -37,6 +38,8 @@ struct tls_handshake_req {
key_serial_t th_certificate;
key_serial_t th_privkey;

+ struct socket *th_sock;
+
unsigned int th_num_peerids;
key_serial_t th_peerid[5];
};
@@ -52,6 +55,7 @@ tls_handshake_req_init(struct handshake_req *req,
treq->th_consumer_data = args->ta_data;
treq->th_peername = args->ta_peername;
treq->th_keyring = args->ta_keyring;
+ treq->th_sock = args->ta_sock;
treq->th_num_peerids = 0;
treq->th_certificate = TLS_NO_CERT;
treq->th_privkey = TLS_NO_PRIVKEY;
@@ -85,6 +89,27 @@ static void tls_handshake_remote_peerids(struct tls_handshake_req *treq,
}
}

+static void tls_handshake_record_size(struct tls_handshake_req *treq,
+ struct genl_info *info)
+{
+ struct tls_context *tls_ctx;
+ struct nlattr *head = nlmsg_attrdata(info->nlhdr, GENL_HDRLEN);
+ struct nlattr *nla;
+ u32 record_size_limit;
+ int rem, len = nlmsg_attrlen(info->nlhdr, GENL_HDRLEN);
+
+ nla_for_each_attr(nla, head, len, rem) {
+ if (nla_type(nla) == HANDSHAKE_A_DONE_RECORD_SIZE_LIMIT) {
+ record_size_limit = nla_get_u32(nla);
+ if (treq->th_sock) {
+ tls_ctx = tls_get_ctx(treq->th_sock->sk);
+ tls_ctx->tls_record_size_limit = record_size_limit;
+ }
+ break;
+ }
+ }
+}
+
/**
* tls_handshake_done - callback to handle a CMD_DONE request
* @req: socket on which the handshake was performed
@@ -98,8 +123,10 @@ static void tls_handshake_done(struct handshake_req *req,
struct tls_handshake_req *treq = handshake_req_private(req);

treq->th_peerid[0] = TLS_NO_PEERID;
- if (info)
+ if (info) {
tls_handshake_remote_peerids(treq, info);
+ tls_handshake_record_size(treq, info);
+ }

if (!status)
set_bit(HANDSHAKE_F_REQ_SESSION, &req->hr_flags);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index fc88e34b7f33..70ffc4f5e382 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1024,6 +1024,7 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
ssize_t copied = 0;
struct sk_msg *msg_pl, *msg_en;
struct tls_rec *rec;
+ u32 tls_record_size_limit;
int required_size;
int num_async = 0;
bool full_record;
@@ -1045,6 +1046,9 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
}
}

+ tls_record_size_limit = min_not_zero(tls_ctx->tls_record_size_limit,
+ TLS_MAX_PAYLOAD_SIZE);
+
while (msg_data_left(msg)) {
if (sk->sk_err) {
ret = -sk->sk_err;
@@ -1066,7 +1070,7 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
orig_size = msg_pl->sg.size;
full_record = false;
try_to_copy = msg_data_left(msg);
- record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size;
+ record_room = tls_record_size_limit - msg_pl->sg.size;
if (try_to_copy >= record_room) {
try_to_copy = record_room;
full_record = true;
--
2.50.1