[RFC v2 21/22] net/socket: add API specification for socket()

From: Sasha Levin
Date: Tue Jun 24 2025 - 14:12:40 EST


Add kernel API specification for the socket() system call, documenting
all aspects of socket creation.

Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
net/socket.c | 489 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 489 insertions(+)

diff --git a/net/socket.c b/net/socket.c
index 9a0e720f08598..fa42497d72af2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -78,6 +78,7 @@
#include <linux/pseudo_fs.h>
#include <linux/security.h>
#include <linux/syscalls.h>
+#include <linux/syscall_api_spec.h>
#include <linux/compat.h>
#include <linux/kmod.h>
#include <linux/audit.h>
@@ -89,6 +90,7 @@
#include <linux/nospec.h>
#include <linux/indirect_call_wrapper.h>
#include <linux/io_uring/net.h>
+#include <linux/un.h>

#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -1692,6 +1694,493 @@ int __sys_socket(int family, int type, int protocol)
return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
}

+DEFINE_KERNEL_API_SPEC(sys_socket)
+ KAPI_DESCRIPTION("Create an endpoint for communication")
+ KAPI_LONG_DESC("Creates an endpoint for communication and returns a file descriptor "
+ "that refers to that endpoint. The file descriptor returned by a successful "
+ "call will be the lowest-numbered file descriptor not currently open for "
+ "the process. The socket has the indicated type, which specifies the "
+ "communication semantics. The socket() system call is the foundation of "
+ "all network programming in Linux, providing access to various network "
+ "protocols and communication mechanisms.")
+ KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE)
+
+ KAPI_PARAM(0, "family", "int", "Protocol/address family (domain)")
+ KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+ KAPI_PARAM_TYPE(KAPI_TYPE_INT)
+ KAPI_PARAM_CONSTRAINT_TYPE(KAPI_CONSTRAINT_RANGE)
+ KAPI_PARAM_RANGE(0, 45) /* AF_UNSPEC to AF_MCTP */
+ KAPI_PARAM_CONSTRAINT("Common families: AF_UNIX (1), AF_INET (2), AF_INET6 (10), "
+ "AF_NETLINK (16), AF_PACKET (17). Others: AF_BLUETOOTH (31), AF_CAN (29), "
+ "AF_TIPC (30), AF_VSOCK (40), AF_XDP (44). Range: 0-45 (AF_MCTP). "
+ "PF_* are aliases. Negative or >= 46 returns EAFNOSUPPORT.")
+ KAPI_PARAM_END
+
+ KAPI_PARAM(1, "type", "int", "Socket type with optional flags")
+ KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+ KAPI_PARAM_TYPE(KAPI_TYPE_INT)
+ KAPI_PARAM_CONSTRAINT_TYPE(KAPI_CONSTRAINT_MASK)
+ KAPI_PARAM_VALID_MASK(SOCK_TYPE_MASK | SOCK_CLOEXEC | SOCK_NONBLOCK)
+ KAPI_PARAM_CONSTRAINT("Types: SOCK_STREAM (1), SOCK_DGRAM (2), SOCK_RAW (3), "
+ "SOCK_RDM (4), SOCK_SEQPACKET (5), SOCK_DCCP (6), SOCK_PACKET (10-obsolete). "
+ "Flags (since 2.6.27): SOCK_NONBLOCK, SOCK_CLOEXEC. Range: 0-10.")
+ KAPI_PARAM_END
+
+ KAPI_PARAM(2, "protocol", "int", "Protocol within the family")
+ KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+ KAPI_PARAM_TYPE(KAPI_TYPE_INT)
+ KAPI_PARAM_CONSTRAINT_TYPE(KAPI_CONSTRAINT_NONE)
+ KAPI_PARAM_CONSTRAINT("Usually 0 to select the default protocol for the given family and type. "
+ "For AF_INET/AF_INET6: IPPROTO_TCP (6), IPPROTO_UDP (17), IPPROTO_ICMP (1), "
+ "IPPROTO_RAW (255), etc. Must be >= 0 and < IPPROTO_MAX. "
+ "For AF_UNIX: only 0 or PF_UNIX (1) accepted. "
+ "For AF_PACKET: network byte order Ethernet protocol (e.g., ETH_P_IP). "
+ "For AF_NETLINK: NETLINK_ROUTE, NETLINK_AUDIT, etc. (0-31). "
+ "Protocol value passed through update_socket_protocol() BPF hook which may modify it.")
+ KAPI_PARAM_END
+
+ KAPI_RETURN("long", "File descriptor on success; negative error code on failure. "
+ "On success, returns the lowest available file descriptor. "
+ "The descriptor is automatically placed in the process's file descriptor table. "
+ "If SOCK_CLOEXEC is set, FD_CLOEXEC is set on the descriptor. "
+ "If SOCK_NONBLOCK is set, O_NONBLOCK is set on the file.")
+ KAPI_RETURN_TYPE(KAPI_TYPE_FD)
+ KAPI_RETURN_CHECK_TYPE(KAPI_RETURN_ERROR_CHECK)
+ KAPI_RETURN_SUCCESS(0)
+ KAPI_RETURN_END
+
+ /* Core error codes from __sock_create() and __sys_socket() */
+ KAPI_ERROR(0, -EAFNOSUPPORT, "EAFNOSUPPORT", "Address family not supported",
+ "The implementation does not support the specified address family. "
+ "Returned when: family < 0 || family >= NPROTO (46); "
+ "protocol family not registered in net_families[]; "
+ "protocol family module cannot be loaded; "
+ "try_module_get() fails on protocol family owner.")
+ KAPI_ERROR(1, -EINVAL, "EINVAL", "Invalid argument",
+ "Invalid argument specified. Returned when: "
+ "type < 0 || type >= SOCK_MAX (11); "
+ "invalid flags in type ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)); "
+ "other protocol-specific validation failures.")
+ KAPI_ERROR(2, -ENFILE, "ENFILE", "File table overflow",
+ "The system-wide limit on the total number of open files has been reached. "
+ "Returned when sock_alloc() fails due to new_inode_pseudo() failure.")
+ KAPI_ERROR(3, -EMFILE, "EMFILE", "Too many open files",
+ "The per-process limit on the number of open file descriptors has been reached. "
+ "Returned when sock_map_fd() cannot allocate a new file descriptor.")
+ KAPI_ERROR(4, -ENOMEM, "ENOMEM", "Out of memory",
+ "Insufficient kernel memory available. Can occur in: "
+ "sk_alloc() when allocating sock structure; "
+ "protocol-specific init functions; "
+ "security_sk_alloc() in LSM hooks; "
+ "various kmalloc()/kmem_cache_alloc() calls.")
+ KAPI_ERROR(5, -ENOBUFS, "ENOBUFS", "No buffer space available",
+ "Insufficient resources to create socket. Similar to ENOMEM but used by "
+ "some protocol families (e.g., AF_PACKET) to indicate resource exhaustion.")
+ KAPI_ERROR(6, -EPROTONOSUPPORT, "EPROTONOSUPPORT", "Protocol not supported",
+ "The protocol is not supported within this domain. Returned when: "
+ "AF_UNIX: protocol != 0 && protocol != PF_UNIX; "
+ "AF_INET/AF_INET6: protocol not found in inetsw[] array; "
+ "AF_NETLINK: protocol < 0 || protocol >= MAX_LINKS (32).")
+ KAPI_ERROR(7, -ESOCKTNOSUPPORT, "ESOCKTNOSUPPORT", "Socket type not supported",
+ "The socket type is not supported within this domain. Returned when: "
+ "AF_UNIX: type not in {STREAM, DGRAM, SEQPACKET, RAW}; "
+ "AF_INET/AF_INET6: no matching (type, protocol) in inetsw[]; "
+ "AF_PACKET: type not in {DGRAM, RAW, PACKET}; "
+ "AF_NETLINK: type not in {RAW, DGRAM}.")
+ KAPI_ERROR(8, -EPERM, "EPERM", "Operation not permitted",
+ "Permission denied due to insufficient privileges. Returned when: "
+ "AF_INET/AF_INET6 with SOCK_RAW: missing CAP_NET_RAW; "
+ "AF_PACKET: missing CAP_NET_RAW; "
+ "Some protocol families may have additional restrictions.")
+ KAPI_ERROR(9, -EACCES, "EACCES", "Permission denied",
+ "Permission denied by Linux Security Module (SELinux, AppArmor, etc.). "
+ "Returned by security_socket_create() or security_socket_post_create() hooks.")
+ KAPI_ERROR(10, -EAGAIN, "EAGAIN", "Resource temporarily unavailable",
+ "Transient resource shortage. Can be returned by some protocol families "
+ "during initialization when resources are temporarily exhausted.")
+ KAPI_ERROR(11, -EINTR, "EINTR", "Interrupted system call",
+ "Operation interrupted by signal. Rare for socket() but possible if "
+ "module loading is interrupted or during memory allocation with GFP_KERNEL.")
+ KAPI_ERROR(12, -EFAULT, "EFAULT", "Bad address",
+ "Not directly returned by socket() since all parameters are values, not pointers. "
+ "Listed for completeness as it appears in documentation.")
+ KAPI_ERROR(13, -ENOSYS, "ENOSYS", "Function not implemented",
+ "Can occur in containers using alt-syscall where socket() is not whitelisted, "
+ "or on architectures where socket() is not implemented.")
+
+ KAPI_ERROR_COUNT(14)
+ KAPI_PARAM_COUNT(3)
+ KAPI_SINCE_VERSION("4.2BSD")
+
+ KAPI_EXAMPLES("/* Create a TCP socket */\n"
+ "int tcp_sock = socket(AF_INET, SOCK_STREAM, 0);\n"
+ "if (tcp_sock < 0) {\n"
+ " perror(\"socket\");\n"
+ " exit(EXIT_FAILURE);\n"
+ "}\n\n"
+ "/* Create a non-blocking UDP socket with close-on-exec */\n"
+ "int udp_sock = socket(AF_INET6, SOCK_DGRAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);\n\n"
+ "/* Create a raw ICMP socket (requires CAP_NET_RAW) */\n"
+ "int raw_sock = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);\n\n"
+ "/* Create a Unix domain datagram socket */\n"
+ "int unix_sock = socket(AF_UNIX, SOCK_DGRAM, 0);\n\n"
+ "/* Create a netlink socket for routing information */\n"
+ "int nl_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);\n\n"
+ "/* Create a packet socket for raw Ethernet frames (requires CAP_NET_RAW) */\n"
+ "int packet_sock = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));\n\n"
+ "/* Create a Bluetooth L2CAP socket */\n"
+ "int bt_sock = socket(AF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP);")
+
+ KAPI_NOTES("Implementation details:\n"
+ "- Uses RCU to safely access net_families[] array\n"
+ "- May trigger automatic module loading via request_module(\"net-pf-%d\", family)\n"
+ "- Allocates inode from sock_inode_cache via new_inode_pseudo()\n"
+ "- Each protocol family registers via sock_register() with unique family number\n"
+ "- Socket creation involves: sock_alloc() -> pf->create() -> sock_map_fd()\n"
+ "- The update_socket_protocol() BPF hook can modify the protocol parameter\n"
+ "- LSM hooks called: security_socket_create() and security_socket_post_create()\n"
+ "- Creates struct socket (VFS layer) and struct sock (network layer)\n"
+ "- Socket state initialized to SS_UNCONNECTED\n"
+ "- File operations set to socket_file_ops\n"
+ "- The (PF_INET, SOCK_PACKET) combination is deprecated since Linux 2.0\n"
+ "Build-time checks ensure SOCK_CLOEXEC == O_CLOEXEC and flag consistency")
+
+ /* Lock specifications */
+ KAPI_LOCK(0, "rcu_read_lock", KAPI_LOCK_RCU)
+ KAPI_LOCK_ACQUIRED
+ KAPI_LOCK_RELEASED
+ KAPI_LOCK_DESC("Protects net_families[] array access during protocol family lookup. "
+ "Acquired before rcu_dereference(net_families[family]), "
+ "released after pf->create() call or on error path.")
+ KAPI_LOCK_END
+
+ KAPI_LOCK(1, "pf->owner module refcount", KAPI_LOCK_CUSTOM)
+ KAPI_LOCK_ACQUIRED
+ KAPI_LOCK_RELEASED
+ KAPI_LOCK_DESC("Prevents protocol family module unload during socket creation. "
+ "try_module_get(pf->owner) before pf->create(), "
+ "module_put(pf->owner) after completion.")
+ KAPI_LOCK_END
+
+ KAPI_LOCK(2, "sock->ops->owner module refcount", KAPI_LOCK_CUSTOM)
+ KAPI_LOCK_ACQUIRED
+ KAPI_LOCK_DESC("Prevents socket operations module unload during socket lifetime. "
+ "try_module_get(sock->ops->owner) after successful creation, "
+ "released only on sock_release() when socket is closed.")
+ KAPI_LOCK_END
+
+ KAPI_LOCK_COUNT(3)
+
+ /* Signal handling */
+ KAPI_SIGNAL(0, 0, "Module loading", KAPI_SIGNAL_RECEIVE, KAPI_SIGNAL_ACTION_RESTART)
+ KAPI_SIGNAL_CONDITION("CONFIG_MODULES && request_module() called")
+ KAPI_SIGNAL_DESC("Module loading via request_module() is interruptible. "
+ "Signal delivery causes -EINTR from modprobe execution.")
+ KAPI_SIGNAL_TIMING(KAPI_SIGNAL_TIME_DURING)
+ KAPI_SIGNAL_INTERRUPTIBLE
+ KAPI_SIGNAL_END
+
+ KAPI_SIGNAL_COUNT(1)
+
+ /* Side effects */
+ KAPI_SIDE_EFFECT(0, KAPI_EFFECT_ALLOC_MEMORY | KAPI_EFFECT_RESOURCE_CREATE,
+ "socket structures",
+ "Allocates struct socket (VFS), struct sock (network), and protocol-specific data. "
+ "Memory from: sock_inode_cache, protocol's slab cache, and general kmalloc.")
+ KAPI_EFFECT_CONDITION("Always occurs on successful socket creation")
+ KAPI_EFFECT_REVERSIBLE
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(1, KAPI_EFFECT_RESOURCE_CREATE,
+ "file descriptor",
+ "Allocates new file descriptor at lowest available index. "
+ "Creates struct file with socket_file_ops. Sets up file->private_data = socket.")
+ KAPI_EFFECT_CONDITION("Always occurs on successful socket creation")
+ KAPI_EFFECT_REVERSIBLE
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(2, KAPI_EFFECT_FILESYSTEM,
+ "protocol module",
+ "May trigger request_module(\"net-pf-%d\", family) to load protocol module. "
+ "Executes /sbin/modprobe in userspace context.")
+ KAPI_EFFECT_CONDITION("CONFIG_MODULES=y && !net_families[family] && first attempt")
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(3, KAPI_EFFECT_MODIFY_STATE,
+ "LSM and audit",
+ "Calls security_socket_create() pre-creation and security_socket_post_create() "
+ "post-creation. May generate audit events. SELinux/AppArmor may deny.")
+ KAPI_EFFECT_CONDITION("CONFIG_SECURITY=y or CONFIG_AUDIT=y")
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(4, KAPI_EFFECT_MODIFY_STATE,
+ "BPF programs",
+ "update_socket_protocol() hook can modify protocol parameter. "
+ "BPF_CGROUP_RUN_PROG_INET_SOCK() may run for AF_INET/AF_INET6.")
+ KAPI_EFFECT_CONDITION("BPF programs attached to cgroup or socket hooks")
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(5, KAPI_EFFECT_NETWORK | KAPI_EFFECT_HARDWARE,
+ "network stack",
+ "Initializes protocol-specific state. May interact with network hardware "
+ "(e.g., AF_PACKET binds to network interface).")
+ KAPI_EFFECT_CONDITION("Protocol family specific")
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT(6, KAPI_EFFECT_MODIFY_STATE,
+ "resource accounting",
+ "Updates task and memory cgroup accounting. Charges socket memory to owner. "
+ "Increments global socket counters.")
+ KAPI_EFFECT_CONDITION("CONFIG_MEMCG=y or other accounting enabled")
+ KAPI_SIDE_EFFECT_END
+
+ KAPI_SIDE_EFFECT_COUNT(7)
+
+ /* State transitions */
+ KAPI_STATE_TRANS(0, "file descriptor table",
+ "n open descriptors", "n+1 open descriptors",
+ "New fd allocated at min(available). Updates current->files->fd_array[]")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS(1, "socket state machine",
+ "non-existent", "SS_UNCONNECTED",
+ "Socket created in unconnected state, ready for bind() or connect()")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS(2, "network namespace",
+ "no socket", "socket registered",
+ "Socket associated with current->nsproxy->net_ns network namespace")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS(3, "memory accounting",
+ "pre-allocation", "memory charged",
+ "Socket memory charged to owner's memcg and rlimits")
+ KAPI_STATE_TRANS_END
+
+ KAPI_STATE_TRANS_COUNT(4)
+
+ /* Networking-specific specifications */
+
+ /* Socket state specification */
+ KAPI_SOCKET_STATE_REQ(KAPI_SOCK_STATE_UNSPEC)
+ KAPI_SOCKET_STATE_RESULT(KAPI_SOCK_STATE_OPEN)
+ KAPI_SOCKET_STATE_COND("Successful socket creation")
+ KAPI_SOCKET_STATE_PROTOS(KAPI_PROTO_ALL)
+ KAPI_SOCKET_STATE_END
+
+ /* Protocol-specific behaviors - detailed specifications */
+ KAPI_PROTOCOL_BEHAVIOR(0, KAPI_PROTO_TCP,
+ "TCP (Transmission Control Protocol) creates reliable, ordered, connection-oriented "
+ "byte streams. Features: 3-way handshake connection establishment; sequence numbers "
+ "for ordering; acknowledgments and retransmissions for reliability; flow control "
+ "via sliding window; congestion control (Reno/CUBIC/BBR); Nagle algorithm for "
+ "small packet aggregation; keep-alive probes; urgent data via MSG_OOB. "
+ "Socket combines (AF_INET/AF_INET6, SOCK_STREAM, IPPROTO_TCP).")
+ KAPI_PROTOCOL_FLAGS(0, "TCP-specific socket options via SOL_TCP level")
+ KAPI_PROTOCOL_BEHAVIOR_END
+
+ KAPI_PROTOCOL_BEHAVIOR(1, KAPI_PROTO_UDP,
+ "UDP (User Datagram Protocol) creates unreliable, connectionless datagram service. "
+ "Features: no connection establishment; best-effort delivery; message boundaries "
+ "preserved; no flow/congestion control; optional checksums; multicast/broadcast "
+ "capable; lower overhead than TCP. Maximum datagram size 65507 bytes (65535 - "
+ "IP header - UDP header). connect() on UDP socket sets default destination. "
+ "Socket combines (AF_INET/AF_INET6, SOCK_DGRAM, IPPROTO_UDP).")
+ KAPI_PROTOCOL_FLAGS(0, "UDP-specific options like UDP_CORK via SOL_UDP")
+ KAPI_PROTOCOL_BEHAVIOR_END
+
+ KAPI_PROTOCOL_BEHAVIOR(2, KAPI_PROTO_UNIX,
+ "Unix domain sockets provide high-performance local IPC with filesystem-based "
+ "addressing or Linux abstract namespace. Features: reliable delivery; in-order "
+ "semantics for SOCK_STREAM; message boundaries for SOCK_DGRAM/SOCK_SEQPACKET; "
+ "credential passing via SCM_CREDENTIALS; file descriptor passing via SCM_RIGHTS; "
+ "no network overhead; kernel-only data path. SOCK_RAW mapped to SOCK_DGRAM. "
+ "Maximum datagram size 130688 bytes by default (net.core.wmem_max).")
+ KAPI_PROTOCOL_FLAGS(0, "No Unix-specific socket level; uses SOL_SOCKET only")
+ KAPI_PROTOCOL_BEHAVIOR_END
+
+ KAPI_PROTOCOL_BEHAVIOR(3, KAPI_PROTO_RAW,
+ "Raw sockets provide direct access to network layer (IP) or link layer (Ethernet). "
+ "Features: receive/send raw IP packets; implement custom protocols; packet "
+ "sniffing; bypass transport layer. IP header included based on IP_HDRINCL option. "
+ "Protocol field specifies which protocol to receive (IPPROTO_ICMP, etc.) or "
+ "IPPROTO_RAW to send any. Link layer access via AF_PACKET. Requires CAP_NET_RAW "
+ "capability. Used by ping, traceroute, nmap, tcpdump.")
+ KAPI_PROTOCOL_FLAGS(0, "IP_HDRINCL and raw-specific options via SOL_RAW")
+ KAPI_PROTOCOL_BEHAVIOR_END
+
+ KAPI_PROTOCOL_BEHAVIOR(4, KAPI_PROTO_PACKET,
+ "Packet sockets provide direct access to link layer (Layer 2). Features: "
+ "send/receive raw Ethernet frames; implement network protocols in userspace; "
+ "packet capture and injection; access to all packets on interface. SOCK_RAW "
+ "provides full Layer 2 header; SOCK_DGRAM provides cooked packets without "
+ "Layer 2 header. Protocol specifies Ethernet protocol (ETH_P_IP, ETH_P_ALL). "
+ "High-performance variants: PACKET_MMAP, PACKET_FANOUT. Requires CAP_NET_RAW.")
+ KAPI_PROTOCOL_FLAGS(0, "Extensive options via SOL_PACKET level")
+ KAPI_PROTOCOL_BEHAVIOR_END
+
+ KAPI_PROTOCOL_BEHAVIOR(5, KAPI_PROTO_NETLINK,
+ "Netlink sockets provide kernel/user-space communication interface. Features: "
+ "reliable datagram service; multicast groups; message-based; TLV attributes; "
+ "async notifications; used for routing, netfilter, audit, SELinux, etc. "
+ "Protocol specifies subsystem: NETLINK_ROUTE (routing/link), NETLINK_FIREWALL, "
+ "NETLINK_NETFILTER, NETLINK_AUDIT, etc. No special capabilities for most "
+ "protocols except administrative operations.")
+ KAPI_PROTOCOL_FLAGS(0, "Netlink-specific options and attributes")
+ KAPI_PROTOCOL_BEHAVIOR_END
+
+ KAPI_PROTOCOL_BEHAVIOR(6, KAPI_PROTO_SCTP,
+ "SCTP (Stream Control Transmission Protocol) provides reliable, message-oriented "
+ "service with multi-streaming and multi-homing. Features: message boundaries; "
+ "ordered/unordered delivery; multi-streaming prevents head-of-line blocking; "
+ "multi-homing for redundancy; heartbeats; partial reliability extension. "
+ "4-way handshake with cookie mechanism prevents SYN floods. "
+ "Socket combines (AF_INET/AF_INET6, SOCK_STREAM/SOCK_SEQPACKET, IPPROTO_SCTP).")
+ KAPI_PROTOCOL_FLAGS(0, "SCTP-specific options via SOL_SCTP level")
+ KAPI_PROTOCOL_BEHAVIOR_END
+
+ KAPI_PROTOCOL_BEHAVIOR_COUNT(7)
+
+ /* Buffer specification - not applicable for socket creation */
+ KAPI_BUFFER_SPEC(0)
+ KAPI_BUFFER_SIZE(0, 0, 0)
+ KAPI_BUFFER_END
+
+ /* Async specification - socket creation is synchronous */
+ KAPI_ASYNC_SPEC(KAPI_ASYNC_BLOCK, 0)
+ KAPI_ASYNC_END
+
+ /* Network-specific errors are already covered in main error list */
+
+ /* Address families supported - comprehensive list */
+ KAPI_ADDR_FAMILY(0, AF_UNIX, "AF_UNIX/AF_LOCAL", sizeof(struct sockaddr_un), 2, 110)
+ KAPI_ADDR_FORMAT("struct sockaddr_un { sa_family_t sun_family; char sun_path[108]; }")
+ KAPI_ADDR_FEATURES(false, false, false)
+ KAPI_ADDR_SPECIAL("Abstract namespace: sun_path[0] == '\\0'; "
+ "Autobind: empty sun_path gets random abstract address; "
+ "Filesystem: normal paths follow filesystem permissions")
+ KAPI_ADDR_PORTS(0, 0) /* No port concept */
+ KAPI_ADDR_FAMILY_END
+
+ KAPI_ADDR_FAMILY(1, AF_INET, "AF_INET", sizeof(struct sockaddr_in), 16, 16)
+ KAPI_ADDR_FORMAT("struct sockaddr_in { sa_family_t sin_family; __be16 sin_port; "
+ "struct in_addr sin_addr; char sin_zero[8]; }")
+ KAPI_ADDR_FEATURES(true, true, true)
+ KAPI_ADDR_SPECIAL("INADDR_ANY (0.0.0.0) - wildcard; "
+ "INADDR_LOOPBACK (127.0.0.1) - loopback; "
+ "INADDR_BROADCAST (255.255.255.255) - broadcast; "
+ "224.0.0.0/4 - multicast range")
+ KAPI_ADDR_PORTS(0, 65535) /* 0 = ephemeral port assignment */
+ KAPI_ADDR_FAMILY_END
+
+ KAPI_ADDR_FAMILY(2, AF_INET6, "AF_INET6", sizeof(struct sockaddr_in6), 28, 28)
+ KAPI_ADDR_FORMAT("struct sockaddr_in6 { sa_family_t sin6_family; __be16 sin6_port; "
+ "__be32 sin6_flowinfo; struct in6_addr sin6_addr; __u32 sin6_scope_id; }")
+ KAPI_ADDR_FEATURES(true, true, false) /* No broadcast in IPv6 */
+ KAPI_ADDR_SPECIAL("in6addr_any (::) - wildcard; "
+ "in6addr_loopback (::1) - loopback; "
+ "ff00::/8 - multicast range; "
+ "fe80::/10 - link-local; "
+ "::ffff:0:0/96 - IPv4-mapped addresses")
+ KAPI_ADDR_PORTS(0, 65535)
+ KAPI_ADDR_FAMILY_END
+
+ KAPI_ADDR_FAMILY(3, AF_NETLINK, "AF_NETLINK", sizeof(struct sockaddr_nl), 12, 12)
+ KAPI_ADDR_FORMAT("struct sockaddr_nl { sa_family_t nl_family; __u16 nl_pad; "
+ "__u32 nl_pid; __u32 nl_groups; }")
+ KAPI_ADDR_FEATURES(false, true, false) /* Multicast via nl_groups */
+ KAPI_ADDR_SPECIAL("nl_pid: 0 = kernel; getpid() = this process; "
+ "nl_groups: bitmask of multicast groups")
+ KAPI_ADDR_PORTS(0, 0) /* Uses nl_pid instead */
+ KAPI_ADDR_FAMILY_END
+
+ KAPI_ADDR_FAMILY(4, AF_PACKET, "AF_PACKET", sizeof(struct sockaddr_ll), 20, 20)
+ KAPI_ADDR_FORMAT("struct sockaddr_ll { sa_family_t sll_family; __be16 sll_protocol; "
+ "int sll_ifindex; __u16 sll_hatype; __u8 sll_pkttype; "
+ "__u8 sll_halen; __u8 sll_addr[8]; }")
+ KAPI_ADDR_FEATURES(true, true, true) /* Via sll_pkttype */
+ KAPI_ADDR_SPECIAL("sll_ifindex: 0 = any interface; "
+ "sll_protocol: ETH_P_ALL = all protocols; "
+ "sll_pkttype: PACKET_HOST/BROADCAST/MULTICAST/OTHERHOST")
+ KAPI_ADDR_PORTS(0, 0) /* Layer 2, no ports */
+ KAPI_ADDR_FAMILY_END
+
+ KAPI_ADDR_FAMILY(5, AF_BLUETOOTH, "AF_BLUETOOTH", sizeof(struct sockaddr), 14, 258)
+ KAPI_ADDR_FORMAT("Varies by protocol: sockaddr_l2 (L2CAP), sockaddr_rc (RFCOMM), "
+ "sockaddr_hci (HCI), sockaddr_sco (SCO)")
+ KAPI_ADDR_FEATURES(false, false, false)
+ KAPI_ADDR_SPECIAL("BDADDR_ANY (00:00:00:00:00:00) - any device; "
+ "BDADDR_LOCAL (00:00:00:ff:ff:ff) - local adapter")
+ KAPI_ADDR_PORTS(1, 30) /* PSM for L2CAP, channel for RFCOMM */
+ KAPI_ADDR_FAMILY_END
+
+ KAPI_ADDR_FAMILY_COUNT(6)
+
+ /* Security specification - use existing capability mechanism */
+ KAPI_CAPABILITY(0, CAP_NET_RAW, "CAP_NET_RAW", KAPI_CAP_GRANT_PERMISSION)
+ KAPI_CAP_CONDITION("family == AF_PACKET || type == SOCK_RAW")
+ KAPI_CAP_ALLOWS("Raw socket creation and packet injection")
+ KAPI_CAP_WITHOUT("Permission denied (EPERM)")
+ KAPI_CAPABILITY_END
+
+ KAPI_CAPABILITY_COUNT(1)
+
+ /* Operation characteristics */
+ .is_connection_oriented = false,
+ .is_message_oriented = false,
+ .supports_oob_data = false,
+ .supports_peek = false,
+ .supports_select_poll = false,
+ .is_reentrant = true,
+
+ /* Semantic descriptions */
+ KAPI_NET_DATA_TRANSFER("Not applicable - socket() only creates the endpoint")
+
+ /* Additional constraints and validation rules */
+ KAPI_CONSTRAINT(0, "Protocol/Type Compatibility",
+ "Not all (family, type, protocol) combinations are valid. "
+ "Common valid combinations: "
+ "(AF_INET, SOCK_STREAM, IPPROTO_TCP); "
+ "(AF_INET, SOCK_DGRAM, IPPROTO_UDP); "
+ "(AF_INET, SOCK_RAW, IPPROTO_ICMP); "
+ "(AF_UNIX, SOCK_STREAM, 0); "
+ "(AF_UNIX, SOCK_DGRAM, 0); "
+ "(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); "
+ "(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)")
+ KAPI_CONSTRAINT_END
+
+ KAPI_CONSTRAINT(1, "Module Loading",
+ "If protocol family module not loaded, socket() may block during "
+ "request_module() execution. This is interruptible and may take "
+ "significant time. Modules loaded: net-pf-N where N is family number.")
+ KAPI_CONSTRAINT_END
+
+ KAPI_CONSTRAINT(2, "Capability Requirements",
+ "CAP_NET_RAW required for: "
+ "- AF_INET/AF_INET6 with SOCK_RAW "
+ "- AF_PACKET with any socket type "
+ "- Some AF_NETLINK operations require CAP_NET_ADMIN "
+ "- AF_BLUETOOTH may require CAP_NET_ADMIN for some operations")
+ KAPI_CONSTRAINT_END
+
+ KAPI_CONSTRAINT(3, "Network Namespace",
+ "Socket is created in current->nsproxy->net_ns network namespace. "
+ "Socket is bound to this namespace for its lifetime. "
+ "Different namespaces have independent network stacks.")
+ KAPI_CONSTRAINT_END
+
+ KAPI_CONSTRAINT(4, "Memory Limits",
+ "Socket creation respects: "
+ "- RLIMIT_NOFILE for file descriptor limits "
+ "- Memory cgroup limits for socket memory "
+ "- System-wide socket memory limits (net.core.somaxconn, etc.) "
+ "- Per-protocol memory limits")
+ KAPI_CONSTRAINT_END
+
+ KAPI_CONSTRAINT_COUNT(5)
+
+KAPI_END_SPEC;
+
SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{
return __sys_socket(family, type, protocol);
--
2.39.5