commit feaf4fe8a8e4509540286899d02cd88f09c0d343 Author: Eric Paris Date: Thu Aug 9 14:08:12 2012 -0400 Network/Security: allocate security data when we allocate unicast sockets commit be9f4a44e7d41cee (ipv4: tcp: remove per net tcp_sock) added a regression because it did not properly initialize the new per cpu sockets. This was reported and bisected by John Stultz: [ 69.272927] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 [ 69.273374] IP: [] selinux_ip_postroute_compat+0xa4/0xe0 [ 69.273374] PGD 3a85b067 PUD 3f50b067 PMD 0 [ 69.273374] Oops: 0000 [#1] PREEMPT SMP [ 69.273374] CPU 3 [ 69.273374] Pid: 2392, comm: hwclock Not tainted 3.6.0-rc1john+ #106 Bochs Bochs [ 69.273374] RIP: 0010:[] [] selinux_ip_postroute_compat+0xa4/0xe0 [ 69.273374] RSP: 0018:ffff88003f003720 EFLAGS: 00010246 [ 69.273374] RAX: 0000000000000000 RBX: ffff88003f5fa9d8 RCX: 0000000000000006 [ 69.273374] RDX: ffff88003f003740 RSI: ffff88003c6b256c RDI: ffff88003f5fa9d8 [ OK ] [ 69.273374] RBP: ffff88003f0037a0 R08: 0000000000000000 R09: ffff88003f1d0cc0 [ 69.273374] R10: 0000000000000003 R11: 0000000000000000 R12: 0000000000000000 [ 69.273374] R13: 0000000000000002 R14: ffff88003f0037c0 R15: 0000000000000004 [ 69.273374] FS: 00007fa398211700(0000) GS:ffff88003f000000(0000) knlGS:0000000000000000 [ 69.273374] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 69.273374] CR2: 0000000000000010 CR3: 000000003b52a000 CR4: 00000000000006e0 [ 69.273374] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 69.273374] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 69.273374] Process hwclock (pid: 2392, threadinfo ffff88003a0ee000, task ffff88003fa82b80) [ 69.273374] Stack: [ 69.273374] ffff88003c6b2558 0000000000000006 0000000000000000 0000160067d70002 [ 69.273374] 0f02000a0202000a 0000000000000000 0000000000000000 0000000000000000 [ 69.273374] ffff88003f003802 ffff88003f003728 ffff88003f1d42d0 ffff88003d6c3560 [ 69.273374] Call Trace: [ 69.273374] [ 69.273374] [] selinux_ip_postroute+0x2ab/0x3e0 [ 69.273374] [] selinux_ipv4_postroute+0x1c/0x20 [ 69.273374] [] nf_iterate+0xac/0x140 [ 69.273374] [] nf_hook_slow+0xb5/0x210 [ 69.273374] [] ip_output+0xaa/0x150 [ 69.273374] [] ip_local_out+0x7f/0x110 [ 69.273374] [] ip_send_skb+0xe/0x40 [ 69.273374] [] ip_push_pending_frames+0x2b/0x30 [ 69.273374] [] ip_send_unicast_reply+0x2c7/0x3c0 [ 69.273374] [] tcp_v4_send_reset+0x1f5/0x3f0 [ 69.273374] [] tcp_v4_rcv+0x2bb/0x1080 [ 69.273374] [] ip_local_deliver_finish+0x133/0x4d0 [ 69.273374] [] ip_local_deliver+0x90/0xa0 [ 69.273374] [] ip_rcv_finish+0x262/0x8f0 [ 69.273374] [] ip_rcv+0x352/0x3a0 [ 69.323844] [] __netif_receive_skb+0xcb4/0x10e0 [ 69.323844] [] netif_receive_skb+0x18d/0x230 [ 69.323844] [] virtnet_poll+0x58c/0x7b0 [ 69.323844] [] net_rx_action+0x289/0x550 [ 69.323844] [] __do_softirq+0x1da/0x560 [ 69.323844] [] call_softirq+0x1c/0x30 [ 69.323844] [] do_softirq+0x105/0x1e0 [ 69.323844] [] irq_exit+0x9e/0x100 [ 69.323844] [] do_IRQ+0x63/0xd0 [ 69.323844] [] common_interrupt+0x6f/0x6f [ 69.323844] [ 69.323844] [] __might_sleep+0x1cd/0x280 [ 69.323844] [] might_fault+0x34/0xb0 [ 69.323844] [] sys_gettimeofday+0xbe/0xf0 [ 69.323844] [] system_call_fastpath+0x16/0x1b [ 69.323844] Code: c0 45 31 c9 b1 01 ba 2a 00 00 00 e8 a7 89 ff ff 85 c0 b9 00 00 6f 00 74 0e 48 83 c4 70 89 c8 5b 41 5c 5d c3 0f 1f 00 0f b6 4d ef <41> 8b 7c 24 10 48 8d 55 c0 48 89 de e8 ab 6d 01 00 83 f8 01 19 [ 69.323844] RIP [] selinux_ip_postroute_compat+0xa4/0xe0 [ 69.323844] RSP [ 69.323844] CR2: 0000000000000010 [ 69.357489] ---[ end trace 0cd3e1a60dee6096 ]--- [ 69.358353] Kernel panic - not syncing: Fatal exception in interrupt The reason for the regresion is because of how the new sock is created. The old socket was created using inet_ctl_sock_create() which uses all generic functions to establish the struct socket, struct sock, and do all of the allocation and initialization of the socket and its appropriate security data. aka: inet_ctl_sock_create sock_create_kern __sock_create pf->create (inet_create) sk_alloc sk_prot_alloc sec_sk_alloc() These new per_cpu skip all of that initialization and instead try to do it by hand. Doing it by hand causes a second regression. The __sock_create() function calls security_socket_post_create() which initializes the securty state on both the socket and the sock. However here we don't set up the security structure. I'd like to use security_socket_post_create() but it needs the socket and in this case we skipped straight to the struct sock. Looks like this custom hackary is going to require a second patch which exposes the inards of security_socket_post_create() as security_sock_post_create() so we can do the labeling of this created this way. But at least this one won't panic the kernel. Reported-by: John Stultz Bisected-by: John Stultz Signed-off-by: Eric Paris Cc: Eric Dumazet Cc: Paul Moore Cc: "Serge E. Hallyn" diff --git a/include/linux/security.h b/include/linux/security.h index 4e5a73c..1e0c5a7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1601,7 +1601,7 @@ struct security_operations { int (*socket_sock_rcv_skb) (struct sock *sk, struct sk_buff *skb); int (*socket_getpeersec_stream) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len); int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid); - int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); + int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority, int numa_node); void (*sk_free_security) (struct sock *sk); void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); void (*sk_getsecid) (struct sock *sk, u32 *secid); @@ -2539,7 +2539,7 @@ int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb); int security_socket_getpeersec_stream(struct socket *sock, char __user *optval, int __user *optlen, unsigned len); int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid); -int security_sk_alloc(struct sock *sk, int family, gfp_t priority); +int security_sk_alloc(struct sock *sk, int family, gfp_t priority, int numa_node); void security_sk_free(struct sock *sk); void security_sk_clone(const struct sock *sk, struct sock *newsk); void security_sk_classify_flow(struct sock *sk, struct flowi *fl); @@ -2667,7 +2667,7 @@ static inline int security_socket_getpeersec_dgram(struct socket *sock, struct s return -ENOPROTOOPT; } -static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority) +static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority, int numa_node) { return 0; } diff --git a/include/net/ip.h b/include/net/ip.h index bd5e444..340905d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -105,7 +105,7 @@ extern void ip_send_check(struct iphdr *ip); extern int __ip_local_out(struct sk_buff *skb); extern int ip_local_out(struct sk_buff *skb); extern int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl); -extern void ip_init(void); +extern int ip_init(void); extern int ip_append_data(struct sock *sk, struct flowi4 *fl4, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), diff --git a/net/core/sock.c b/net/core/sock.c index 8f67ced8..2cab455 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1186,7 +1186,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (sk != NULL) { kmemcheck_annotate_bitfield(sk, flags); - if (security_sk_alloc(sk, family, priority)) + if (security_sk_alloc(sk, family, priority, numa_node_id())) goto out_free; if (!try_module_get(prot->owner)) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6681ccf..3f79f37 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1720,7 +1720,8 @@ static int __init inet_init(void) * Set the IP module up */ - ip_init(); + if (ip_init() < 0) + panic("Failed to initialize ip.\n"); tcp_v4_init(); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 76dde25..4a775b3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1545,12 +1545,23 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, ip_rt_put(rt); } -void __init ip_init(void) +int __init ip_init(void) { + struct inet_sock *sock; + int rc, cpu; + ip_rt_init(); inet_initpeers(); #if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS) igmp_mc_proc_init(); #endif + + for_each_possible_cpu(cpu) { + sock = &per_cpu(unicast_sock, cpu); + rc = security_sk_alloc(&sock->sk, PF_INET, GFP_KERNEL, cpu_to_node(cpu)); + if (rc) + return rc; + } + return 0; } diff --git a/security/capability.c b/security/capability.c index 61095df..0525d28 100644 --- a/security/capability.c +++ b/security/capability.c @@ -650,7 +650,7 @@ static int cap_socket_getpeersec_dgram(struct socket *sock, return -ENOPROTOOPT; } -static int cap_sk_alloc_security(struct sock *sk, int family, gfp_t priority) +static int cap_sk_alloc_security(struct sock *sk, int family, gfp_t priority, int numa_node) { return 0; } diff --git a/security/security.c b/security/security.c index 860aeb3..02a7f76 100644 --- a/security/security.c +++ b/security/security.c @@ -1146,9 +1146,9 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u } EXPORT_SYMBOL(security_socket_getpeersec_dgram); -int security_sk_alloc(struct sock *sk, int family, gfp_t priority) +int security_sk_alloc(struct sock *sk, int family, gfp_t priority, int numa_node) { - return security_ops->sk_alloc_security(sk, family, priority); + return security_ops->sk_alloc_security(sk, family, priority, numa_node); } void security_sk_free(struct sock *sk) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6c77f63..bdcfd0c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4289,11 +4289,12 @@ out: return 0; } -static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority) +static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority, + int numa_node) { struct sk_security_struct *sksec; - sksec = kzalloc(sizeof(*sksec), priority); + sksec = kmalloc_node(sizeof(*sksec), priority | __GFP_ZERO, numa_node); if (!sksec) return -ENOMEM; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 8221514..b43ae5d 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1754,12 +1754,14 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode) * * Returns 0 on success, -ENOMEM is there's no memory */ -static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags) +static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags, + int numa_node) { char *csp = smk_of_current(); struct socket_smack *ssp; - ssp = kzalloc(sizeof(struct socket_smack), gfp_flags); + ssp = kmalloc_node(sizeof(struct socket_smack), gfp_flags | __GFP_ZERO, + numa_node); if (ssp == NULL) return -ENOMEM;