[PATCH 05/11] selinux: use dlist for isec inode list

From: Dave Chinner
Date: Wed Dec 06 2023 - 01:06:52 EST


From: Dave Chinner <dchinner@xxxxxxxxxx>

Because it's a horrible point of lock contention under heavily
concurrent directory traversals...

- 12.14% d_instantiate
- 12.06% security_d_instantiate
- 12.13% selinux_d_instantiate
- 12.16% inode_doinit_with_dentry
- 15.45% _raw_spin_lock
- do_raw_spin_lock
14.68% __pv_queued_spin_lock_slowpath


Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
include/linux/dlock-list.h | 9 ++++
security/selinux/hooks.c | 72 +++++++++++++++----------------
security/selinux/include/objsec.h | 6 +--
3 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h
index 327cb9edc7e3..7ad933b8875d 100644
--- a/include/linux/dlock-list.h
+++ b/include/linux/dlock-list.h
@@ -132,6 +132,15 @@ extern void dlock_lists_add(struct dlock_list_node *node,
struct dlock_list_heads *dlist);
extern void dlock_lists_del(struct dlock_list_node *node);

+static inline void
+dlock_list_del_iter(struct dlock_list_iter *iter,
+ struct dlock_list_node *node)
+{
+ WARN_ON_ONCE((iter->entry != READ_ONCE(node->head)));
+ list_del_init(&node->list);
+ WRITE_ONCE(node->head, NULL);
+}
+
/*
* Find the first entry of the next available list.
*/
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index feda711c6b7b..0358d7c66949 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -340,26 +340,11 @@ static struct inode_security_struct *backing_inode_security(struct dentry *dentr
static void inode_free_security(struct inode *inode)
{
struct inode_security_struct *isec = selinux_inode(inode);
- struct superblock_security_struct *sbsec;

if (!isec)
return;
- sbsec = selinux_superblock(inode->i_sb);
- /*
- * As not all inode security structures are in a list, we check for
- * empty list outside of the lock to make sure that we won't waste
- * time taking a lock doing nothing.
- *
- * The list_del_init() function can be safely called more than once.
- * It should not be possible for this function to be called with
- * concurrent list_add(), but for better safety against future changes
- * in the code, we use list_empty_careful() here.
- */
- if (!list_empty_careful(&isec->list)) {
- spin_lock(&sbsec->isec_lock);
- list_del_init(&isec->list);
- spin_unlock(&sbsec->isec_lock);
- }
+ if (!list_empty(&isec->list.list))
+ dlock_lists_del(&isec->list);
}

struct selinux_mnt_opts {
@@ -547,6 +532,8 @@ static int sb_finish_set_opts(struct super_block *sb)
struct superblock_security_struct *sbsec = selinux_superblock(sb);
struct dentry *root = sb->s_root;
struct inode *root_inode = d_backing_inode(root);
+ struct dlock_list_iter iter;
+ struct inode_security_struct *isec, *n;
int rc = 0;

if (sbsec->behavior == SECURITY_FS_USE_XATTR) {
@@ -570,27 +557,28 @@ static int sb_finish_set_opts(struct super_block *sb)
/* Initialize the root inode. */
rc = inode_doinit_with_dentry(root_inode, root);

- /* Initialize any other inodes associated with the superblock, e.g.
- inodes created prior to initial policy load or inodes created
- during get_sb by a pseudo filesystem that directly
- populates itself. */
- spin_lock(&sbsec->isec_lock);
- while (!list_empty(&sbsec->isec_head)) {
- struct inode_security_struct *isec =
- list_first_entry(&sbsec->isec_head,
- struct inode_security_struct, list);
+ /*
+ * Initialize any other inodes associated with the superblock, e.g.
+ * inodes created prior to initial policy load or inodes created during
+ * get_sb by a pseudo filesystem that directly populates itself.
+ */
+ init_dlock_list_iter(&iter, &sbsec->isec_head);
+ dlist_for_each_entry_safe(isec, n, &iter, list) {
struct inode *inode = isec->inode;
- list_del_init(&isec->list);
- spin_unlock(&sbsec->isec_lock);
+
+ dlock_list_del_iter(&iter, &isec->list);
+ dlock_list_unlock(&iter);
+
inode = igrab(inode);
if (inode) {
if (!IS_PRIVATE(inode))
inode_doinit_with_dentry(inode, NULL);
iput(inode);
}
- spin_lock(&sbsec->isec_lock);
+
+ dlock_list_relock(&iter);
}
- spin_unlock(&sbsec->isec_lock);
+ WARN_ON_ONCE(!dlock_lists_empty(&sbsec->isec_head));
return rc;
}

@@ -1428,10 +1416,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
/* Defer initialization until selinux_complete_init,
after the initial policy is loaded and the security
server is ready to handle calls. */
- spin_lock(&sbsec->isec_lock);
- if (list_empty(&isec->list))
- list_add(&isec->list, &sbsec->isec_head);
- spin_unlock(&sbsec->isec_lock);
+ if (list_empty(&isec->list.list))
+ dlock_lists_add(&isec->list, &sbsec->isec_head);
goto out_unlock;
}

@@ -2548,9 +2534,10 @@ static int selinux_sb_alloc_security(struct super_block *sb)
{
struct superblock_security_struct *sbsec = selinux_superblock(sb);

+ if (alloc_dlock_list_heads(&sbsec->isec_head))
+ return -ENOMEM;
+
mutex_init(&sbsec->lock);
- INIT_LIST_HEAD(&sbsec->isec_head);
- spin_lock_init(&sbsec->isec_lock);
sbsec->sid = SECINITSID_UNLABELED;
sbsec->def_sid = SECINITSID_FILE;
sbsec->mntpoint_sid = SECINITSID_UNLABELED;
@@ -2558,6 +2545,15 @@ static int selinux_sb_alloc_security(struct super_block *sb)
return 0;
}

+static void selinux_sb_free_security(struct super_block *sb)
+{
+ struct superblock_security_struct *sbsec = selinux_superblock(sb);
+
+ if (!sbsec)
+ return;
+ free_dlock_list_heads(&sbsec->isec_head);
+}
+
static inline int opt_len(const char *s)
{
bool open_quote = false;
@@ -2841,7 +2837,7 @@ static int selinux_inode_alloc_security(struct inode *inode)
u32 sid = current_sid();

spin_lock_init(&isec->lock);
- INIT_LIST_HEAD(&isec->list);
+ init_dlock_list_node(&isec->list);
isec->inode = inode;
isec->sid = SECINITSID_UNLABELED;
isec->sclass = SECCLASS_FILE;
@@ -2920,6 +2916,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
if (rc)
return rc;

+
/* Possibly defer initialization to selinux_complete_init. */
if (sbsec->flags & SE_SBINITIALIZED) {
struct inode_security_struct *isec = selinux_inode(inode);
@@ -7215,6 +7212,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
selinux_msg_queue_alloc_security),
LSM_HOOK_INIT(shm_alloc_security, selinux_shm_alloc_security),
LSM_HOOK_INIT(sb_alloc_security, selinux_sb_alloc_security),
+ LSM_HOOK_INIT(sb_free_security, selinux_sb_free_security),
LSM_HOOK_INIT(inode_alloc_security, selinux_inode_alloc_security),
LSM_HOOK_INIT(sem_alloc_security, selinux_sem_alloc_security),
LSM_HOOK_INIT(secid_to_secctx, selinux_secid_to_secctx),
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 8159fd53c3de..e0709f429c56 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -24,6 +24,7 @@
#include <linux/spinlock.h>
#include <linux/lsm_hooks.h>
#include <linux/msg.h>
+#include <linux/dlock-list.h>
#include <net/net_namespace.h>
#include "flask.h"
#include "avc.h"
@@ -45,7 +46,7 @@ enum label_initialized {

struct inode_security_struct {
struct inode *inode; /* back pointer to inode object */
- struct list_head list; /* list of inode_security_struct */
+ struct dlock_list_node list; /* list of inode_security_struct */
u32 task_sid; /* SID of creating task */
u32 sid; /* SID of this object */
u16 sclass; /* security class of this object */
@@ -67,8 +68,7 @@ struct superblock_security_struct {
unsigned short behavior; /* labeling behavior */
unsigned short flags; /* which mount options were specified */
struct mutex lock;
- struct list_head isec_head;
- spinlock_t isec_lock;
+ struct dlock_list_heads isec_head;
};

struct msg_security_struct {
--
2.42.0