[RFC next v2 1/2] ucounts: free ucount only count and rlimit are zero
From: Chen Ridong
Date: Mon May 19 2025 - 09:25:08 EST
From: Chen Ridong <chenridong@xxxxxxxxxx>
After the commit fda31c50292a ("signal: avoid double atomic counter
increments for user accounting") and the commit 15bc01effefe ("ucounts:
Fix signal ucount refcounting"), the reference counting mechanism for
ucounts has the following behavior. The reference count is incremented
when the first pending signal pins to the ucounts, and it is decremented
when the last pending signal is dequeued. This implies that as long as
there are any pending signals pinned to the ucounts, the ucounts cannot
be freed.
To address the scalability issue, the next patch will mention, the
ucounts.rlimits will be converted to percpu_counter. However, summing up
the percpu counters is expensive. To overcome this, this patch modifies
the conditions for freeing ucounts. Instead of complex checks regarding
whether a pending signal is the first or the last one, the ucounts can now
be freed only when both the refcount and the rlimits are zero.
This change not only simplifies the logic but also reduces the number of
atomic operations.
Signed-off-by: Chen Ridong <chenridong@xxxxxxxxxx>
---
include/linux/user_namespace.h | 1 +
kernel/ucount.c | 75 ++++++++++++++++++++++++++--------
2 files changed, 59 insertions(+), 17 deletions(-)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index a0bb6d012137..6e2229ea4673 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -122,6 +122,7 @@ struct ucounts {
kuid_t uid;
struct rcu_head rcu;
rcuref_t count;
+ atomic_long_t freed;
atomic_long_t ucount[UCOUNT_COUNTS];
atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS];
};
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 8686e329b8f2..125471af7d59 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -185,18 +185,61 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
return new;
}
-void put_ucounts(struct ucounts *ucounts)
+/*
+ * Whether all the rlimits are zero.
+ * For now, only UCOUNT_RLIMIT_SIGPENDING is considered.
+ * Other rlimit can be added.
+ */
+static bool rlimits_are_zero(struct ucounts *ucounts)
+{
+ int rtypes[] = { UCOUNT_RLIMIT_SIGPENDING };
+ int rtype;
+
+ for (int i = 0; i < sizeof(rtypes)/sizeof(int); ++i) {
+ rtype = rtypes[i];
+ if (atomic_long_read(&ucounts->rlimit[rtype]) > 0)
+ return false;
+ }
+ return true;
+}
+
+/*
+ * Ucounts can be freed only when the ucount->count is released
+ * and the rlimits are zero.
+ * The caller should hold rcu_read_lock();
+ */
+static bool ucounts_can_be_freed(struct ucounts *ucounts)
+{
+ if (rcuref_read(&ucounts->count) > 0)
+ return false;
+ if (!rlimits_are_zero(ucounts))
+ return false;
+ /* Prevent double free */
+ return atomic_long_cmpxchg(&ucounts->freed, 0, 1) == 0;
+}
+
+static void free_ucounts(struct ucounts *ucounts)
{
unsigned long flags;
- if (rcuref_put(&ucounts->count)) {
- spin_lock_irqsave(&ucounts_lock, flags);
- hlist_nulls_del_rcu(&ucounts->node);
- spin_unlock_irqrestore(&ucounts_lock, flags);
+ spin_lock_irqsave(&ucounts_lock, flags);
+ hlist_nulls_del_rcu(&ucounts->node);
+ spin_unlock_irqrestore(&ucounts_lock, flags);
+
+ put_user_ns(ucounts->ns);
+ kfree_rcu(ucounts, rcu);
+}
- put_user_ns(ucounts->ns);
- kfree_rcu(ucounts, rcu);
+void put_ucounts(struct ucounts *ucounts)
+{
+ rcu_read_lock();
+ if (rcuref_put(&ucounts->count) &&
+ ucounts_can_be_freed(ucounts)) {
+ rcu_read_unlock();
+ free_ucounts(ucounts);
+ return;
}
+ rcu_read_unlock();
}
static inline bool atomic_long_inc_below(atomic_long_t *v, int u)
@@ -281,11 +324,17 @@ static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
{
struct ucounts *iter, *next;
for (iter = ucounts; iter != last; iter = next) {
+ bool to_free;
+
+ rcu_read_lock();
long dec = atomic_long_sub_return(1, &iter->rlimit[type]);
WARN_ON_ONCE(dec < 0);
next = iter->ns->ucounts;
- if (dec == 0)
- put_ucounts(iter);
+ to_free = ucounts_can_be_freed(iter);
+ rcu_read_unlock();
+ /* If ucounts->count is zero and the rlimits are zero, free ucounts */
+ if (to_free)
+ free_ucounts(iter);
}
}
@@ -310,14 +359,6 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type,
ret = new;
if (!override_rlimit)
max = get_userns_rlimit_max(iter->ns, type);
- /*
- * Grab an extra ucount reference for the caller when
- * the rlimit count was previously 0.
- */
- if (new != 1)
- continue;
- if (!get_ucounts(iter))
- goto dec_unwind;
}
return ret;
dec_unwind:
--
2.34.1