Re: [PATCH v3 1/2] capability: add cap_isidentical

From: Linus Torvalds
Date: Tue Feb 28 2023 - 14:40:00 EST


On Tue, Feb 28, 2023 at 6:47 AM Mateusz Guzik <mjguzik@xxxxxxxxx> wrote:
>
> Personally I would only touch it as a result of losing a bet (and I'm
> not taking any with this in play), but that's just my $0.05 (adjusted
> for inflation).

Heh. I took that as a challenge.

It wasn't actually all that bad (famous last words). For type safety
reasons I decided to use a struct wrapper

typedef struct { u64 val; } kernel_cap_t;

to avoid any nasty silent integer value conversions. But then it was
literally just a matter of cleaning up some of the insanity.

I think the fs/proc/array.c modification is an example of just how bad
things used to be:

--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -300,13 +300,8 @@ static inline void task_sig(struct seq_file *m,
static void render_cap_t(struct seq_file *m, const char *header,
kernel_cap_t *a)
{
- unsigned __capi;
-
seq_puts(m, header);
- CAP_FOR_EACH_U32(__capi) {
- seq_put_hex_ll(m, NULL,
- a->cap[CAP_LAST_U32 - __capi], 8);
- }
+ seq_put_hex_ll(m, NULL, a->val, 16);
seq_putc(m, '\n');
}

note how the code literally did that odd

CAP_LAST_U32 - __capi

in there just to get the natural "high word first" that is exactly
what you get if you just print out the value as a hex number.

Now, the actual user mode conversions still exist, but even those
often got simplified.

Have I actually *tested* this? Of course not. I'm lazy, and everything
I write obviously always works on the first try anyway, so why should
I?

So take this patch with a healthy dose of salt, but it looks sane to
me, and it does build cleanly (and with our type system, that actually
does say quite a lot).

This actually looks sane enough that I might even boot it. Call me crazy.

Linus
fs/proc/array.c | 7 +-
include/linux/capability.h | 128 +++++----------------
io_uring/fdinfo.c | 4 +-
kernel/auditsc.c | 6 +-
kernel/capability.c | 97 ++++++++--------
kernel/umh.c | 41 +++----
security/apparmor/policy_unpack.c | 40 +++++--
security/commoncap.c | 49 ++++----
.../selftests/bpf/progs/test_deny_namespace.c | 7 +-
9 files changed, 150 insertions(+), 229 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 49283b8103c7..9b0315d34c58 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -300,13 +300,8 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
static void render_cap_t(struct seq_file *m, const char *header,
kernel_cap_t *a)
{
- unsigned __capi;
-
seq_puts(m, header);
- CAP_FOR_EACH_U32(__capi) {
- seq_put_hex_ll(m, NULL,
- a->cap[CAP_LAST_U32 - __capi], 8);
- }
+ seq_put_hex_ll(m, NULL, a->val, 16);
seq_putc(m, '\n');
}

diff --git a/include/linux/capability.h b/include/linux/capability.h
index d3c6c2d1ff45..9bd50f070494 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -15,28 +15,25 @@

#include <uapi/linux/capability.h>
#include <linux/uidgid.h>
+#include <linux/bits.h>

#define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3
-#define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3

extern int file_caps_enabled;

-typedef struct kernel_cap_struct {
- __u32 cap[_KERNEL_CAPABILITY_U32S];
-} kernel_cap_t;
+typedef struct { u64 val; } kernel_cap_t;

/* same as vfs_ns_cap_data but in cpu endian and always filled completely */
struct cpu_vfs_cap_data {
__u32 magic_etc;
+ kuid_t rootid;
kernel_cap_t permitted;
kernel_cap_t inheritable;
- kuid_t rootid;
};

#define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct))
#define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t))

-
struct file;
struct inode;
struct dentry;
@@ -47,13 +44,6 @@ struct mnt_idmap;
extern const kernel_cap_t __cap_empty_set;
extern const kernel_cap_t __cap_init_eff_set;

-/*
- * Internal kernel functions only
- */
-
-#define CAP_FOR_EACH_U32(__capi) \
- for (__capi = 0; __capi < _KERNEL_CAPABILITY_U32S; ++__capi)
-
/*
* CAP_FS_MASK and CAP_NFSD_MASKS:
*
@@ -67,104 +57,52 @@ extern const kernel_cap_t __cap_init_eff_set;
* 2. The security.* and trusted.* xattrs are fs-related MAC permissions
*/

-# define CAP_FS_MASK_B0 (CAP_TO_MASK(CAP_CHOWN) \
- | CAP_TO_MASK(CAP_MKNOD) \
- | CAP_TO_MASK(CAP_DAC_OVERRIDE) \
- | CAP_TO_MASK(CAP_DAC_READ_SEARCH) \
- | CAP_TO_MASK(CAP_FOWNER) \
- | CAP_TO_MASK(CAP_FSETID))
-
-# define CAP_FS_MASK_B1 (CAP_TO_MASK(CAP_MAC_OVERRIDE))
-
-#if _KERNEL_CAPABILITY_U32S != 2
-# error Fix up hand-coded capability macro initializers
-#else /* HAND-CODED capability initializers */
+# define CAP_FS_MASK (BIT_ULL(CAP_CHOWN) \
+ | BIT_ULL(CAP_MKNOD) \
+ | BIT_ULL(CAP_DAC_OVERRIDE) \
+ | BIT_ULL(CAP_DAC_READ_SEARCH) \
+ | BIT_ULL(CAP_FOWNER) \
+ | BIT_ULL(CAP_FSETID) \
+ | BIT_ULL(CAP_MAC_OVERRIDE))
+#define CAP_VALID_MASK (BIT_ULL(CAP_LAST_CAP+1)-1)

-#define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1)
-#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1)
+# define CAP_EMPTY_SET ((kernel_cap_t) { 0 })
+# define CAP_FULL_SET ((kernel_cap_t) { CAP_VALID_MASK })
+# define CAP_FS_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_LINUX_IMMUTABLE) })
+# define CAP_NFSD_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_SYS_RESOURCE) })

-# define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }})
-# define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }})
-# define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \
- | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \
- CAP_FS_MASK_B1 } })
-# define CAP_NFSD_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \
- | CAP_TO_MASK(CAP_SYS_RESOURCE), \
- CAP_FS_MASK_B1 } })
+# define cap_clear(c) do { (c).val = 0; } while (0)

-#endif /* _KERNEL_CAPABILITY_U32S != 2 */
-
-# define cap_clear(c) do { (c) = __cap_empty_set; } while (0)
-
-#define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag))
-#define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag))
-#define cap_raised(c, flag) ((c).cap[CAP_TO_INDEX(flag)] & CAP_TO_MASK(flag))
-
-#define CAP_BOP_ALL(c, a, b, OP) \
-do { \
- unsigned __capi; \
- CAP_FOR_EACH_U32(__capi) { \
- c.cap[__capi] = a.cap[__capi] OP b.cap[__capi]; \
- } \
-} while (0)
-
-#define CAP_UOP_ALL(c, a, OP) \
-do { \
- unsigned __capi; \
- CAP_FOR_EACH_U32(__capi) { \
- c.cap[__capi] = OP a.cap[__capi]; \
- } \
-} while (0)
+#define cap_raise(c, flag) ((c).val |= BIT_ULL(flag))
+#define cap_lower(c, flag) ((c).val &= ~BIT_ULL(flag))
+#define cap_raised(c, flag) (((c).val & BIT_ULL(flag)) != 0)

static inline kernel_cap_t cap_combine(const kernel_cap_t a,
const kernel_cap_t b)
{
- kernel_cap_t dest;
- CAP_BOP_ALL(dest, a, b, |);
- return dest;
+ return (kernel_cap_t) { a.val | b.val };
}

static inline kernel_cap_t cap_intersect(const kernel_cap_t a,
const kernel_cap_t b)
{
- kernel_cap_t dest;
- CAP_BOP_ALL(dest, a, b, &);
- return dest;
+ return (kernel_cap_t) { a.val & b.val };
}

static inline kernel_cap_t cap_drop(const kernel_cap_t a,
const kernel_cap_t drop)
{
- kernel_cap_t dest;
- CAP_BOP_ALL(dest, a, drop, &~);
- return dest;
-}
-
-static inline kernel_cap_t cap_invert(const kernel_cap_t c)
-{
- kernel_cap_t dest;
- CAP_UOP_ALL(dest, c, ~);
- return dest;
+ return (kernel_cap_t) { a.val &~ drop.val };
}

static inline bool cap_isclear(const kernel_cap_t a)
{
- unsigned __capi;
- CAP_FOR_EACH_U32(__capi) {
- if (a.cap[__capi] != 0)
- return false;
- }
- return true;
+ return !a.val;
}

static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b)
{
- unsigned __capi;
- CAP_FOR_EACH_U32(__capi) {
- if (a.cap[__capi] != b.cap[__capi])
- return false;
- }
- return true;
+ return a.val == b.val;
}

/*
@@ -176,39 +114,31 @@ static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b)
*/
static inline bool cap_issubset(const kernel_cap_t a, const kernel_cap_t set)
{
- kernel_cap_t dest;
- dest = cap_drop(a, set);
- return cap_isclear(dest);
+ return (a.val & set.val) == a.val;
}

/* Used to decide between falling back on the old suser() or fsuser(). */

static inline kernel_cap_t cap_drop_fs_set(const kernel_cap_t a)
{
- const kernel_cap_t __cap_fs_set = CAP_FS_SET;
- return cap_drop(a, __cap_fs_set);
+ return cap_drop(a, CAP_FS_SET);
}

static inline kernel_cap_t cap_raise_fs_set(const kernel_cap_t a,
const kernel_cap_t permitted)
{
- const kernel_cap_t __cap_fs_set = CAP_FS_SET;
- return cap_combine(a,
- cap_intersect(permitted, __cap_fs_set));
+ return cap_combine(a, cap_intersect(permitted, CAP_FS_SET));
}

static inline kernel_cap_t cap_drop_nfsd_set(const kernel_cap_t a)
{
- const kernel_cap_t __cap_fs_set = CAP_NFSD_SET;
- return cap_drop(a, __cap_fs_set);
+ return cap_drop(a, CAP_NFSD_SET);
}

static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a,
const kernel_cap_t permitted)
{
- const kernel_cap_t __cap_nfsd_set = CAP_NFSD_SET;
- return cap_combine(a,
- cap_intersect(permitted, __cap_nfsd_set));
+ return cap_combine(a, cap_intersect(permitted, CAP_NFSD_SET));
}

#ifdef CONFIG_MULTIUSER
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index 882bd56b01ed..76c279b13aee 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -22,7 +22,6 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
struct user_namespace *uns = seq_user_ns(m);
struct group_info *gi;
kernel_cap_t cap;
- unsigned __capi;
int g;

seq_printf(m, "%5d\n", id);
@@ -42,8 +41,7 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
}
seq_puts(m, "\n\tCapEff:\t");
cap = cred->cap_effective;
- CAP_FOR_EACH_U32(__capi)
- seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8);
+ seq_put_hex_ll(m, NULL, cap.val, 16);
seq_putc(m, '\n');
return 0;
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 93d0b87f3283..addeed3df15d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1295,15 +1295,11 @@ static void audit_log_execve_info(struct audit_context *context,
static void audit_log_cap(struct audit_buffer *ab, char *prefix,
kernel_cap_t *cap)
{
- int i;
-
if (cap_isclear(*cap)) {
audit_log_format(ab, " %s=0", prefix);
return;
}
- audit_log_format(ab, " %s=", prefix);
- CAP_FOR_EACH_U32(i)
- audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]);
+ audit_log_format(ab, " %s=%016llx", prefix, cap->val);
}

static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
diff --git a/kernel/capability.c b/kernel/capability.c
index 339a44dfe2f4..40ede532259e 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -151,6 +151,7 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
pid_t pid;
unsigned tocopy;
kernel_cap_t pE, pI, pP;
+ struct __user_cap_data_struct kdata[2];

ret = cap_validate_magic(header, &tocopy);
if ((dataptr == NULL) || (ret != 0))
@@ -163,42 +164,46 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
return -EINVAL;

ret = cap_get_target_pid(pid, &pE, &pI, &pP);
- if (!ret) {
- struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
- unsigned i;
-
- for (i = 0; i < tocopy; i++) {
- kdata[i].effective = pE.cap[i];
- kdata[i].permitted = pP.cap[i];
- kdata[i].inheritable = pI.cap[i];
- }
-
- /*
- * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
- * we silently drop the upper capabilities here. This
- * has the effect of making older libcap
- * implementations implicitly drop upper capability
- * bits when they perform a: capget/modify/capset
- * sequence.
- *
- * This behavior is considered fail-safe
- * behavior. Upgrading the application to a newer
- * version of libcap will enable access to the newer
- * capabilities.
- *
- * An alternative would be to return an error here
- * (-ERANGE), but that causes legacy applications to
- * unexpectedly fail; the capget/modify/capset aborts
- * before modification is attempted and the application
- * fails.
- */
- if (copy_to_user(dataptr, kdata, tocopy
- * sizeof(struct __user_cap_data_struct))) {
- return -EFAULT;
- }
- }
+ if (ret)
+ return ret;

- return ret;
+ /*
+ * Annoying legacy format with 64-bit capabilities exposed
+ * as two sets of 32-bit fields, so we need to split the
+ * capability values up.
+ */
+ kdata[0].effective = pE.val; kdata[1].effective = pE.val >> 32;
+ kdata[0].permitted = pP.val; kdata[1].permitted = pP.val >> 32;
+ kdata[0].inheritable = pI.val; kdata[0].inheritable = pI.val >> 32;
+
+ /*
+ * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
+ * we silently drop the upper capabilities here. This
+ * has the effect of making older libcap
+ * implementations implicitly drop upper capability
+ * bits when they perform a: capget/modify/capset
+ * sequence.
+ *
+ * This behavior is considered fail-safe
+ * behavior. Upgrading the application to a newer
+ * version of libcap will enable access to the newer
+ * capabilities.
+ *
+ * An alternative would be to return an error here
+ * (-ERANGE), but that causes legacy applications to
+ * unexpectedly fail; the capget/modify/capset aborts
+ * before modification is attempted and the application
+ * fails.
+ */
+ if (copy_to_user(dataptr, kdata, tocopy * sizeof(kdata[0])))
+ return -EFAULT;
+
+ return 0;
+}
+
+static kernel_cap_t mk_kernel_cap(u32 low, u32 high)
+{
+ return (kernel_cap_t) { (low | ((u64)high << 32)) & CAP_VALID_MASK };
}

/**
@@ -221,8 +226,8 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
*/
SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
{
- struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
- unsigned i, tocopy, copybytes;
+ struct __user_cap_data_struct kdata[2] = { { 0, }, };
+ unsigned tocopy, copybytes;
kernel_cap_t inheritable, permitted, effective;
struct cred *new;
int ret;
@@ -246,21 +251,9 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
if (copy_from_user(&kdata, data, copybytes))
return -EFAULT;

- for (i = 0; i < tocopy; i++) {
- effective.cap[i] = kdata[i].effective;
- permitted.cap[i] = kdata[i].permitted;
- inheritable.cap[i] = kdata[i].inheritable;
- }
- while (i < _KERNEL_CAPABILITY_U32S) {
- effective.cap[i] = 0;
- permitted.cap[i] = 0;
- inheritable.cap[i] = 0;
- i++;
- }
-
- effective.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
- permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
- inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+ effective = mk_kernel_cap(kdata[0].effective, kdata[1].effective);
+ permitted = mk_kernel_cap(kdata[0].permitted, kdata[1].permitted);
+ inheritable = mk_kernel_cap(kdata[0].inheritable, kdata[1].inheritable);

new = prepare_creds();
if (!new)
diff --git a/kernel/umh.c b/kernel/umh.c
index fbf872c624cb..2a4708277335 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -501,9 +501,9 @@ static int proc_cap_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
- unsigned long cap_array[_KERNEL_CAPABILITY_U32S];
- kernel_cap_t new_cap;
- int err, i;
+ unsigned long cap_array[2];
+ kernel_cap_t new_cap, *cap;
+ int err;

if (write && (!capable(CAP_SETPCAP) ||
!capable(CAP_SYS_MODULE)))
@@ -514,14 +514,16 @@ static int proc_cap_handler(struct ctl_table *table, int write,
* userspace if this is a read.
*/
spin_lock(&umh_sysctl_lock);
- for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) {
- if (table->data == CAP_BSET)
- cap_array[i] = usermodehelper_bset.cap[i];
- else if (table->data == CAP_PI)
- cap_array[i] = usermodehelper_inheritable.cap[i];
- else
- BUG();
- }
+ if (table->data == CAP_BSET)
+ cap = &usermodehelper_bset;
+ else if (table->data == CAP_PI)
+ cap = &usermodehelper_inheritable;
+ else
+ BUG();
+
+ /* Legacy format: capabilities are exposed as two 32-bit values */
+ cap_array[0] = (u32) cap->val;
+ cap_array[1] = cap->val >> 32;
spin_unlock(&umh_sysctl_lock);

t = *table;
@@ -535,22 +537,15 @@ static int proc_cap_handler(struct ctl_table *table, int write,
if (err < 0)
return err;

- /*
- * convert from the sysctl array of ulongs to the kernel_cap_t
- * internal representation
- */
- for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++)
- new_cap.cap[i] = cap_array[i];
+ new_cap.val = (u32)cap_array[0];
+ new_cap.val += (u64)cap_array[1] << 32;

/*
* Drop everything not in the new_cap (but don't add things)
*/
if (write) {
spin_lock(&umh_sysctl_lock);
- if (table->data == CAP_BSET)
- usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap);
- if (table->data == CAP_PI)
- usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap);
+ *cap = cap_intersect(*cap, new_cap);
spin_unlock(&umh_sysctl_lock);
}

@@ -561,14 +556,14 @@ struct ctl_table usermodehelper_table[] = {
{
.procname = "bset",
.data = CAP_BSET,
- .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
+ .maxlen = 2 * sizeof(unsigned long),
.mode = 0600,
.proc_handler = proc_cap_handler,
},
{
.procname = "inheritable",
.data = CAP_PI,
- .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
+ .maxlen = 2 * sizeof(unsigned long),
.mode = 0600,
.proc_handler = proc_cap_handler,
},
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 5e9949832af6..cf2ceec40b28 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -304,6 +304,26 @@ VISIBLE_IF_KUNIT bool aa_unpack_u64(struct aa_ext *e, u64 *data, const char *nam
}
EXPORT_SYMBOL_IF_KUNIT(aa_unpack_u64);

+static bool aa_unpack_cap_low(struct aa_ext *e, kernel_cap_t *data, const char *name)
+{
+ u32 val;
+
+ if (!aa_unpack_u32(e, &val, name))
+ return false;
+ data->val = val;
+ return true;
+}
+
+static bool aa_unpack_cap_high(struct aa_ext *e, kernel_cap_t *data, const char *name)
+{
+ u32 val;
+
+ if (!aa_unpack_u32(e, &val, name))
+ return false;
+ data->val = (u32)data->val | ((u64)val << 32);
+ return true;
+}
+
VISIBLE_IF_KUNIT bool aa_unpack_array(struct aa_ext *e, const char *name, u16 *size)
{
void *pos = e->pos;
@@ -897,25 +917,25 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
profile->path_flags = PATH_MEDIATE_DELETED;

info = "failed to unpack profile capabilities";
- if (!aa_unpack_u32(e, &(rules->caps.allow.cap[0]), NULL))
+ if (!aa_unpack_cap_low(e, &rules->caps.allow, NULL))
goto fail;
- if (!aa_unpack_u32(e, &(rules->caps.audit.cap[0]), NULL))
+ if (!aa_unpack_cap_low(e, &rules->caps.audit, NULL))
goto fail;
- if (!aa_unpack_u32(e, &(rules->caps.quiet.cap[0]), NULL))
+ if (!aa_unpack_cap_low(e, &rules->caps.quiet, NULL))
goto fail;
- if (!aa_unpack_u32(e, &tmpcap.cap[0], NULL))
+ if (!aa_unpack_cap_low(e, &tmpcap, NULL))
goto fail;

info = "failed to unpack upper profile capabilities";
if (aa_unpack_nameX(e, AA_STRUCT, "caps64")) {
/* optional upper half of 64 bit caps */
- if (!aa_unpack_u32(e, &(rules->caps.allow.cap[1]), NULL))
+ if (!aa_unpack_cap_high(e, &rules->caps.allow, NULL))
goto fail;
- if (!aa_unpack_u32(e, &(rules->caps.audit.cap[1]), NULL))
+ if (!aa_unpack_cap_high(e, &rules->caps.audit, NULL))
goto fail;
- if (!aa_unpack_u32(e, &(rules->caps.quiet.cap[1]), NULL))
+ if (!aa_unpack_cap_high(e, &rules->caps.quiet, NULL))
goto fail;
- if (!aa_unpack_u32(e, &(tmpcap.cap[1]), NULL))
+ if (!aa_unpack_cap_high(e, &tmpcap, NULL))
goto fail;
if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL))
goto fail;
@@ -924,9 +944,9 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
info = "failed to unpack extended profile capabilities";
if (aa_unpack_nameX(e, AA_STRUCT, "capsx")) {
/* optional extended caps mediation mask */
- if (!aa_unpack_u32(e, &(rules->caps.extended.cap[0]), NULL))
+ if (!aa_unpack_cap_low(e, &rules->caps.extended, NULL))
goto fail;
- if (!aa_unpack_u32(e, &(rules->caps.extended.cap[1]), NULL))
+ if (!aa_unpack_cap_high(e, &rules->caps.extended, NULL))
goto fail;
if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL))
goto fail;
diff --git a/security/commoncap.c b/security/commoncap.c
index aec62db55271..5bb7d1e96277 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -589,7 +589,6 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
bool *has_fcap)
{
struct cred *new = bprm->cred;
- unsigned i;
int ret = 0;

if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
@@ -598,22 +597,17 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
if (caps->magic_etc & VFS_CAP_REVISION_MASK)
*has_fcap = true;

- CAP_FOR_EACH_U32(i) {
- __u32 permitted = caps->permitted.cap[i];
- __u32 inheritable = caps->inheritable.cap[i];
-
- /*
- * pP' = (X & fP) | (pI & fI)
- * The addition of pA' is handled later.
- */
- new->cap_permitted.cap[i] =
- (new->cap_bset.cap[i] & permitted) |
- (new->cap_inheritable.cap[i] & inheritable);
+ /*
+ * pP' = (X & fP) | (pI & fI)
+ * The addition of pA' is handled later.
+ */
+ new->cap_permitted.val =
+ (new->cap_bset.val & caps->permitted.val) |
+ (new->cap_inheritable.val & caps->inheritable.val);

- if (permitted & ~new->cap_permitted.cap[i])
- /* insufficient to execute correctly */
- ret = -EPERM;
- }
+ if (caps->permitted.val & ~new->cap_permitted.val)
+ /* insufficient to execute correctly */
+ ret = -EPERM;

/*
* For legacy apps, with no internal support for recognizing they
@@ -644,7 +638,6 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
{
struct inode *inode = d_backing_inode(dentry);
__u32 magic_etc;
- unsigned tocopy, i;
int size;
struct vfs_ns_cap_data data, *nscaps = &data;
struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
@@ -677,17 +670,14 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
case VFS_CAP_REVISION_1:
if (size != XATTR_CAPS_SZ_1)
return -EINVAL;
- tocopy = VFS_CAP_U32_1;
break;
case VFS_CAP_REVISION_2:
if (size != XATTR_CAPS_SZ_2)
return -EINVAL;
- tocopy = VFS_CAP_U32_2;
break;
case VFS_CAP_REVISION_3:
if (size != XATTR_CAPS_SZ_3)
return -EINVAL;
- tocopy = VFS_CAP_U32_3;
rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
break;

@@ -705,15 +695,20 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
if (!rootid_owns_currentns(rootvfsuid))
return -ENODATA;

- CAP_FOR_EACH_U32(i) {
- if (i >= tocopy)
- break;
- cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted);
- cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable);
+ cpu_caps->permitted.val = le32_to_cpu(caps->data[0].permitted);
+ cpu_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable);
+
+ /*
+ * Rev1 had just a single 32-bit word, later expanded
+ * to a second one for the high bits
+ */
+ if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {
+ cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32;
+ cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32;
}

- cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
- cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+ cpu_caps->permitted.val &= CAP_VALID_MASK;
+ cpu_caps->inheritable.val &= CAP_VALID_MASK;

cpu_caps->rootid = vfsuid_into_kuid(rootvfsuid);

diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
index 09ad5a4ebd1f..591104e79812 100644
--- a/tools/testing/selftests/bpf/progs/test_deny_namespace.c
+++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
@@ -6,7 +6,7 @@
#include <linux/capability.h>

struct kernel_cap_struct {
- __u32 cap[_LINUX_CAPABILITY_U32S_3];
+ __u64 val;
} __attribute__((preserve_access_index));

struct cred {
@@ -19,14 +19,13 @@ SEC("lsm.s/userns_create")
int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
{
struct kernel_cap_struct caps = cred->cap_effective;
- int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN);
- __u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN);
+ __u64 cap_mask = BIT_LL(CAP_SYS_ADMIN);

if (ret)
return 0;

ret = -EPERM;
- if (caps.cap[cap_index] & cap_mask)
+ if (caps.val & cap_mask)
return 0;

return -EPERM;