capabilities patch

Alexander Kjeldaas (astor@guardian.no)
Tue, 21 Apr 1998 03:29:41 +0200


--f9I7BMVVzbSWLtt+
Content-Type: text/plain; charset=us-ascii

This is my current patch against 2.1.97. It handles the set*uid case,
implements the system calls for setting and getting capabilities for
arbitrary processes and "securelevel" emulation. Linus has some of
this already (not the system calls), but I just wanted to dump this
code here so you all could have a look at it while I'm sleeping
;-). The system calls are untested.

astor

-- 
 Alexander Kjeldaas, Guardian Networks AS, Trondheim, Norway
 http://www.guardian.no/

--f9I7BMVVzbSWLtt+ Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename=cap_97_1

diff -urN linux97/fs/proc/array.c lp97/fs/proc/array.c --- linux97/fs/proc/array.c Thu Mar 12 00:53:18 1998 +++ lp97/fs/proc/array.c Mon Apr 20 23:56:14 1998 @@ -764,6 +764,15 @@ return buffer; } +extern inline char *task_cap(struct task_struct *p, char *buffer) +{ + buffer += sprintf(buffer, "CapInh:\t%08x\n", p->cap_inheritable.cap); + buffer += sprintf(buffer, "CapPrm:\t%08x\n", p->cap_permitted.cap); + buffer += sprintf(buffer, "CapEff:\t%08x\n", p->cap_effective.cap); + return buffer; +} + + static int get_status(int pid, char * buffer) { char * orig = buffer; @@ -778,6 +787,7 @@ buffer = task_state(tsk, buffer); buffer = task_mem(tsk, buffer); buffer = task_sig(tsk, buffer); + buffer = task_cap(tsk, buffer); return buffer - orig; } diff -urN linux97/include/linux/capability.h lp97/include/linux/capability.h --- linux97/include/linux/capability.h Tue Apr 14 22:00:11 1998 +++ lp97/include/linux/capability.h Tue Apr 21 03:19:01 1998 @@ -19,16 +19,14 @@ #define _LINUX_CAPABILITY_VERSION 0x19980330 -typedef struct _user_cap_struct { - __u32 version; - __u32 size; - __u8 cap[1]; +typedef struct __user_cap_struct { + __u32 cap[1]; } *cap_t; #ifdef __KERNEL__ typedef struct kernel_cap_struct { - int cap; + __u32 cap; } kernel_cap_t; #endif @@ -82,10 +80,11 @@ #define CAP_KILL 5 /* Allows setgid(2) manipulation */ +/* Allows setgroups(2) */ #define CAP_SETGID 6 -/* Allows setuid(2) manipulation */ +/* Allows set*uid(2) manipulation (including fsuid) */ #define CAP_SETUID 7 @@ -155,6 +154,10 @@ /* Allow administration of the random device */ /* Allow device administration */ /* Allow examination and configuration of disk quotas */ +/* Allow configuring the kernel's syslog (printk behaviour) */ +/* Allow sending a signal to any process */ +/* Allow setting the domainname */ +/* Allow setting the hostname */ /* System Admin functions: mount et al */ #define CAP_SYS_ADMIN 21 @@ -163,11 +166,15 @@ #define CAP_SYS_BOOT 22 -/* Allow use of renice() on others, and raising of priority */ +/* Allow raising priority and setting priority on other (different + UID) processes */ +/* Allow use of FIFO and round-robin (realtime) scheduling on own + processes and setting the scheduling algorithm used by another + process. */ #define CAP_SYS_NICE 23 -/* Override resource limits */ +/* Override resource limits. Set resource limits. */ #define CAP_SYS_RESOURCE 24 @@ -196,8 +203,9 @@ #define cap_isclear(c) (!c.cap) #define cap_copy(dest,src) do { (dest).cap = (src).cap; } while(0) -#define cap_clear(c) do { c.cap = 0; } while(0) -#define cap_set_full(c) do { c.cap = ~0; } while(0) +#define cap_clear(c) do { (c).cap = 0; } while(0) +#define cap_set_full(c) do { (c).cap = ~0; } while(0) +#define cap_mask(c,mask) do { (c).cap &= (mask).cap; } while(0) #define cap_is_fs_cap(c) ((c) & CAP_FS_MASK) diff -urN linux97/kernel/acct.c lp97/kernel/acct.c --- linux97/kernel/acct.c Sat Feb 28 22:33:49 1998 +++ lp97/kernel/acct.c Mon Apr 20 23:56:14 1998 @@ -119,7 +119,7 @@ int error = -EPERM; lock_kernel(); - if (!suser()) + if (!capable(CAP_SYS_PACCT)) goto out; if (name == (char *)NULL) { diff -urN linux97/kernel/module.c lp97/kernel/module.c --- linux97/kernel/module.c Tue Mar 10 23:43:13 1998 +++ lp97/kernel/module.c Mon Apr 20 23:56:14 1998 @@ -120,7 +120,7 @@ struct module *mod; lock_kernel(); - if (!suser()) { + if (!capable(CAP_SYS_MODULE)) { error = -EPERM; goto err0; } @@ -175,7 +175,7 @@ struct module_ref *dep; lock_kernel(); - if (!suser()) + if (!capable(CAP_SYS_MODULE)) goto err0; if ((namelen = get_mod_name(name_user, &name)) < 0) { error = namelen; @@ -366,7 +366,7 @@ int something_changed; lock_kernel(); - if (!suser()) + if (!capable(CAP_SYS_MODULE)) goto out; if (name_user) { diff -urN linux97/kernel/printk.c lp97/kernel/printk.c --- linux97/kernel/printk.c Mon Apr 13 23:41:53 1998 +++ lp97/kernel/printk.c Mon Apr 20 23:56:14 1998 @@ -130,7 +130,7 @@ int error = -EPERM; lock_kernel(); - if ((type != 3) && !suser()) + if ((type != 3) && !capable(CAP_SYS_ADMIN)) goto out; error = 0; switch (type) { diff -urN linux97/kernel/sched.c lp97/kernel/sched.c --- linux97/kernel/sched.c Thu Apr 2 02:26:35 1998 +++ lp97/kernel/sched.c Mon Apr 20 23:56:14 1998 @@ -1227,7 +1227,7 @@ newprio = increment; if (increment < 0) { - if (!suser()) + if (!capable(CAP_SYS_NICE)) return -EPERM; newprio = -increment; increase = 1; @@ -1322,10 +1322,11 @@ goto out_unlock; retval = -EPERM; - if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser()) + if ((policy == SCHED_FIFO || policy == SCHED_RR) && + !capable(CAP_SYS_NICE)) goto out_unlock; if ((current->euid != p->euid) && (current->euid != p->uid) && - !suser()) + !capable(CAP_SYS_NICE)) goto out_unlock; retval = 0; diff -urN linux97/kernel/signal.c lp97/kernel/signal.c --- linux97/kernel/signal.c Tue Apr 14 01:47:48 1998 +++ lp97/kernel/signal.c Mon Apr 20 23:56:14 1998 @@ -235,7 +235,7 @@ && ((sig != SIGCONT) || (current->session != t->session)) && (current->euid ^ t->suid) && (current->euid ^ t->uid) && (current->uid ^ t->suid) && (current->uid ^ t->uid) - && !suser()) + && !capable(CAP_SYS_ADMIN)) goto out_nolock; /* The null signal is a permissions and process existance probe. diff -urN linux97/kernel/sys.c lp97/kernel/sys.c --- linux97/kernel/sys.c Mon Apr 20 23:55:31 1998 +++ lp97/kernel/sys.c Tue Apr 21 03:18:12 1998 @@ -114,13 +114,13 @@ if (!proc_sel(p, which, who)) continue; if (p->uid != current->euid && - p->uid != current->uid && !suser()) { + p->uid != current->uid && !capable(CAP_SYS_NICE)) { error = EPERM; continue; } if (error == ESRCH) error = 0; - if (priority > p->priority && !suser()) + if (priority > p->priority && !capable(CAP_SYS_NICE)) error = EACCES; else p->priority = priority; @@ -172,7 +172,7 @@ char buffer[256]; /* We only trust the superuser with rebooting the system. */ - if (!suser()) + if (!capable(CAP_SYS_BOOT)) return -EPERM; /* For safety, we require "magic" arguments. */ @@ -273,7 +273,7 @@ if (rgid != (gid_t) -1) { if ((old_rgid == rgid) || (current->egid==rgid) || - suser()) + capable(CAP_SETGID)) current->gid = rgid; else return -EPERM; @@ -282,7 +282,7 @@ if ((old_rgid == egid) || (current->egid == egid) || (current->sgid == egid) || - suser()) + capable(CAP_SETGID)) current->fsgid = current->egid = egid; else { current->gid = old_rgid; @@ -307,7 +307,7 @@ { int old_egid = current->egid; - if (suser()) + if (capable(CAP_SETGID)) current->gid = current->egid = current->sgid = current->fsgid = gid; else if ((gid == current->gid) || (gid == current->sgid)) current->egid = current->fsgid = gid; @@ -319,6 +319,41 @@ return 0; } +/* + * cap_emulate_setxuid() fixes the effective / permitted capabilities of + * a process after a call to setuid, setreuid, or setresuid. + * + * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of + * {r,e,s}uid != 0, the permitted and effective capabilities are + * cleared. + * + * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective + * capabilities of the process are cleared. + * + * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective + * capabilities are set to the permitted capabilities. + * + * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should + * never happen. + * + * -astor + */ +extern inline void cap_emulate_setxuid(int old_ruid, int old_euid, + int old_suid) +{ + if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && + (current->uid != 0 && current->euid != 0 && current->suid != 0)) { + cap_clear(current->cap_permitted); + cap_clear(current->cap_effective); + } + if (old_euid == 0 && current->euid != 0) { + cap_clear(current->cap_effective); + } + if (old_euid != 0 && current->euid == 0) { + cap_copy(current->cap_effective, current->cap_permitted); + } +} + /* * Unprivileged users may change the real uid to the effective uid * or vice versa. (BSD-style) @@ -336,14 +371,15 @@ */ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid) { - int old_ruid, old_euid, new_ruid; + int old_ruid, old_euid, old_suid, new_ruid; new_ruid = old_ruid = current->uid; old_euid = current->euid; + old_suid = current->suid; if (ruid != (uid_t) -1) { if ((old_ruid == ruid) || (current->euid==ruid) || - suser()) + capable(CAP_SETUID)) new_ruid = ruid; else return -EPERM; @@ -352,7 +388,7 @@ if ((old_ruid == euid) || (current->euid == euid) || (current->suid == euid) || - suser()) + capable(CAP_SETUID)) current->fsuid = current->euid = euid; else return -EPERM; @@ -375,9 +411,16 @@ if(new_ruid) charge_uid(current, 1); } + + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + cap_emulate_setxuid(old_ruid, old_euid, old_suid); + } + return 0; } + + /* * setuid() is implemented like SysV w/ SAVED_IDS * @@ -392,10 +435,11 @@ asmlinkage int sys_setuid(uid_t uid) { int old_euid = current->euid; - int old_ruid, new_ruid; + int old_ruid, old_suid, new_ruid; old_ruid = new_ruid = current->uid; - if (suser()) + old_suid = current->suid; + if (capable(CAP_SETUID)) new_ruid = current->euid = current->suid = current->fsuid = uid; else if ((uid == current->uid) || (uid == current->suid)) current->fsuid = current->euid = uid; @@ -412,6 +456,11 @@ if(new_ruid) charge_uid(current, 1); } + + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + cap_emulate_setxuid(old_ruid, old_euid, old_suid); + } + return 0; } @@ -422,6 +471,9 @@ */ asmlinkage int sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) { + int old_ruid = current->uid; + int old_euid = current->euid; + int old_suid = current->suid; if (current->uid != 0 && current->euid != 0 && current->suid != 0) { if ((ruid != (uid_t) -1) && (ruid != current->uid) && (ruid != current->euid) && (ruid != current->suid)) @@ -448,6 +500,11 @@ } if (suid != (uid_t) -1) current->suid = suid; + + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + cap_emulate_setxuid(old_ruid, old_euid, old_suid); + } + return 0; } @@ -515,11 +572,31 @@ old_fsuid = current->fsuid; if (uid == current->uid || uid == current->euid || - uid == current->suid || uid == current->fsuid || suser()) + uid == current->suid || uid == current->fsuid || + capable(CAP_SETUID)) current->fsuid = uid; if (current->fsuid != old_fsuid) current->dumpable = 0; + /* We emulate fsuid by essentially doing a scaled-down version + * of what we did in setresuid and friends. However, we only + * operate on the fs-specific bits of the process' effective + * capabilities + * + * FIXME - is fsuser used for all CAP_FS_MASK capabilities? + * if not, we might be a bit too harsh here. + */ + + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + if (old_fsuid == 0 && current->fsuid != 0) { + current->cap_effective.cap &= ~CAP_FS_MASK; + } + if (old_fsuid != 0 && current->fsuid == 0) { + current->cap_effective.cap |= + (current->cap_permitted.cap & CAP_FS_MASK); + } + } + return old_fsuid; } @@ -532,7 +609,8 @@ old_fsgid = current->fsgid; if (gid == current->gid || gid == current->egid || - gid == current->sgid || gid == current->fsgid || suser()) + gid == current->sgid || gid == current->fsgid || + capable(CAP_SETGID)) current->fsgid = gid; if (current->fsgid != old_fsgid) current->dumpable = 0; @@ -716,7 +794,7 @@ asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist) { - if (!suser()) + if (!capable(CAP_SETGID)) return -EPERM; if ((unsigned) gidsetsize > NGROUPS) return -EINVAL; @@ -756,7 +834,7 @@ asmlinkage int sys_sethostname(char *name, int len) { - if (!suser()) + if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; @@ -787,7 +865,7 @@ */ asmlinkage int sys_setdomainname(char *name, int len) { - if (!suser()) + if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; @@ -820,7 +898,7 @@ old_rlim = current->rlim + resource; if (((new_rlim.rlim_cur > old_rlim->rlim_max) || (new_rlim.rlim_max > old_rlim->rlim_max)) && - !suser()) + !capable(CAP_SYS_RESOURCE)) return -EPERM; if (resource == RLIMIT_NOFILE) { if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN) @@ -916,3 +994,141 @@ } return error; } + + +/* + * Set capabilities on a process or all processes (except current). + * + * version - indicates the layout of the parameters (size and + * mappings). User space should do the necessary mapping, so this is + * mainly to catch old programs. + * + * pid - 0 means current process, -1 means all processes except + * current, >0 means a specific process. If pid is -1, the inherited, + * permitted and effective set of all processes (except current) are + * masked against set_inh, set_perm and set_eff. + * + */ + +#ifdef __SMP__ +/* We use this to avoid races while updating the task_list */ +static spinlock_t setcap_lock = SPIN_LOCK_UNLOCKED; +#endif + +asmlinkage int sys_setcap(int version, int pid, + struct __user_cap_struct *set_inh, + struct __user_cap_struct *set_perm, + struct __user_cap_struct *set_eff) +{ + /* Initialize to a combination that will pass the validity + check */ + kernel_cap_t inheritable = CAP_EMPTY_SET, + permitted = CAP_FULL_SET, + effective = CAP_EMPTY_SET; + + struct task_struct *task; + int error; + + if (version != _LINUX_CAPABILITY_VERSION) + return -EINVAL; + + if (set_inh && copy_from_user(&inheritable, set_inh, + sizeof(inheritable))) + return -EFAULT; + + if (set_perm && copy_from_user(&permitted, set_perm, + sizeof(permitted))) + return -EFAULT; + + if (set_eff && copy_from_user(&effective, set_eff, + sizeof(effective))) + return -EFAULT; + + if ((pid != 0 && !capable(CAP_SETPCAP)) || (pid < -1)) + return -EPERM; + + spin_lock(&setcap_lock); + error = -EPERM; + /* Check if the capabilities are sane */ + if (!capable(CAP_SETPCAP)) { + /* I: any raised capabilities must be a subset of the + * (old) Permitted + * P: permitted capabilities can only be removed and + * never added. + * E: must be set to a subset of (new) Permitted + */ + int inh_raised = ~current->cap_inheritable.cap & + inheritable.cap; + if ((inh_raised & current->cap_permitted.cap) != inh_raised) + goto spin_out; + if (~current->cap_permitted.cap & permitted.cap) + goto spin_out; + if (~permitted.cap & effective.cap) + goto spin_out; + } + + if (pid >= 0) { + task = (pid > 0) ? find_task_by_pid(pid) : current; + if (set_perm) + cap_copy(task->cap_permitted, permitted); + if (set_inh) + cap_copy(task->cap_inheritable, inheritable); + if (set_eff) + cap_copy(task->cap_effective, effective); + } else { + read_lock(&tasklist_lock); + for_each_task(task) { + if (task == current) + continue; + if (set_perm) + cap_mask(task->cap_permitted, permitted); + if (set_inh) + cap_mask(task->cap_inheritable, inheritable); + if (set_eff) + cap_mask(task->cap_effective, effective); + } + read_unlock(&tasklist_lock); + } + error = 0; + spin_out: + spin_unlock(&setcap_lock); + return error; +} + +/* get capabilities of a process. Same semantics as above except + * that pid == -1 doesn't make sense. + */ + +asmlinkage int sys_getcap(int version, int pid, + struct __user_cap_struct *get_inh, + struct __user_cap_struct *get_perm, + struct __user_cap_struct *get_eff) +{ + struct task_struct *p; + + if (version != _LINUX_CAPABILITY_VERSION) + return -EINVAL; + + if ((pid > 0 && !capable(CAP_SETPCAP)) || (pid < 0)) + return -EPERM; + + if (pid == 0) + p = current; + else + p = find_task_by_pid(pid); + + if (get_inh && copy_to_user(get_inh, &p->cap_inheritable, + sizeof(p->cap_inheritable))) + return -EFAULT; + + if (get_perm && copy_to_user(get_perm, &p->cap_permitted, + sizeof(p->cap_permitted))) + return -EFAULT; + + if (get_eff && copy_to_user(get_eff, &p->cap_effective, + sizeof(p->cap_effective))) + return -EFAULT; + + return 0; +} + diff -urN linux97/kernel/time.c lp97/kernel/time.c --- linux97/kernel/time.c Fri Feb 13 01:44:15 1998 +++ lp97/kernel/time.c Mon Apr 20 23:56:14 1998 @@ -87,7 +87,7 @@ { int value; - if (!suser()) + if (!capable(CAP_SYS_TIME)) return -EPERM; if (get_user(value, tptr)) return -EFAULT; @@ -156,7 +156,7 @@ { static int firsttime = 1; - if (!suser()) + if (!capable(CAP_SYS_TIME)) return -EPERM; if (tz) { @@ -221,7 +221,7 @@ long ltemp, mtemp, save_adjust; /* In order to modify anything, you gotta be super-user! */ - if (txc->modes && !suser()) + if (txc->modes && !capable(CAP_SYS_TIME)) return -EPERM; /* Now we validate the data before disabling interrupts */

--f9I7BMVVzbSWLtt+--

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu