Re: Conditional SymLinks

Tall cool one (ice@mama.indstate.edu)
Mon, 15 Dec 1997 18:32:12 -0500


Hello,

After the recent conditional symlink thread I decided to do a "proof of
concept" for kicks (they aren't keeping me busy enough at work apparently
=). I have the attached patch below.

I don't think anyone should use it for real, but I just wanted to share
some of my findings for those who might be thinking about coding something.

- A processes enviornment location changes when a setenv() or putenv() is
done and the task[x]->mm->env_start and env_end are not updated, so they
forever point at the processes orginal environment. Perhaps this is
desirable, perhaps not.

The only way to "fix" this would be to allow libc to tell the kernal
where the current environment is -- not likely to happen I would wager.

- One must expand sym link lookups always, not just when they are
followed, otherwise your shell or programs might (will) get confused
when they read the link themselves and try to use it.

This makes it quite impossible to tell what the orginal link looked
like, or if it even uses the environment. The only way would be to
create a new syscall or ioctl() -- readreallink() perhaps.

- The kernel does a fair number of sym link lookups. Calling out to a
user-land program could impact performance. All your libs are
sym-linked for example. A flag stating that the link is a cadidate for
expansion would be usefull.

- What happens when a processes environment is not in memory (i.e. swapped
to disk)?

Now, about my patch (against 2.0.32) for ext2 filesystems:

You can imbed environment variables in the sym link in the format of:

${VARIABLE[:<default>]}

If VARIABLE is not defined it uses <default> instead, thus:

/tmp -> ${HOME:/var}/tmp

is possible.

This patch does strange things when you start messing with your
environment or using more than one var in the string. I wouldn't use it.
I'm mearly posting it so others can look at it. Consider before using this,
that I only spent maybe four hours on this, and most of that time was spent
rebooting the system. =) It isn't deadly as far as I know though (I tried to
be very carefull in bounds checking everything), and it seems to leave
normal links alone as you would expect.

Expanding is done in expandlink() which should be quite portable to other
FS's. It uses static buffers though, which I'm not sure is a good thing
inside the kernel.

The patch also uses two functions I ripped out of the /proc filesystem too,
get_phys_addr() and get_array() (renamed get_env() and ever so slightly
modified here). It would perhaps be usefull to make those functions global
in /proc and use them instead. I borrowed them because they looked so much
better than using lots of get_user() calls. I assume these functions deal
with a swapped environment sanely, so I don't have to.

I would wager that conditional sym-links would never actually make it into
the kernel. They have a lot of gee-wiz value, but not much else. They do
solve some problems nicely though. As far as security concerns go: it's not
exactly shell expansion, so as long as bounds checking is carefully done, I
don't see major security problems.

- Steve

.------------------------------------------------. # * # # # # # #
| Steve Baker | Barely Working | # ## # # # # #
| ice@mama.indstate.edu | System Administrator | # # # # # # # #
| Red-Hat Rulz! | Will work for hardware | # # # ## # # # #
`-- SYS-ADMIN FOR HIRE, HAVE UNIX, WILL TRAVEL --' #### # # # ## # #

*** linux-orig/fs/ext2/symlink.c Wed Aug 6 19:52:01 1997
--- linux/fs/ext2/symlink.c Fri Dec 12 15:04:33 1997
***************
*** 18,32 ****
--- 18,40 ----
#include <asm/segment.h>

#include <linux/errno.h>
+ #include <linux/malloc.h>
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/sched.h>
#include <linux/stat.h>

+ #include <asm/pgtable.h>
+
static int ext2_readlink (struct inode *, char *, int);
static int ext2_follow_link (struct inode *, struct inode *, int, int,
struct inode **);

+ static char *expandlink (char *);
+
+ static unsigned long get_phys_addr(struct task_struct * , unsigned long);
+ static int get_env(struct task_struct *, unsigned long , unsigned long , char *);
+
/*
* symlinks can't do much...
*/
***************
*** 86,91 ****
--- 94,102 ----
link = bh->b_data;
} else
link = (char *) inode->u.ext2_i.i_data;
+
+ link = expandlink(link);
+
UPDATE_ATIME(inode);
current->link_count++;
error = open_namei (link, flag, mode, res_inode, dir);
***************
*** 119,124 ****
--- 130,138 ----
}
else
link = (char *) inode->u.ext2_i.i_data;
+
+ link = expandlink(link);
+
i = 0;
while (i < buflen && (c = link[i])) {
i++;
***************
*** 129,132 ****
--- 143,290 ----
if (bh)
brelse (bh);
return i;
+ }
+
+
+ char * expandlink(char * link)
+ {
+ static char lbuf[PATH_MAX+1], vbuf[PATH_MAX+1];
+
+ unsigned long env_start = current->mm->env_start;
+ unsigned long env_end = current->mm->env_end;
+ char *env_buf = NULL, *addr, *tmp, *def, *defend;
+ int len, alen, found = 0;
+
+ char *l = lbuf, *lend = lbuf+PATH_MAX-1, *p = link;
+ char *v, *vend = vbuf+PATH_MAX-1;
+
+ while(*p) {
+ while (*p && *p != '$' && l < lend) *l++ = *p++;
+ if (l >= lend || !*p) {
+ if (l < lend && found) break;
+ if (env_buf)
+ vfree(env_buf);
+ return link;
+ }
+ if (*++p != '{') {
+ *l++ = '$';
+ continue;
+ }
+
+ def = defend = NULL;
+ for(p++,len=0,v=vbuf;v < vend && *p && *p != '}' && *p != ':';len++)
+ *v++ = *p++;
+
+ if (*p == ':' && v < vend) {
+ def = ++p;
+ while(v < vend && *p && *p != '}') p++;
+ defend = p;
+ }
+ if (v >= vend || !*p) {
+ if (env_buf)
+ vfree(env_buf);
+ return link;
+ }
+ if (def == defend) def = NULL;
+ *v++ = '=';
+ *v = 0;
+ p++;
+ len++;
+
+ if (!env_buf) {
+ env_buf = vmalloc((env_end - env_start) + 1);
+ if (!env_buf) return link;
+ alen = get_env(current,env_start,env_end,env_buf);
+ if (!alen) {
+ vfree(env_buf);
+ return link;
+ }
+ }
+ for(addr=env_buf;alen;alen--,addr++) {
+ if (len >= alen) {
+ if (!def) break;
+ if ((l+(defend-def)+1) < lend) {
+ while(def < defend) *l++ = *def++;
+ found = 1;
+ }
+ break;
+ }
+ if (!strncmp((char *)addr,vbuf,len)) {
+ addr=addr+len;
+ alen-=len;
+ for(tmp=addr;alen && *(char *)addr;addr++,alen--);
+ if ((l+(addr-tmp)+1) < lend) {
+ while(tmp < addr) *l++ = *tmp++;
+ found = 1;
+ }
+ break;
+ }
+ while(alen && *(char *)addr) {
+ addr++;
+ alen--;
+ }
+ }
+ }
+ *l = 0;
+ vfree(env_buf);
+ return lbuf;
+ }
+
+ static int get_env(struct task_struct * p, unsigned long start, unsigned long end, char * buffer)
+ {
+ unsigned long addr;
+ int size = 0, result = 0;
+ char c;
+
+ if (start >= end)
+ return result;
+ for (;;) {
+ addr = get_phys_addr(p, start);
+ if (!addr)
+ return result;
+ do {
+ c = *(char *) addr;
+ if (!c)
+ result = size;
+ if (size < PAGE_SIZE)
+ buffer[size++] = c;
+ else
+ return result;
+ addr++;
+ start++;
+ if (!c && start >= end)
+ return result;
+ } while (addr & ~PAGE_MASK);
+ }
+ return result;
+ }
+
+ static unsigned long get_phys_addr(struct task_struct * p, unsigned long ptr)
+ {
+ pgd_t *page_dir;
+ pmd_t *page_middle;
+ pte_t pte;
+
+ if (!p || !p->mm || ptr >= TASK_SIZE)
+ return 0;
+ page_dir = pgd_offset(p->mm,ptr);
+ if (pgd_none(*page_dir))
+ return 0;
+ if (pgd_bad(*page_dir)) {
+ printk("bad page directory entry %08lx\n", pgd_val(*page_dir));
+ pgd_clear(page_dir);
+ return 0;
+ }
+ page_middle = pmd_offset(page_dir,ptr);
+ if (pmd_none(*page_middle))
+ return 0;
+ if (pmd_bad(*page_middle)) {
+ printk("bad page middle entry %08lx\n", pmd_val(*page_middle));
+ pmd_clear(page_middle);
+ return 0;
+ }
+ pte = *pte_offset(page_middle,ptr);
+ if (!pte_present(pte))
+ return 0;
+ return pte_page(pte) + (ptr & ~PAGE_MASK);
}