[PATCH v3] Add a "nosymfollow" mount option.

From: Mattias Nissler
Date: Thu Nov 17 2016 - 12:00:54 EST


For mounts that have the new "nosymfollow" option, don't follow
symlinks when resolving paths. The new option is similar in spirit to
the existing "nodev", "noexec", and "nosuid" options. Various BSD
variants have been supporting the "nosymfollow" mount option for a
long time with equivalent implementations.

Note that symlinks may still be created on file systems mounted with
the "nosymfollow" option present. readlink() remains functional, so
user space code that is aware of symlinks can still choose to follow
them explicitly.

Setting the "nosymfollow" mount option helps prevent privileged
writers from modifying files unintentionally in case there is an
unexpected link along the accessed path. The "nosymfollow" option is
thus useful as a defensive measure for systems that need to deal with
untrusted file systems in privileged contexts.

Signed-off-by: Mattias Nissler <mnissler@xxxxxxxxxxxx>
---
Changes since v2:
- Updated the option name to align with BSD naming.

I have also uploaded the corresponding util-linux patch to make the
"mount" command understand the new option here:
https://gist.github.com/anonymous/317207ae499389235258f0d52ab22c03

fs/namei.c | 3 +++
fs/namespace.c | 9 ++++++---
fs/proc_namespace.c | 1 +
fs/statfs.c | 2 ++
include/linux/mount.h | 3 ++-
include/linux/statfs.h | 1 +
include/uapi/linux/fs.h | 1 +
7 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 5b4eed2..4751e7f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1021,6 +1021,9 @@ const char *get_link(struct nameidata *nd)
touch_atime(&last->link);
}

+ if (nd->path.mnt->mnt_flags & MNT_NOSYMFOLLOW)
+ return ERR_PTR(-EACCES);
+
error = security_inode_follow_link(dentry, inode,
nd->flags & LOOKUP_RCU);
if (unlikely(error))
diff --git a/fs/namespace.c b/fs/namespace.c
index e6c234b..2bf244e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2732,6 +2732,8 @@ long do_mount(const char *dev_name, const char __user *dir_name,
mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
+ if (flags & MS_NOSYMFOLLOW)
+ mnt_flags |= MNT_NOSYMFOLLOW;

/* The default atime for remount is preservation */
if ((flags & MS_REMOUNT) &&
@@ -2741,9 +2743,10 @@ long do_mount(const char *dev_name, const char __user *dir_name,
mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
}

- flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
- MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
- MS_STRICTATIME | MS_NOREMOTELOCK);
+ flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_NOSYMFOLLOW |
+ MS_ACTIVE | MS_BORN | MS_NOATIME | MS_NODIRATIME |
+ MS_RELATIME | MS_KERNMOUNT | MS_STRICTATIME |
+ MS_NOREMOTELOCK);

if (flags & MS_REMOUNT)
retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 3f1190d..366149d 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -67,6 +67,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
{ MNT_NOATIME, ",noatime" },
{ MNT_NODIRATIME, ",nodiratime" },
{ MNT_RELATIME, ",relatime" },
+ { MNT_NOSYMFOLLOW, ",nosymfollow" },
{ 0, NULL }
};
const struct proc_fs_info *fs_infop;
diff --git a/fs/statfs.c b/fs/statfs.c
index 083dc0a..cfd4da8 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -27,6 +27,8 @@ static int flags_by_mnt(int mnt_flags)
flags |= ST_NODIRATIME;
if (mnt_flags & MNT_RELATIME)
flags |= ST_RELATIME;
+ if (mnt_flags & MNT_NOSYMFOLLOW)
+ flags |= ST_NOSYMFOLLOW;
return flags;
}

diff --git a/include/linux/mount.h b/include/linux/mount.h
index 1172cce..c9e8d7b 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -28,6 +28,7 @@ struct mnt_namespace;
#define MNT_NODIRATIME 0x10
#define MNT_RELATIME 0x20
#define MNT_READONLY 0x40 /* does the user want this to be r/o? */
+#define MNT_NOSYMFOLLOW 0x80

#define MNT_SHRINKABLE 0x100
#define MNT_WRITE_HOLD 0x200
@@ -44,7 +45,7 @@ struct mnt_namespace;
#define MNT_SHARED_MASK (MNT_UNBINDABLE)
#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
| MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
- | MNT_READONLY)
+ | MNT_READONLY | MNT_NOSYMFOLLOW)
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )

#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
diff --git a/include/linux/statfs.h b/include/linux/statfs.h
index 0166d32..6bab2b2 100644
--- a/include/linux/statfs.h
+++ b/include/linux/statfs.h
@@ -39,5 +39,6 @@ struct kstatfs {
#define ST_NOATIME 0x0400 /* do not update access times */
#define ST_NODIRATIME 0x0800 /* do not update directory access times */
#define ST_RELATIME 0x1000 /* update atime relative to mtime/ctime */
+#define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */

#endif
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index acb2b61..c978b25 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -130,6 +130,7 @@ struct inodes_stat_t {
#define MS_I_VERSION (1<<23) /* Update inode I_version field */
#define MS_STRICTATIME (1<<24) /* Always perform atime updates */
#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
+#define MS_NOSYMFOLLOW (1<<26) /* Do not follow symlinks */

/* These sb flags are internal to the kernel */
#define MS_NOREMOTELOCK (1<<27)
--
2.8.0.rc3.226.g39d4020