[PATCH 3/7] vfs: add renameat2 syscall and cross-rename

From: Miklos Szeredi
Date: Tue Oct 01 2013 - 12:00:30 EST


From: Miklos Szeredi <mszeredi@xxxxxxx>

Add new renameat2 syscall, which is the same as renameat with an added
flags argument.

If flags is zero then this is a plain overwriting rename. If flags contain
RENAME_EXCHANGE then exchange source and destination files. There's no
restriction on the type of the files; e.g. a directory can be exchanged
with a symlink.

Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx>
---
arch/x86/syscalls/syscall_64.tbl | 1 +
fs/dcache.c | 46 ++++++++++---
fs/namei.c | 139 ++++++++++++++++++++++++++++++---------
include/linux/dcache.h | 1 +
include/linux/fs.h | 2 +
include/uapi/linux/fs.h | 2 +
6 files changed, 152 insertions(+), 39 deletions(-)

diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 38ae65d..fafd734 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -320,6 +320,7 @@
311 64 process_vm_writev sys_process_vm_writev
312 common kcmp sys_kcmp
313 common finit_module sys_finit_module
+314 common renameat2 sys_renameat2

#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/fs/dcache.c b/fs/dcache.c
index 4100030..1735bac 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2495,12 +2495,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
dentry->d_name.name = dentry->d_iname;
} else {
/*
- * Both are internal. Just copy target to dentry
+ * Both are internal.
*/
- memcpy(dentry->d_iname, target->d_name.name,
- target->d_name.len + 1);
- dentry->d_name.len = target->d_name.len;
- return;
+ unsigned int i;
+ BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
+ for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
+ swap(((long *) &dentry->d_iname)[i],
+ ((long *) &target->d_iname)[i]);
+ }
}
}
swap(dentry->d_name.len, target->d_name.len);
@@ -2557,13 +2559,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry,
* __d_move - move a dentry
* @dentry: entry to move
* @target: new dentry
+ * @exchange: exchange the two dentries
*
* Update the dcache to reflect the move of a file name. Negative
* dcache entries should not be moved in this way. Caller must hold
* rename_lock, the i_mutex of the source and target directories,
* and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
*/
-static void __d_move(struct dentry * dentry, struct dentry * target)
+static void __d_move(struct dentry *dentry, struct dentry *target,
+ bool exchange)
{
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
@@ -2587,6 +2591,10 @@ static void __d_move(struct dentry * dentry, struct dentry * target)

/* Unhash the target: dput() will then get rid of it */
__d_drop(target);
+ if (exchange) {
+ __d_rehash(target,
+ d_hash(dentry->d_parent, dentry->d_name.hash));
+ }

list_del(&dentry->d_u.d_child);
list_del(&target->d_u.d_child);
@@ -2613,6 +2621,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
write_seqcount_end(&dentry->d_seq);

dentry_unlock_parents_for_move(dentry, target);
+ if (exchange)
+ fsnotify_d_move(target);
spin_unlock(&target->d_lock);
fsnotify_d_move(dentry);
spin_unlock(&dentry->d_lock);
@@ -2630,11 +2640,31 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
void d_move(struct dentry *dentry, struct dentry *target)
{
write_seqlock(&rename_lock);
- __d_move(dentry, target);
+ __d_move(dentry, target, false);
write_sequnlock(&rename_lock);
}
EXPORT_SYMBOL(d_move);

+/*
+ * d_exchange - exchange two dentries
+ * @dentry1: first dentry
+ * @dentry2: second dentry
+ */
+void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
+{
+ write_seqlock(&rename_lock);
+
+ WARN_ON(!dentry1->d_inode);
+ WARN_ON(!dentry2->d_inode);
+ WARN_ON(IS_ROOT(dentry1));
+ WARN_ON(IS_ROOT(dentry2));
+
+ __d_move(dentry1, dentry2, true);
+
+ write_sequnlock(&rename_lock);
+}
+
+
/**
* d_ancestor - search for an ancestor
* @p1: ancestor dentry
@@ -2682,7 +2712,7 @@ static struct dentry *__d_unalias(struct inode *inode,
m2 = &alias->d_parent->d_inode->i_mutex;
out_unalias:
if (likely(!d_mountpoint(alias))) {
- __d_move(alias, dentry);
+ __d_move(alias, dentry, false);
ret = alias;
}
out_err:
diff --git a/fs/namei.c b/fs/namei.c
index 7ec6a12..55700b3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3963,14 +3963,18 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
* ->i_mutex on parents, which works but leads to some truly excessive
* locking].
*/
-int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+static int vfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
{
int error;
const unsigned char *old_name;
struct inode *source = old_dentry->d_inode;
struct inode *target = new_dentry->d_inode;
bool is_dir = S_ISDIR(source->i_mode);
+ bool new_is_dir = target ? S_ISDIR(target->i_mode) : false;
+ bool overwrite = !(flags & RENAME_EXCHANGE);
+ unsigned max_links = new_dir->i_sb->s_max_links;

if (source == target)
return 0;
@@ -3981,74 +3985,116 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,

if (!target)
error = may_create(new_dir, new_dentry);
- else
+ else if (overwrite)
error = may_delete(new_dir, new_dentry, is_dir);
+ else
+ error = may_delete(new_dir, new_dentry, new_is_dir);
if (error)
return error;

- if (!old_dir->i_op->rename)
+ if (!old_dir->i_op->rename && !old_dir->i_op->rename2)
return -EPERM;

+ if (flags && !old_dir->i_op->rename2)
+ return -EOPNOTSUPP;
+
/*
* If we are going to change the parent - check write permissions,
* we'll need to flip '..'.
*/
- if (is_dir && new_dir != old_dir) {
- error = inode_permission(source, MAY_WRITE);
- if (error)
- return error;
+ if (new_dir != old_dir) {
+ if (is_dir) {
+ error = inode_permission(source, MAY_WRITE);
+ if (error)
+ return error;
+ }
+ if (!overwrite && new_is_dir) {
+ error = inode_permission(target, MAY_WRITE);
+ if (error)
+ return error;
+ }
}

error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
if (error)
return error;

+ if (!overwrite) {
+ error = security_inode_rename(new_dir, new_dentry,
+ old_dir, old_dentry);
+ if (error)
+ return error;
+ }
+
old_name = fsnotify_oldname_init(old_dentry->d_name.name);
dget(new_dentry);
- if (target)
+ if (overwrite && target)
mutex_lock(&target->i_mutex);

error = -EBUSY;
if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
goto out;

- if (is_dir) {
- unsigned max_links = new_dir->i_sb->s_max_links;
-
+ if (max_links && new_dir != old_dir) {
error = -EMLINK;
- if (max_links && !target && new_dir != old_dir &&
- new_dir->i_nlink >= max_links)
+ if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links)
goto out;
+ if (!overwrite && !is_dir && new_is_dir &&
+ old_dir->i_nlink > max_links)
+ goto out;
+ }
+
+ if (overwrite && is_dir && target)
+ shrink_dcache_parent(new_dentry);

- if (target)
- shrink_dcache_parent(new_dentry);
+ if (old_dir->i_op->rename2) {
+ error = old_dir->i_op->rename2(old_dir, old_dentry,
+ new_dir, new_dentry, flags);
+ } else {
+ error = old_dir->i_op->rename(old_dir, old_dentry,
+ new_dir, new_dentry);
}

- error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
if (error)
goto out;

- if (target) {
+ if (overwrite && target) {
if (is_dir)
target->i_flags |= S_DEAD;
dont_mount(new_dentry);
}
- if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
- d_move(old_dentry, new_dentry);
+ if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
+ if (overwrite)
+ d_move(old_dentry, new_dentry);
+ else
+ d_exchange(old_dentry, new_dentry);
+ }
out:
- if (target)
+ if (overwrite && target)
mutex_unlock(&target->i_mutex);
dput(new_dentry);
- if (!error)
+ if (!error) {
fsnotify_move(old_dir, new_dir, old_name, is_dir,
- target, old_dentry);
+ overwrite ? target : NULL, old_dentry);
+ if (!overwrite) {
+ fsnotify_move(new_dir, old_dir, old_dentry->d_name.name,
+ new_is_dir, NULL, new_dentry);
+ }
+ }
fsnotify_oldname_free(old_name);

return error;
}

-SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
- int, newdfd, const char __user *, newname)
+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ return vfs_rename2(old_dir, old_dentry, new_dir, new_dentry, 0);
+}
+
+
+SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
+ int, newdfd, const char __user *, newname, unsigned int, flags)
{
struct dentry *old_dir, *new_dir;
struct dentry *old_dentry, *new_dentry;
@@ -4058,7 +4104,13 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
struct filename *to;
unsigned int lookup_flags = 0;
bool should_retry = false;
+ bool overwrite = !(flags & RENAME_EXCHANGE);
int error;
+
+ error = -EOPNOTSUPP;
+ if (flags & ~RENAME_EXCHANGE)
+ goto exit;
+
retry:
from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags);
if (IS_ERR(from)) {
@@ -4091,7 +4143,8 @@ retry:

oldnd.flags &= ~LOOKUP_PARENT;
newnd.flags &= ~LOOKUP_PARENT;
- newnd.flags |= LOOKUP_RENAME_TARGET;
+ if (overwrite)
+ newnd.flags |= LOOKUP_RENAME_TARGET;

trap = lock_rename(new_dir, old_dir);

@@ -4108,7 +4161,7 @@ retry:
error = -ENOTDIR;
if (oldnd.last.name[oldnd.last.len])
goto exit4;
- if (newnd.last.name[newnd.last.len])
+ if (overwrite && newnd.last.name[newnd.last.len])
goto exit4;
}
/* source should not be ancestor of target */
@@ -4119,8 +4172,19 @@ retry:
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto exit4;
+ if (!overwrite) {
+ error = -ENOENT;
+ if (!new_dentry->d_inode)
+ goto exit4;
+
+ if (!S_ISDIR(new_dentry->d_inode->i_mode)) {
+ error = -ENOTDIR;
+ if (newnd.last.name[newnd.last.len])
+ goto exit4;
+ }
+ }
/* target should not be an ancestor of source */
- error = -ENOTEMPTY;
+ error = overwrite ? -ENOTEMPTY : -EINVAL;
if (new_dentry == trap)
goto exit5;

@@ -4128,8 +4192,15 @@ retry:
&newnd.path, new_dentry);
if (error)
goto exit5;
- error = vfs_rename(old_dir->d_inode, old_dentry,
- new_dir->d_inode, new_dentry);
+ if (!overwrite) {
+ error = security_path_rename(&newnd.path, new_dentry,
+ &oldnd.path, old_dentry);
+ if (error)
+ goto exit5;
+ }
+
+ error = vfs_rename2(old_dir->d_inode, old_dentry,
+ new_dir->d_inode, new_dentry, flags);
exit5:
dput(new_dentry);
exit4:
@@ -4154,9 +4225,15 @@ exit:
return error;
}

+SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
+ int, newdfd, const char __user *, newname)
+{
+ return sys_renameat2(olddfd, oldname, newdfd, newname, 0);
+}
+
SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
{
- return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname);
+ return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
}

int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 59066e0..ce5ebed 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -297,6 +297,7 @@ extern void dentry_update_name_case(struct dentry *, struct qstr *);

/* used for rename() and baskets */
extern void d_move(struct dentry *, struct dentry *);
+extern void d_exchange(struct dentry *, struct dentry *);
extern struct dentry *d_ancestor(struct dentry *, struct dentry *);

/* appendix may either be NULL or be used for transname suffixes */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3f40547..71c6cf9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1572,6 +1572,8 @@ struct inode_operations {
int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
+ int (*rename2) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *, unsigned int);
int (*setattr) (struct dentry *, struct iattr *);
int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 6c28b61..ebdafb6 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -35,6 +35,8 @@
#define SEEK_HOLE 4 /* seek to the next hole */
#define SEEK_MAX SEEK_HOLE

+#define RENAME_EXCHANGE (1 << 0) /* Exchange source and dest */
+
struct fstrim_range {
__u64 start;
__u64 len;
--
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/