Re: [PATCH 1/6] tty: implement write_iter

From: Jiri Slaby
Date: Thu Jan 21 2021 - 04:41:43 EST


On 21. 01. 21, 10:00, Greg Kroah-Hartman wrote:
From: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>

This makes the tty layer use the .write_iter() function instead of the
traditional .write() functionality.

That allows writev(), but more importantly also makes it possible to
enable .splice_write() for ttys, reinstating the "splice to tty"
functionality that was lost in commit 36e2c7421f02 ("fs: don't allow
splice read/write without explicit ops").

Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops")
Reported-by: Oliver Giles <ohw.giles@xxxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
---
drivers/tty/tty_io.c | 48 ++++++++++++++++++++++++--------------------
1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 56ade99ef99f..338bc4ef5549 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -143,9 +143,8 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */
DEFINE_MUTEX(tty_mutex);
static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
-static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
-ssize_t redirected_tty_write(struct file *, const char __user *,
- size_t, loff_t *);
+static ssize_t tty_write(struct kiocb *, struct iov_iter *);
+ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *);
static __poll_t tty_poll(struct file *, poll_table *);
static int tty_open(struct inode *, struct file *);
long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@@ -478,7 +477,8 @@ static void tty_show_fdinfo(struct seq_file *m, struct file *file)
static const struct file_operations tty_fops = {
.llseek = no_llseek,
.read = tty_read,
- .write = tty_write,
+ .write_iter = tty_write,
+ .splice_write = iter_file_splice_write,
.poll = tty_poll,
.unlocked_ioctl = tty_ioctl,
.compat_ioctl = tty_compat_ioctl,
@@ -491,7 +491,8 @@ static const struct file_operations tty_fops = {
static const struct file_operations console_fops = {
.llseek = no_llseek,
.read = tty_read,
- .write = redirected_tty_write,
+ .write_iter = redirected_tty_write,
+ .splice_write = iter_file_splice_write,
.poll = tty_poll,
.unlocked_ioctl = tty_ioctl,
.compat_ioctl = tty_compat_ioctl,
@@ -607,9 +608,9 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
/* This breaks for file handles being sent over AF_UNIX sockets ? */
list_for_each_entry(priv, &tty->tty_files, list) {
filp = priv->file;
- if (filp->f_op->write == redirected_tty_write)
+ if (filp->f_op->write_iter == redirected_tty_write)
cons_filp = filp;
- if (filp->f_op->write != tty_write)
+ if (filp->f_op->write_iter != tty_write)

This now relies on implicit value of hung_up_tty_fops.write_iter (i.e. NULL), okay.

continue;
closecount++;
__tty_fasync(-1, filp, 0); /* can't block */
> filp->f_op = &hung_up_tty_fops;

Isn't this racy with VFS layer in vfs_write:
if (file->f_op->write)
ret = file->f_op->write(file, buf, count, pos);
else if (file->f_op->write_iter)
ret = new_sync_write(file, buf, count, pos);

? hung_up_tty_fops do not set iter_write and tty_fops do not set write. When we switch from one to the other here, right after the 'if', but before the call, what happens? Likely nothing for the ->write case immediately as compilers cache the value, but for ->write_iter, I'm not sure. Anyway, this looks broken to me. (Read on.)

@@ -956,14 +957,20 @@ static inline ssize_t do_tty_write(
size_t size = count;
if (size > chunk)
size = chunk;
+
ret = -EFAULT;
- if (copy_from_user(tty->write_buf, buf, size))
+ if (copy_from_iter(tty->write_buf, size, from) != size)
break;
+
ret = write(tty, file, tty->write_buf, size);
if (ret <= 0)
break;
+
+ /* FIXME! Have Al check this! */
+ if (ret != size)
+ iov_iter_revert(from, size-ret);
+
written += ret;
- buf += ret;
count -= ret;
if (!count)
break;
@@ -1023,9 +1030,9 @@ void tty_write_message(struct tty_struct *tty, char *msg)
* write method will not be invoked in parallel for each device.
*/
-static ssize_t tty_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t tty_write(struct kiocb *iocb, struct iov_iter *from)
{
+ struct file *file = iocb->ki_filp;
struct tty_struct *tty = file_tty(file);
struct tty_ldisc *ld;
ssize_t ret;
@@ -1038,18 +1045,15 @@ static ssize_t tty_write(struct file *file, const char __user *buf,
if (tty->ops->write_room == NULL)
tty_err(tty, "missing write_room method\n");
ld = tty_ldisc_ref_wait(tty);
- if (!ld)
- return hung_up_tty_write(file, buf, count, ppos);
- if (!ld->ops->write)
+ if (!ld || !ld->ops->write)
ret = -EIO;
else
- ret = do_tty_write(ld->ops->write, tty, file, buf, count);
+ ret = do_tty_write(ld->ops->write, tty, file, from);
tty_ldisc_deref(ld);

Ok, here belongs my earlier note: "if ld == NULL => crash here." That is if hangup happens during the ldisc wait, the kernel will crash in tty_ldisc_deref.

Is there a reason not to convert hung_up_tty_fops too and leave the return hung_up_tty_write here intact? This would also solve the comments above.

return ret;
}
-ssize_t redirected_tty_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+ssize_t redirected_tty_write(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *p = NULL;

thanks,
--
js