[RFC v1 11/14] bus1: implement message transmission

From: David Herrmann
Date: Wed Oct 26 2016 - 15:23:27 EST


From: Tom Gundersen <teg@xxxxxxx>

While notifications already work and simply require linking bus1_handle
objects into the destination queue, real messages require proper
payloads. This implements two core objects: Message objects and
factories.

The message factory is similar to transaction contexts, and lives
completely on the stack. It is used to import the parameters given by
user-space in a SEND ioctl. It parses and validates them. With this
message factors we can now instantiate many messages, one for each
destination of a multicast.

Messages need to carry a bunch of data, mainly:
- metadata: This just matches what Unix-sockets do (uid, gid, pid,
tid, and secctx)
- payload: Random memory passed in as iovec-array by user-space
- files: Set of file-descriptors, very similar to SCM_RIGHTS
- handles: Set of local handles to transfer to the destination

Signed-off-by: Tom Gundersen <teg@xxxxxxx>
Signed-off-by: David Herrmann <dh.herrmann@xxxxxxxxx>
---
ipc/bus1/Makefile | 1 +
ipc/bus1/message.c | 613 +++++++++++++++++++++++++++++++++++++++++++++++++++++
ipc/bus1/message.h | 171 +++++++++++++++
ipc/bus1/peer.c | 2 +
ipc/bus1/peer.h | 2 +
ipc/bus1/util.c | 162 ++++++++++++++
ipc/bus1/util.h | 7 +
7 files changed, 958 insertions(+)
create mode 100644 ipc/bus1/message.c
create mode 100644 ipc/bus1/message.h

diff --git a/ipc/bus1/Makefile b/ipc/bus1/Makefile
index b87cddb..05434bda 100644
--- a/ipc/bus1/Makefile
+++ b/ipc/bus1/Makefile
@@ -1,6 +1,7 @@
bus1-y := \
handle.o \
main.o \
+ message.o \
peer.o \
tx.o \
user.o \
diff --git a/ipc/bus1/message.c b/ipc/bus1/message.c
new file mode 100644
index 0000000..4c5c905
--- /dev/null
+++ b/ipc/bus1/message.c
@@ -0,0 +1,613 @@
+/*
+ * Copyright (C) 2013-2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/pid.h>
+#include <linux/pid_namespace.h>
+#include <linux/sched.h>
+#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/uidgid.h>
+#include <linux/uio.h>
+#include <uapi/linux/bus1.h>
+#include "handle.h"
+#include "message.h"
+#include "peer.h"
+#include "tx.h"
+#include "user.h"
+#include "util.h"
+#include "util/flist.h"
+#include "util/pool.h"
+#include "util/queue.h"
+
+static size_t bus1_factory_size(struct bus1_cmd_send *param)
+{
+ /* make sure @size cannot overflow */
+ BUILD_BUG_ON(UIO_MAXIOV > U16_MAX);
+ BUILD_BUG_ON(BUS1_FD_MAX > U16_MAX);
+
+ /* make sure we do not violate alignment rules */
+ BUILD_BUG_ON(__alignof(struct bus1_flist) < __alignof(struct iovec));
+ BUILD_BUG_ON(__alignof(struct iovec) < __alignof(struct file *));
+
+ return sizeof(struct bus1_factory) +
+ bus1_flist_inline_size(param->n_handles) +
+ param->n_vecs * sizeof(struct iovec) +
+ param->n_fds * sizeof(struct file *);
+}
+
+/**
+ * bus1_factory_new() - create new message factory
+ * @peer: peer to operate as
+ * @param: factory parameters
+ * @stack: optional stack for factory, or NULL
+ * @n_stack: size of space at @stack
+ *
+ * This allocates a new message factory. It imports data from @param and
+ * prepares the factory for a transaction. From this factory, messages can be
+ * instantiated. This is used both for unicasts and multicasts.
+ *
+ * If @stack is given, this tries to place the factory on the specified stack
+ * space. The caller must guarantee that the factory does not outlive the stack
+ * frame. If this is not wanted, pass 0 as @n_stack.
+ * In either case, if the stack frame is too small, this will allocate the
+ * factory on the heap.
+ *
+ * Return: Pointer to factory, or ERR_PTR on failure.
+ */
+struct bus1_factory *bus1_factory_new(struct bus1_peer *peer,
+ struct bus1_cmd_send *param,
+ void *stack,
+ size_t n_stack)
+{
+ const struct iovec __user *ptr_vecs;
+ const u64 __user *ptr_handles;
+ const int __user *ptr_fds;
+ struct bus1_factory *f;
+ struct bus1_flist *e;
+ struct file *file;
+ size_t i, size;
+ bool is_new;
+ int r, fd;
+ u32 sid;
+ u64 id;
+
+ lockdep_assert_held(&peer->local.lock);
+
+ size = bus1_factory_size(param);
+ if (unlikely(size > n_stack)) {
+ f = kmalloc(size, GFP_TEMPORARY);
+ if (!f)
+ return ERR_PTR(-ENOMEM);
+
+ f->on_stack = false;
+ } else {
+ f = stack;
+ f->on_stack = true;
+ }
+
+ /* set to default first, so the destructor can be called anytime */
+ f->peer = peer;
+ f->param = param;
+ f->cred = current_cred();
+ f->pid = task_tgid(current);
+ f->tid = task_pid(current);
+
+ f->has_secctx = false;
+
+ f->length_vecs = 0;
+ f->n_vecs = param->n_vecs;
+ f->n_handles = 0;
+ f->n_handles_charge = 0;
+ f->n_files = 0;
+ f->n_secctx = 0;
+ f->vecs = (void *)(f + 1) + bus1_flist_inline_size(param->n_handles);
+ f->files = (void *)(f->vecs + param->n_vecs);
+ f->secctx = NULL;
+ bus1_flist_init(f->handles, f->param->n_handles);
+
+ /* import vecs */
+ ptr_vecs = (const struct iovec __user *)(unsigned long)param->ptr_vecs;
+ r = bus1_import_vecs(f->vecs, &f->length_vecs, ptr_vecs, f->n_vecs);
+ if (r < 0)
+ goto error;
+
+ /* import handles */
+ r = bus1_flist_populate(f->handles, f->param->n_handles, GFP_TEMPORARY);
+ if (r < 0)
+ goto error;
+
+ ptr_handles = (const u64 __user *)(unsigned long)param->ptr_handles;
+ for (i = 0, e = f->handles;
+ i < f->param->n_handles;
+ e = bus1_flist_next(e, &i)) {
+ if (get_user(id, ptr_handles + f->n_handles)) {
+ r = -EFAULT;
+ goto error;
+ }
+
+ e->ptr = bus1_handle_import(peer, id, &is_new);
+ if (IS_ERR(e->ptr)) {
+ r = PTR_ERR(e->ptr);
+ goto error;
+ }
+
+ ++f->n_handles;
+ if (is_new)
+ ++f->n_handles_charge;
+ }
+
+ /* import files */
+ ptr_fds = (const int __user *)(unsigned long)param->ptr_fds;
+ while (f->n_files < param->n_fds) {
+ if (get_user(fd, ptr_fds + f->n_files)) {
+ r = -EFAULT;
+ goto error;
+ }
+
+ file = bus1_import_fd(fd);
+ if (IS_ERR(file)) {
+ r = PTR_ERR(file);
+ goto error;
+ }
+
+ f->files[f->n_files++] = file;
+ }
+
+ /* import secctx */
+ security_task_getsecid(current, &sid);
+ r = security_secid_to_secctx(sid, &f->secctx, &f->n_secctx);
+ if (r != -EOPNOTSUPP) {
+ if (r < 0)
+ goto error;
+
+ f->has_secctx = true;
+ }
+
+ return f;
+
+error:
+ bus1_factory_free(f);
+ return ERR_PTR(r);
+}
+
+/**
+ * bus1_factory_free() - destroy message factory
+ * @f: factory to operate on, or NULL
+ *
+ * This destroys the message factory @f, previously created via
+ * bus1_factory_new(). All pinned resources are freed. Messages created via the
+ * factory are unaffected.
+ *
+ * If @f is NULL, this is a no-op.
+ *
+ * Return: NULL is returned.
+ */
+struct bus1_factory *bus1_factory_free(struct bus1_factory *f)
+{
+ struct bus1_flist *e;
+ size_t i;
+
+ if (f) {
+ lockdep_assert_held(&f->peer->local.lock);
+
+ if (f->has_secctx)
+ security_release_secctx(f->secctx, f->n_secctx);
+
+ for (i = 0; i < f->n_files; ++i)
+ fput(f->files[i]);
+
+ /* Iterate and forget imported handles (f->n_handles)... */
+ for (i = 0, e = f->handles;
+ i < f->n_handles;
+ e = bus1_flist_next(e, &i)) {
+ bus1_handle_forget(e->ptr);
+ bus1_handle_unref(e->ptr);
+ }
+ /* ...but free total space (f->param->n_handles). */
+ bus1_flist_deinit(f->handles, f->param->n_handles);
+
+ if (!f->on_stack)
+ kfree(f);
+ }
+
+ return NULL;
+}
+
+/**
+ * bus1_factory_seal() - charge and commit local resources
+ * @f: factory to use
+ *
+ * The factory needs to pin and possibly create local peer resources. This
+ * commits those resources. You should call this after you instantiated all
+ * messages, since you cannot undo it easily.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int bus1_factory_seal(struct bus1_factory *f)
+{
+ struct bus1_handle *h;
+ struct bus1_flist *e;
+ size_t i;
+
+ lockdep_assert_held(&f->peer->local.lock);
+
+ for (i = 0, e = f->handles;
+ i < f->n_handles;
+ e = bus1_flist_next(e, &i)) {
+ h = e->ptr;
+ if (bus1_handle_is_public(h))
+ continue;
+
+ --f->n_handles_charge;
+ WARN_ON(h != bus1_handle_acquire(h, false));
+ WARN_ON(atomic_inc_return(&h->n_user) != 1);
+ }
+
+ return 0;
+}
+
+/**
+ * bus1_factory_instantiate() - instantiate a message from a factory
+ * @f: factory to use
+ * @handle: destination handle
+ * @peer: destination peer
+ *
+ * This instantiates a new message targetted at @handle, based on the plans in
+ * the message factory @f.
+ *
+ * The newly created message is not linked into any contexts, but is available
+ * for free use to the caller.
+ *
+ * Return: Pointer to new message, or ERR_PTR on failure.
+ */
+struct bus1_message *bus1_factory_instantiate(struct bus1_factory *f,
+ struct bus1_handle *handle,
+ struct bus1_peer *peer)
+{
+ struct bus1_flist *src_e, *dst_e;
+ struct bus1_message *m;
+ bool transmit_secctx;
+ struct kvec vec;
+ size_t size, i, j;
+ u64 offset;
+ int r;
+
+ lockdep_assert_held(&f->peer->local.lock);
+
+ transmit_secctx = f->has_secctx &&
+ (READ_ONCE(peer->flags) & BUS1_PEER_FLAG_WANT_SECCTX);
+
+ size = sizeof(*m) + bus1_flist_inline_size(f->n_handles) +
+ f->n_files * sizeof(struct file *);
+ m = kmalloc(size, GFP_KERNEL);
+ if (!m)
+ return ERR_PTR(-ENOMEM);
+
+ /* set to default first, so the destructor can be called anytime */
+ kref_init(&m->ref);
+ bus1_queue_node_init(&m->qnode, BUS1_MSG_DATA);
+ m->qnode.owner = peer;
+ m->dst = bus1_handle_ref(handle);
+ m->user = bus1_user_ref(f->peer->user);
+
+ m->flags = 0;
+ m->uid = from_kuid_munged(peer->cred->user_ns, f->cred->uid);
+ m->gid = from_kgid_munged(peer->cred->user_ns, f->cred->gid);
+ m->pid = pid_nr_ns(f->pid, peer->pid_ns);
+ m->tid = pid_nr_ns(f->tid, peer->pid_ns);
+
+ m->n_bytes = f->length_vecs;
+ m->n_handles = 0;
+ m->n_handles_charge = f->n_handles;
+ m->n_files = 0;
+ m->n_secctx = 0;
+ m->slice = NULL;
+ m->files = (void *)(m + 1) + bus1_flist_inline_size(f->n_handles);
+ bus1_flist_init(m->handles, f->n_handles);
+
+ /* allocate pool slice */
+ size = max_t(size_t, 8,
+ ALIGN(m->n_bytes, 8) +
+ ALIGN(f->n_handles * sizeof(u64), 8) +
+ ALIGN(f->n_files * sizeof(int), 8) +
+ ALIGN(f->n_secctx, 8));
+ mutex_lock(&peer->data.lock);
+ m->slice = bus1_pool_alloc(&peer->data.pool, size);
+ mutex_unlock(&peer->data.lock);
+ if (IS_ERR(m->slice)) {
+ r = PTR_ERR(m->slice);
+ m->slice = NULL;
+ goto error;
+ }
+
+ /* import blob */
+ r = bus1_pool_write_iovec(&peer->data.pool, m->slice, 0, f->vecs,
+ f->n_vecs, f->length_vecs);
+ if (r < 0)
+ goto error;
+
+ /* import handles */
+ r = bus1_flist_populate(m->handles, f->n_handles, GFP_KERNEL);
+ if (r < 0)
+ goto error;
+
+ r = 0;
+ m->n_handles = f->n_handles;
+ i = 0;
+ j = 0;
+ src_e = f->handles;
+ dst_e = m->handles;
+ while (i < f->n_handles) {
+ WARN_ON(i != j);
+
+ dst_e->ptr = bus1_handle_ref_by_other(peer, src_e->ptr);
+ if (!dst_e->ptr) {
+ dst_e->ptr = bus1_handle_new_remote(peer, src_e->ptr);
+ if (IS_ERR(dst_e->ptr) && r >= 0) {
+ /*
+ * Continue on error until we imported all
+ * handles. Otherwise, trailing entries in the
+ * array will be stale, and the destructor
+ * cannot tell which.
+ */
+ r = PTR_ERR(dst_e->ptr);
+ }
+ }
+
+ src_e = bus1_flist_next(src_e, &i);
+ dst_e = bus1_flist_next(dst_e, &j);
+ }
+ if (r < 0)
+ goto error;
+
+ /* import files */
+ while (m->n_files < f->n_files) {
+ m->files[m->n_files] = get_file(f->files[m->n_files]);
+ ++m->n_files;
+ }
+
+ /* import secctx */
+ if (transmit_secctx) {
+ offset = ALIGN(m->n_bytes, 8) +
+ ALIGN(m->n_handles * sizeof(u64), 8) +
+ ALIGN(m->n_files * sizeof(int), 8);
+ vec = (struct kvec){
+ .iov_base = f->secctx,
+ .iov_len = f->n_secctx,
+ };
+
+ r = bus1_pool_write_kvec(&peer->data.pool, m->slice, offset,
+ &vec, 1, vec.iov_len);
+ if (r < 0)
+ goto error;
+
+ m->n_secctx = f->n_secctx;
+ m->flags |= BUS1_MSG_FLAG_HAS_SECCTX;
+ }
+
+ return m;
+
+error:
+ bus1_message_unref(m);
+ return ERR_PTR(r);
+}
+
+/**
+ * bus1_message_free() - destroy message
+ * @k: kref belonging to a message
+ *
+ * This frees the message belonging to the reference counter @k. It is supposed
+ * to be used with kref_put(). See bus1_message_unref(). Like all queue nodes,
+ * the memory deallocation is rcu-delayed.
+ */
+void bus1_message_free(struct kref *k)
+{
+ struct bus1_message *m = container_of(k, struct bus1_message, ref);
+ struct bus1_peer *peer = m->qnode.owner;
+ struct bus1_flist *e;
+ size_t i;
+
+ WARN_ON(!peer);
+ lockdep_assert_held(&peer->active);
+
+ for (i = 0; i < m->n_files; ++i)
+ fput(m->files[i]);
+
+ for (i = 0, e = m->handles;
+ i < m->n_handles;
+ e = bus1_flist_next(e, &i)) {
+ if (!IS_ERR_OR_NULL(e->ptr)) {
+ if (m->qnode.group)
+ bus1_handle_release(e->ptr, true);
+ bus1_handle_unref(e->ptr);
+ }
+ }
+ bus1_flist_deinit(m->handles, m->n_handles);
+
+ if (m->slice) {
+ mutex_lock(&peer->data.lock);
+ bus1_pool_release_kernel(&peer->data.pool, m->slice);
+ mutex_unlock(&peer->data.lock);
+ }
+
+ bus1_user_unref(m->user);
+ bus1_handle_unref(m->dst);
+ bus1_queue_node_deinit(&m->qnode);
+ kfree_rcu(m, qnode.rcu);
+}
+
+/**
+ * bus1_message_stage() - stage message
+ * @m: message to operate on
+ * @tx: transaction to stage on
+ *
+ * This acquires all resources of the message @m and then stages the message on
+ * @tx. Like all stage operations, this cannot be undone. Hence, you must make
+ * sure you can continue to commit the transaction without erroring-out in
+ * between.
+ *
+ * This consumes the caller's reference on @m, plus the active reference on the
+ * destination peer.
+ */
+void bus1_message_stage(struct bus1_message *m, struct bus1_tx *tx)
+{
+ struct bus1_peer *peer = m->qnode.owner;
+ struct bus1_flist *e;
+ size_t i;
+
+ WARN_ON(!peer);
+ lockdep_assert_held(&peer->active);
+
+ for (i = 0, e = m->handles;
+ i < m->n_handles;
+ e = bus1_flist_next(e, &i))
+ e->ptr = bus1_handle_acquire(e->ptr, true);
+
+ /* this consumes an active reference on m->qnode.owner */
+ bus1_tx_stage_sync(tx, &m->qnode);
+}
+
+/**
+ * bus1_message_install() - install message payload into target process
+ * @m: message to operate on
+ * @inst_fds: whether to install FDs
+ *
+ * This installs the payload FDs and handles of @message into the receiving
+ * peer and the calling process. Handles are always installed, FDs are only
+ * installed if explicitly requested via @param.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int bus1_message_install(struct bus1_message *m, struct bus1_cmd_recv *param)
+{
+ size_t i, j, n, size, offset, n_handles = 0, n_fds = 0;
+ const bool inst_fds = param->flags & BUS1_RECV_FLAG_INSTALL_FDS;
+ const bool peek = param->flags & BUS1_RECV_FLAG_PEEK;
+ struct bus1_peer *peer = m->qnode.owner;
+ struct bus1_handle *h;
+ struct bus1_flist *e;
+ struct kvec vec;
+ u64 ts, *handles;
+ u8 stack[512];
+ void *buffer = stack;
+ int r, *fds;
+
+ WARN_ON(!peer);
+ lockdep_assert_held(&peer->local.lock);
+
+ size = max(m->n_files, min_t(size_t, m->n_handles, BUS1_FLIST_BATCH));
+ size *= max(sizeof(*fds), sizeof(*handles));
+ if (unlikely(size > sizeof(stack))) {
+ buffer = kmalloc(size, GFP_TEMPORARY);
+ if (!buffer)
+ return -ENOMEM;
+ }
+
+ if (m->n_handles > 0) {
+ handles = buffer;
+ ts = bus1_queue_node_get_timestamp(&m->qnode);
+ offset = ALIGN(m->n_bytes, 8);
+
+ i = 0;
+ while ((n = bus1_flist_walk(m->handles, m->n_handles,
+ &e, &i)) > 0) {
+ WARN_ON(i > m->n_handles);
+ WARN_ON(i > BUS1_FLIST_BATCH);
+
+ for (j = 0; j < n; ++j) {
+ h = e[j].ptr;
+ if (h && bus1_handle_is_live_at(h, ts)) {
+ handles[j] = bus1_handle_identify(h);
+ ++n_handles;
+ } else {
+ bus1_handle_release(h, true);
+ e[j].ptr = bus1_handle_unref(h);
+ handles[j] = BUS1_HANDLE_INVALID;
+ }
+ }
+
+ vec.iov_base = buffer;
+ vec.iov_len = n * sizeof(u64);
+
+ r = bus1_pool_write_kvec(&peer->data.pool, m->slice,
+ offset, &vec, 1, vec.iov_len);
+ if (r < 0)
+ goto exit;
+
+ offset += n * sizeof(u64);
+ }
+ }
+
+ if (inst_fds && m->n_files > 0) {
+ fds = buffer;
+
+ for ( ; n_fds < m->n_files; ++n_fds) {
+ r = get_unused_fd_flags(O_CLOEXEC);
+ if (r < 0)
+ goto exit;
+
+ fds[n_fds] = r;
+ }
+
+ vec.iov_base = fds;
+ vec.iov_len = n_fds * sizeof(int);
+ offset = ALIGN(m->n_bytes, 8) +
+ ALIGN(m->n_handles * sizeof(u64), 8);
+
+ r = bus1_pool_write_kvec(&peer->data.pool, m->slice, offset,
+ &vec, 1, vec.iov_len);
+ if (r < 0)
+ goto exit;
+ }
+
+ /* charge resources */
+ if (!peek) {
+ WARN_ON(n_handles < m->n_handles_charge);
+ m->n_handles_charge -= n_handles;
+ }
+
+ /* publish pool slice */
+ mutex_lock(&peer->data.lock);
+ bus1_pool_publish(&peer->data.pool, m->slice);
+ mutex_unlock(&peer->data.lock);
+
+ /* commit handles */
+ for (i = 0, e = m->handles;
+ i < m->n_handles;
+ e = bus1_flist_next(e, &i)) {
+ h = e->ptr;
+ if (!IS_ERR_OR_NULL(h)) {
+ WARN_ON(h != bus1_handle_acquire(h, true));
+ WARN_ON(atomic_inc_return(&h->n_user) < 1);
+ }
+ }
+
+ /* commit FDs */
+ while (n_fds > 0) {
+ --n_fds;
+ fd_install(fds[n_fds], get_file(m->files[n_fds]));
+ }
+
+ r = 0;
+
+exit:
+ while (n_fds-- > 0)
+ put_unused_fd(fds[n_fds]);
+ if (buffer != stack)
+ kfree(buffer);
+ return r;
+}
diff --git a/ipc/bus1/message.h b/ipc/bus1/message.h
new file mode 100644
index 0000000..e8c982f
--- /dev/null
+++ b/ipc/bus1/message.h
@@ -0,0 +1,171 @@
+#ifndef __BUS1_MESSAGE_H
+#define __BUS1_MESSAGE_H
+
+/*
+ * Copyright (C) 2013-2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+/**
+ * DOC: Messages
+ *
+ * XXX
+ */
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include "util/flist.h"
+#include "util/queue.h"
+
+struct bus1_cmd_send;
+struct bus1_handle;
+struct bus1_peer;
+struct bus1_pool_slice;
+struct bus1_tx;
+struct bus1_user;
+struct cred;
+struct file;
+struct iovec;
+struct pid;
+
+/**
+ * struct bus1_factory - message factory
+ * @peer: sending peer
+ * @param: factory parameters
+ * @cred: sender credentials
+ * @pid: sender PID
+ * @tid: sender TID
+ * @on_stack: whether object lives on stack
+ * @has_secctx: whether secctx has been set
+ * @length_vecs: total length of data in vectors
+ * @n_vecs: number of vectors
+ * @n_handles: number of handles
+ * @n_handles_charge: number of handles to charge on commit
+ * @n_files: number of files
+ * @n_secctx: length of secctx
+ * @vecs: vector array
+ * @files: file array
+ * @secctx: allocated secctx
+ * @handles: handle array
+ */
+struct bus1_factory {
+ struct bus1_peer *peer;
+ struct bus1_cmd_send *param;
+ const struct cred *cred;
+ struct pid *pid;
+ struct pid *tid;
+
+ bool on_stack : 1;
+ bool has_secctx : 1;
+
+ size_t length_vecs;
+ size_t n_vecs;
+ size_t n_handles;
+ size_t n_handles_charge;
+ size_t n_files;
+ u32 n_secctx;
+ struct iovec *vecs;
+ struct file **files;
+ char *secctx;
+
+ struct bus1_flist handles[];
+};
+
+/**
+ * struct bus1_message - data messages
+ * @ref: reference counter
+ * @qnode: embedded queue node
+ * @dst: destination handle
+ * @user: sending user
+ * @flags: message flags
+ * @uid: sender UID
+ * @gid: sender GID
+ * @pid: sender PID
+ * @tid: sender TID
+ * @n_bytes: number of user-bytes transmitted
+ * @n_handles: number of handles transmitted
+ * @n_handles_charge: number of handle charges
+ * @n_files: number of files transmitted
+ * @n_secctx: number of bytes of security context transmitted
+ * @slice: actual message data
+ * @files: passed file descriptors
+ * @handles: passed handles
+ */
+struct bus1_message {
+ struct kref ref;
+ struct bus1_queue_node qnode;
+ struct bus1_handle *dst;
+ struct bus1_user *user;
+
+ u64 flags;
+ uid_t uid;
+ gid_t gid;
+ pid_t pid;
+ pid_t tid;
+
+ size_t n_bytes;
+ size_t n_handles;
+ size_t n_handles_charge;
+ size_t n_files;
+ size_t n_secctx;
+ struct bus1_pool_slice *slice;
+ struct file **files;
+
+ struct bus1_flist handles[];
+};
+
+struct bus1_factory *bus1_factory_new(struct bus1_peer *peer,
+ struct bus1_cmd_send *param,
+ void *stack,
+ size_t n_stack);
+struct bus1_factory *bus1_factory_free(struct bus1_factory *f);
+int bus1_factory_seal(struct bus1_factory *f);
+struct bus1_message *bus1_factory_instantiate(struct bus1_factory *f,
+ struct bus1_handle *handle,
+ struct bus1_peer *peer);
+
+void bus1_message_free(struct kref *k);
+void bus1_message_stage(struct bus1_message *m, struct bus1_tx *tx);
+int bus1_message_install(struct bus1_message *m, struct bus1_cmd_recv *param);
+
+/**
+ * bus1_message_ref() - acquire object reference
+ * @m: message to operate on, or NULL
+ *
+ * This acquires a single reference to @m. The caller must already hold a
+ * reference when calling this.
+ *
+ * If @m is NULL, this is a no-op.
+ *
+ * Return: @m is returned.
+ */
+static inline struct bus1_message *bus1_message_ref(struct bus1_message *m)
+{
+ if (m)
+ kref_get(&m->ref);
+ return m;
+}
+
+/**
+ * bus1_message_unref() - release object reference
+ * @m: message to operate on, or NULL
+ *
+ * This releases a single object reference to @m. If the reference counter
+ * drops to 0, the message is destroyed.
+ *
+ * If @m is NULL, this is a no-op.
+ *
+ * Return: NULL is returned.
+ */
+static inline struct bus1_message *bus1_message_unref(struct bus1_message *m)
+{
+ if (m)
+ kref_put(&m->ref, bus1_message_free);
+ return NULL;
+}
+
+#endif /* __BUS1_MESSAGE_H */
diff --git a/ipc/bus1/peer.c b/ipc/bus1/peer.c
index a1525cb..0ff7a98 100644
--- a/ipc/bus1/peer.c
+++ b/ipc/bus1/peer.c
@@ -70,6 +70,7 @@ struct bus1_peer *bus1_peer_new(void)

/* initialize data section */
mutex_init(&peer->data.lock);
+ peer->data.pool = BUS1_POOL_NULL;
bus1_queue_init(&peer->data.queue);

/* initialize peer-private section */
@@ -136,6 +137,7 @@ struct bus1_peer *bus1_peer_free(struct bus1_peer *peer)

/* deinitialize data section */
bus1_queue_deinit(&peer->data.queue);
+ bus1_pool_deinit(&peer->data.pool);
mutex_destroy(&peer->data.lock);

/* deinitialize constant fields */
diff --git a/ipc/bus1/peer.h b/ipc/bus1/peer.h
index 655d3ac..5eb558f 100644
--- a/ipc/bus1/peer.h
+++ b/ipc/bus1/peer.h
@@ -54,6 +54,7 @@
#include <linux/wait.h>
#include "user.h"
#include "util/active.h"
+#include "util/pool.h"
#include "util/queue.h"

struct cred;
@@ -88,6 +89,7 @@ struct bus1_peer {

struct {
struct mutex lock;
+ struct bus1_pool pool;
struct bus1_queue queue;
} data;

diff --git a/ipc/bus1/util.c b/ipc/bus1/util.c
index 8acf798..687f40d 100644
--- a/ipc/bus1/util.c
+++ b/ipc/bus1/util.c
@@ -9,12 +9,174 @@

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/atomic.h>
+#include <linux/compat.h>
#include <linux/debugfs.h>
#include <linux/err.h>
+#include <linux/file.h>
#include <linux/fs.h>
#include <linux/kernel.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/uio.h>
+#include <net/sock.h>
+#include "main.h"
#include "util.h"

+/**
+ * bus1_import_vecs() - import vectors from user
+ * @out_vecs: kernel memory to store vecs, preallocated
+ * @out_length: output storage for sum of all vectors lengths
+ * @vecs: user pointer for vectors
+ * @n_vecs: number of vectors to import
+ *
+ * This copies the given vectors from user memory into the preallocated kernel
+ * buffer. Sanity checks are performed on the memory of the vector-array, the
+ * memory pointed to by the vectors and on the overall size calculation.
+ *
+ * If the vectors were copied successfully, @out_length will contain the sum of
+ * all vector-lengths.
+ *
+ * Unlike most other functions, this function might modify its output buffer
+ * even if it fails. That is, @out_vecs might contain garbage if this function
+ * fails. This is done for performance reasons.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int bus1_import_vecs(struct iovec *out_vecs,
+ size_t *out_length,
+ const void __user *vecs,
+ size_t n_vecs)
+{
+ size_t i, length = 0;
+
+ if (n_vecs > UIO_MAXIOV)
+ return -EMSGSIZE;
+ if (n_vecs == 0) {
+ *out_length = 0;
+ return 0;
+ }
+
+ if (IS_ENABLED(CONFIG_COMPAT) && in_compat_syscall()) {
+ /*
+ * Compat types and macros are protected by CONFIG_COMPAT,
+ * rather than providing a fallback. We want compile-time
+ * coverage, so provide fallback types. The IS_ENABLED(COMPAT)
+ * condition guarantees this is collected by the dead-code
+ * elimination, anyway.
+ */
+#if IS_ENABLED(CONFIG_COMPAT)
+ const struct compat_iovec __user *uvecs = vecs;
+ compat_uptr_t v_base;
+ compat_size_t v_len;
+ compat_ssize_t v_slen;
+#else
+ const struct iovec __user *uvecs = vecs;
+ void __user *v_base;
+ size_t v_len;
+ ssize_t v_slen;
+#endif
+ void __user *v_ptr;
+
+ if (unlikely(!access_ok(VERIFY_READ, vecs,
+ sizeof(*uvecs) * n_vecs)))
+ return -EFAULT;
+
+ for (i = 0; i < n_vecs; ++i) {
+ if (unlikely(__get_user(v_base, &uvecs[i].iov_base) ||
+ __get_user(v_len, &uvecs[i].iov_len)))
+ return -EFAULT;
+
+#if IS_ENABLED(CONFIG_COMPAT)
+ v_ptr = compat_ptr(v_base);
+#else
+ v_ptr = v_base;
+#endif
+ v_slen = v_len;
+
+ if (unlikely(v_slen < 0 ||
+ (typeof(v_len))v_slen != v_len))
+ return -EMSGSIZE;
+ if (unlikely(!access_ok(VERIFY_READ, v_ptr, v_len)))
+ return -EFAULT;
+ if (unlikely((size_t)v_len > MAX_RW_COUNT - length))
+ return -EMSGSIZE;
+
+ out_vecs[i].iov_base = v_ptr;
+ out_vecs[i].iov_len = v_len;
+ length += v_len;
+ }
+ } else {
+ void __user *v_base;
+ size_t v_len;
+
+ if (copy_from_user(out_vecs, vecs, sizeof(*out_vecs) * n_vecs))
+ return -EFAULT;
+
+ for (i = 0; i < n_vecs; ++i) {
+ v_base = out_vecs[i].iov_base;
+ v_len = out_vecs[i].iov_len;
+
+ if (unlikely((ssize_t)v_len < 0))
+ return -EMSGSIZE;
+ if (unlikely(!access_ok(VERIFY_READ, v_base, v_len)))
+ return -EFAULT;
+ if (unlikely(v_len > MAX_RW_COUNT - length))
+ return -EMSGSIZE;
+
+ length += v_len;
+ }
+ }
+
+ *out_length = length;
+ return 0;
+}
+
+/**
+ * bus1_import_fd() - import file descriptor from user
+ * @user_fd: pointer to user-supplied file descriptor
+ *
+ * This imports a file-descriptor from the current user-context. The FD number
+ * is copied into kernel-space, then resolved to a file and returned to the
+ * caller. If something goes wrong, an error is returned.
+ *
+ * Neither bus1, nor UDS files are allowed. If those are supplied, EOPNOTSUPP
+ * is returned. Those would require expensive garbage-collection if they're
+ * sent recursively by user-space.
+ *
+ * Return: Pointer to pinned file, ERR_PTR on failure.
+ */
+struct file *bus1_import_fd(int fd)
+{
+ struct file *f, *ret;
+ struct socket *sock;
+ struct inode *inode;
+
+ if (unlikely(fd < 0))
+ return ERR_PTR(-EBADF);
+
+ f = fget_raw(fd);
+ if (unlikely(!f))
+ return ERR_PTR(-EBADF);
+
+ inode = file_inode(f);
+ sock = S_ISSOCK(inode->i_mode) ? SOCKET_I(inode) : NULL;
+
+ if (f->f_mode & FMODE_PATH)
+ ret = f; /* O_PATH is always allowed */
+ else if (f->f_op == &bus1_fops)
+ ret = ERR_PTR(-EOPNOTSUPP); /* disallow bus1 recursion */
+ else if (sock && sock->sk && sock->ops && sock->ops->family == PF_UNIX)
+ ret = ERR_PTR(-EOPNOTSUPP); /* disallow UDS recursion */
+ else
+ ret = f; /* all others are allowed */
+
+ if (f != ret)
+ fput(f);
+
+ return ret;
+}
+
#if defined(CONFIG_DEBUG_FS)

static int bus1_debugfs_atomic_t_get(void *data, u64 *val)
diff --git a/ipc/bus1/util.h b/ipc/bus1/util.h
index c22ecd5..ab41d5e 100644
--- a/ipc/bus1/util.h
+++ b/ipc/bus1/util.h
@@ -26,6 +26,7 @@
#include <linux/types.h>

struct dentry;
+struct iovec;

/**
* BUS1_TAIL - tail pointer in singly-linked lists
@@ -37,6 +38,12 @@ struct dentry;
*/
#define BUS1_TAIL ERR_PTR(-1)

+int bus1_import_vecs(struct iovec *out_vecs,
+ size_t *out_length,
+ const void __user *vecs,
+ size_t n_vecs);
+struct file *bus1_import_fd(int fd);
+
#if defined(CONFIG_DEBUG_FS)

struct dentry *
--
2.10.1