Re: [PATCH 2/7][RFC] netfilter: add xt_qtaguid matching module

From: richard -rw- weinberger
Date: Sat Sep 22 2012 - 17:19:17 EST


Just a few comments, please see below.
In general I'd send this module also to netfilter-devel@... and get
rid of most debugging stuff.

On Sat, Sep 22, 2012 at 4:10 AM, John Stultz <john.stultz@xxxxxxxxxx> wrote:
> Put procfs dirs in /proc/net/xt_qtaguid/
> ctrl
> stats
> iface_stat/<iface>/...
> The uid stats are obtainable in ./stats.

Do we really want new files in /proc?

> +static const char *module_procdirname = "xt_qtaguid";

Why not char[]?

> +static struct proc_dir_entry *xt_qtaguid_procdir;
> +
> +static unsigned int proc_iface_perms = S_IRUGO;
> +module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
> +
> +static struct proc_dir_entry *xt_qtaguid_stats_file;
> +static unsigned int proc_stats_perms = S_IRUGO;
> +module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
> +
> +static struct proc_dir_entry *xt_qtaguid_ctrl_file;
> +static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
> +module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
> +
> +/* 0 means, don't limit anybody */
> +static gid_t proc_stats_readall_gid;
> +static gid_t proc_ctrl_write_gid;
> +module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
> + S_IRUGO | S_IWUSR);
> +module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
> + S_IRUGO | S_IWUSR);
> +
> +/*
> + * Limit the number of active tags (via socket tags) for a given UID.
> + * Multiple processes could share the UID.
> + */
> +static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
> +module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
> +
> +/*
> + * After the kernel has initiallized this module, it is still possible
> + * to make it passive.
> + * Setting passive to Y:
> + * - the iface stats handling will not act on notifications.
> + * - iptables matches will never match.
> + * - ctrl commands silently succeed.
> + * - stats are always empty.
> + * This is mostly usefull when a bug is suspected.
> + */
> +static bool module_passive;
> +module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
> +
> +/*
> + * Control how qtaguid data is tracked per proc/uid.
> + * Setting tag_tracking_passive to Y:
> + * - don't create proc specific structs to track tags
> + * - don't check that active tag stats exceed some limits.
> + * - don't clean up socket tags on process exits.
> + * This is mostly usefull when a bug is suspected.
> + */
> +static bool qtu_proc_handling_passive;
> +module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
> + S_IRUGO | S_IWUSR);
> +
> +#define QTU_DEV_NAME "xt_qtaguid"

We have this string already in module_procdirname.

> +static LIST_HEAD(iface_stat_list);
> +static DEFINE_SPINLOCK(iface_stat_list_lock);
> +
> +static struct rb_root sock_tag_tree = RB_ROOT;
> +static DEFINE_SPINLOCK(sock_tag_list_lock);
> +
> +static struct rb_root tag_counter_set_tree = RB_ROOT;
> +static DEFINE_SPINLOCK(tag_counter_set_list_lock);
> +
> +static struct rb_root uid_tag_data_tree = RB_ROOT;
> +static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
> +
> +static struct rb_root proc_qtu_data_tree = RB_ROOT;
> +/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
> +
> +static struct qtaguid_event_counts qtu_events;
> +/*----------------------------------------------*/
> +static bool can_manipulate_uids(void)
> +{
> + /* root pwnd */
> + return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
> + || in_egroup_p(proc_ctrl_write_gid);
> +}
> +
> +static bool can_impersonate_uid(uid_t uid)
> +{
> + return uid == current_fsuid() || can_manipulate_uids();
> +}
> +
> +static bool can_read_other_uid_stats(uid_t uid)
> +{
> + /* root pwnd */
> + return unlikely(!current_fsuid()) || uid == current_fsuid()
> + || unlikely(!proc_stats_readall_gid)
> + || in_egroup_p(proc_stats_readall_gid);
> +}
> +
> +static inline void dc_add_byte_packets(struct data_counters *counters, int set,
> + enum ifs_tx_rx direction,
> + enum ifs_proto ifs_proto,
> + int bytes,
> + int packets)
> +{
> + counters->bpc[set][direction][ifs_proto].bytes += bytes;
> + counters->bpc[set][direction][ifs_proto].packets += packets;
> +}
> +
> +static inline uint64_t dc_sum_bytes(struct data_counters *counters,
> + int set,
> + enum ifs_tx_rx direction)
> +{
> + return counters->bpc[set][direction][IFS_TCP].bytes
> + + counters->bpc[set][direction][IFS_UDP].bytes
> + + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
> +}
> +
> +static inline uint64_t dc_sum_packets(struct data_counters *counters,
> + int set,
> + enum ifs_tx_rx direction)
> +{
> + return counters->bpc[set][direction][IFS_TCP].packets
> + + counters->bpc[set][direction][IFS_UDP].packets
> + + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
> +}
> +
> +static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
> +{
> + struct rb_node *node = root->rb_node;
> +
> + while (node) {
> + struct tag_node *data = rb_entry(node, struct tag_node, node);
> + int result;
> + RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
> + " node=%p data=%p\n", tag, node, data);
> + result = tag_compare(tag, data->tag);
> + RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
> + " data.tag=0x%llx (uid=%u) res=%d\n",
> + tag, data->tag, get_uid_from_tag(data->tag), result);
> + if (result < 0)
> + node = node->rb_left;
> + else if (result > 0)
> + node = node->rb_right;
> + else
> + return data;
> + }
> + return NULL;
> +}
> +
> +static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
> +{
> + struct rb_node **new = &(root->rb_node), *parent = NULL;
> +
> + /* Figure out where to put new node */
> + while (*new) {
> + struct tag_node *this = rb_entry(*new, struct tag_node,
> + node);
> + int result = tag_compare(data->tag, this->tag);
> + RB_DEBUG("qtaguid: %s(): tag=0x%llx"
> + " (uid=%u)\n", __func__,
> + this->tag,
> + get_uid_from_tag(this->tag));
> + parent = *new;
> + if (result < 0)
> + new = &((*new)->rb_left);
> + else if (result > 0)
> + new = &((*new)->rb_right);
> + else
> + BUG();

WARN_ONCE(), please.
Otherwise an attacker may trigger the BUG() remotely in case of an
implementation error...

> + }
> +
> + /* Add new node and rebalance tree. */
> + rb_link_node(&data->node, parent, new);
> + rb_insert_color(&data->node, root);
> +}
> +
> +static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
> +{
> + tag_node_tree_insert(&data->tn, root);
> +}
> +
> +static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
> +{
> + struct tag_node *node = tag_node_tree_search(root, tag);
> + if (!node)
> + return NULL;
> + return rb_entry(&node->node, struct tag_stat, tn.node);
> +}
> +
> +static void tag_counter_set_tree_insert(struct tag_counter_set *data,
> + struct rb_root *root)
> +{
> + tag_node_tree_insert(&data->tn, root);
> +}
> +
> +static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
> + tag_t tag)
> +{
> + struct tag_node *node = tag_node_tree_search(root, tag);
> + if (!node)
> + return NULL;
> + return rb_entry(&node->node, struct tag_counter_set, tn.node);
> +
> +}
> +
> +static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
> +{
> + tag_node_tree_insert(&data->tn, root);
> +}
> +
> +static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
> +{
> + struct tag_node *node = tag_node_tree_search(root, tag);
> + if (!node)
> + return NULL;
> + return rb_entry(&node->node, struct tag_ref, tn.node);
> +}
> +
> +static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
> + const struct sock *sk)
> +{
> + struct rb_node *node = root->rb_node;
> +
> + while (node) {
> + struct sock_tag *data = rb_entry(node, struct sock_tag,
> + sock_node);
> + if (sk < data->sk)
> + node = node->rb_left;
> + else if (sk > data->sk)
> + node = node->rb_right;
> + else
> + return data;
> + }
> + return NULL;
> +}
> +
> +static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
> +{
> + struct rb_node **new = &(root->rb_node), *parent = NULL;
> +
> + /* Figure out where to put new node */
> + while (*new) {
> + struct sock_tag *this = rb_entry(*new, struct sock_tag,
> + sock_node);
> + parent = *new;
> + if (data->sk < this->sk)
> + new = &((*new)->rb_left);
> + else if (data->sk > this->sk)
> + new = &((*new)->rb_right);
> + else
> + BUG();

Same here.

> + }
> +
> + /* Add new node and rebalance tree. */
> + rb_link_node(&data->sock_node, parent, new);
> + rb_insert_color(&data->sock_node, root);
> +}
> +
> +static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
> +{
> + struct rb_node *node;
> + struct sock_tag *st_entry;
> +
> + node = rb_first(st_to_free_tree);
> + while (node) {
> + st_entry = rb_entry(node, struct sock_tag, sock_node);
> + node = rb_next(node);
> + CT_DEBUG("qtaguid: %s(): "
> + "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
> + st_entry->sk,
> + st_entry->tag,
> + get_uid_from_tag(st_entry->tag));
> + rb_erase(&st_entry->sock_node, st_to_free_tree);
> + sockfd_put(st_entry->socket);
> + kfree(st_entry);
> + }
> +}
> +
> +static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
> + const pid_t pid)
> +{
> + struct rb_node *node = root->rb_node;
> +
> + while (node) {
> + struct proc_qtu_data *data = rb_entry(node,
> + struct proc_qtu_data,
> + node);
> + if (pid < data->pid)
> + node = node->rb_left;
> + else if (pid > data->pid)
> + node = node->rb_right;
> + else
> + return data;
> + }
> + return NULL;
> +}
> +
> +static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
> + struct rb_root *root)
> +{
> + struct rb_node **new = &(root->rb_node), *parent = NULL;
> +
> + /* Figure out where to put new node */
> + while (*new) {
> + struct proc_qtu_data *this = rb_entry(*new,
> + struct proc_qtu_data,
> + node);
> + parent = *new;
> + if (data->pid < this->pid)
> + new = &((*new)->rb_left);
> + else if (data->pid > this->pid)
> + new = &((*new)->rb_right);
> + else
> + BUG();

Same here.

> + }
> +
> + /* Add new node and rebalance tree. */
> + rb_link_node(&data->node, parent, new);
> + rb_insert_color(&data->node, root);
> +}
> +
> +static void uid_tag_data_tree_insert(struct uid_tag_data *data,
> + struct rb_root *root)
> +{
> + struct rb_node **new = &(root->rb_node), *parent = NULL;
> +
> + /* Figure out where to put new node */
> + while (*new) {
> + struct uid_tag_data *this = rb_entry(*new,
> + struct uid_tag_data,
> + node);
> + parent = *new;
> + if (data->uid < this->uid)
> + new = &((*new)->rb_left);
> + else if (data->uid > this->uid)
> + new = &((*new)->rb_right);
> + else
> + BUG();

Same here.

> + }
> +
> + /* Add new node and rebalance tree. */
> + rb_link_node(&data->node, parent, new);
> + rb_insert_color(&data->node, root);
> +}
> +
> +static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
> + uid_t uid)
> +{
> + struct rb_node *node = root->rb_node;
> +
> + while (node) {
> + struct uid_tag_data *data = rb_entry(node,
> + struct uid_tag_data,
> + node);
> + if (uid < data->uid)
> + node = node->rb_left;
> + else if (uid > data->uid)
> + node = node->rb_right;
> + else
> + return data;
> + }
> + return NULL;
> +}
> +
> +/*
> + * Allocates a new uid_tag_data struct if needed.
> + * Returns a pointer to the found or allocated uid_tag_data.
> + * Returns a PTR_ERR on failures, and lock is not held.
> + * If found is not NULL:
> + * sets *found to true if not allocated.
> + * sets *found to false if allocated.
> + */
> +struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
> +{
> + struct uid_tag_data *utd_entry;
> +
> + /* Look for top level uid_tag_data for the UID */
> + utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
> + DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
> +
> + if (found_res)
> + *found_res = utd_entry;
> + if (utd_entry)
> + return utd_entry;
> +
> + utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
> + if (!utd_entry) {
> + pr_err("qtaguid: get_uid_data(%u): "
> + "tag data alloc failed\n", uid);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + utd_entry->uid = uid;
> + utd_entry->tag_ref_tree = RB_ROOT;
> + uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
> + DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
> + return utd_entry;
> +}
> +
> +/* Never returns NULL. Either PTR_ERR or a valid ptr. */
> +static struct tag_ref *new_tag_ref(tag_t new_tag,
> + struct uid_tag_data *utd_entry)
> +{
> + struct tag_ref *tr_entry;
> + int res;
> +
> + if (utd_entry->num_active_tags + 1 > max_sock_tags) {
> + pr_info("qtaguid: new_tag_ref(0x%llx): "
> + "tag ref alloc quota exceeded. max=%d\n",
> + new_tag, max_sock_tags);
> + res = -EMFILE;
> + goto err_res;
> +
> + }
> +
> + tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
> + if (!tr_entry) {
> + pr_err("qtaguid: new_tag_ref(0x%llx): "
> + "tag ref alloc failed\n",
> + new_tag);
> + res = -ENOMEM;
> + goto err_res;
> + }
> + tr_entry->tn.tag = new_tag;
> + /* tr_entry->num_sock_tags handled by caller */
> + utd_entry->num_active_tags++;
> + tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
> + DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
> + " inserted new tag ref %p\n",
> + new_tag, tr_entry);
> + return tr_entry;
> +
> +err_res:
> + return ERR_PTR(res);
> +}
> +
> +static struct tag_ref *lookup_tag_ref(tag_t full_tag,
> + struct uid_tag_data **utd_res)
> +{
> + struct uid_tag_data *utd_entry;
> + struct tag_ref *tr_entry;
> + bool found_utd;
> + uid_t uid = get_uid_from_tag(full_tag);
> +
> + DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
> + full_tag, uid);
> +
> + utd_entry = get_uid_data(uid, &found_utd);
> + if (IS_ERR_OR_NULL(utd_entry)) {
> + if (utd_res)
> + *utd_res = utd_entry;
> + return NULL;
> + }
> +
> + tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
> + if (utd_res)
> + *utd_res = utd_entry;
> + DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
> + full_tag, utd_entry, tr_entry);
> + return tr_entry;
> +}
> +
> +/* Never returns NULL. Either PTR_ERR or a valid ptr. */
> +static struct tag_ref *get_tag_ref(tag_t full_tag,
> + struct uid_tag_data **utd_res)
> +{
> + struct uid_tag_data *utd_entry;
> + struct tag_ref *tr_entry;
> +
> + DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
> + full_tag);
> + spin_lock_bh(&uid_tag_data_tree_lock);
> + tr_entry = lookup_tag_ref(full_tag, &utd_entry);
> + BUG_ON(IS_ERR_OR_NULL(utd_entry));
> + if (!tr_entry)
> + tr_entry = new_tag_ref(full_tag, utd_entry);
> +
> + spin_unlock_bh(&uid_tag_data_tree_lock);
> + if (utd_res)
> + *utd_res = utd_entry;
> + DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
> + full_tag, utd_entry, tr_entry);
> + return tr_entry;
> +}
> +
> +/* Checks and maybe frees the UID Tag Data entry */
> +static void put_utd_entry(struct uid_tag_data *utd_entry)
> +{
> + /* Are we done with the UID tag data entry? */
> + if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
> + !utd_entry->num_pqd) {
> + DR_DEBUG("qtaguid: %s(): "
> + "erase utd_entry=%p uid=%u "
> + "by pid=%u tgid=%u uid=%u\n", __func__,
> + utd_entry, utd_entry->uid,
> + current->pid, current->tgid, current_fsuid());
> + BUG_ON(utd_entry->num_active_tags);
> + rb_erase(&utd_entry->node, &uid_tag_data_tree);
> + kfree(utd_entry);
> + } else {
> + DR_DEBUG("qtaguid: %s(): "
> + "utd_entry=%p still has %d tags %d proc_qtu_data\n",
> + __func__, utd_entry, utd_entry->num_active_tags,
> + utd_entry->num_pqd);
> + BUG_ON(!(utd_entry->num_active_tags ||
> + utd_entry->num_pqd));
> + }
> +}
> +
> +/*
> + * If no sock_tags are using this tag_ref,
> + * decrements refcount of utd_entry, removes tr_entry
> + * from utd_entry->tag_ref_tree and frees.
> + */
> +static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
> + struct uid_tag_data *utd_entry)
> +{
> + DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
> + tr_entry, tr_entry->tn.tag,
> + get_uid_from_tag(tr_entry->tn.tag));
> + if (!tr_entry->num_sock_tags) {
> + BUG_ON(!utd_entry->num_active_tags);
> + utd_entry->num_active_tags--;
> + rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
> + DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
> + kfree(tr_entry);
> + }
> +}
> +
> +static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
> +{
> + struct rb_node *node;
> + struct tag_ref *tr_entry;
> + tag_t acct_tag;
> +
> + DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
> + full_tag, get_uid_from_tag(full_tag));
> + acct_tag = get_atag_from_tag(full_tag);
> + node = rb_first(&utd_entry->tag_ref_tree);
> + while (node) {
> + tr_entry = rb_entry(node, struct tag_ref, tn.node);
> + node = rb_next(node);
> + if (!acct_tag || tr_entry->tn.tag == full_tag)
> + free_tag_ref_from_utd_entry(tr_entry, utd_entry);
> + }
> +}
> +
> +static int read_proc_u64(char *page, char **start, off_t off,
> + int count, int *eof, void *data)
> +{
> + int len;
> + uint64_t value;
> + char *p = page;
> + uint64_t *iface_entry = data;
> +
> + if (!data)
> + return 0;
> +
> + value = *iface_entry;
> + p += sprintf(p, "%llu\n", value);
> + len = (p - page) - off;
> + *eof = (len <= count) ? 1 : 0;
> + *start = page + off;
> + return len;
> +}
> +
> +static int read_proc_bool(char *page, char **start, off_t off,
> + int count, int *eof, void *data)
> +{
> + int len;
> + bool value;
> + char *p = page;
> + bool *bool_entry = data;
> +
> + if (!data)
> + return 0;
> +
> + value = *bool_entry;
> + p += sprintf(p, "%u\n", value);
> + len = (p - page) - off;
> + *eof = (len <= count) ? 1 : 0;
> + *start = page + off;
> + return len;
> +}
> +
> +static int get_active_counter_set(tag_t tag)
> +{
> + int active_set = 0;
> + struct tag_counter_set *tcs;
> +
> + MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
> + " (uid=%u)\n",
> + tag, get_uid_from_tag(tag));
> + /* For now we only handle UID tags for active sets */
> + tag = get_utag_from_tag(tag);
> + spin_lock_bh(&tag_counter_set_list_lock);
> + tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
> + if (tcs)
> + active_set = tcs->active_set;
> + spin_unlock_bh(&tag_counter_set_list_lock);
> + return active_set;
> +}
> +
> +/*
> + * Find the entry for tracking the specified interface.
> + * Caller must hold iface_stat_list_lock
> + */
> +static struct iface_stat *get_iface_entry(const char *ifname)
> +{
> + struct iface_stat *iface_entry;
> +
> + /* Find the entry for tracking the specified tag within the interface */
> + if (ifname == NULL) {
> + pr_info("qtaguid: iface_stat: get() NULL device name\n");
> + return NULL;
> + }

Can ifname really become NULL?

> + /* Iterate over interfaces */
> + list_for_each_entry(iface_entry, &iface_stat_list, list) {
> + if (!strcmp(ifname, iface_entry->ifname))
> + goto done;
> + }
> + iface_entry = NULL;
> +done:
> + return iface_entry;
> +}
> +
> +static int iface_stat_all_proc_read(char *page, char **num_items_returned,
> + off_t items_to_skip, int char_count,
> + int *eof, void *data)
> +{
> + char *outp = page;
> + int item_index = 0;
> + int len;
> + struct iface_stat *iface_entry;
> + struct rtnl_link_stats64 dev_stats, *stats;
> + struct rtnl_link_stats64 no_dev_stats = {0};
> +
> + if (unlikely(module_passive)) {
> + *eof = 1;
> + return 0;
> + }
> +
> + CT_DEBUG("qtaguid:proc iface_stat_all "
> + "page=%p *num_items_returned=%p off=%ld "
> + "char_count=%d *eof=%d\n", page, *num_items_returned,
> + items_to_skip, char_count, *eof);
> +
> + if (*eof)
> + return 0;
> +
> + /*
> + * This lock will prevent iface_stat_update() from changing active,
> + * and in turn prevent an interface from unregistering itself.
> + */
> + spin_lock_bh(&iface_stat_list_lock);
> + list_for_each_entry(iface_entry, &iface_stat_list, list) {
> + if (item_index++ < items_to_skip)
> + continue;
> +
> + if (iface_entry->active) {
> + stats = dev_get_stats(iface_entry->net_dev,
> + &dev_stats);
> + } else {
> + stats = &no_dev_stats;
> + }
> + len = snprintf(outp, char_count,
> + "%s %d "
> + "%llu %llu %llu %llu "
> + "%llu %llu %llu %llu\n",
> + iface_entry->ifname,
> + iface_entry->active,
> + iface_entry->totals[IFS_RX].bytes,
> + iface_entry->totals[IFS_RX].packets,
> + iface_entry->totals[IFS_TX].bytes,
> + iface_entry->totals[IFS_TX].packets,
> + stats->rx_bytes, stats->rx_packets,
> + stats->tx_bytes, stats->tx_packets);
> + if (len >= char_count) {
> + spin_unlock_bh(&iface_stat_list_lock);
> + *outp = '\0';
> + return outp - page;
> + }
> + outp += len;
> + char_count -= len;
> + (*num_items_returned)++;
> + }
> + spin_unlock_bh(&iface_stat_list_lock);
> +
> + *eof = 1;
> + return outp - page;
> +}
> +
> +static void iface_create_proc_worker(struct work_struct *work)
> +{
> + struct proc_dir_entry *proc_entry;
> + struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
> + iface_work);
> + struct iface_stat *new_iface = isw->iface_entry;
> +
> + /* iface_entries are not deleted, so safe to manipulate. */
> + proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
> + if (IS_ERR_OR_NULL(proc_entry)) {
> + pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
> + kfree(isw);
> + return;
> + }
> +
> + new_iface->proc_ptr = proc_entry;
> +
> + create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
> + read_proc_u64, &new_iface->totals[IFS_TX].bytes);
> + create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
> + read_proc_u64, &new_iface->totals[IFS_RX].bytes);
> + create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
> + read_proc_u64, &new_iface->totals[IFS_TX].packets);
> + create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
> + read_proc_u64, &new_iface->totals[IFS_RX].packets);
> + create_proc_read_entry("active", proc_iface_perms, proc_entry,
> + read_proc_bool, &new_iface->active);
> +
> + IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
> + "entry=%p dev=%s\n", new_iface, new_iface->ifname);
> + kfree(isw);
> +}
> +
> +/*
> + * Will set the entry's active state, and
> + * update the net_dev accordingly also.
> + */
> +static void _iface_stat_set_active(struct iface_stat *entry,
> + struct net_device *net_dev,
> + bool activate)
> +{
> + if (activate) {
> + entry->net_dev = net_dev;
> + entry->active = true;
> + IF_DEBUG("qtaguid: %s(%s): "
> + "enable tracking. rfcnt=%d\n", __func__,
> + entry->ifname,
> + this_cpu_read(*net_dev->pcpu_refcnt));
> + } else {
> + entry->active = false;
> + entry->net_dev = NULL;
> + IF_DEBUG("qtaguid: %s(%s): "
> + "disable tracking. rfcnt=%d\n", __func__,
> + entry->ifname,
> + this_cpu_read(*net_dev->pcpu_refcnt));
> +
> + }
> +}
> +
> +/* Caller must hold iface_stat_list_lock */
> +static struct iface_stat *iface_alloc(struct net_device *net_dev)
> +{
> + struct iface_stat *new_iface;
> + struct iface_stat_work *isw;
> +
> + new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
> + if (new_iface == NULL) {
> + pr_err("qtaguid: iface_stat: create(%s): "
> + "iface_stat alloc failed\n", net_dev->name);
> + return NULL;
> + }
> + new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
> + if (new_iface->ifname == NULL) {
> + pr_err("qtaguid: iface_stat: create(%s): "
> + "ifname alloc failed\n", net_dev->name);
> + kfree(new_iface);
> + return NULL;
> + }
> + spin_lock_init(&new_iface->tag_stat_list_lock);
> + new_iface->tag_stat_tree = RB_ROOT;
> + _iface_stat_set_active(new_iface, net_dev, true);
> +
> + /*
> + * ipv6 notifier chains are atomic :(
> + * No create_proc_read_entry() for you!
> + */
> + isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
> + if (!isw) {
> + pr_err("qtaguid: iface_stat: create(%s): "
> + "work alloc failed\n", new_iface->ifname);
> + _iface_stat_set_active(new_iface, net_dev, false);
> + kfree(new_iface->ifname);
> + kfree(new_iface);
> + return NULL;
> + }
> + isw->iface_entry = new_iface;
> + INIT_WORK(&isw->iface_work, iface_create_proc_worker);
> + schedule_work(&isw->iface_work);
> + list_add(&new_iface->list, &iface_stat_list);
> + return new_iface;
> +}
> +
> +static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
> + struct iface_stat *iface)
> +{
> + struct rtnl_link_stats64 dev_stats, *stats;
> + bool stats_rewound;
> +
> + stats = dev_get_stats(net_dev, &dev_stats);
> + /* No empty packets */
> + stats_rewound =
> + (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
> + || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
> +
> + IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
> + "bytes rx/tx=%llu/%llu "
> + "active=%d last_known=%d "
> + "stats_rewound=%d\n", __func__,
> + net_dev ? net_dev->name : "?",
> + iface, net_dev,
> + stats->rx_bytes, stats->tx_bytes,
> + iface->active, iface->last_known_valid, stats_rewound);
> +
> + if (iface->active && iface->last_known_valid && stats_rewound) {
> + pr_warn_once("qtaguid: iface_stat: %s(%s): "
> + "iface reset its stats unexpectedly\n", __func__,
> + net_dev->name);
> +
> + iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes;
> + iface->totals[IFS_TX].packets +=
> + iface->last_known[IFS_TX].packets;
> + iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes;
> + iface->totals[IFS_RX].packets +=
> + iface->last_known[IFS_RX].packets;
> + iface->last_known_valid = false;
> + IF_DEBUG("qtaguid: %s(%s): iface=%p "
> + "used last known bytes rx/tx=%llu/%llu\n", __func__,
> + iface->ifname, iface, iface->last_known[IFS_RX].bytes,
> + iface->last_known[IFS_TX].bytes);
> + }
> +}
> +
> +/*
> + * Create a new entry for tracking the specified interface.
> + * Do nothing if the entry already exists.
> + * Called when an interface is configured with a valid IP address.
> + */
> +static void iface_stat_create(struct net_device *net_dev,
> + struct in_ifaddr *ifa)
> +{
> + struct in_device *in_dev = NULL;
> + const char *ifname;
> + struct iface_stat *entry;
> + __be32 ipaddr = 0;
> + struct iface_stat *new_iface;
> +
> + IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
> + net_dev ? net_dev->name : "?",
> + ifa, net_dev);
> + if (!net_dev) {
> + pr_err("qtaguid: iface_stat: create(): no net dev\n");
> + return;
> + }
> +
> + ifname = net_dev->name;
> + if (!ifa) {
> + in_dev = in_dev_get(net_dev);
> + if (!in_dev) {
> + pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
> + ifname);
> + return;
> + }
> + IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
> + ifname, in_dev);
> + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
> + IF_DEBUG("qtaguid: iface_stat: create(%s): "
> + "ifa=%p ifa_label=%s\n",
> + ifname, ifa,
> + ifa->ifa_label ? ifa->ifa_label : "(null)");
> + if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
> + break;
> + }
> + }
> +
> + if (!ifa) {
> + IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
> + ifname);
> + goto done_put;
> + }
> + ipaddr = ifa->ifa_local;
> +
> + spin_lock_bh(&iface_stat_list_lock);
> + entry = get_iface_entry(ifname);
> + if (entry != NULL) {
> + bool activate = !ipv4_is_loopback(ipaddr);
> + IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
> + ifname, entry);
> + iface_check_stats_reset_and_adjust(net_dev, entry);
> + _iface_stat_set_active(entry, net_dev, activate);
> + IF_DEBUG("qtaguid: %s(%s): "
> + "tracking now %d on ip=%pI4\n", __func__,
> + entry->ifname, activate, &ipaddr);
> + goto done_unlock_put;
> + } else if (ipv4_is_loopback(ipaddr)) {
> + IF_DEBUG("qtaguid: iface_stat: create(%s): "
> + "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
> + goto done_unlock_put;
> + }
> +
> + new_iface = iface_alloc(net_dev);
> + IF_DEBUG("qtaguid: iface_stat: create(%s): done "
> + "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
> +done_unlock_put:
> + spin_unlock_bh(&iface_stat_list_lock);
> +done_put:
> + if (in_dev)
> + in_dev_put(in_dev);
> +}
> +
> +static void iface_stat_create_ipv6(struct net_device *net_dev,
> + struct inet6_ifaddr *ifa)
> +{
> + struct in_device *in_dev;
> + const char *ifname;
> + struct iface_stat *entry;
> + struct iface_stat *new_iface;
> + int addr_type;
> +
> + IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
> + ifa, net_dev, net_dev ? net_dev->name : "");
> + if (!net_dev) {
> + pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
> + return;
> + }
> + ifname = net_dev->name;
> +
> + in_dev = in_dev_get(net_dev);
> + if (!in_dev) {
> + pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
> + ifname);
> + return;
> + }
> +
> + IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
> + ifname, in_dev);
> +
> + if (!ifa) {
> + IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
> + ifname);
> + goto done_put;
> + }
> + addr_type = ipv6_addr_type(&ifa->addr);
> +
> + spin_lock_bh(&iface_stat_list_lock);
> + entry = get_iface_entry(ifname);
> + if (entry != NULL) {
> + bool activate = !(addr_type & IPV6_ADDR_LOOPBACK);
> + IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
> + ifname, entry);
> + iface_check_stats_reset_and_adjust(net_dev, entry);
> + _iface_stat_set_active(entry, net_dev, activate);
> + IF_DEBUG("qtaguid: %s(%s): "
> + "tracking now %d on ip=%pI6c\n", __func__,
> + entry->ifname, activate, &ifa->addr);
> + goto done_unlock_put;
> + } else if (addr_type & IPV6_ADDR_LOOPBACK) {
> + IF_DEBUG("qtaguid: %s(%s): "
> + "ignore loopback dev. ip=%pI6c\n", __func__,
> + ifname, &ifa->addr);
> + goto done_unlock_put;
> + }
> +
> + new_iface = iface_alloc(net_dev);
> + IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
> + "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
> +
> +done_unlock_put:
> + spin_unlock_bh(&iface_stat_list_lock);
> +done_put:
> + in_dev_put(in_dev);
> +}
> +
> +static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
> +{
> + MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
> + return sock_tag_tree_search(&sock_tag_tree, sk);
> +}
> +
> +static struct sock_tag *get_sock_stat(const struct sock *sk)
> +{
> + struct sock_tag *sock_tag_entry;
> + MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
> + if (!sk)
> + return NULL;
> + spin_lock_bh(&sock_tag_list_lock);
> + sock_tag_entry = get_sock_stat_nl(sk);
> + spin_unlock_bh(&sock_tag_list_lock);
> + return sock_tag_entry;
> +}
> +
> +static void
> +data_counters_update(struct data_counters *dc, int set,
> + enum ifs_tx_rx direction, int proto, int bytes)
> +{
> + switch (proto) {
> + case IPPROTO_TCP:
> + dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
> + break;
> + case IPPROTO_UDP:
> + dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
> + break;
> + case IPPROTO_IP:
> + default:
> + dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
> + 1);
> + break;
> + }
> +}
> +
> +/*
> + * Update stats for the specified interface. Do nothing if the entry
> + * does not exist (when a device was never configured with an IP address).
> + * Called when an device is being unregistered.
> + */
> +static void iface_stat_update(struct net_device *net_dev, bool stash_only)
> +{
> + struct rtnl_link_stats64 dev_stats, *stats;
> + struct iface_stat *entry;
> +
> + stats = dev_get_stats(net_dev, &dev_stats);
> + spin_lock_bh(&iface_stat_list_lock);
> + entry = get_iface_entry(net_dev->name);
> + if (entry == NULL) {
> + IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
> + net_dev->name);
> + spin_unlock_bh(&iface_stat_list_lock);
> + return;
> + }
> +
> + IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
> + net_dev->name, entry);
> + if (!entry->active) {
> + IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
> + net_dev->name);
> + spin_unlock_bh(&iface_stat_list_lock);
> + return;
> + }
> +
> + if (stash_only) {
> + entry->last_known[IFS_TX].bytes = stats->tx_bytes;
> + entry->last_known[IFS_TX].packets = stats->tx_packets;
> + entry->last_known[IFS_RX].bytes = stats->rx_bytes;
> + entry->last_known[IFS_RX].packets = stats->rx_packets;
> + entry->last_known_valid = true;
> + IF_DEBUG("qtaguid: %s(%s): "
> + "dev stats stashed rx/tx=%llu/%llu\n", __func__,
> + net_dev->name, stats->rx_bytes, stats->tx_bytes);
> + spin_unlock_bh(&iface_stat_list_lock);
> + return;
> + }
> + entry->totals[IFS_TX].bytes += stats->tx_bytes;
> + entry->totals[IFS_TX].packets += stats->tx_packets;
> + entry->totals[IFS_RX].bytes += stats->rx_bytes;
> + entry->totals[IFS_RX].packets += stats->rx_packets;
> + /* We don't need the last_known[] anymore */
> + entry->last_known_valid = false;
> + _iface_stat_set_active(entry, net_dev, false);
> + IF_DEBUG("qtaguid: %s(%s): "
> + "disable tracking. rx/tx=%llu/%llu\n", __func__,
> + net_dev->name, stats->rx_bytes, stats->tx_bytes);
> + spin_unlock_bh(&iface_stat_list_lock);
> +}
> +
> +static void tag_stat_update(struct tag_stat *tag_entry,
> + enum ifs_tx_rx direction, int proto, int bytes)
> +{
> + int active_set;
> + active_set = get_active_counter_set(tag_entry->tn.tag);
> + MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
> + "dir=%d proto=%d bytes=%d)\n",
> + tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
> + active_set, direction, proto, bytes);
> + data_counters_update(&tag_entry->counters, active_set, direction,
> + proto, bytes);
> + if (tag_entry->parent_counters)
> + data_counters_update(tag_entry->parent_counters, active_set,
> + direction, proto, bytes);
> +}
> +
> +/*
> + * Create a new entry for tracking the specified {acct_tag,uid_tag} within
> + * the interface.
> + * iface_entry->tag_stat_list_lock should be held.
> + */
> +static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
> + tag_t tag)
> +{
> + struct tag_stat *new_tag_stat_entry = NULL;
> + IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
> + " (uid=%u)\n", __func__,
> + iface_entry, tag, get_uid_from_tag(tag));
> + new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
> + if (!new_tag_stat_entry) {
> + pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
> + goto done;
> + }
> + new_tag_stat_entry->tn.tag = tag;
> + tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
> +done:
> + return new_tag_stat_entry;
> +}
> +
> +static void if_tag_stat_update(const char *ifname, uid_t uid,
> + const struct sock *sk, enum ifs_tx_rx direction,
> + int proto, int bytes)
> +{
> + struct tag_stat *tag_stat_entry;
> + tag_t tag, acct_tag;
> + tag_t uid_tag;
> + struct data_counters *uid_tag_counters;
> + struct sock_tag *sock_tag_entry;
> + struct iface_stat *iface_entry;
> + struct tag_stat *new_tag_stat;
> + MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
> + "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
> + ifname, uid, sk, direction, proto, bytes);
> +
> +
> + iface_entry = get_iface_entry(ifname);
> + if (!iface_entry) {
> + pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
> + ifname);
> + return;
> + }
> + /* It is ok to process data when an iface_entry is inactive */
> +
> + MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
> + ifname, iface_entry);
> +
> + /*
> + * Look for a tagged sock.
> + * It will have an acct_uid.
> + */
> + sock_tag_entry = get_sock_stat(sk);
> + if (sock_tag_entry) {
> + tag = sock_tag_entry->tag;
> + acct_tag = get_atag_from_tag(tag);
> + uid_tag = get_utag_from_tag(tag);
> + } else {
> + acct_tag = make_atag_from_value(0);
> + tag = combine_atag_with_uid(acct_tag, uid);
> + uid_tag = make_tag_from_uid(uid);
> + }
> + MT_DEBUG("qtaguid: iface_stat: stat_update(): "
> + " looking for tag=0x%llx (uid=%u) in ife=%p\n",
> + tag, get_uid_from_tag(tag), iface_entry);
> + /* Loop over tag list under this interface for {acct_tag,uid_tag} */
> + spin_lock_bh(&iface_entry->tag_stat_list_lock);
> +
> + tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
> + tag);
> + if (tag_stat_entry) {
> + /*
> + * Updating the {acct_tag, uid_tag} entry handles both stats:
> + * {0, uid_tag} will also get updated.
> + */
> + tag_stat_update(tag_stat_entry, direction, proto, bytes);
> + spin_unlock_bh(&iface_entry->tag_stat_list_lock);
> + return;
> + }
> +
> + /* Loop over tag list under this interface for {0,uid_tag} */
> + tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
> + uid_tag);
> + if (!tag_stat_entry) {
> + /* Here: the base uid_tag did not exist */
> + /*
> + * No parent counters. So
> + * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
> + */
> + new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
> + uid_tag_counters = &new_tag_stat->counters;
> + } else {
> + uid_tag_counters = &tag_stat_entry->counters;
> + }
> +
> + if (acct_tag) {
> + new_tag_stat = create_if_tag_stat(iface_entry, tag);
> + new_tag_stat->parent_counters = uid_tag_counters;
> + }
> + tag_stat_update(new_tag_stat, direction, proto, bytes);
> + spin_unlock_bh(&iface_entry->tag_stat_list_lock);
> +}
> +
> +static int iface_netdev_event_handler(struct notifier_block *nb,
> + unsigned long event, void *ptr) {
> + struct net_device *dev = ptr;
> +
> + if (unlikely(module_passive))
> + return NOTIFY_DONE;
> +
> + IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
> + "ev=0x%lx/%s netdev=%p->name=%s\n",
> + event, netdev_evt_str(event), dev, dev ? dev->name : "");
> +
> + switch (event) {
> + case NETDEV_UP:
> + iface_stat_create(dev, NULL);
> + atomic64_inc(&qtu_events.iface_events);
> + break;
> + case NETDEV_DOWN:
> + case NETDEV_UNREGISTER:
> + iface_stat_update(dev, event == NETDEV_DOWN);
> + atomic64_inc(&qtu_events.iface_events);
> + break;
> + }
> + return NOTIFY_DONE;
> +}
> +
> +static int iface_inet6addr_event_handler(struct notifier_block *nb,
> + unsigned long event, void *ptr)
> +{
> + struct inet6_ifaddr *ifa = ptr;
> + struct net_device *dev;
> +
> + if (unlikely(module_passive))
> + return NOTIFY_DONE;
> +
> + IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
> + "ev=0x%lx/%s ifa=%p\n",
> + event, netdev_evt_str(event), ifa);
> +
> + switch (event) {
> + case NETDEV_UP:
> + BUG_ON(!ifa || !ifa->idev);
> + dev = (struct net_device *)ifa->idev->dev;
> + iface_stat_create_ipv6(dev, ifa);
> + atomic64_inc(&qtu_events.iface_events);
> + break;
> + case NETDEV_DOWN:
> + case NETDEV_UNREGISTER:
> + BUG_ON(!ifa || !ifa->idev);
> + dev = (struct net_device *)ifa->idev->dev;
> + iface_stat_update(dev, event == NETDEV_DOWN);
> + atomic64_inc(&qtu_events.iface_events);
> + break;
> + }
> + return NOTIFY_DONE;
> +}
> +
> +static int iface_inetaddr_event_handler(struct notifier_block *nb,
> + unsigned long event, void *ptr)
> +{
> + struct in_ifaddr *ifa = ptr;
> + struct net_device *dev;
> +
> + if (unlikely(module_passive))
> + return NOTIFY_DONE;
> +
> + IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
> + "ev=0x%lx/%s ifa=%p\n",
> + event, netdev_evt_str(event), ifa);
> +
> + switch (event) {
> + case NETDEV_UP:
> + BUG_ON(!ifa || !ifa->ifa_dev);
> + dev = ifa->ifa_dev->dev;
> + iface_stat_create(dev, ifa);
> + atomic64_inc(&qtu_events.iface_events);
> + break;
> + case NETDEV_DOWN:
> + case NETDEV_UNREGISTER:
> + BUG_ON(!ifa || !ifa->ifa_dev);
> + dev = ifa->ifa_dev->dev;
> + iface_stat_update(dev, event == NETDEV_DOWN);
> + atomic64_inc(&qtu_events.iface_events);
> + break;
> + }
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block iface_netdev_notifier_blk = {
> + .notifier_call = iface_netdev_event_handler,
> +};
> +
> +static struct notifier_block iface_inetaddr_notifier_blk = {
> + .notifier_call = iface_inetaddr_event_handler,
> +};
> +
> +static struct notifier_block iface_inet6addr_notifier_blk = {
> + .notifier_call = iface_inet6addr_event_handler,
> +};
> +
> +static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
> +{
> + int err;
> +
> + iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
> + if (!iface_stat_procdir) {
> + pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
> + err = -1;
> + goto err;
> + }
> +
> + iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename,
> + proc_iface_perms,
> + parent_procdir);
> + if (!iface_stat_all_procfile) {
> + pr_err("qtaguid: iface_stat: init "
> + " failed to create stat_all proc entry\n");
> + err = -1;
> + goto err_zap_entry;
> + }
> + iface_stat_all_procfile->read_proc = iface_stat_all_proc_read;
> +
> +
> + err = register_netdevice_notifier(&iface_netdev_notifier_blk);
> + if (err) {
> + pr_err("qtaguid: iface_stat: init "
> + "failed to register dev event handler\n");
> + goto err_zap_all_stats_entry;
> + }
> + err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
> + if (err) {
> + pr_err("qtaguid: iface_stat: init "
> + "failed to register ipv4 dev event handler\n");
> + goto err_unreg_nd;
> + }
> +
> + err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
> + if (err) {
> + pr_err("qtaguid: iface_stat: init "
> + "failed to register ipv6 dev event handler\n");
> + goto err_unreg_ip4_addr;
> + }
> + return 0;
> +
> +err_unreg_ip4_addr:
> + unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
> +err_unreg_nd:
> + unregister_netdevice_notifier(&iface_netdev_notifier_blk);
> +err_zap_all_stats_entry:
> + remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
> +err_zap_entry:
> + remove_proc_entry(iface_stat_procdirname, parent_procdir);
> +err:
> + return err;
> +}
> +
> +static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
> + struct xt_action_param *par)
> +{
> + struct sock *sk;
> + unsigned int hook_mask = (1 << par->hooknum);
> +
> + MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
> + par->hooknum, par->family);
> +
> + /*
> + * Let's not abuse the the xt_socket_get*_sk(), or else it will
> + * return garbage SKs.
> + */
> + if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
> + return NULL;
> +
> + switch (par->family) {
> + case NFPROTO_IPV6:
> + sk = xt_socket_get6_sk(skb, par);
> + break;
> + case NFPROTO_IPV4:
> + sk = xt_socket_get4_sk(skb, par);
> + break;
> + default:
> + return NULL;
> + }
> +
> + /*
> + * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
> + * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
> + * Not fixed in 3.0-r3 :(
> + */

Is it fixed now?

> + if (sk) {
> + MT_DEBUG("qtaguid: %p->sk_proto=%u "
> + "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
> + if (sk->sk_state == TCP_TIME_WAIT) {
> + xt_socket_put_sk(sk);
> + sk = NULL;
> + }
> + }
> + return sk;
> +}
> +
> +static void account_for_uid(const struct sk_buff *skb,
> + const struct sock *alternate_sk, uid_t uid,
> + struct xt_action_param *par)
> +{
> + const struct net_device *el_dev;
> +
> + if (!skb->dev) {
> + MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
> + el_dev = par->in ? : par->out;
> + } else {
> + const struct net_device *other_dev;
> + el_dev = skb->dev;
> + other_dev = par->in ? : par->out;
> + if (el_dev != other_dev) {
> + MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
> + "par->(in/out)=%p %s\n",
> + par->hooknum, el_dev, el_dev->name, other_dev,
> + other_dev->name);
> + }
> + }
> +
> + if (unlikely(!el_dev)) {
> + pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
> + } else if (unlikely(!el_dev->name)) {
> + pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
> + } else {
> + MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n",
> + par->hooknum,
> + el_dev->name,
> + el_dev->type);
> +
> + if_tag_stat_update(el_dev->name, uid,
> + skb->sk ? skb->sk : alternate_sk,
> + par->in ? IFS_RX : IFS_TX,
> + ip_hdr(skb)->protocol, skb->len);
> + }
> +}
> +
> +static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
> +{
> + const struct xt_qtaguid_match_info *info = par->matchinfo;
> + const struct file *filp;
> + bool got_sock = false;
> + struct sock *sk;
> + uid_t sock_uid;
> + bool res;
> +
> + if (unlikely(module_passive))
> + return (info->match ^ info->invert) == 0;
> +
> + MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
> + par->hooknum, skb, par->in, par->out, par->family);
> +
> + atomic64_inc(&qtu_events.match_calls);
> + if (skb == NULL) {
> + res = (info->match ^ info->invert) == 0;
> + goto ret_res;
> + }
> +
> + sk = skb->sk;
> +
> + if (sk == NULL) {
> + /*
> + * A missing sk->sk_socket happens when packets are in-flight
> + * and the matching socket is already closed and gone.
> + */
> + sk = qtaguid_find_sk(skb, par);
> + /*
> + * If we got the socket from the find_sk(), we will need to put
> + * it back, as nf_tproxy_get_sock_v4() got it.
> + */
> + got_sock = sk;
> + if (sk)
> + atomic64_inc(&qtu_events.match_found_sk_in_ct);
> + else
> + atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
> + } else {
> + atomic64_inc(&qtu_events.match_found_sk);
> + }
> + MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n",
> + par->hooknum, sk, got_sock, ip_hdr(skb)->protocol);
> + if (sk != NULL) {
> + MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
> + par->hooknum, sk, sk->sk_socket,
> + sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
> + filp = sk->sk_socket ? sk->sk_socket->file : NULL;
> + MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
> + par->hooknum, filp ? filp->f_cred->fsuid : -1);
> + }
> +
> + if (sk == NULL || sk->sk_socket == NULL) {
> + /*
> + * Here, the qtaguid_find_sk() using connection tracking
> + * couldn't find the owner, so for now we just count them
> + * against the system.
> + */
> + /*
> + * TODO: unhack how to force just accounting.
> + * For now we only do iface stats when the uid-owner is not
> + * requested.
> + */
> + if (!(info->match & XT_QTAGUID_UID))
> + account_for_uid(skb, sk, 0, par);
> + MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
> + par->hooknum,
> + sk ? sk->sk_socket : NULL);
> + res = (info->match ^ info->invert) == 0;
> + atomic64_inc(&qtu_events.match_no_sk);
> + goto put_sock_ret_res;
> + } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
> + res = false;
> + goto put_sock_ret_res;
> + }
> + filp = sk->sk_socket->file;
> + if (filp == NULL) {
> + MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
> + account_for_uid(skb, sk, 0, par);
> + res = ((info->match ^ info->invert) &
> + (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
> + atomic64_inc(&qtu_events.match_no_sk_file);
> + goto put_sock_ret_res;
> + }
> + sock_uid = filp->f_cred->fsuid;
> + /*
> + * TODO: unhack how to force just accounting.
> + * For now we only do iface stats when the uid-owner is not requested
> + */
> + if (!(info->match & XT_QTAGUID_UID))
> + account_for_uid(skb, sk, sock_uid, par);
> +
> + /*
> + * The following two tests fail the match when:
> + * id not in range AND no inverted condition requested
> + * or id in range AND inverted condition requested
> + * Thus (!a && b) || (a && !b) == a ^ b
> + */
> + if (info->match & XT_QTAGUID_UID)
> + if ((filp->f_cred->fsuid >= info->uid_min &&
> + filp->f_cred->fsuid <= info->uid_max) ^
> + !(info->invert & XT_QTAGUID_UID)) {
> + MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
> + par->hooknum);
> + res = false;
> + goto put_sock_ret_res;
> + }
> + if (info->match & XT_QTAGUID_GID)
> + if ((filp->f_cred->fsgid >= info->gid_min &&
> + filp->f_cred->fsgid <= info->gid_max) ^
> + !(info->invert & XT_QTAGUID_GID)) {
> + MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
> + par->hooknum);
> + res = false;
> + goto put_sock_ret_res;
> + }
> +
> + MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
> + res = true;
> +
> +put_sock_ret_res:
> + if (got_sock)
> + xt_socket_put_sk(sk);
> +ret_res:
> + MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
> + return res;
> +}
> +
> +#ifdef DDEBUG
> +/* This function is not in xt_qtaguid_print.c because of locks visibility */
> +static void prdebug_full_state(int indent_level, const char *fmt, ...)
> +{
> + va_list args;
> + char *fmt_buff;
> + char *buff;
> +
> + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
> + return;
> +
> + fmt_buff = kasprintf(GFP_ATOMIC,
> + "qtaguid: %s(): %s {\n", __func__, fmt);
> + BUG_ON(!fmt_buff);
> + va_start(args, fmt);
> + buff = kvasprintf(GFP_ATOMIC,
> + fmt_buff, args);
> + BUG_ON(!buff);
> + pr_debug("%s", buff);
> + kfree(fmt_buff);
> + kfree(buff);
> + va_end(args);
> +
> + spin_lock_bh(&sock_tag_list_lock);
> + prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
> + spin_unlock_bh(&sock_tag_list_lock);
> +
> + spin_lock_bh(&sock_tag_list_lock);
> + spin_lock_bh(&uid_tag_data_tree_lock);
> + prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
> + prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
> + spin_unlock_bh(&uid_tag_data_tree_lock);
> + spin_unlock_bh(&sock_tag_list_lock);
> +
> + spin_lock_bh(&iface_stat_list_lock);
> + prdebug_iface_stat_list(indent_level, &iface_stat_list);
> + spin_unlock_bh(&iface_stat_list_lock);
> +
> + pr_debug("qtaguid: %s(): }\n", __func__);
> +}
> +#else
> +static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
> +#endif
> +
> +/*
> + * Procfs reader to get all active socket tags using style "1)" as described in
> + * fs/proc/generic.c
> + */
> +static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
> + off_t items_to_skip, int char_count, int *eof,
> + void *data)
> +{
> + char *outp = page;
> + int len;
> + uid_t uid;
> + struct rb_node *node;
> + struct sock_tag *sock_tag_entry;
> + int item_index = 0;
> + int indent_level = 0;
> + long f_count;
> +
> + if (unlikely(module_passive)) {
> + *eof = 1;
> + return 0;
> + }
> +
> + if (*eof)
> + return 0;
> +
> + CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
> + page, items_to_skip, char_count, *eof);
> +
> + spin_lock_bh(&sock_tag_list_lock);
> + for (node = rb_first(&sock_tag_tree);
> + node;
> + node = rb_next(node)) {
> + if (item_index++ < items_to_skip)
> + continue;
> + sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
> + uid = get_uid_from_tag(sock_tag_entry->tag);
> + CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
> + "pid=%u\n",
> + sock_tag_entry->sk,
> + sock_tag_entry->tag,
> + uid,
> + sock_tag_entry->pid
> + );
> + f_count = atomic_long_read(
> + &sock_tag_entry->socket->file->f_count);
> + len = snprintf(outp, char_count,
> + "sock=%p tag=0x%llx (uid=%u) pid=%u "
> + "f_count=%lu\n",
> + sock_tag_entry->sk,
> + sock_tag_entry->tag, uid,
> + sock_tag_entry->pid, f_count);
> + if (len >= char_count) {
> + spin_unlock_bh(&sock_tag_list_lock);
> + *outp = '\0';
> + return outp - page;
> + }
> + outp += len;
> + char_count -= len;
> + (*num_items_returned)++;
> + }
> + spin_unlock_bh(&sock_tag_list_lock);
> +
> + if (item_index++ >= items_to_skip) {
> + len = snprintf(outp, char_count,
> + "events: sockets_tagged=%llu "
> + "sockets_untagged=%llu "
> + "counter_set_changes=%llu "
> + "delete_cmds=%llu "
> + "iface_events=%llu "
> + "match_calls=%llu "
> + "match_found_sk=%llu "
> + "match_found_sk_in_ct=%llu "
> + "match_found_no_sk_in_ct=%llu "
> + "match_no_sk=%llu "
> + "match_no_sk_file=%llu\n",
> + atomic64_read(&qtu_events.sockets_tagged),
> + atomic64_read(&qtu_events.sockets_untagged),
> + atomic64_read(&qtu_events.counter_set_changes),
> + atomic64_read(&qtu_events.delete_cmds),
> + atomic64_read(&qtu_events.iface_events),
> + atomic64_read(&qtu_events.match_calls),
> + atomic64_read(&qtu_events.match_found_sk),
> + atomic64_read(&qtu_events.match_found_sk_in_ct),
> + atomic64_read(
> + &qtu_events.match_found_no_sk_in_ct),
> + atomic64_read(&qtu_events.match_no_sk),
> + atomic64_read(&qtu_events.match_no_sk_file));
> + if (len >= char_count) {
> + *outp = '\0';
> + return outp - page;
> + }
> + outp += len;
> + char_count -= len;
> + (*num_items_returned)++;
> + }
> +
> + /* Count the following as part of the last item_index */
> + if (item_index > items_to_skip) {
> + prdebug_full_state(indent_level, "proc ctrl");
> + }
> +
> + *eof = 1;
> + return outp - page;
> +}
> +
> +/*
> + * Delete socket tags, and stat tags associated with a given
> + * accouting tag and uid.
> + */
> +static int ctrl_cmd_delete(const char *input)
> +{
> + char cmd;
> + uid_t uid;
> + uid_t entry_uid;
> + tag_t acct_tag;
> + tag_t tag;
> + int res, argc;
> + struct iface_stat *iface_entry;
> + struct rb_node *node;
> + struct sock_tag *st_entry;
> + struct rb_root st_to_free_tree = RB_ROOT;
> + struct tag_stat *ts_entry;
> + struct tag_counter_set *tcs_entry;
> + struct tag_ref *tr_entry;
> + struct uid_tag_data *utd_entry;
> +
> + argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
> + CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
> + "user_tag=0x%llx uid=%u\n", input, argc, cmd,
> + acct_tag, uid);
> + if (argc < 2) {
> + res = -EINVAL;
> + goto err;
> + }
> + if (!valid_atag(acct_tag)) {
> + pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
> + res = -EINVAL;
> + goto err;
> + }
> + if (argc < 3) {
> + uid = current_fsuid();
> + } else if (!can_impersonate_uid(uid)) {
> + pr_info("qtaguid: ctrl_delete(%s): "
> + "insufficient priv from pid=%u tgid=%u uid=%u\n",
> + input, current->pid, current->tgid, current_fsuid());
> + res = -EPERM;
> + goto err;
> + }
> +
> + tag = combine_atag_with_uid(acct_tag, uid);
> + CT_DEBUG("qtaguid: ctrl_delete(%s): "
> + "looking for tag=0x%llx (uid=%u)\n",
> + input, tag, uid);
> +
> + /* Delete socket tags */
> + spin_lock_bh(&sock_tag_list_lock);
> + node = rb_first(&sock_tag_tree);
> + while (node) {
> + st_entry = rb_entry(node, struct sock_tag, sock_node);
> + entry_uid = get_uid_from_tag(st_entry->tag);
> + node = rb_next(node);
> + if (entry_uid != uid)
> + continue;
> +
> + CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
> + input, st_entry->tag, entry_uid);
> +
> + if (!acct_tag || st_entry->tag == tag) {
> + rb_erase(&st_entry->sock_node, &sock_tag_tree);
> + /* Can't sockfd_put() within spinlock, do it later. */
> + sock_tag_tree_insert(st_entry, &st_to_free_tree);
> + tr_entry = lookup_tag_ref(st_entry->tag, NULL);
> + BUG_ON(tr_entry->num_sock_tags <= 0);
> + tr_entry->num_sock_tags--;
> + /*
> + * TODO: remove if, and start failing.
> + * This is a hack to work around the fact that in some
> + * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
> + * and are trying to work around apps
> + * that didn't open the /dev/xt_qtaguid.
> + */
> + if (st_entry->list.next && st_entry->list.prev)
> + list_del(&st_entry->list);
> + }
> + }
> + spin_unlock_bh(&sock_tag_list_lock);
> +
> + sock_tag_tree_erase(&st_to_free_tree);
> +
> + /* Delete tag counter-sets */
> + spin_lock_bh(&tag_counter_set_list_lock);
> + /* Counter sets are only on the uid tag, not full tag */
> + tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
> + if (tcs_entry) {
> + CT_DEBUG("qtaguid: ctrl_delete(%s): "
> + "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
> + input,
> + tcs_entry->tn.tag,
> + get_uid_from_tag(tcs_entry->tn.tag),
> + tcs_entry->active_set);
> + rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
> + kfree(tcs_entry);
> + }
> + spin_unlock_bh(&tag_counter_set_list_lock);
> +
> + /*
> + * If acct_tag is 0, then all entries belonging to uid are
> + * erased.
> + */
> + spin_lock_bh(&iface_stat_list_lock);
> + list_for_each_entry(iface_entry, &iface_stat_list, list) {
> + spin_lock_bh(&iface_entry->tag_stat_list_lock);
> + node = rb_first(&iface_entry->tag_stat_tree);
> + while (node) {
> + ts_entry = rb_entry(node, struct tag_stat, tn.node);
> + entry_uid = get_uid_from_tag(ts_entry->tn.tag);
> + node = rb_next(node);
> +
> + CT_DEBUG("qtaguid: ctrl_delete(%s): "
> + "ts tag=0x%llx (uid=%u)\n",
> + input, ts_entry->tn.tag, entry_uid);
> +
> + if (entry_uid != uid)
> + continue;
> + if (!acct_tag || ts_entry->tn.tag == tag) {
> + CT_DEBUG("qtaguid: ctrl_delete(%s): "
> + "erase ts: %s 0x%llx %u\n",
> + input, iface_entry->ifname,
> + get_atag_from_tag(ts_entry->tn.tag),
> + entry_uid);
> + rb_erase(&ts_entry->tn.node,
> + &iface_entry->tag_stat_tree);
> + kfree(ts_entry);
> + }
> + }
> + spin_unlock_bh(&iface_entry->tag_stat_list_lock);
> + }
> + spin_unlock_bh(&iface_stat_list_lock);
> +
> + /* Cleanup the uid_tag_data */
> + spin_lock_bh(&uid_tag_data_tree_lock);
> + node = rb_first(&uid_tag_data_tree);
> + while (node) {
> + utd_entry = rb_entry(node, struct uid_tag_data, node);
> + entry_uid = utd_entry->uid;
> + node = rb_next(node);
> +
> + CT_DEBUG("qtaguid: ctrl_delete(%s): "
> + "utd uid=%u\n",
> + input, entry_uid);
> +
> + if (entry_uid != uid)
> + continue;
> + /*
> + * Go over the tag_refs, and those that don't have
> + * sock_tags using them are freed.
> + */
> + put_tag_ref_tree(tag, utd_entry);
> + put_utd_entry(utd_entry);
> + }
> + spin_unlock_bh(&uid_tag_data_tree_lock);
> +
> + atomic64_inc(&qtu_events.delete_cmds);
> + res = 0;
> +
> +err:
> + return res;
> +}
> +
> +static int ctrl_cmd_counter_set(const char *input)
> +{
> + char cmd;
> + uid_t uid = 0;
> + tag_t tag;
> + int res, argc;
> + struct tag_counter_set *tcs;
> + int counter_set;
> +
> + argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
> + CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
> + "set=%d uid=%u\n", input, argc, cmd,
> + counter_set, uid);
> + if (argc != 3) {
> + res = -EINVAL;
> + goto err;
> + }
> + if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
> + pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
> + input);
> + res = -EINVAL;
> + goto err;
> + }
> + if (!can_manipulate_uids()) {
> + pr_info("qtaguid: ctrl_counterset(%s): "
> + "insufficient priv from pid=%u tgid=%u uid=%u\n",
> + input, current->pid, current->tgid, current_fsuid());
> + res = -EPERM;
> + goto err;
> + }
> +
> + tag = make_tag_from_uid(uid);
> + spin_lock_bh(&tag_counter_set_list_lock);
> + tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
> + if (!tcs) {
> + tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
> + if (!tcs) {
> + spin_unlock_bh(&tag_counter_set_list_lock);
> + pr_err("qtaguid: ctrl_counterset(%s): "
> + "failed to alloc counter set\n",
> + input);
> + res = -ENOMEM;
> + goto err;
> + }
> + tcs->tn.tag = tag;
> + tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
> + CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
> + "(uid=%u) set=%d\n",
> + input, tag, get_uid_from_tag(tag), counter_set);
> + }
> + tcs->active_set = counter_set;
> + spin_unlock_bh(&tag_counter_set_list_lock);
> + atomic64_inc(&qtu_events.counter_set_changes);
> + res = 0;
> +
> +err:
> + return res;
> +}
> +
> +static int ctrl_cmd_tag(const char *input)
> +{
> + char cmd;
> + int sock_fd = 0;
> + uid_t uid = 0;
> + tag_t acct_tag = make_atag_from_value(0);
> + tag_t full_tag;
> + struct socket *el_socket;
> + int res, argc;
> + struct sock_tag *sock_tag_entry;
> + struct tag_ref *tag_ref_entry;
> + struct uid_tag_data *uid_tag_data_entry;
> + struct proc_qtu_data *pqd_entry;
> +
> + /* Unassigned args will get defaulted later. */
> + argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
> + CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
> + "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
> + acct_tag, uid);
> + if (argc < 2) {
> + res = -EINVAL;
> + goto err;
> + }
> + el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
> + if (!el_socket) {
> + pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
> + " sock_fd=%d err=%d\n", input, sock_fd, res);
> + goto err;
> + }
> + CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
> + input, atomic_long_read(&el_socket->file->f_count),
> + el_socket->sk);
> + if (argc < 3) {
> + acct_tag = make_atag_from_value(0);
> + } else if (!valid_atag(acct_tag)) {
> + pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
> + res = -EINVAL;
> + goto err_put;
> + }
> + CT_DEBUG("qtaguid: ctrl_tag(%s): "
> + "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
> + "in_group=%d in_egroup=%d\n",
> + input, current->pid, current->tgid, current_uid(),
> + current_euid(), current_fsuid(),
> + in_group_p(proc_ctrl_write_gid),
> + in_egroup_p(proc_ctrl_write_gid));
> + if (argc < 4) {
> + uid = current_fsuid();
> + } else if (!can_impersonate_uid(uid)) {
> + pr_info("qtaguid: ctrl_tag(%s): "
> + "insufficient priv from pid=%u tgid=%u uid=%u\n",
> + input, current->pid, current->tgid, current_fsuid());
> + res = -EPERM;
> + goto err_put;
> + }
> + full_tag = combine_atag_with_uid(acct_tag, uid);
> +
> + spin_lock_bh(&sock_tag_list_lock);
> + sock_tag_entry = get_sock_stat_nl(el_socket->sk);
> + tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
> + if (IS_ERR(tag_ref_entry)) {
> + res = PTR_ERR(tag_ref_entry);
> + spin_unlock_bh(&sock_tag_list_lock);
> + goto err_put;
> + }
> + tag_ref_entry->num_sock_tags++;
> + if (sock_tag_entry) {
> + struct tag_ref *prev_tag_ref_entry;
> +
> + CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
> + "st@%p ...->f_count=%ld\n",
> + input, el_socket->sk, sock_tag_entry,
> + atomic_long_read(&el_socket->file->f_count));
> + /*
> + * This is a re-tagging, so release the sock_fd that was
> + * locked at the time of the 1st tagging.
> + * There is still the ref from this call's sockfd_lookup() so
> + * it can be done within the spinlock.
> + */
> + sockfd_put(sock_tag_entry->socket);
> + prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
> + &uid_tag_data_entry);
> + BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
> + BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
> + prev_tag_ref_entry->num_sock_tags--;
> + sock_tag_entry->tag = full_tag;
> + } else {
> + CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
> + input, el_socket->sk);
> + sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
> + GFP_ATOMIC);
> + if (!sock_tag_entry) {
> + pr_err("qtaguid: ctrl_tag(%s): "
> + "socket tag alloc failed\n",
> + input);
> + spin_unlock_bh(&sock_tag_list_lock);
> + res = -ENOMEM;
> + goto err_tag_unref_put;
> + }
> + sock_tag_entry->sk = el_socket->sk;
> + sock_tag_entry->socket = el_socket;
> + sock_tag_entry->pid = current->tgid;
> + sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
> + uid);
> + spin_lock_bh(&uid_tag_data_tree_lock);
> + pqd_entry = proc_qtu_data_tree_search(
> + &proc_qtu_data_tree, current->tgid);
> + /*
> + * TODO: remove if, and start failing.
> + * At first, we want to catch user-space code that is not
> + * opening the /dev/xt_qtaguid.
> + */
> + if (IS_ERR_OR_NULL(pqd_entry))
> + pr_warn_once(
> + "qtaguid: %s(): "
> + "User space forgot to open /dev/xt_qtaguid? "
> + "pid=%u tgid=%u uid=%u\n", __func__,
> + current->pid, current->tgid,
> + current_fsuid());
> + else
> + list_add(&sock_tag_entry->list,
> + &pqd_entry->sock_tag_list);
> + spin_unlock_bh(&uid_tag_data_tree_lock);
> +
> + sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
> + atomic64_inc(&qtu_events.sockets_tagged);
> + }
> + spin_unlock_bh(&sock_tag_list_lock);
> + /* We keep the ref to the socket (file) until it is untagged */
> + CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
> + input, sock_tag_entry,
> + atomic_long_read(&el_socket->file->f_count));
> + return 0;
> +
> +err_tag_unref_put:
> + BUG_ON(tag_ref_entry->num_sock_tags <= 0);
> + tag_ref_entry->num_sock_tags--;
> + free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
> +err_put:
> + CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
> + input, atomic_long_read(&el_socket->file->f_count) - 1);
> + /* Release the sock_fd that was grabbed by sockfd_lookup(). */
> + sockfd_put(el_socket);
> + return res;
> +
> +err:
> + CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
> + return res;
> +}
> +
> +static int ctrl_cmd_untag(const char *input)
> +{
> + char cmd;
> + int sock_fd = 0;
> + struct socket *el_socket;
> + int res, argc;
> + struct sock_tag *sock_tag_entry;
> + struct tag_ref *tag_ref_entry;
> + struct uid_tag_data *utd_entry;
> + struct proc_qtu_data *pqd_entry;
> +
> + argc = sscanf(input, "%c %d", &cmd, &sock_fd);
> + CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
> + input, argc, cmd, sock_fd);
> + if (argc < 2) {
> + res = -EINVAL;
> + goto err;
> + }
> + el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
> + if (!el_socket) {
> + pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
> + " sock_fd=%d err=%d\n", input, sock_fd, res);
> + goto err;
> + }
> + CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
> + input, atomic_long_read(&el_socket->file->f_count),
> + el_socket->sk);
> + spin_lock_bh(&sock_tag_list_lock);
> + sock_tag_entry = get_sock_stat_nl(el_socket->sk);
> + if (!sock_tag_entry) {
> + spin_unlock_bh(&sock_tag_list_lock);
> + res = -EINVAL;
> + goto err_put;
> + }
> + /*
> + * The socket already belongs to the current process
> + * so it can do whatever it wants to it.
> + */
> + rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
> +
> + tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
> + BUG_ON(!tag_ref_entry);
> + BUG_ON(tag_ref_entry->num_sock_tags <= 0);
> + spin_lock_bh(&uid_tag_data_tree_lock);
> + pqd_entry = proc_qtu_data_tree_search(
> + &proc_qtu_data_tree, current->tgid);
> + /*
> + * TODO: remove if, and start failing.
> + * At first, we want to catch user-space code that is not
> + * opening the /dev/xt_qtaguid.
> + */
> + if (IS_ERR_OR_NULL(pqd_entry))
> + pr_warn_once("qtaguid: %s(): "
> + "User space forgot to open /dev/xt_qtaguid? "
> + "pid=%u tgid=%u uid=%u\n", __func__,
> + current->pid, current->tgid, current_fsuid());
> + else
> + list_del(&sock_tag_entry->list);
> + spin_unlock_bh(&uid_tag_data_tree_lock);
> + /*
> + * We don't free tag_ref from the utd_entry here,
> + * only during a cmd_delete().
> + */
> + tag_ref_entry->num_sock_tags--;
> + spin_unlock_bh(&sock_tag_list_lock);
> + /*
> + * Release the sock_fd that was grabbed at tag time,
> + * and once more for the sockfd_lookup() here.
> + */
> + sockfd_put(sock_tag_entry->socket);
> + CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
> + input, sock_tag_entry,
> + atomic_long_read(&el_socket->file->f_count) - 1);
> + sockfd_put(el_socket);
> +
> + kfree(sock_tag_entry);
> + atomic64_inc(&qtu_events.sockets_untagged);
> +
> + return 0;
> +
> +err_put:
> + CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
> + input, atomic_long_read(&el_socket->file->f_count) - 1);
> + /* Release the sock_fd that was grabbed by sockfd_lookup(). */
> + sockfd_put(el_socket);
> + return res;
> +
> +err:
> + CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
> + return res;
> +}
> +
> +static int qtaguid_ctrl_parse(const char *input, int count)
> +{
> + char cmd;
> + int res;
> +
> + cmd = input[0];
> + /* Collect params for commands */
> + switch (cmd) {
> + case 'd':
> + res = ctrl_cmd_delete(input);
> + break;
> +
> + case 's':
> + res = ctrl_cmd_counter_set(input);
> + break;
> +
> + case 't':
> + res = ctrl_cmd_tag(input);
> + break;
> +
> + case 'u':
> + res = ctrl_cmd_untag(input);
> + break;
> +
> + default:
> + res = -EINVAL;
> + goto err;
> + }
> + if (!res)
> + res = count;
> +err:
> + CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
> + return res;
> +}
> +
> +#define MAX_QTAGUID_CTRL_INPUT_LEN 255
> +static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
> + unsigned long count, void *data)
> +{
> + char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
> +
> + if (unlikely(module_passive))
> + return count;
> +
> + if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
> + return -EINVAL;
> +
> + if (copy_from_user(input_buf, buffer, count))
> + return -EFAULT;
> +
> + input_buf[count] = '\0';
> + return qtaguid_ctrl_parse(input_buf, count);
> +}
> +
> +struct proc_print_info {
> + char *outp;
> + char **num_items_returned;
> + struct iface_stat *iface_entry;
> + struct tag_stat *ts_entry;
> + int item_index;
> + int items_to_skip;
> + int char_count;
> +};
> +
> +static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
> +{
> + int len;
> + struct data_counters *cnts;
> +
> + if (!ppi->item_index) {
> + if (ppi->item_index++ < ppi->items_to_skip)
> + return 0;
> + len = snprintf(ppi->outp, ppi->char_count,
> + "idx iface acct_tag_hex uid_tag_int cnt_set "
> + "rx_bytes rx_packets "
> + "tx_bytes tx_packets "
> + "rx_tcp_bytes rx_tcp_packets "
> + "rx_udp_bytes rx_udp_packets "
> + "rx_other_bytes rx_other_packets "
> + "tx_tcp_bytes tx_tcp_packets "
> + "tx_udp_bytes tx_udp_packets "
> + "tx_other_bytes tx_other_packets\n");
> + } else {
> + tag_t tag = ppi->ts_entry->tn.tag;
> + uid_t stat_uid = get_uid_from_tag(tag);
> +
> + if (!can_read_other_uid_stats(stat_uid)) {
> + CT_DEBUG("qtaguid: stats line: "
> + "%s 0x%llx %u: insufficient priv "
> + "from pid=%u tgid=%u uid=%u\n",
> + ppi->iface_entry->ifname,
> + get_atag_from_tag(tag), stat_uid,
> + current->pid, current->tgid, current_fsuid());
> + return 0;
> + }
> + if (ppi->item_index++ < ppi->items_to_skip)
> + return 0;
> + cnts = &ppi->ts_entry->counters;
> + len = snprintf(
> + ppi->outp, ppi->char_count,
> + "%d %s 0x%llx %u %u "
> + "%llu %llu "
> + "%llu %llu "
> + "%llu %llu "
> + "%llu %llu "
> + "%llu %llu "
> + "%llu %llu "
> + "%llu %llu "
> + "%llu %llu\n",
> + ppi->item_index,
> + ppi->iface_entry->ifname,
> + get_atag_from_tag(tag),
> + stat_uid,
> + cnt_set,
> + dc_sum_bytes(cnts, cnt_set, IFS_RX),
> + dc_sum_packets(cnts, cnt_set, IFS_RX),
> + dc_sum_bytes(cnts, cnt_set, IFS_TX),
> + dc_sum_packets(cnts, cnt_set, IFS_TX),
> + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
> + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
> + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
> + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
> + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
> + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
> + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
> + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
> + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
> + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
> + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
> + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
> + }
> + return len;
> +}
> +
> +static bool pp_sets(struct proc_print_info *ppi)
> +{
> + int len;
> + int counter_set;
> + for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
> + counter_set++) {
> + len = pp_stats_line(ppi, counter_set);
> + if (len >= ppi->char_count) {
> + *ppi->outp = '\0';
> + return false;
> + }
> + if (len) {
> + ppi->outp += len;
> + ppi->char_count -= len;
> + (*ppi->num_items_returned)++;
> + }
> + }
> + return true;
> +}
> +
> +/*
> + * Procfs reader to get all tag stats using style "1)" as described in
> + * fs/proc/generic.c
> + * Groups all protocols tx/rx bytes.
> + */
> +static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
> + off_t items_to_skip, int char_count, int *eof,
> + void *data)
> +{
> + struct proc_print_info ppi;
> + int len;
> +
> + ppi.outp = page;
> + ppi.item_index = 0;
> + ppi.char_count = char_count;
> + ppi.num_items_returned = num_items_returned;
> + ppi.items_to_skip = items_to_skip;
> +
> + if (unlikely(module_passive)) {
> + len = pp_stats_line(&ppi, 0);
> + /* The header should always be shorter than the buffer. */
> + BUG_ON(len >= ppi.char_count);
> + (*num_items_returned)++;
> + *eof = 1;
> + return len;
> + }
> +
> + CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
> + "char_count=%d *eof=%d\n", page, *num_items_returned,
> + items_to_skip, char_count, *eof);
> +
> + if (*eof)
> + return 0;
> +
> + /* The idx is there to help debug when things go belly up. */
> + len = pp_stats_line(&ppi, 0);
> + /* Don't advance the outp unless the whole line was printed */
> + if (len >= ppi.char_count) {
> + *ppi.outp = '\0';
> + return ppi.outp - page;
> + }
> + if (len) {
> + ppi.outp += len;
> + ppi.char_count -= len;
> + (*num_items_returned)++;
> + }
> +
> + spin_lock_bh(&iface_stat_list_lock);
> + list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
> + struct rb_node *node;
> + spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
> + for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
> + node;
> + node = rb_next(node)) {
> + ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
> + if (!pp_sets(&ppi)) {
> + spin_unlock_bh(
> + &ppi.iface_entry->tag_stat_list_lock);
> + spin_unlock_bh(&iface_stat_list_lock);
> + return ppi.outp - page;
> + }
> + }
> + spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
> + }
> + spin_unlock_bh(&iface_stat_list_lock);
> +
> + *eof = 1;
> + return ppi.outp - page;
> +}
> +
> +/*------------------------------------------*/
> +static int qtudev_open(struct inode *inode, struct file *file)
> +{
> + struct uid_tag_data *utd_entry;
> + struct proc_qtu_data *pqd_entry;
> + struct proc_qtu_data *new_pqd_entry;
> + int res;
> + bool utd_entry_found;
> +
> + if (unlikely(qtu_proc_handling_passive))
> + return 0;
> +
> + DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
> + current->pid, current->tgid, current_fsuid());
> +
> + spin_lock_bh(&uid_tag_data_tree_lock);
> +
> + /* Look for existing uid data, or alloc one. */
> + utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
> + if (IS_ERR_OR_NULL(utd_entry)) {
> + res = PTR_ERR(utd_entry);
> + goto err;
> + }
> +
> + /* Look for existing PID based proc_data */
> + pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
> + current->tgid);
> + if (pqd_entry) {
> + pr_err("qtaguid: qtudev_open(): %u/%u %u "
> + "%s already opened\n",
> + current->pid, current->tgid, current_fsuid(),
> + QTU_DEV_NAME);
> + res = -EBUSY;
> + goto err_unlock_free_utd;
> + }
> +
> + new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
> + if (!new_pqd_entry) {
> + pr_err("qtaguid: qtudev_open(): %u/%u %u: "
> + "proc data alloc failed\n",
> + current->pid, current->tgid, current_fsuid());
> + res = -ENOMEM;
> + goto err_unlock_free_utd;
> + }
> + new_pqd_entry->pid = current->tgid;
> + INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
> + new_pqd_entry->parent_tag_data = utd_entry;
> + utd_entry->num_pqd++;
> +
> + proc_qtu_data_tree_insert(new_pqd_entry,
> + &proc_qtu_data_tree);
> +
> + spin_unlock_bh(&uid_tag_data_tree_lock);
> + DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
> + current_fsuid(), new_pqd_entry);
> + file->private_data = new_pqd_entry;
> + return 0;
> +
> +err_unlock_free_utd:
> + if (!utd_entry_found) {
> + rb_erase(&utd_entry->node, &uid_tag_data_tree);
> + kfree(utd_entry);
> + }
> + spin_unlock_bh(&uid_tag_data_tree_lock);
> +err:
> + return res;
> +}
> +
> +static int qtudev_release(struct inode *inode, struct file *file)
> +{
> + struct proc_qtu_data *pqd_entry = file->private_data;
> + struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data;
> + struct sock_tag *st_entry;
> + struct rb_root st_to_free_tree = RB_ROOT;
> + struct list_head *entry, *next;
> + struct tag_ref *tr;
> +
> + if (unlikely(qtu_proc_handling_passive))
> + return 0;
> +
> + /*
> + * Do not trust the current->pid, it might just be a kworker cleaning
> + * up after a dead proc.
> + */
> + DR_DEBUG("qtaguid: qtudev_release(): "
> + "pid=%u tgid=%u uid=%u "
> + "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
> + current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
> + pqd_entry, pqd_entry->pid, utd_entry,
> + utd_entry->num_active_tags);
> +
> + spin_lock_bh(&sock_tag_list_lock);
> + spin_lock_bh(&uid_tag_data_tree_lock);
> +
> + list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
> + st_entry = list_entry(entry, struct sock_tag, list);
> + DR_DEBUG("qtaguid: %s(): "
> + "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
> + __func__,
> + st_entry, st_entry->sk,
> + current->pid, current->tgid,
> + pqd_entry->parent_tag_data->uid);
> +
> + utd_entry = uid_tag_data_tree_search(
> + &uid_tag_data_tree,
> + get_uid_from_tag(st_entry->tag));
> + BUG_ON(IS_ERR_OR_NULL(utd_entry));
> + DR_DEBUG("qtaguid: %s(): "
> + "looking for tag=0x%llx in utd_entry=%p\n", __func__,
> + st_entry->tag, utd_entry);
> + tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
> + st_entry->tag);
> + BUG_ON(!tr);
> + BUG_ON(tr->num_sock_tags <= 0);
> + tr->num_sock_tags--;
> + free_tag_ref_from_utd_entry(tr, utd_entry);
> +
> + rb_erase(&st_entry->sock_node, &sock_tag_tree);
> + list_del(&st_entry->list);
> + /* Can't sockfd_put() within spinlock, do it later. */
> + sock_tag_tree_insert(st_entry, &st_to_free_tree);
> +
> + /*
> + * Try to free the utd_entry if no other proc_qtu_data is
> + * using it (num_pqd is 0) and it doesn't have active tags
> + * (num_active_tags is 0).
> + */
> + put_utd_entry(utd_entry);
> + }
> +
> + rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
> + BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
> + pqd_entry->parent_tag_data->num_pqd--;
> + put_utd_entry(pqd_entry->parent_tag_data);
> + kfree(pqd_entry);
> + file->private_data = NULL;
> +
> + spin_unlock_bh(&uid_tag_data_tree_lock);
> + spin_unlock_bh(&sock_tag_list_lock);
> +
> +
> + sock_tag_tree_erase(&st_to_free_tree);
> +
> + prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
> + current->pid, current->tgid);
> + return 0;
> +}
> +
> +/*------------------------------------------*/
> +static const struct file_operations qtudev_fops = {
> + .owner = THIS_MODULE,
> + .open = qtudev_open,
> + .release = qtudev_release,
> +};
> +
> +static struct miscdevice qtu_device = {
> + .minor = MISC_DYNAMIC_MINOR,
> + .name = QTU_DEV_NAME,
> + .fops = &qtudev_fops,
> + /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
> +};
> +
> +/*------------------------------------------*/
> +static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
> +{
> + int ret;
> + *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
> + if (!*res_procdir) {
> + pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
> + ret = -ENOMEM;
> + goto no_dir;
> + }
> +
> + xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
> + *res_procdir);
> + if (!xt_qtaguid_ctrl_file) {
> + pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
> + " file\n");
> + ret = -ENOMEM;
> + goto no_ctrl_entry;
> + }
> + xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
> + xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
> +
> + xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
> + *res_procdir);
> + if (!xt_qtaguid_stats_file) {
> + pr_err("qtaguid: failed to create xt_qtaguid/stats "
> + "file\n");
> + ret = -ENOMEM;
> + goto no_stats_entry;
> + }
> + xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
> + /*
> + * TODO: add support counter hacking
> + * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
> + */
> + return 0;
> +
> +no_stats_entry:
> + remove_proc_entry("ctrl", *res_procdir);
> +no_ctrl_entry:
> + remove_proc_entry("xt_qtaguid", NULL);
> +no_dir:
> + return ret;
> +}
> +
> +static struct xt_match qtaguid_mt_reg __read_mostly = {
> + /*
> + * This module masquerades as the "owner" module so that iptables
> + * tools can deal with it.
> + */
> + .name = "owner",
> + .revision = 1,
> + .family = NFPROTO_UNSPEC,
> + .match = qtaguid_mt,
> + .matchsize = sizeof(struct xt_qtaguid_match_info),
> + .me = THIS_MODULE,
> +};
> +
> +static int __init qtaguid_mt_init(void)
> +{
> + if (qtaguid_proc_register(&xt_qtaguid_procdir)
> + || iface_stat_init(xt_qtaguid_procdir)
> + || xt_register_match(&qtaguid_mt_reg)
> + || misc_register(&qtu_device))
> + return -1;
> + return 0;
> +}
> +
> +/*
> + * TODO: allow unloading of the module.
> + * For now stats are permanent.
> + * Kconfig forces'y/n' and never an 'm'.
> + */
> +
> +module_init(qtaguid_mt_init);
> +MODULE_AUTHOR("jpa <jpa@xxxxxxxxxx>");
> +MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
> +MODULE_LICENSE("GPL");
> +MODULE_ALIAS("ipt_owner");
> +MODULE_ALIAS("ip6t_owner");
> +MODULE_ALIAS("ipt_qtaguid");
> +MODULE_ALIAS("ip6t_qtaguid");
> diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h
> new file mode 100644
> index 0000000..02479d6
> --- /dev/null
> +++ b/net/netfilter/xt_qtaguid_internal.h
> @@ -0,0 +1,330 @@
> +/*
> + * Kernel iptables module to track stats for packets based on user tags.
> + *
> + * (C) 2011 Google, Inc
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +#ifndef __XT_QTAGUID_INTERNAL_H__
> +#define __XT_QTAGUID_INTERNAL_H__
> +
> +#include <linux/types.h>
> +#include <linux/rbtree.h>
> +#include <linux/spinlock_types.h>
> +#include <linux/workqueue.h>
> +
> +/* Iface handling */
> +#define IDEBUG_MASK (1<<0)
> +/* Iptable Matching. Per packet. */
> +#define MDEBUG_MASK (1<<1)
> +/* Red-black tree handling. Per packet. */
> +#define RDEBUG_MASK (1<<2)
> +/* procfs ctrl/stats handling */
> +#define CDEBUG_MASK (1<<3)
> +/* dev and resource tracking */
> +#define DDEBUG_MASK (1<<4)
> +
> +/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
> +#define DEFAULT_DEBUG_MASK 0
> +
> +/*
> + * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
> + * All undef: text size ~ 0x3030; all def: ~ 0x4404.
> + */
> +#define IDEBUG
> +#define MDEBUG
> +#define RDEBUG
> +#define CDEBUG
> +#define DDEBUG
> +
> +#define MSK_DEBUG(mask, ...) do { \
> + if (unlikely(qtaguid_debug_mask & (mask))) \
> + pr_debug(__VA_ARGS__); \
> + } while (0)
> +#ifdef IDEBUG
> +#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
> +#else
> +#define IF_DEBUG(...) no_printk(__VA_ARGS__)
> +#endif
> +#ifdef MDEBUG
> +#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
> +#else
> +#define MT_DEBUG(...) no_printk(__VA_ARGS__)
> +#endif
> +#ifdef RDEBUG
> +#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
> +#else
> +#define RB_DEBUG(...) no_printk(__VA_ARGS__)
> +#endif
> +#ifdef CDEBUG
> +#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
> +#else
> +#define CT_DEBUG(...) no_printk(__VA_ARGS__)
> +#endif
> +#ifdef DDEBUG
> +#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
> +#else
> +#define DR_DEBUG(...) no_printk(__VA_ARGS__)
> +#endif
> +
> +extern uint qtaguid_debug_mask;
> +
> +/*---------------------------------------------------------------------------*/
> +/*
> + * Tags:
> + *
> + * They represent what the data usage counters will be tracked against.
> + * By default a tag is just based on the UID.
> + * The UID is used as the base for policing, and can not be ignored.
> + * So a tag will always at least represent a UID (uid_tag).
> + *
> + * A tag can be augmented with an "accounting tag" which is associated
> + * with a UID.
> + * User space can set the acct_tag portion of the tag which is then used
> + * with sockets: all data belonging to that socket will be counted against the
> + * tag. The policing is then based on the tag's uid_tag portion,
> + * and stats are collected for the acct_tag portion separately.
> + *
> + * There could be
> + * a: {acct_tag=1, uid_tag=10003}
> + * b: {acct_tag=2, uid_tag=10003}
> + * c: {acct_tag=3, uid_tag=10003}
> + * d: {acct_tag=0, uid_tag=10003}
> + * a, b, and c represent tags associated with specific sockets.
> + * d is for the totals for that uid, including all untagged traffic.
> + * Typically d is used with policing/quota rules.
> + *
> + * We want tag_t big enough to distinguish uid_t and acct_tag.
> + * It might become a struct if needed.
> + * Nothing should be using it as an int.
> + */
> +typedef uint64_t tag_t; /* Only used via accessors */
> +
> +#define TAG_UID_MASK 0xFFFFFFFFULL
> +#define TAG_ACCT_MASK (~0xFFFFFFFFULL)
> +
> +static inline int tag_compare(tag_t t1, tag_t t2)
> +{
> + return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
> +}
> +
> +static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
> +{
> + return acct_tag | uid;
> +}
> +static inline tag_t make_tag_from_uid(uid_t uid)
> +{
> + return uid;
> +}
> +static inline uid_t get_uid_from_tag(tag_t tag)
> +{
> + return tag & TAG_UID_MASK;
> +}
> +static inline tag_t get_utag_from_tag(tag_t tag)
> +{
> + return tag & TAG_UID_MASK;
> +}
> +static inline tag_t get_atag_from_tag(tag_t tag)
> +{
> + return tag & TAG_ACCT_MASK;
> +}
> +
> +static inline bool valid_atag(tag_t tag)
> +{
> + return !(tag & TAG_UID_MASK);
> +}
> +static inline tag_t make_atag_from_value(uint32_t value)
> +{
> + return (uint64_t)value << 32;
> +}
> +/*---------------------------------------------------------------------------*/
> +
> +/*
> + * Maximum number of socket tags that a UID is allowed to have active.
> + * Multiple processes belonging to the same UID contribute towards this limit.
> + * Special UIDs that can impersonate a UID also contribute (e.g. download
> + * manager, ...)
> + */
> +#define DEFAULT_MAX_SOCK_TAGS 1024
> +
> +/*
> + * For now we only track 2 sets of counters.
> + * The default set is 0.
> + * Userspace can activate another set for a given uid being tracked.
> + */
> +#define IFS_MAX_COUNTER_SETS 2
> +
> +enum ifs_tx_rx {
> + IFS_TX,
> + IFS_RX,
> + IFS_MAX_DIRECTIONS
> +};
> +
> +/* For now, TCP, UDP, the rest */
> +enum ifs_proto {
> + IFS_TCP,
> + IFS_UDP,
> + IFS_PROTO_OTHER,
> + IFS_MAX_PROTOS
> +};
> +
> +struct byte_packet_counters {
> + uint64_t bytes;
> + uint64_t packets;
> +};
> +
> +struct data_counters {
> + struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
> +};
> +
> +/* Generic X based nodes used as a base for rb_tree ops */
> +struct tag_node {
> + struct rb_node node;
> + tag_t tag;
> +};
> +
> +struct tag_stat {
> + struct tag_node tn;
> + struct data_counters counters;
> + /*
> + * If this tag is acct_tag based, we need to count against the
> + * matching parent uid_tag.
> + */
> + struct data_counters *parent_counters;
> +};
> +
> +struct iface_stat {
> + struct list_head list; /* in iface_stat_list */
> + char *ifname;
> + bool active;
> + /* net_dev is only valid for active iface_stat */
> + struct net_device *net_dev;
> +
> + struct byte_packet_counters totals[IFS_MAX_DIRECTIONS];
> + /*
> + * We keep the last_known, because some devices reset their counters
> + * just before NETDEV_UP, while some will reset just before
> + * NETDEV_REGISTER (which is more normal).
> + * So now, if the device didn't do a NETDEV_UNREGISTER and we see
> + * its current dev stats smaller that what was previously known, we
> + * assume an UNREGISTER and just use the last_known.
> + */
> + struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
> + /* last_known is usable when last_known_valid is true */
> + bool last_known_valid;
> +
> + struct proc_dir_entry *proc_ptr;
> +
> + struct rb_root tag_stat_tree;
> + spinlock_t tag_stat_list_lock;
> +};
> +
> +/* This is needed to create proc_dir_entries from atomic context. */
> +struct iface_stat_work {
> + struct work_struct iface_work;
> + struct iface_stat *iface_entry;
> +};
> +
> +/*
> + * Track tag that this socket is transferring data for, and not necessarily
> + * the uid that owns the socket.
> + * This is the tag against which tag_stat.counters will be billed.
> + * These structs need to be looked up by sock and pid.
> + */
> +struct sock_tag {
> + struct rb_node sock_node;
> + struct sock *sk; /* Only used as a number, never dereferenced */
> + /* The socket is needed for sockfd_put() */
> + struct socket *socket;
> + /* Used to associate with a given pid */
> + struct list_head list; /* in proc_qtu_data.sock_tag_list */
> + pid_t pid;
> +
> + tag_t tag;
> +};
> +
> +struct qtaguid_event_counts {
> + /* Various successful events */
> + atomic64_t sockets_tagged;
> + atomic64_t sockets_untagged;
> + atomic64_t counter_set_changes;
> + atomic64_t delete_cmds;
> + atomic64_t iface_events; /* Number of NETDEV_* events handled */
> +
> + atomic64_t match_calls; /* Number of times iptables called mt */
> + /*
> + * match_found_sk_*: numbers related to the netfilter matching
> + * function finding a sock for the sk_buff.
> + * Total skbs processed is sum(match_found*).
> + */
> + atomic64_t match_found_sk; /* An sk was already in the sk_buff. */
> + /* The connection tracker had or didn't have the sk. */
> + atomic64_t match_found_sk_in_ct;
> + atomic64_t match_found_no_sk_in_ct;
> + /*
> + * No sk could be found. No apparent owner. Could happen with
> + * unsolicited traffic.
> + */
> + atomic64_t match_no_sk;
> + /*
> + * The file ptr in the sk_socket wasn't there.
> + * This might happen for traffic while the socket is being closed.
> + */
> + atomic64_t match_no_sk_file;
> +};
> +
> +/* Track the set active_set for the given tag. */
> +struct tag_counter_set {
> + struct tag_node tn;
> + int active_set;
> +};
> +
> +/*----------------------------------------------*/
> +/*
> + * The qtu uid data is used to track resources that are created directly or
> + * indirectly by processes (uid tracked).
> + * It is shared by the processes with the same uid.
> + * Some of the resource will be counted to prevent further rogue allocations,
> + * some will need freeing once the owner process (uid) exits.
> + */
> +struct uid_tag_data {
> + struct rb_node node;
> + uid_t uid;
> +
> + /*
> + * For the uid, how many accounting tags have been set.
> + */
> + int num_active_tags;
> + /* Track the number of proc_qtu_data that reference it */
> + int num_pqd;
> + struct rb_root tag_ref_tree;
> + /* No tag_node_tree_lock; use uid_tag_data_tree_lock */
> +};
> +
> +struct tag_ref {
> + struct tag_node tn;
> +
> + /*
> + * This tracks the number of active sockets that have a tag on them
> + * which matches this tag_ref.tn.tag.
> + * A tag ref can live on after the sockets are untagged.
> + * A tag ref can only be removed during a tag delete command.
> + */
> + int num_sock_tags;
> +};
> +
> +struct proc_qtu_data {
> + struct rb_node node;
> + pid_t pid;
> +
> + struct uid_tag_data *parent_tag_data;
> +
> + /* Tracks the sock_tags that need freeing upon this proc's death */
> + struct list_head sock_tag_list;
> + /* No spinlock_t sock_tag_list_lock; use the global one. */
> +};
> +
> +/*----------------------------------------------*/
> +#endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */
> diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c
> new file mode 100644
> index 0000000..3917678
> --- /dev/null
> +++ b/net/netfilter/xt_qtaguid_print.c
> @@ -0,0 +1,556 @@
> +/*
> + * Pretty printing Support for iptables xt_qtaguid module.
> + *
> + * (C) 2011 Google, Inc
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +/*
> + * Most of the functions in this file just waste time if DEBUG is not defined.
> + * The matching xt_qtaguid_print.h will static inline empty funcs if the needed
> + * debug flags ore not defined.
> + * Those funcs that fail to allocate memory will panic as there is no need to
> + * hobble allong just pretending to do the requested work.
> + */
> +
> +#define DEBUG
> +
> +#include <linux/fs.h>
> +#include <linux/gfp.h>
> +#include <linux/net.h>
> +#include <linux/rbtree.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock_types.h>
> +
> +
> +#include "xt_qtaguid_internal.h"
> +#include "xt_qtaguid_print.h"
> +
> +#ifdef DDEBUG
> +
> +static void _bug_on_err_or_null(void *ptr)
> +{
> + if (IS_ERR_OR_NULL(ptr)) {
> + pr_err("qtaguid: kmalloc failed\n");
> + BUG();
> + }
> +}
> +
> +char *pp_tag_t(tag_t *tag)
> +{
> + char *res;
> +
> + if (!tag)
> + res = kasprintf(GFP_ATOMIC, "tag_t@null{}");
> + else
> + res = kasprintf(GFP_ATOMIC,
> + "tag_t@%p{tag=0x%llx, uid=%u}",
> + tag, *tag, get_uid_from_tag(*tag));
> + _bug_on_err_or_null(res);
> + return res;
> +}
> +
> +char *pp_data_counters(struct data_counters *dc, bool showValues)
> +{
> + char *res;
> +
> + if (!dc)
> + res = kasprintf(GFP_ATOMIC, "data_counters@null{}");
> + else if (showValues)
> + res = kasprintf(
> + GFP_ATOMIC, "data_counters@%p{"
> + "set0{"
> + "rx{"
> + "tcp{b=%llu, p=%llu}, "
> + "udp{b=%llu, p=%llu},"
> + "other{b=%llu, p=%llu}}, "
> + "tx{"
> + "tcp{b=%llu, p=%llu}, "
> + "udp{b=%llu, p=%llu},"
> + "other{b=%llu, p=%llu}}}, "
> + "set1{"
> + "rx{"
> + "tcp{b=%llu, p=%llu}, "
> + "udp{b=%llu, p=%llu},"
> + "other{b=%llu, p=%llu}}, "
> + "tx{"
> + "tcp{b=%llu, p=%llu}, "
> + "udp{b=%llu, p=%llu},"
> + "other{b=%llu, p=%llu}}}}",
> + dc,
> + dc->bpc[0][IFS_RX][IFS_TCP].bytes,
> + dc->bpc[0][IFS_RX][IFS_TCP].packets,
> + dc->bpc[0][IFS_RX][IFS_UDP].bytes,
> + dc->bpc[0][IFS_RX][IFS_UDP].packets,
> + dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes,
> + dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets,
> + dc->bpc[0][IFS_TX][IFS_TCP].bytes,
> + dc->bpc[0][IFS_TX][IFS_TCP].packets,
> + dc->bpc[0][IFS_TX][IFS_UDP].bytes,
> + dc->bpc[0][IFS_TX][IFS_UDP].packets,
> + dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes,
> + dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets,
> + dc->bpc[1][IFS_RX][IFS_TCP].bytes,
> + dc->bpc[1][IFS_RX][IFS_TCP].packets,
> + dc->bpc[1][IFS_RX][IFS_UDP].bytes,
> + dc->bpc[1][IFS_RX][IFS_UDP].packets,
> + dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes,
> + dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets,
> + dc->bpc[1][IFS_TX][IFS_TCP].bytes,
> + dc->bpc[1][IFS_TX][IFS_TCP].packets,
> + dc->bpc[1][IFS_TX][IFS_UDP].bytes,
> + dc->bpc[1][IFS_TX][IFS_UDP].packets,
> + dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes,
> + dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets);
> + else
> + res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc);
> + _bug_on_err_or_null(res);
> + return res;
> +}
> +
> +char *pp_tag_node(struct tag_node *tn)
> +{
> + char *tag_str;
> + char *res;
> +
> + if (!tn) {
> + res = kasprintf(GFP_ATOMIC, "tag_node@null{}");
> + _bug_on_err_or_null(res);
> + return res;
> + }
> + tag_str = pp_tag_t(&tn->tag);
> + res = kasprintf(GFP_ATOMIC,
> + "tag_node@%p{tag=%s}",
> + tn, tag_str);
> + _bug_on_err_or_null(res);
> + kfree(tag_str);
> + return res;
> +}
> +
> +char *pp_tag_ref(struct tag_ref *tr)
> +{
> + char *tn_str;
> + char *res;
> +
> + if (!tr) {
> + res = kasprintf(GFP_ATOMIC, "tag_ref@null{}");
> + _bug_on_err_or_null(res);
> + return res;
> + }
> + tn_str = pp_tag_node(&tr->tn);
> + res = kasprintf(GFP_ATOMIC,
> + "tag_ref@%p{%s, num_sock_tags=%d}",
> + tr, tn_str, tr->num_sock_tags);
> + _bug_on_err_or_null(res);
> + kfree(tn_str);
> + return res;
> +}
> +
> +char *pp_tag_stat(struct tag_stat *ts)
> +{
> + char *tn_str;
> + char *counters_str;
> + char *parent_counters_str;
> + char *res;
> +
> + if (!ts) {
> + res = kasprintf(GFP_ATOMIC, "tag_stat@null{}");
> + _bug_on_err_or_null(res);
> + return res;
> + }
> + tn_str = pp_tag_node(&ts->tn);
> + counters_str = pp_data_counters(&ts->counters, true);
> + parent_counters_str = pp_data_counters(ts->parent_counters, false);
> + res = kasprintf(GFP_ATOMIC,
> + "tag_stat@%p{%s, counters=%s, parent_counters=%s}",
> + ts, tn_str, counters_str, parent_counters_str);
> + _bug_on_err_or_null(res);
> + kfree(tn_str);
> + kfree(counters_str);
> + kfree(parent_counters_str);
> + return res;
> +}
> +
> +char *pp_iface_stat(struct iface_stat *is)
> +{
> + char *res;
> + if (!is)
> + res = kasprintf(GFP_ATOMIC, "iface_stat@null{}");
> + else
> + res = kasprintf(GFP_ATOMIC, "iface_stat@%p{"
> + "list=list_head{...}, "
> + "ifname=%s, "
> + "total={rx={bytes=%llu, "
> + "packets=%llu}, "
> + "tx={bytes=%llu, "
> + "packets=%llu}}, "
> + "last_known_valid=%d, "
> + "last_known={rx={bytes=%llu, "
> + "packets=%llu}, "
> + "tx={bytes=%llu, "
> + "packets=%llu}}, "
> + "active=%d, "
> + "net_dev=%p, "
> + "proc_ptr=%p, "
> + "tag_stat_tree=rb_root{...}}",
> + is,
> + is->ifname,
> + is->totals[IFS_RX].bytes,
> + is->totals[IFS_RX].packets,
> + is->totals[IFS_TX].bytes,
> + is->totals[IFS_TX].packets,
> + is->last_known_valid,
> + is->last_known[IFS_RX].bytes,
> + is->last_known[IFS_RX].packets,
> + is->last_known[IFS_TX].bytes,
> + is->last_known[IFS_TX].packets,
> + is->active,
> + is->net_dev,
> + is->proc_ptr);
> + _bug_on_err_or_null(res);
> + return res;
> +}
> +
> +char *pp_sock_tag(struct sock_tag *st)
> +{
> + char *tag_str;
> + char *res;
> +
> + if (!st) {
> + res = kasprintf(GFP_ATOMIC, "sock_tag@null{}");
> + _bug_on_err_or_null(res);
> + return res;
> + }
> + tag_str = pp_tag_t(&st->tag);
> + res = kasprintf(GFP_ATOMIC, "sock_tag@%p{"
> + "sock_node=rb_node{...}, "
> + "sk=%p socket=%p (f_count=%lu), list=list_head{...}, "
> + "pid=%u, tag=%s}",
> + st, st->sk, st->socket, atomic_long_read(
> + &st->socket->file->f_count),
> + st->pid, tag_str);
> + _bug_on_err_or_null(res);
> + kfree(tag_str);
> + return res;
> +}
> +
> +char *pp_uid_tag_data(struct uid_tag_data *utd)
> +{
> + char *res;
> +
> + if (!utd)
> + res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}");
> + else
> + res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{"
> + "uid=%u, num_active_acct_tags=%d, "
> + "num_pqd=%d, "
> + "tag_node_tree=rb_root{...}, "
> + "proc_qtu_data_tree=rb_root{...}}",
> + utd, utd->uid,
> + utd->num_active_tags, utd->num_pqd);
> + _bug_on_err_or_null(res);
> + return res;
> +}
> +
> +char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
> +{
> + char *parent_tag_data_str;
> + char *res;
> +
> + if (!pqd) {
> + res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}");
> + _bug_on_err_or_null(res);
> + return res;
> + }
> + parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data);
> + res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{"
> + "node=rb_node{...}, pid=%u, "
> + "parent_tag_data=%s, "
> + "sock_tag_list=list_head{...}}",
> + pqd, pqd->pid, parent_tag_data_str
> + );
> + _bug_on_err_or_null(res);
> + kfree(parent_tag_data_str);
> + return res;
> +}
> +
> +/*------------------------------------------*/
> +void prdebug_sock_tag_tree(int indent_level,
> + struct rb_root *sock_tag_tree)
> +{
> + struct rb_node *node;
> + struct sock_tag *sock_tag_entry;
> + char *str;
> +
> + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
> + return;
> +
> + if (RB_EMPTY_ROOT(sock_tag_tree)) {
> + str = "sock_tag_tree=rb_root{}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + return;
> + }
> +
> + str = "sock_tag_tree=rb_root{";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + indent_level++;
> + for (node = rb_first(sock_tag_tree);
> + node;
> + node = rb_next(node)) {
> + sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
> + str = pp_sock_tag(sock_tag_entry);
> + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
> + kfree(str);
> + }
> + indent_level--;
> + str = "}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> +}
> +
> +void prdebug_sock_tag_list(int indent_level,
> + struct list_head *sock_tag_list)
> +{
> + struct sock_tag *sock_tag_entry;
> + char *str;
> +
> + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
> + return;
> +
> + if (list_empty(sock_tag_list)) {
> + str = "sock_tag_list=list_head{}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + return;
> + }
> +
> + str = "sock_tag_list=list_head{";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + indent_level++;
> + list_for_each_entry(sock_tag_entry, sock_tag_list, list) {
> + str = pp_sock_tag(sock_tag_entry);
> + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
> + kfree(str);
> + }
> + indent_level--;
> + str = "}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> +}
> +
> +void prdebug_proc_qtu_data_tree(int indent_level,
> + struct rb_root *proc_qtu_data_tree)
> +{
> + char *str;
> + struct rb_node *node;
> + struct proc_qtu_data *proc_qtu_data_entry;
> +
> + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
> + return;
> +
> + if (RB_EMPTY_ROOT(proc_qtu_data_tree)) {
> + str = "proc_qtu_data_tree=rb_root{}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + return;
> + }
> +
> + str = "proc_qtu_data_tree=rb_root{";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + indent_level++;
> + for (node = rb_first(proc_qtu_data_tree);
> + node;
> + node = rb_next(node)) {
> + proc_qtu_data_entry = rb_entry(node,
> + struct proc_qtu_data,
> + node);
> + str = pp_proc_qtu_data(proc_qtu_data_entry);
> + pr_debug("%*d: %s,\n", indent_level*2, indent_level,
> + str);
> + kfree(str);
> + indent_level++;
> + prdebug_sock_tag_list(indent_level,
> + &proc_qtu_data_entry->sock_tag_list);
> + indent_level--;
> +
> + }
> + indent_level--;
> + str = "}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> +}
> +
> +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
> +{
> + char *str;
> + struct rb_node *node;
> + struct tag_ref *tag_ref_entry;
> +
> + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
> + return;
> +
> + if (RB_EMPTY_ROOT(tag_ref_tree)) {
> + str = "tag_ref_tree{}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + return;
> + }
> +
> + str = "tag_ref_tree{";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + indent_level++;
> + for (node = rb_first(tag_ref_tree);
> + node;
> + node = rb_next(node)) {
> + tag_ref_entry = rb_entry(node,
> + struct tag_ref,
> + tn.node);
> + str = pp_tag_ref(tag_ref_entry);
> + pr_debug("%*d: %s,\n", indent_level*2, indent_level,
> + str);
> + kfree(str);
> + }
> + indent_level--;
> + str = "}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> +}
> +
> +void prdebug_uid_tag_data_tree(int indent_level,
> + struct rb_root *uid_tag_data_tree)
> +{
> + char *str;
> + struct rb_node *node;
> + struct uid_tag_data *uid_tag_data_entry;
> +
> + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
> + return;
> +
> + if (RB_EMPTY_ROOT(uid_tag_data_tree)) {
> + str = "uid_tag_data_tree=rb_root{}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + return;
> + }
> +
> + str = "uid_tag_data_tree=rb_root{";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + indent_level++;
> + for (node = rb_first(uid_tag_data_tree);
> + node;
> + node = rb_next(node)) {
> + uid_tag_data_entry = rb_entry(node, struct uid_tag_data,
> + node);
> + str = pp_uid_tag_data(uid_tag_data_entry);
> + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
> + kfree(str);
> + if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) {
> + indent_level++;
> + prdebug_tag_ref_tree(indent_level,
> + &uid_tag_data_entry->tag_ref_tree);
> + indent_level--;
> + }
> + }
> + indent_level--;
> + str = "}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> +}
> +
> +void prdebug_tag_stat_tree(int indent_level,
> + struct rb_root *tag_stat_tree)
> +{
> + char *str;
> + struct rb_node *node;
> + struct tag_stat *ts_entry;
> +
> + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
> + return;
> +
> + if (RB_EMPTY_ROOT(tag_stat_tree)) {
> + str = "tag_stat_tree{}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + return;
> + }
> +
> + str = "tag_stat_tree{";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + indent_level++;
> + for (node = rb_first(tag_stat_tree);
> + node;
> + node = rb_next(node)) {
> + ts_entry = rb_entry(node, struct tag_stat, tn.node);
> + str = pp_tag_stat(ts_entry);
> + pr_debug("%*d: %s\n", indent_level*2, indent_level,
> + str);
> + kfree(str);
> + }
> + indent_level--;
> + str = "}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> +}
> +
> +void prdebug_iface_stat_list(int indent_level,
> + struct list_head *iface_stat_list)
> +{
> + char *str;
> + struct iface_stat *iface_entry;
> +
> + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
> + return;
> +
> + if (list_empty(iface_stat_list)) {
> + str = "iface_stat_list=list_head{}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + return;
> + }
> +
> + str = "iface_stat_list=list_head{";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + indent_level++;
> + list_for_each_entry(iface_entry, iface_stat_list, list) {
> + str = pp_iface_stat(iface_entry);
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> + kfree(str);
> +
> + spin_lock_bh(&iface_entry->tag_stat_list_lock);
> + if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) {
> + indent_level++;
> + prdebug_tag_stat_tree(indent_level,
> + &iface_entry->tag_stat_tree);
> + indent_level--;
> + }
> + spin_unlock_bh(&iface_entry->tag_stat_list_lock);
> + }
> + indent_level--;
> + str = "}";
> + pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
> +}
> +
> +#endif /* ifdef DDEBUG */
> +/*------------------------------------------*/
> +static const char * const netdev_event_strings[] = {
> + "netdev_unknown",
> + "NETDEV_UP",
> + "NETDEV_DOWN",
> + "NETDEV_REBOOT",
> + "NETDEV_CHANGE",
> + "NETDEV_REGISTER",
> + "NETDEV_UNREGISTER",
> + "NETDEV_CHANGEMTU",
> + "NETDEV_CHANGEADDR",
> + "NETDEV_GOING_DOWN",
> + "NETDEV_CHANGENAME",
> + "NETDEV_FEAT_CHANGE",
> + "NETDEV_BONDING_FAILOVER",
> + "NETDEV_PRE_UP",
> + "NETDEV_PRE_TYPE_CHANGE",
> + "NETDEV_POST_TYPE_CHANGE",
> + "NETDEV_POST_INIT",
> + "NETDEV_UNREGISTER_BATCH",
> + "NETDEV_RELEASE",
> + "NETDEV_NOTIFY_PEERS",
> + "NETDEV_JOIN",
> +};
> +
> +const char *netdev_evt_str(int netdev_event)
> +{
> + if (netdev_event < 0
> + || netdev_event >= ARRAY_SIZE(netdev_event_strings))
> + return "bad event num";
> + return netdev_event_strings[netdev_event];
> +}
> diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h
> new file mode 100644
> index 0000000..b63871a
> --- /dev/null
> +++ b/net/netfilter/xt_qtaguid_print.h
> @@ -0,0 +1,120 @@
> +/*
> + * Pretty printing Support for iptables xt_qtaguid module.
> + *
> + * (C) 2011 Google, Inc
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +#ifndef __XT_QTAGUID_PRINT_H__
> +#define __XT_QTAGUID_PRINT_H__
> +
> +#include "xt_qtaguid_internal.h"
> +
> +#ifdef DDEBUG
> +
> +char *pp_tag_t(tag_t *tag);
> +char *pp_data_counters(struct data_counters *dc, bool showValues);
> +char *pp_tag_node(struct tag_node *tn);
> +char *pp_tag_ref(struct tag_ref *tr);
> +char *pp_tag_stat(struct tag_stat *ts);
> +char *pp_iface_stat(struct iface_stat *is);
> +char *pp_sock_tag(struct sock_tag *st);
> +char *pp_uid_tag_data(struct uid_tag_data *qtd);
> +char *pp_proc_qtu_data(struct proc_qtu_data *pqd);
> +
> +/*------------------------------------------*/
> +void prdebug_sock_tag_list(int indent_level,
> + struct list_head *sock_tag_list);
> +void prdebug_sock_tag_tree(int indent_level,
> + struct rb_root *sock_tag_tree);
> +void prdebug_proc_qtu_data_tree(int indent_level,
> + struct rb_root *proc_qtu_data_tree);
> +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree);
> +void prdebug_uid_tag_data_tree(int indent_level,
> + struct rb_root *uid_tag_data_tree);
> +void prdebug_tag_stat_tree(int indent_level,
> + struct rb_root *tag_stat_tree);
> +void prdebug_iface_stat_list(int indent_level,
> + struct list_head *iface_stat_list);
> +
> +#else
> +
> +/*------------------------------------------*/
> +static inline char *pp_tag_t(tag_t *tag)
> +{
> + return NULL;
> +}
> +static inline char *pp_data_counters(struct data_counters *dc, bool showValues)
> +{
> + return NULL;
> +}
> +static inline char *pp_tag_node(struct tag_node *tn)
> +{
> + return NULL;
> +}
> +static inline char *pp_tag_ref(struct tag_ref *tr)
> +{
> + return NULL;
> +}
> +static inline char *pp_tag_stat(struct tag_stat *ts)
> +{
> + return NULL;
> +}
> +static inline char *pp_iface_stat(struct iface_stat *is)
> +{
> + return NULL;
> +}
> +static inline char *pp_sock_tag(struct sock_tag *st)
> +{
> + return NULL;
> +}
> +static inline char *pp_uid_tag_data(struct uid_tag_data *qtd)
> +{
> + return NULL;
> +}
> +static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
> +{
> + return NULL;
> +}
> +
> +/*------------------------------------------*/
> +static inline
> +void prdebug_sock_tag_list(int indent_level,
> + struct list_head *sock_tag_list)
> +{
> +}
> +static inline
> +void prdebug_sock_tag_tree(int indent_level,
> + struct rb_root *sock_tag_tree)
> +{
> +}
> +static inline
> +void prdebug_proc_qtu_data_tree(int indent_level,
> + struct rb_root *proc_qtu_data_tree)
> +{
> +}
> +static inline
> +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
> +{
> +}
> +static inline
> +void prdebug_uid_tag_data_tree(int indent_level,
> + struct rb_root *uid_tag_data_tree)
> +{
> +}
> +static inline
> +void prdebug_tag_stat_tree(int indent_level,
> + struct rb_root *tag_stat_tree)
> +{
> +}
> +static inline
> +void prdebug_iface_stat_list(int indent_level,
> + struct list_head *iface_stat_list)
> +{
> +}
> +#endif
> +/*------------------------------------------*/
> +const char *netdev_evt_str(int netdev_event);
> +#endif /* ifndef __XT_QTAGUID_PRINT_H__ */
> --
> 1.7.9.5

--
Thanks,
//richard
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/