Re: [PATCH 22/29] sysctl: Stop requiring explicit management ofsysctl directories

From: Lucian Adrian Grijincu
Date: Sun Jan 29 2012 - 14:32:46 EST


On Fri, Jan 27, 2012 at 6:51 AM, Eric W. Biederman
<ebiederm@xxxxxxxxxxxx> wrote:
> Simplify the code and the sysctl semantics by autogenerating
> sysctl directories when a sysctl table is registered that needs
> the directories and autodeleting the directories when there are
> no more sysctl tables registered that need them.
>
> Autogenerating directories keeps sysctl tables from depending
> on each other, removing all of the arcane register/unregister
> ordering constraints and makes it impossible to get the order
> wrong when reigsering and unregistering sysctl tables.
>
> Autogenerating directories yields one unique entity that dentries
> can point to, retaining the current effective use of the dcache.
>
> Add struct ctl_dir as the type of these new autogenerated
> directories.
>
> The attached_by and attached_to fields in ctl_table_header are
> removed as they are no longer needed.
>
> The child field in ctl_table is no longer needed by the core of
> the sysctl code. Âctl_table.child can be removed once all of the
> existing users have been updated.
>
> Benchmark before:
> Â Âmake-dummies 0 999 -> 0.7s
>  Ârmmod dummy    Â-> 0.07s
> Â Âmake-dummies 0 9999 -> 1m10s
>  Ârmmod dummy     -> 0.4s
>
> Benchmark after:
> Â Âmake-dummies 0 999 -> 0.44s
>  Ârmmod dummy    Â-> 0.065s
> Â Âmake-dummies 0 9999 -> 1m36s
>  Ârmmod dummy     -> 0.4s
>
> Signed-off-by: Eric W. Biederman <ebiederm@xxxxxxxxxxxx>
> ---
> Âfs/proc/proc_sysctl.c Â| Â342 ++++++++++++++++++++----------------------------
> Âinclude/linux/sysctl.h | Â 10 +-
> Â2 files changed, 150 insertions(+), 202 deletions(-)
>
> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
> index 65c13dd..3c0767d 100644
> --- a/fs/proc/proc_sysctl.c
> +++ b/fs/proc/proc_sysctl.c
> @@ -28,28 +28,31 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll)
> Âstatic struct ctl_table root_table[] = {
> Â Â Â Â{
> Â Â Â Â Â Â Â Â.procname = "",
> - Â Â Â Â Â Â Â .mode = S_IRUGO|S_IXUGO,
> - Â Â Â Â Â Â Â .child = &root_table[1],
> + Â Â Â Â Â Â Â .mode = S_IFDIR|S_IRUGO|S_IXUGO,
> Â Â Â Â},
> Â Â Â Â{ }
> Â};
> Âstatic struct ctl_table_root sysctl_table_root;
> -static struct ctl_table_header root_table_header = {
> - Â Â Â {{.count = 1,
> - Â Â Â Â .nreg = 1,
> - Â Â Â Â .ctl_table = root_table,
> - Â Â Â Â .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
> - Â Â Â .root = &sysctl_table_root,
> - Â Â Â .set = &sysctl_table_root.default_set,
> +static struct ctl_dir sysctl_root_dir = {
> + Â Â Â .header = {
> + Â Â Â Â Â Â Â {{.count = 1,
> + Â Â Â Â Â Â Â Â .nreg = 1,
> + Â Â Â Â Â Â Â Â .ctl_table = root_table,
> + Â Â Â Â Â Â Â Â .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
> + Â Â Â Â Â Â Â .root = &sysctl_table_root,
> + Â Â Â Â Â Â Â .set = &sysctl_table_root.default_set,
> + Â Â Â },
> Â};
> Âstatic struct ctl_table_root sysctl_table_root = {
> Â Â Â Â.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
> - Â Â Â .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
> + Â Â Â .default_set.list = LIST_HEAD_INIT(sysctl_root_dir.header.ctl_entry),
> Â Â Â Â.default_set.root = &sysctl_table_root,
> Â};
>
> Âstatic DEFINE_SPINLOCK(sysctl_lock);
>
> +static void drop_sysctl_table(struct ctl_table_header *header);
> +
> Âstatic int namecmp(const char *name1, int len1, const char *name2, int len2)
> Â{
> Â Â Â Âint minlen;
> @@ -66,29 +69,18 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2)
> Â}
>
> Âstatic struct ctl_table *find_entry(struct ctl_table_header **phead,
> - Â Â Â struct ctl_table_set *set,
> - Â Â Â struct ctl_table_header *dir_head, struct ctl_table *dir,
> + Â Â Â struct ctl_table_set *set, struct ctl_dir *dir,
> Â Â Â Âconst char *name, int namelen)
> Â{
> Â Â Â Âstruct ctl_table_header *head;
> Â Â Â Âstruct ctl_table *entry;
>
> - Â Â Â if (dir_head->set == set) {
> - Â Â Â Â Â Â Â for (entry = dir; entry->procname; entry++) {
> - Â Â Â Â Â Â Â Â Â Â Â const char *procname = entry->procname;
> - Â Â Â Â Â Â Â Â Â Â Â if (namecmp(procname, strlen(procname), name, namelen) == 0) {
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â *phead = dir_head;
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return entry;
> - Â Â Â Â Â Â Â Â Â Â Â }
> - Â Â Â Â Â Â Â }
> - Â Â Â }
> -
> Â Â Â Âlist_for_each_entry(head, &set->list, ctl_entry) {
> Â Â Â Â Â Â Â Âif (head->unregistering)
> Â Â Â Â Â Â Â Â Â Â Â Âcontinue;
> - Â Â Â Â Â Â Â if (head->attached_to != dir)
> + Â Â Â Â Â Â Â if (head->parent != dir)
> Â Â Â Â Â Â Â Â Â Â Â Âcontinue;
> - Â Â Â Â Â Â Â for (entry = head->attached_by; entry->procname; entry++) {
> + Â Â Â Â Â Â Â for (entry = head->ctl_table; entry->procname; entry++) {
> Â Â Â Â Â Â Â Â Â Â Â Âconst char *procname = entry->procname;
> Â Â Â Â Â Â Â Â Â Â Â Âif (namecmp(procname, strlen(procname), name, namelen) == 0) {
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â*phead = head;
> @@ -103,6 +95,7 @@ static void init_header(struct ctl_table_header *head,
> Â Â Â Âstruct ctl_table_root *root, struct ctl_table_set *set,
> Â Â Â Âstruct ctl_table *table)
> Â{
> + Â Â Â head->ctl_table = table;
> Â Â Â Âhead->ctl_table_arg = table;
> Â Â Â ÂINIT_LIST_HEAD(&head->ctl_entry);
> Â Â Â Âhead->used = 0;
> @@ -119,9 +112,10 @@ static void erase_header(struct ctl_table_header *head)
> Â Â Â Âlist_del_init(&head->ctl_entry);
> Â}
>
> -static void insert_header(struct ctl_table_header *header)
> +static void insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
> Â{
> - Â Â Â header->parent->count++;
> + Â Â Â header->parent = dir;
> + Â Â Â header->parent->header.nreg++;
> Â Â Â Âlist_add_tail(&header->ctl_entry, &header->set->list);
> Â}
>
> @@ -219,8 +213,7 @@ lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
> Â}
>
> Âstatic struct ctl_table *lookup_entry(struct ctl_table_header **phead,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct ctl_table_header *dir_head,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct ctl_table *dir,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct ctl_dir *dir,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âconst char *name, int namelen)
> Â{
> Â Â Â Âstruct ctl_table_header *head;
> @@ -232,7 +225,7 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead,
> Â Â Â Âroot = &sysctl_table_root;
> Â Â Â Âdo {
> Â Â Â Â Â Â Â Âset = lookup_header_set(root, current->nsproxy);
> - Â Â Â Â Â Â Â entry = find_entry(&head, set, dir_head, dir, name, namelen);
> + Â Â Â Â Â Â Â entry = find_entry(&head, set, dir, name, namelen);
> Â Â Â Â Â Â Â Âif (entry && use_table(head))
> Â Â Â Â Â Â Â Â Â Â Â Â*phead = head;
> Â Â Â Â Â Â Â Âelse
> @@ -244,7 +237,7 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead,
> Â Â Â Âreturn entry;
> Â}
>
> -static struct ctl_table_header *next_usable_entry(struct ctl_table *dir,
> +static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir,
> Â Â Â Âstruct ctl_table_root *root, struct list_head *tmp)
> Â{
> Â Â Â Âstruct nsproxy *namespaces = current->nsproxy;
> @@ -256,8 +249,8 @@ static struct ctl_table_header *next_usable_entry(struct ctl_table *dir,
> Â Â Â Â Â Â Â Âhead = list_entry(tmp, struct ctl_table_header, ctl_entry);
> Â Â Â Â Â Â Â Âroot = head->root;
>
> - Â Â Â Â Â Â Â if (head->attached_to != dir ||
> - Â Â Â Â Â Â Â Â Â !head->attached_by->procname ||
> + Â Â Â Â Â Â Â if (head->parent != dir ||
> + Â Â Â Â Â Â Â Â Â !head->ctl_table->procname ||
> Â Â Â Â Â Â Â Â Â Â!use_table(head))
> Â Â Â Â Â Â Â Â Â Â Â Âgoto next;
>
> @@ -281,47 +274,35 @@ out:
> Â Â Â Âreturn NULL;
> Â}
>
> -static void first_entry(
> - Â Â Â struct ctl_table_header *dir_head, struct ctl_table *dir,
> +static void first_entry(struct ctl_dir *dir,
> Â Â Â Âstruct ctl_table_header **phead, struct ctl_table **pentry)
> Â{
> - Â Â Â struct ctl_table_header *head = dir_head;
> - Â Â Â struct ctl_table *entry = dir;
> + Â Â Â struct ctl_table_header *head;
> + Â Â Â struct ctl_table *entry = NULL;
>
> Â Â Â Âspin_lock(&sysctl_lock);
> - Â Â Â if (entry->procname) {
> - Â Â Â Â Â Â Â use_table(head);
> - Â Â Â } else {
> - Â Â Â Â Â Â Â head = next_usable_entry(dir, &sysctl_table_root,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â&sysctl_table_root.default_set.list);
> - Â Â Â Â Â Â Â if (head)
> - Â Â Â Â Â Â Â Â Â Â Â entry = head->attached_by;
> - Â Â Â }
> + Â Â Â head = next_usable_entry(dir, &sysctl_table_root,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â&sysctl_table_root.default_set.list);
> Â Â Â Âspin_unlock(&sysctl_lock);
> + Â Â Â if (head)
> + Â Â Â Â Â Â Â entry = head->ctl_table;
> Â Â Â Â*phead = head;
> Â Â Â Â*pentry = entry;
> Â}
>
> -static void next_entry(struct ctl_table *dir,
> - Â Â Â struct ctl_table_header **phead, struct ctl_table **pentry)
> +static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry)
> Â{
> Â Â Â Âstruct ctl_table_header *head = *phead;
> Â Â Â Âstruct ctl_table *entry = *pentry;
>
> Â Â Â Âentry++;
> Â Â Â Âif (!entry->procname) {
> - Â Â Â Â Â Â Â struct ctl_table_root *root = head->root;
> - Â Â Â Â Â Â Â struct list_head *tmp = &head->ctl_entry;
> - Â Â Â Â Â Â Â if (head->attached_to != dir) {
> - Â Â Â Â Â Â Â Â Â Â Â root = &sysctl_table_root;
> - Â Â Â Â Â Â Â Â Â Â Â tmp = &sysctl_table_root.default_set.list;
> - Â Â Â Â Â Â Â }
> Â Â Â Â Â Â Â Âspin_lock(&sysctl_lock);
> Â Â Â Â Â Â Â Âunuse_table(head);
> - Â Â Â Â Â Â Â head = next_usable_entry(dir, root, tmp);
> + Â Â Â Â Â Â Â head = next_usable_entry(head->parent, head->root, &head->ctl_entry);
> Â Â Â Â Â Â Â Âspin_unlock(&sysctl_lock);
> Â Â Â Â Â Â Â Âif (head)
> - Â Â Â Â Â Â Â Â Â Â Â entry = head->attached_by;
> + Â Â Â Â Â Â Â Â Â Â Â entry = head->ctl_table;
> Â Â Â Â}
> Â Â Â Â*phead = head;
> Â Â Â Â*pentry = entry;
> @@ -381,7 +362,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
>
> Â Â Â Âinode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
> Â Â Â Âinode->i_mode = table->mode;
> - Â Â Â if (!table->child) {
> + Â Â Â if (!S_ISDIR(table->mode)) {
> Â Â Â Â Â Â Â Âinode->i_mode |= S_IFREG;
> Â Â Â Â Â Â Â Âinode->i_op = &proc_sys_inode_operations;
> Â Â Â Â Â Â Â Âinode->i_fop = &proc_sys_file_operations;
> @@ -398,7 +379,7 @@ static struct ctl_table_header *grab_header(struct inode *inode)
> Â{
> Â Â Â Âstruct ctl_table_header *head = PROC_I(inode)->sysctl;
> Â Â Â Âif (!head)
> - Â Â Â Â Â Â Â head = &root_table_header;
> + Â Â Â Â Â Â Â head = &sysctl_root_dir.header;
> Â Â Â Âreturn sysctl_head_grab(head);
> Â}
>
> @@ -406,24 +387,19 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âstruct nameidata *nd)
> Â{
> Â Â Â Âstruct ctl_table_header *head = grab_header(dir);
> - Â Â Â struct ctl_table *table = PROC_I(dir)->sysctl_entry;
> Â Â Â Âstruct ctl_table_header *h = NULL;
> Â Â Â Âstruct qstr *name = &dentry->d_name;
> Â Â Â Âstruct ctl_table *p;
> Â Â Â Âstruct inode *inode;
> Â Â Â Âstruct dentry *err = ERR_PTR(-ENOENT);
> + Â Â Â struct ctl_dir *ctl_dir;
>
> Â Â Â Âif (IS_ERR(head))
> Â Â Â Â Â Â Â Âreturn ERR_CAST(head);
>
> - Â Â Â if (table && !table->child) {
> - Â Â Â Â Â Â Â WARN_ON(1);
> - Â Â Â Â Â Â Â goto out;
> - Â Â Â }
> + Â Â Â ctl_dir = container_of(head, struct ctl_dir, header);
>
> - Â Â Â table = table ? table->child : &head->ctl_table[1];
> -
> - Â Â Â p = lookup_entry(&h, head, table, name->name, name->len);
> + Â Â Â p = lookup_entry(&h, ctl_dir, name->name, name->len);
> Â Â Â Âif (!p)
> Â Â Â Â Â Â Â Âgoto out;
>
> @@ -586,21 +562,16 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
> Â Â Â Âstruct dentry *dentry = filp->f_path.dentry;
> Â Â Â Âstruct inode *inode = dentry->d_inode;
> Â Â Â Âstruct ctl_table_header *head = grab_header(inode);
> - Â Â Â struct ctl_table *table = PROC_I(inode)->sysctl_entry;
> Â Â Â Âstruct ctl_table_header *h = NULL;
> Â Â Â Âstruct ctl_table *entry;
> + Â Â Â struct ctl_dir *ctl_dir;
> Â Â Â Âunsigned long pos;
> Â Â Â Âint ret = -EINVAL;
>
> Â Â Â Âif (IS_ERR(head))
> Â Â Â Â Â Â Â Âreturn PTR_ERR(head);
>
> - Â Â Â if (table && !table->child) {
> - Â Â Â Â Â Â Â WARN_ON(1);
> - Â Â Â Â Â Â Â goto out;
> - Â Â Â }
> -
> - Â Â Â table = table ? table->child : &head->ctl_table[1];
> + Â Â Â ctl_dir = container_of(head, struct ctl_dir, header);
>
> Â Â Â Âret = 0;
> Â Â Â Â/* Avoid a switch here: arm builds fail with missing __cmpdi2 */
> @@ -618,7 +589,7 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
> Â Â Â Â}
> Â Â Â Âpos = 2;
>
> - Â Â Â for (first_entry(head, table, &h, &entry); h; next_entry(table, &h, &entry)) {
> + Â Â Â for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) {
> Â Â Â Â Â Â Â Âret = scan(h, entry, &pos, filp, dirent, filldir);
> Â Â Â Â Â Â Â Âif (ret) {
> Â Â Â Â Â Â Â Â Â Â Â Âsysctl_head_finish(h);
> @@ -779,52 +750,86 @@ static const struct dentry_operations proc_sys_dentry_operations = {
>    Â.d_compare   Â= proc_sys_compare,
> Â};
>
> -static struct ctl_table *is_branch_in(struct ctl_table *branch,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct ctl_table *table)
> +static struct ctl_dir *find_subdir(struct ctl_table_set *set, struct ctl_dir *dir,
> + Â Â Â const char *name, int namelen)
> Â{
> - Â Â Â struct ctl_table *p;
> - Â Â Â const char *s = branch->procname;
> + Â Â Â struct ctl_table_header *head;
> + Â Â Â struct ctl_table *entry;
>
> - Â Â Â /* branch should have named subdirectory as its first element */
> - Â Â Â if (!s || !branch->child)
> - Â Â Â Â Â Â Â return NULL;
> + Â Â Â entry = find_entry(&head, set, dir, name, namelen);
> + Â Â Â if (!entry)
> + Â Â Â Â Â Â Â return ERR_PTR(-ENOENT);
> + Â Â Â if (S_ISDIR(entry->mode))
> + Â Â Â Â Â Â Â return container_of(head, struct ctl_dir, header);
> + Â Â Â return ERR_PTR(-ENOTDIR);
> +}


I find this easier to read:

entry = find_entry(&head, dir, name, namelen);
if (!entry)
return ERR_PTR(-ENOENT);
if (!S_ISDIR(entry->mode))
return ERR_PTR(-ENOTDIR);
return container_of(head, struct ctl_dir, header);




> +
> +static struct ctl_dir *new_dir(struct ctl_table_set *set,
> + Â Â Â const char *name, int namelen)
> +{
> + Â Â Â struct ctl_table *table;
> + Â Â Â struct ctl_dir *new;
> + Â Â Â char *new_name;
>
> - Â Â Â /* ... and nothing else */
> - Â Â Â if (branch[1].procname)
> + Â Â Â new = kzalloc(sizeof(*new) + sizeof(struct ctl_table)*2 +
> + Â Â Â Â Â Â Â Â Â Â namelen + 1, GFP_KERNEL);
> + Â Â Â if (!new)
> Â Â Â Â Â Â Â Âreturn NULL;
>
> - Â Â Â /* table should contain subdirectory with the same name */
> - Â Â Â for (p = table; p->procname; p++) {
> - Â Â Â Â Â Â Â if (!p->child)
> - Â Â Â Â Â Â Â Â Â Â Â continue;
> - Â Â Â Â Â Â Â if (p->procname && strcmp(p->procname, s) == 0)
> - Â Â Â Â Â Â Â Â Â Â Â return p;
> - Â Â Â }
> - Â Â Â return NULL;
> + Â Â Â table = (struct ctl_table *)(new + 1);
> + Â Â Â new_name = (char *)(table + 2);
> + Â Â Â memcpy(new_name, name, namelen);
> + Â Â Â new_name[namelen] = '\0';
> + Â Â Â table[0].procname = new_name;
> + Â Â Â table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO;
> + Â Â Â init_header(&new->header, set->root, set, table);
> +
> + Â Â Â return new;
> Â}
>
> -/* see if attaching q to p would be an improvement */
> -static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
> Â{
> - Â Â Â struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
> - Â Â Â struct ctl_table *next;
> - Â Â Â int is_better = 0;
> - Â Â Â int not_in_parent = !p->attached_by;
> -
> - Â Â Â while ((next = is_branch_in(by, to)) != NULL) {
> - Â Â Â Â Â Â Â if (by == q->attached_by)
> - Â Â Â Â Â Â Â Â Â Â Â is_better = 1;
> - Â Â Â Â Â Â Â if (to == p->attached_by)
> - Â Â Â Â Â Â Â Â Â Â Â not_in_parent = 1;
> - Â Â Â Â Â Â Â by = by->child;
> - Â Â Â Â Â Â Â to = next->child;
> - Â Â Â }
> + Â Â Â struct ctl_dir *subdir, *new = NULL;
>
> - Â Â Â if (is_better && not_in_parent) {
> - Â Â Â Â Â Â Â q->attached_by = by;
> - Â Â Â Â Â Â Â q->attached_to = to;
> - Â Â Â Â Â Â Â q->parent = p;



This one's hard to understand :) I would add these comments:



/* get_subdir - find and take a reference of a given subdir
* - if no entry with the same name found, create one
* - set - the set in which to look for the subdir (after looking in
the parent dir's set)
* - dir - the parent dir. NOTE: before calling this function
* make sure to have a increased the count on the parent dir's nreg
* as this function will drop a reference!
*/

> +static struct ctl_dir *get_subdir(struct ctl_table_set *set,
> + Â Â Â struct ctl_dir *dir, const char *name, int namelen)
> + Â Â Â spin_lock(&sysctl_lock);

/* first look in the parent dir's set */

> + Â Â Â subdir = find_subdir(dir->header.set, dir, name, namelen);
> + Â Â Â if (!IS_ERR(subdir))
> + Â Â Â Â Â Â Â goto found;
> + Â Â Â if ((PTR_ERR(subdir) == -ENOENT) && set != dir->header.set)

/* then look in the requested set if it's not the
same as the parent's set */

> + Â Â Â Â Â Â Â subdir = find_subdir(set, dir, name, namelen);
> + Â Â Â if (!IS_ERR(subdir))
> + Â Â Â Â Â Â Â goto found;
> + Â Â Â if (PTR_ERR(subdir) != -ENOENT)
> + Â Â Â Â Â Â Â goto failed;
> +
> + Â Â Â spin_unlock(&sysctl_lock);

/* not found, let's make a new dir and we'll add it later */
> + Â Â Â new = new_dir(set, name, namelen);

> + Â Â Â spin_lock(&sysctl_lock);
> + Â Â Â subdir = ERR_PTR(-ENOMEM);
> + Â Â Â if (!new)
> + Â Â Â Â Â Â Â goto failed;
> +
/* check in the requested set: maybe someone added it while we
temporarily gave up the sysctl_lock to create the new dir. */

> + Â Â Â subdir = find_subdir(set, dir, name, namelen);
> + Â Â Â if (!IS_ERR(subdir))
> + Â Â Â Â Â Â Â goto found;
> + Â Â Â if (PTR_ERR(subdir) != -ENOENT)
> + Â Â Â Â Â Â Â goto failed;
> +
/* nope, no one created an entry with this name while we gave
up the spin lock.
We can add the new dir. */

> + Â Â Â insert_header(dir, &new->header);
> + Â Â Â subdir = new;
> +found:
> + Â Â Â subdir->header.nreg++;
> +failed:
> + Â Â Â if (unlikely(IS_ERR(subdir))) {
> + Â Â Â Â Â Â Â printk(KERN_ERR "sysctl could not get directory: %*.*s %ld\n",
> + Â Â Â Â Â Â Â Â Â Â Â namelen, namelen, name, PTR_ERR(subdir));
> Â Â Â Â}

/* drop a reference from the parent dir, as we dive into
the subdir */

> + Â Â Â drop_sysctl_table(&dir->header);
> + Â Â Â if (new)
> + Â Â Â Â Â Â Â drop_sysctl_table(&new->header);
> + Â Â Â spin_unlock(&sysctl_lock);
> + Â Â Â return subdir;
> Â}
>


Why do you only check the given @set after creating the new dir. Why
not the parent dir too?


> Âstatic int sysctl_check_table_dups(const char *path, struct ctl_table *old,
> @@ -846,24 +851,14 @@ static int sysctl_check_table_dups(const char *path, struct ctl_table *old,
> Â}
>
> Âstatic int sysctl_check_dups(struct nsproxy *namespaces,
> - Â Â Â struct ctl_table_header *header,
> + Â Â Â struct ctl_dir *dir,
> Â Â Â Âconst char *path, struct ctl_table *table)
> Â{
> Â Â Â Âstruct ctl_table_root *root;
> Â Â Â Âstruct ctl_table_set *set;
> - Â Â Â struct ctl_table_header *dir_head, *head;
> - Â Â Â struct ctl_table *dir_table;
> + Â Â Â struct ctl_table_header *head;
> Â Â Â Âint error = 0;
>
> - Â Â Â /* No dups if we are the only member of our directory */
> - Â Â Â if (header->attached_by != table)
> - Â Â Â Â Â Â Â return 0;
> -
> - Â Â Â dir_head = header->parent;
> - Â Â Â dir_table = header->attached_to;
> -
> - Â Â Â error = sysctl_check_table_dups(path, dir_table, table);
> -
> Â Â Â Âroot = &sysctl_table_root;
> Â Â Â Âdo {
> Â Â Â Â Â Â Â Âset = lookup_header_set(root, namespaces);
> @@ -871,9 +866,9 @@ static int sysctl_check_dups(struct nsproxy *namespaces,
> Â Â Â Â Â Â Â Âlist_for_each_entry(head, &set->list, ctl_entry) {
> Â Â Â Â Â Â Â Â Â Â Â Âif (head->unregistering)
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âcontinue;
> - Â Â Â Â Â Â Â Â Â Â Â if (head->attached_to != dir_table)
> + Â Â Â Â Â Â Â Â Â Â Â if (head->parent != dir)
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âcontinue;
> - Â Â Â Â Â Â Â Â Â Â Â error = sysctl_check_table_dups(path, head->attached_by,
> + Â Â Â Â Â Â Â Â Â Â Â error = sysctl_check_table_dups(path, head->ctl_table,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âtable);
> Â Â Â Â Â Â Â Â}
> Â Â Â Â Â Â Â Âroot = list_entry(root->root_list.next,
> @@ -977,47 +972,25 @@ struct ctl_table_header *__register_sysctl_table(
> Â Â Â Âconst char *path, struct ctl_table *table)
> Â{
> Â Â Â Âstruct ctl_table_header *header;
> - Â Â Â struct ctl_table *new, **prevp;
> Â Â Â Âconst char *name, *nextname;
> - Â Â Â unsigned int npath = 0;
> Â Â Â Âstruct ctl_table_set *set;
> - Â Â Â size_t path_bytes = 0;
> - Â Â Â char *new_name;
> -
> - Â Â Â /* Count the path components */
> - Â Â Â for (name = path; name; name = nextname) {
> - Â Â Â Â Â Â Â int namelen;
> - Â Â Â Â Â Â Â nextname = strchr(name, '/');
> - Â Â Â Â Â Â Â if (nextname) {
> - Â Â Â Â Â Â Â Â Â Â Â namelen = nextname - name;
> - Â Â Â Â Â Â Â Â Â Â Â nextname++;
> - Â Â Â Â Â Â Â } else {
> - Â Â Â Â Â Â Â Â Â Â Â namelen = strlen(name);
> - Â Â Â Â Â Â Â }
> - Â Â Â Â Â Â Â if (namelen == 0)
> - Â Â Â Â Â Â Â Â Â Â Â continue;
> - Â Â Â Â Â Â Â path_bytes += namelen + 1;
> - Â Â Â Â Â Â Â npath++;
> - Â Â Â }
> + Â Â Â struct ctl_dir *dir;
>
> - Â Â Â /*
> - Â Â Â Â* For each path component, allocate a 2-element ctl_table array.
> - Â Â Â Â* The first array element will be filled with the sysctl entry
> - Â Â Â Â* for this, the second will be the sentinel (procname == 0).
> - Â Â Â Â*
> - Â Â Â Â* We allocate everything in one go so that we don't have to
> - Â Â Â Â* worry about freeing additional memory in unregister_sysctl_table.
> - Â Â Â Â*/
> - Â Â Â header = kzalloc(sizeof(struct ctl_table_header) + path_bytes +
> - Â Â Â Â Â Â Â Â Â Â Â Â(2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
> + Â Â Â header = kzalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
> Â Â Â Âif (!header)
> Â Â Â Â Â Â Â Âreturn NULL;
>
> - Â Â Â new = (struct ctl_table *) (header + 1);
> - Â Â Â new_name = (char *)(new + (2 * npath));
> + Â Â Â init_header(header, root, NULL, table);
> + Â Â Â if (sysctl_check_table(path, table))
> + Â Â Â Â Â Â Â goto fail;
> +
> + Â Â Â spin_lock(&sysctl_lock);
> + Â Â Â header->set = set = lookup_header_set(root, namespaces);
> + Â Â Â dir = &sysctl_root_dir;


/* get_subdir will drop a reference to this dir as it dives into the subdir */

> + Â Â Â dir->header.nreg++;
> + Â Â Â spin_unlock(&sysctl_lock);
>
> - Â Â Â /* Now connect the dots */
> - Â Â Â prevp = &header->ctl_table;
> + Â Â Â /* Find the directory for the ctl_table */
> Â Â Â Âfor (name = path; name; name = nextname) {
> Â Â Â Â Â Â Â Âint namelen;
> Â Â Â Â Â Â Â Ânextname = strchr(name, '/');
> @@ -1029,51 +1002,21 @@ struct ctl_table_header *__register_sysctl_table(
> Â Â Â Â Â Â Â Â}
> Â Â Â Â Â Â Â Âif (namelen == 0)
> Â Â Â Â Â Â Â Â Â Â Â Âcontinue;
> - Â Â Â Â Â Â Â memcpy(new_name, name, namelen);
> - Â Â Â Â Â Â Â new_name[namelen] = '\0';
> -
> - Â Â Â Â Â Â Â new->procname = new_name;
> -        new->mode   = 0555;
> -
> - Â Â Â Â Â Â Â *prevp = new;
> - Â Â Â Â Â Â Â prevp = &new->child;
>
> - Â Â Â Â Â Â Â new += 2;
> - Â Â Â Â Â Â Â new_name += namelen + 1;
> + Â Â Â Â Â Â Â dir = get_subdir(set, dir, name, namelen);
> + Â Â Â Â Â Â Â if (IS_ERR(dir))
> + Â Â Â Â Â Â Â Â Â Â Â goto fail;
> Â Â Â Â}
> - Â Â Â *prevp = table;
> -
> - Â Â Â init_header(header, root, NULL, table);
> - Â Â Â if (sysctl_check_table(path, table))
> - Â Â Â Â Â Â Â goto fail;
> -
> Â Â Â Âspin_lock(&sysctl_lock);
> - Â Â Â header->set = lookup_header_set(root, namespaces);
> - Â Â Â header->attached_by = header->ctl_table;
> - Â Â Â header->attached_to = &root_table[1];
> - Â Â Â header->parent = &root_table_header;
> - Â Â Â set = header->set;
> - Â Â Â root = header->root;
> - Â Â Â for (;;) {
> - Â Â Â Â Â Â Â struct ctl_table_header *p;
> - Â Â Â Â Â Â Â list_for_each_entry(p, &set->list, ctl_entry) {
> - Â Â Â Â Â Â Â Â Â Â Â if (p->unregistering)
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â continue;
> - Â Â Â Â Â Â Â Â Â Â Â try_attach(p, header);
> - Â Â Â Â Â Â Â }
> - Â Â Â Â Â Â Â if (root == &sysctl_table_root)
> - Â Â Â Â Â Â Â Â Â Â Â break;
> - Â Â Â Â Â Â Â root = list_entry(root->root_list.prev,
> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct ctl_table_root, root_list);
> - Â Â Â Â Â Â Â set = lookup_header_set(root, namespaces);
> - Â Â Â }
> - Â Â Â if (sysctl_check_dups(namespaces, header, path, table))
> - Â Â Â Â Â Â Â goto fail_locked;
> - Â Â Â insert_header(header);
> + Â Â Â if (sysctl_check_dups(namespaces, dir, path, table))
> + Â Â Â Â Â Â Â goto fail_put_dir_locked;
> + Â Â Â insert_header(dir, header);
> + Â Â Â drop_sysctl_table(&dir->header);
> Â Â Â Âspin_unlock(&sysctl_lock);
>
> Â Â Â Âreturn header;
> -fail_locked:
> +fail_put_dir_locked:
> + Â Â Â drop_sysctl_table(&dir->header);
> Â Â Â Âspin_unlock(&sysctl_lock);
> Âfail:
> Â Â Â Âkfree(header);
> @@ -1299,16 +1242,17 @@ EXPORT_SYMBOL(register_sysctl_table);
>
> Âstatic void drop_sysctl_table(struct ctl_table_header *header)
> Â{
> + Â Â Â struct ctl_dir *parent = header->parent;
> +
> Â Â Â Âif (--header->nreg)
> Â Â Â Â Â Â Â Âreturn;
>
> Â Â Â Âstart_unregistering(header);
> - Â Â Â if (!--header->parent->count) {
> - Â Â Â Â Â Â Â WARN_ON(1);
> - Â Â Â Â Â Â Â kfree_rcu(header->parent, rcu);
> - Â Â Â }
> Â Â Â Âif (!--header->count)
> Â Â Â Â Â Â Â Âkfree_rcu(header, rcu);
> +
> + Â Â Â if (parent)
> + Â Â Â Â Â Â Â drop_sysctl_table(&parent->header);
> Â}
>
> Â/**
> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
> index e73ba33..3084b62 100644
> --- a/include/linux/sysctl.h
> +++ b/include/linux/sysctl.h
> @@ -938,6 +938,7 @@ struct ctl_table;
> Âstruct nsproxy;
> Âstruct ctl_table_root;
> Âstruct ctl_table_header;
> +struct ctl_dir;
>
> Âtypedef struct ctl_table ctl_table;
>
> @@ -1040,9 +1041,12 @@ struct ctl_table_header
> Â Â Â Âstruct ctl_table *ctl_table_arg;
> Â Â Â Âstruct ctl_table_root *root;
> Â Â Â Âstruct ctl_table_set *set;
> - Â Â Â struct ctl_table *attached_by;
> - Â Â Â struct ctl_table *attached_to;
> - Â Â Â struct ctl_table_header *parent;
> + Â Â Â struct ctl_dir *parent;
> +};
> +
> +struct ctl_dir {
> + Â Â Â /* Header must be at the start of ctl_dir */
> + Â Â Â struct ctl_table_header header;
> Â};
>
> Âstruct ctl_table_set {
> --
> 1.7.2.5
>



--
Â.
..: Lucian
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/