[PATCH v1 5/8] perf comm: Add reference count checking to comm_str

From: Ian Rogers
Date: Tue May 07 2024 - 14:37:17 EST


Reference count checking of an rbtree is troublesome as each pointer
should have a reference, switch to using a sorted array. Remove an
indirection by embedding the reference count with the string. Use
pthread_once to safely initialize the comm_strs and reader writer
mutex.

Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
---
tools/perf/util/comm.c | 196 ++++++++++++++++++++++++++---------------
1 file changed, 126 insertions(+), 70 deletions(-)

diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index afb8d4fd2644..1aa9a08e5b03 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -1,108 +1,164 @@
// SPDX-License-Identifier: GPL-2.0
#include "comm.h"
#include <errno.h>
-#include <stdlib.h>
-#include <stdio.h>
#include <string.h>
+#include <internal/rc_check.h>
#include <linux/refcount.h>
-#include <linux/rbtree.h>
#include <linux/zalloc.h>
#include "rwsem.h"

-struct comm_str {
- char *str;
- struct rb_node rb_node;
+DECLARE_RC_STRUCT(comm_str) {
refcount_t refcnt;
+ char str[];
};

-/* Should perhaps be moved to struct machine */
-static struct rb_root comm_str_root;
-static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,};
+static struct comm_strs {
+ struct rw_semaphore lock;
+ struct comm_str **strs;
+ int num_strs;
+ int capacity;
+} _comm_strs;
+
+static void comm_strs__init(void)
+{
+ init_rwsem(&_comm_strs.lock);
+ _comm_strs.capacity = 16;
+ _comm_strs.num_strs = 0;
+ _comm_strs.strs = calloc(16, sizeof(*_comm_strs.strs));
+}
+
+static struct comm_strs *comm_strs__get(void)
+{
+ static pthread_once_t comm_strs_type_once = PTHREAD_ONCE_INIT;
+
+ pthread_once(&comm_strs_type_once, comm_strs__init);
+
+ return &_comm_strs;
+}
+
+static refcount_t *comm_str__refcnt(struct comm_str *cs)
+{
+ return &RC_CHK_ACCESS(cs)->refcnt;
+}
+
+static const char *comm_str__str(const struct comm_str *cs)
+{
+ return &RC_CHK_ACCESS(cs)->str[0];
+}

static struct comm_str *comm_str__get(struct comm_str *cs)
{
- if (cs && refcount_inc_not_zero(&cs->refcnt))
- return cs;
+ struct comm_str *result;
+
+ if (RC_CHK_GET(result, cs))
+ refcount_inc_not_zero(comm_str__refcnt(cs));

- return NULL;
+ return result;
}

static void comm_str__put(struct comm_str *cs)
{
- if (cs && refcount_dec_and_test(&cs->refcnt)) {
- down_write(&comm_str_lock);
- rb_erase(&cs->rb_node, &comm_str_root);
- up_write(&comm_str_lock);
- zfree(&cs->str);
- free(cs);
+ if (cs && refcount_dec_and_test(comm_str__refcnt(cs))) {
+ struct comm_strs *comm_strs = comm_strs__get();
+ int i;
+
+ down_write(&comm_strs->lock);
+ for (i = 0; i < comm_strs->num_strs; i++) {
+ if (comm_strs->strs[i] == cs)
+ break;
+ }
+ for (; i < comm_strs->num_strs - 1; i++)
+ comm_strs->strs[i] = comm_strs->strs[i + 1];
+
+ comm_strs->num_strs--;
+ up_write(&comm_strs->lock);
+ RC_CHK_FREE(cs);
+ } else {
+ RC_CHK_PUT(cs);
}
}

-static struct comm_str *comm_str__alloc(const char *str)
+static struct comm_str *comm_str__new(const char *str)
{
- struct comm_str *cs;
+ struct comm_str *result = NULL;
+ RC_STRUCT(comm_str) *cs;

- cs = zalloc(sizeof(*cs));
- if (!cs)
- return NULL;
-
- cs->str = strdup(str);
- if (!cs->str) {
- free(cs);
- return NULL;
+ cs = malloc(sizeof(*cs) + strlen(str) + 1);
+ if (ADD_RC_CHK(result, cs)) {
+ refcount_set(comm_str__refcnt(result), 1);
+ strcpy(&cs->str[0], str);
}
+ return result;
+}

- refcount_set(&cs->refcnt, 1);
+static int comm_str__cmp(const void *_lhs, const void *_rhs)
+{
+ const struct comm_str *lhs = *(const struct comm_str * const *)_lhs;
+ const struct comm_str *rhs = *(const struct comm_str * const *)_rhs;

- return cs;
+ return strcmp(comm_str__str(lhs), comm_str__str(rhs));
}

-static
-struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root)
+static int comm_str__search(const void *_key, const void *_member)
{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct comm_str *iter, *new;
- int cmp;
-
- while (*p != NULL) {
- parent = *p;
- iter = rb_entry(parent, struct comm_str, rb_node);
-
- /*
- * If we race with comm_str__put, iter->refcnt is 0
- * and it will be removed within comm_str__put call
- * shortly, ignore it in this search.
- */
- cmp = strcmp(str, iter->str);
- if (!cmp && comm_str__get(iter))
- return iter;
-
- if (cmp < 0)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
- }
+ const char *key = _key;
+ const struct comm_str *member = *(const struct comm_str * const *)_member;

- new = comm_str__alloc(str);
- if (!new)
- return NULL;
+ return strcmp(key, comm_str__str(member));
+}
+
+static struct comm_str *__comm_strs__find(struct comm_strs *comm_strs, const char *str)
+{
+ struct comm_str **result;

- rb_link_node(&new->rb_node, parent, p);
- rb_insert_color(&new->rb_node, root);
+ result = bsearch(str, comm_strs->strs, comm_strs->num_strs, sizeof(struct comm_str *),
+ comm_str__search);

- return new;
+ if (!result)
+ return NULL;
+
+ return comm_str__get(*result);
}

-static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
+static struct comm_str *comm_strs__findnew(const char *str)
{
- struct comm_str *cs;
+ struct comm_strs *comm_strs = comm_strs__get();
+ struct comm_str *result;

- down_write(&comm_str_lock);
- cs = __comm_str__findnew(str, root);
- up_write(&comm_str_lock);
+ if (!comm_strs)
+ return NULL;

- return cs;
+ down_read(&comm_strs->lock);
+ result = __comm_strs__find(comm_strs, str);
+ up_read(&comm_strs->lock);
+ if (result)
+ return result;
+
+ down_write(&comm_strs->lock);
+ result = __comm_strs__find(comm_strs, str);
+ if (!result) {
+ if (comm_strs->num_strs == comm_strs->capacity) {
+ struct comm_str **tmp;
+
+ tmp = reallocarray(comm_strs->strs,
+ comm_strs->capacity + 16,
+ sizeof(*comm_strs->strs));
+ if (!tmp) {
+ up_write(&comm_strs->lock);
+ return NULL;
+ }
+ comm_strs->strs = tmp;
+ comm_strs->capacity += 16;
+ }
+ result = comm_str__new(str);
+ if (result) {
+ comm_strs->strs[comm_strs->num_strs++] = result;
+ qsort(comm_strs->strs, comm_strs->num_strs, sizeof(struct comm_str *),
+ comm_str__cmp);
+ }
+ }
+ up_write(&comm_strs->lock);
+ return result;
}

struct comm *comm__new(const char *str, u64 timestamp, bool exec)
@@ -115,7 +171,7 @@ struct comm *comm__new(const char *str, u64 timestamp, bool exec)
comm->start = timestamp;
comm->exec = exec;

- comm->comm_str = comm_str__findnew(str, &comm_str_root);
+ comm->comm_str = comm_strs__findnew(str);
if (!comm->comm_str) {
free(comm);
return NULL;
@@ -128,7 +184,7 @@ int comm__override(struct comm *comm, const char *str, u64 timestamp, bool exec)
{
struct comm_str *new, *old = comm->comm_str;

- new = comm_str__findnew(str, &comm_str_root);
+ new = comm_strs__findnew(str);
if (!new)
return -ENOMEM;

@@ -149,5 +205,5 @@ void comm__free(struct comm *comm)

const char *comm__str(const struct comm *comm)
{
- return comm->comm_str->str;
+ return comm_str__str(comm->comm_str);
}
--
2.45.0.rc1.225.g2a3ae87e7f-goog