Re: [PATCH v2 30/33] x86/intel_rdt_rdtgroup.c: Process schemata input from resctrl interface

From: Thomas Gleixner
Date: Thu Sep 08 2016 - 18:23:46 EST


On Thu, 8 Sep 2016, Fenghua Yu wrote:

> +struct resources {

Darn. The first look made me parse this as a redefinition of 'struct
resource' ... Can't you find an even more generic name for this?

> + struct cache_resource *l3;
> +};
> +
> +static int get_res_type(char **res, enum resource_type *res_type)
> +{
> + char *tok;
> +
> + tok = strsep(res, ":");
> + if (tok == NULL)

We still write: if (!tok) as anywhere else.

> +static int divide_resources(char *buf, char *resources[RESOURCE_NUM])
> +{
> + char *tok;
> + unsigned int resource_num = 0;
> + int ret = 0;
> + char *res;
> + char *res_block;
> + size_t size;
> + enum resource_type res_type;

Sigh.

> +
> + size = strlen(buf) + 1;
> + res = kzalloc(size, GFP_KERNEL);
> + if (!res) {
> + ret = -ENOSPC;
> + goto out;
> + }
> +
> + while ((tok = strsep(&buf, "\n")) != NULL) {
> + if (strlen(tok) == 0)
> + break;
> + if (resource_num++ >= 1) {

How gets that ever greater 1?

> + pr_info("More than one line of resource input!\n");
> + ret = -EINVAL;
> + goto out;
> + }
> + strcpy(res, tok);
> + }
> +
> + res_block = res;
> + ret = get_res_type(&res_block, &res_type);
> + if (ret) {
> + pr_info("Unknown resource type!");
> + goto out;
> + }
> +
> + if (res_block == NULL) {
> + pr_info("Invalid resource value!");
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + if (res_type == RESOURCE_L3 && cat_enabled(CACHE_LEVEL3)) {
> + strcpy(resources[RESOURCE_L3], res_block);
> + } else {
> + pr_info("Invalid resource type!");
> + goto out;
> + }
> +
> + ret = 0;


You surely found the most convoluted solution for this. Whats wrong with:

data = get_res_type(res, &type);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
goto out;
}

ret = 0;
switch (type) {
case RESOURCE_L3:
if (cat_enabled(CACHE_LEVEL3))
strcpy(resources[RESOURCE_L3], data);
break;
default:
ret = -EINVAL;
}

That's too simple to understand and too extensible for future resource
types, right?

> +out:
> + kfree(res);
> + return ret;
> +}

> +static int get_input_cbm(char *tok, struct cache_resource *l,
> + int input_domain_num, int level)
> +{
> + int ret;
> +
> + if (!cdp_enabled) {
> + if (tok == NULL)
> + return -EINVAL;
> +
> + ret = kstrtoul(tok, 16,
> + (unsigned long *)&l->cbm[input_domain_num]);

What is this type cast for? Can't you just parse the data into a local
unsigned long and then store it after validation?

> + if (ret)
> + return ret;
> +
> + if (!cbm_validate(l->cbm[input_domain_num], level))
> + return -EINVAL;
> + } else {
> + char *input_cbm1_str;
> +
> + input_cbm1_str = strsep(&tok, ",");
> + if (input_cbm1_str == NULL || tok == NULL)
> + return -EINVAL;
> +
> + ret = kstrtoul(input_cbm1_str, 16,
> + (unsigned long *)&l->cbm[input_domain_num]);
> + if (ret)
> + return ret;
> +
> + if (!cbm_validate(l->cbm[input_domain_num], level))
> + return -EINVAL;
> +
> + ret = kstrtoul(tok, 16,
> + (unsigned long *)&l->cbm2[input_domain_num]);
> + if (ret)
> + return ret;
> +
> + if (!cbm_validate(l->cbm2[input_domain_num], level))
> + return -EINVAL;

So you have 3 copies of the same sequence now. At other places you split
out the most tiniest stuff into a gazillion of helper functions ...

Just create a parser helper and call it for any of those types. So the
whole thing boils down to:

static int parse_cbm_token(char *tok, u64 *cbm, int level)
{
unsigned long data;
int ret;

ret = kstrtoul(tok, 16, &data);
if (ret)
return ret;
if (!cbm_validate(data, level))
return -EINVAL;
*cbm = data;
return 0;
}

static int parse_cbm(char *buf, struct cache_resource *cr, int domain,
int level)
{
char *cbm1 = buf;
int ret;

if (cdp_enabled)
cbm1 = strsep(&buf, ',');

if (!cbm1 || !buf)
return -EINVAL;

ret = parse_cbm_token(cbm1, &cr->cbm[domain]);
if (ret)
return ret;

if (cdp_enmabled)
return parse_cbm_token(buf, &cr->cbm2[domain]);
return 0;
}

Copy and paste is simpler than thinking, but the result is uglier and
harder to read.

> +static int get_cache_schema(char *buf, struct cache_resource *l, int level,
> + struct rdtgroup *rdtgrp)
> +{
> + char *tok, *tok_cache_id;
> + int ret;
> + int domain_num;
> + int input_domain_num;
> + int len;
> + unsigned int input_cache_id;
> + unsigned int cid;
> + unsigned int leaf;
> +
> + if (!cat_enabled(level) && strcmp(buf, ";")) {
> + pr_info("Disabled resource should have empty schema\n");

So an empty schema is a string which is != ";". Very interesting.

This enabled check here wants more thoughts. If a resource is disabled,
then the input line should be simply ignored. Otherwise you need to rewrite
scripts, config files just because you disabled a particular resource.

> + return -EINVAL;
> + }
> +}
> +
> +enum {
> + CURRENT_CLOSID,
> + REUSED_OWN_CLOSID,
> + REUSED_OTHER_CLOSID,
> + NEW_CLOSID,
> +};

Another random enum in the middle of the code and at a place where it is
completely disjunct from its usage.

> +
> +/*
> + * Check if the reference counts are all ones in rdtgrp's domain.
> + */
> +static bool one_refcnt(struct rdtgroup *rdtgrp, int domain)

A really self explaining function name - NOT!

> +/*
> + * Go through all shared domains. Check if there is an existing closid
> + * in all rdtgroups that matches l3 cbms in the shared
> + * domain. If find one, reuse the closid. Otherwise, allocate a new one.
> + */
> +static int get_rdtgroup_resources(struct resources *resources_set,
> + struct rdtgroup *rdtgrp)
> +{
> + struct cache_resource *l3;
> + bool l3_cbm_found;
> + struct list_head *l;
> + struct rdtgroup *r;
> + u64 cbm;
> + int rdt_closid[MAX_CACHE_DOMAINS];
> + int rdt_closid_type[MAX_CACHE_DOMAINS];

Have you ever checked what the stack foot print of this whole callchain is?
One of the callers has already a char array[1024] on the stack.....

> + int domain;
> + int closid;
> + int ret;
> +
> + l3 = resources_set->l3;
> + memcpy(rdt_closid, rdtgrp->resource.closid,
> + shared_domain_num * sizeof(int));

Can you please seperate stuff with new lines ocassionally to make it
readable?

> + for (domain = 0; domain < shared_domain_num; domain++) {
> + if (rdtgrp->resource.valid) {
> + /*
> + * If current rdtgrp is the only user of cbms in
> + * this domain, will replace the cbms with the input
> + * cbms and reuse its own closid.
> + */
> + if (one_refcnt(rdtgrp, domain)) {
> + closid = rdtgrp->resource.closid[domain];
> + rdt_closid[domain] = closid;
> + rdt_closid_type[domain] = REUSED_OWN_CLOSID;
> + continue;
> + }
> +
> + l3_cbm_found = true;
> +
> + if (cat_l3_enabled)
> + l3_cbm_found = cbm_found(l3, rdtgrp, domain,
> + CACHE_LEVEL3);
> +
> + /*
> + * If the cbms in this shared domain are already
> + * existing in current rdtgrp, record the closid
> + * and its type.
> + */
> + if (l3_cbm_found) {
> + closid = rdtgrp->resource.closid[domain];
> + rdt_closid[domain] = closid;
> + rdt_closid_type[domain] = CURRENT_CLOSID;
> + continue;
> + }

This is unreadable once more.

if (find_cbm(l3, rdtgrp, domain, CACHE_LEVEL3) {
closid = rdtgrp->resource.closid[domain];
rdt_closid[domain] = closid;
rdt_closid_type[domain] = CURRENT_CLOSID;
continue;
}

That requires that find_cbm() - which is a way more intuitive name than
cbm_found() - returns false when cat_l3_enabled is false. Which is trivial
and obvious ...

> + }
> +
> + /*
> + * If the cbms are not found in this rdtgrp, search other
> + * rdtgroups and see if there are matched cbms.
> + */
> + l3_cbm_found = cat_l3_enabled ? false : true;

What the heck?

l3_cbm_found = !cat_l3_enabled;

Is too simple obviously.

Aside of that silly conditional: If cat_l3_enables is false then
l3_cbm_found is true.

> + list_for_each(l, &rdtgroup_lists) {
> + r = list_entry(l, struct rdtgroup, rdtgroup_list);
> + if (r == rdtgrp || !r->resource.valid)
> + continue;
> +
> + if (cat_l3_enabled)
> + l3_cbm_found = cbm_found(l3, r, domain,
> + CACHE_LEVEL3);

And because this path is never taken when cat_l3_enabled is false.

> +
> + if (l3_cbm_found) {

We happily get the closid for something which is not enabled at all. What
is the logic here? I can't find any in this convoluted mess.

> + /* Get the closid that matches l3 cbms.*/
> + closid = r->resource.closid[domain];
> + rdt_closid[domain] = closid;
> + rdt_closid_type[domain] = REUSED_OTHER_CLOSID;
> + break;
> + }
> + }
> + if (!l3_cbm_found) {
> + /*
> + * If no existing closid is found, allocate
> + * a new one.
> + */
> + ret = closid_alloc(&closid, domain);
> + if (ret)
> + goto err;
> + rdt_closid[domain] = closid;
> + rdt_closid_type[domain] = NEW_CLOSID;
> + }
> + }

I really don't want to imagine how this might look like when you add L2
support and if you have code doing this please hide it in the poison
cabinet forever,

> + /*
> + * Now all closid are ready in rdt_closid. Update rdtgrp's closid.
> + */
> + for_each_cache_domain(domain, 0, shared_domain_num) {
> + /*
> + * Nothing is changed if the same closid and same cbms were
> + * found in this rdtgrp's domain.
> + */
> + if (rdt_closid_type[domain] == CURRENT_CLOSID)
> + continue;
> +
> + /*
> + * Put rdtgroup closid. No need to put the closid if we
> + * just change cbms and keep the closid (REUSED_OWN_CLOSID).
> + */
> + if (rdtgrp->resource.valid &&
> + rdt_closid_type[domain] != REUSED_OWN_CLOSID) {
> + /* Put old closid in this rdtgrp's domain if valid. */
> + closid = rdtgrp->resource.closid[domain];
> + closid_put(closid, domain);
> + }
> +
> + /*
> + * Replace the closid in this rdtgrp's domain with saved
> + * closid that was newly allocted (NEW_CLOSID), or found in
> + * another rdtgroup's domains (REUSED_CLOSID), or found in
> + * this rdtgrp (REUSED_OWN_CLOSID).
> + */
> + closid = rdt_closid[domain];
> + rdtgrp->resource.closid[domain] = closid;
> +
> + /*
> + * Get the reused other rdtgroup's closid. No need to get the
> + * closid newly allocated (NEW_CLOSID) because it's been
> + * already got in closid_alloc(). And no need to get the closid
> + * for resued own closid (REUSED_OWN_CLOSID).
> + */
> + if (rdt_closid_type[domain] == REUSED_OTHER_CLOSID)
> + closid_get(closid, domain);
> +
> + /*
> + * If the closid comes from a newly allocated closid
> + * (NEW_CLOSID), or found in this rdtgrp (REUSED_OWN_CLOSID),
> + * cbms for this closid will be updated in MSRs.
> + */
> + if (rdt_closid_type[domain] == NEW_CLOSID ||
> + rdt_closid_type[domain] == REUSED_OWN_CLOSID) {
> + /*
> + * Update cbm in cctable with the newly allocated
> + * closid.
> + */
> + if (cat_l3_enabled) {
> + int cpu;
> + struct cpumask *mask;
> + int dindex;
> + int l3_domain = shared_domain[domain].l3_domain;
> + int leaf = level_to_leaf(CACHE_LEVEL3);
> +
> + cbm = l3->cbm[l3_domain];
> + dindex = get_dcbm_table_index(closid);
> + l3_cctable[l3_domain][dindex].cbm = cbm;
> + if (cdp_enabled) {
> + int iindex;
> +
> + cbm = l3->cbm2[l3_domain];
> + iindex = get_icbm_table_index(closid);
> + l3_cctable[l3_domain][iindex].cbm = cbm;
> + }
> +
> + mask =
> + &cache_domains[leaf].shared_cpu_map[l3_domain];
> +
> + cpu = cpumask_first(mask);
> + smp_call_function_single(cpu, cbm_update_l3_msr,
> + &closid, 1);

Again, why don't ypu split that out into a seperate function instead of
having the forth indentation level and random line breaks?

> +static void init_cache_resource(struct cache_resource *l)
> +{
> + l->cbm = NULL;
> + l->cbm2 = NULL;
> + l->closid = NULL;
> + l->refcnt = NULL;

memset ?

> +}
> +
> +static void free_cache_resource(struct cache_resource *l)
> +{
> + kfree(l->cbm);
> + kfree(l->cbm2);
> + kfree(l->closid);
> + kfree(l->refcnt);
> +}
> +
> +static int alloc_cache_resource(struct cache_resource *l, int level)
> +{
> + int domain_num = get_domain_num(level);
> +
> + l->cbm = kcalloc(domain_num, sizeof(*l->cbm), GFP_KERNEL);
> + l->cbm2 = kcalloc(domain_num, sizeof(*l->cbm2), GFP_KERNEL);
> + l->closid = kcalloc(domain_num, sizeof(*l->closid), GFP_KERNEL);
> + l->refcnt = kcalloc(domain_num, sizeof(*l->refcnt), GFP_KERNEL);
> + if (l->cbm && l->cbm2 && l->closid && l->refcnt)
> + return 0;
> +
> + return -ENOMEM;
> +}
> +
> +/*
> + * This function digests schemata given in text buf. If the schemata are in
> + * right format and there is enough closid, input the schemata in rdtgrp
> + * and update resource cctables.
> + *
> + * Inputs:
> + * buf: string buffer containing schemata
> + * rdtgrp: current rdtgroup holding schemata.
> + *
> + * Return:
> + * 0 on success or error code.
> + */
> +static int get_resources(char *buf, struct rdtgroup *rdtgrp)
> +{
> + char *resources[RESOURCE_NUM];
> + struct cache_resource l3;
> + struct resources resources_set;
> + int ret;
> + char *resources_block;
> + int i;
> + int size = strlen(buf) + 1;
> +
> + resources_block = kcalloc(RESOURCE_NUM, size, GFP_KERNEL);
> + if (!resources_block)
> + return -ENOMEM;
> +
> + for (i = 0; i < RESOURCE_NUM; i++)
> + resources[i] = (char *)(resources_block + i * size);

This is a recurring scheme in your code. Allocating a runtime sized array
and initializing pointers.

Darn, instead of open coding this in several places can't you just make a
single function which does exactly that?

> + ret = divide_resources(buf, resources);
> + if (ret) {
> + kfree(resources_block);
> + return -EINVAL;
> + }
> +
> + init_cache_resource(&l3);
> +
> + if (cat_l3_enabled) {
> + ret = alloc_cache_resource(&l3, CACHE_LEVEL3);
> + if (ret)
> + goto out;
> +
> + ret = get_cache_schema(resources[RESOURCE_L3], &l3,
> + CACHE_LEVEL3, rdtgrp);
> + if (ret)
> + goto out;
> +
> + resources_set.l3 = &l3;
> + } else
> + resources_set.l3 = NULL;



> +
> + ret = get_rdtgroup_resources(&resources_set, rdtgrp);
> +
> +out:
> + kfree(resources_block);
> + free_cache_resource(&l3);
> +
> + return ret;
> +}
> +
> +static void gen_cache_prefix(char *buf, int level)
> +{
> + sprintf(buf, "L%1d:", level == CACHE_LEVEL3 ? 3 : 2);
> +}
> +
> +static int get_cache_id(int domain, int level)
> +{
> + return cache_domains[level_to_leaf(level)].shared_cache_id[domain];
> +}
> +
> +static void gen_cache_buf(char *buf, int level)
> +{
> + int domain;
> + char buf1[32];
> + int domain_num;
> + u64 val;
> +
> + gen_cache_prefix(buf, level);
> +
> + domain_num = get_domain_num(level);
> +
> + val = max_cbm(level);
> +
> + for (domain = 0; domain < domain_num; domain++) {
> + sprintf(buf1, "%d=%lx", get_cache_id(domain, level),
> + (unsigned long)val);
> + strcat(buf, buf1);

WTF?

char *p = buf;

p += sprintf(p, "....", ...);
p += sprintf(p, "....", ...);
p += sprintf(p, "....", ...);

Solves the same problem as this local buffer on the stack plus strcat().

> +/*
> + * Set up default schemata in a rdtgroup. All schemata in all resources are
> + * default values (all 1's) for all domains.
> + *
> + * Input: rdtgroup.
> + * Return: 0: successful
> + * non-0: error code
> + */
> +int get_default_resources(struct rdtgroup *rdtgrp)
> +{
> + char schema[1024];

And that number is pulled out of thin air or what?

> + int ret = 0;
> +
> + if (cat_enabled(CACHE_LEVEL3)) {
> + gen_cache_buf(schema, CACHE_LEVEL3);
> +
> + if (strlen(schema)) {
> + ret = get_resources(schema, rdtgrp);
> + if (ret)
> + return ret;
> + }
> + gen_cache_buf(rdtgrp->schema, CACHE_LEVEL3);
> + }
> +
> + return ret;
> +}
> +
> +ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
> + char *buf, size_t nbytes, loff_t off)
> +{
> + int ret = 0;
> + struct rdtgroup *rdtgrp;
> + char *schema;
> +
> + rdtgrp = rdtgroup_kn_lock_live(of->kn);
> + if (!rdtgrp)
> + return -ENODEV;
> +
> + schema = kzalloc(sizeof(char) * strlen(buf) + 1, GFP_KERNEL);
> + if (!schema) {
> + ret = -ENOMEM;
> + goto out_unlock;
> + }
> +
> + memcpy(schema, buf, strlen(buf) + 1);

Open coding kstrdup() is indeed useful. and reevaluating strlen(buf) three
times in the same function is even more useful.

> +
> + ret = get_resources(buf, rdtgrp);
> + if (ret)
> + goto out;
> +
> + memcpy(rdtgrp->schema, schema, strlen(schema) + 1);

IIRC then the kernel has even strcpy() and strncpy for that matter.

Btw, what makes sure that strlen(schema) is < 1023 ?????

Thanks,

tglx