Re: [PATCH 1/3] powerpc/smp: Parse ibm,thread-groups with multiple properties

From: Srikar Dronamraju
Date: Mon Dec 07 2020 - 07:15:00 EST


* Gautham R. Shenoy <ego@xxxxxxxxxxxxxxxxxx> [2020-12-04 10:18:45]:

> From: "Gautham R. Shenoy" <ego@xxxxxxxxxxxxxxxxxx>

<snipped>

>
> static int parse_thread_groups(struct device_node *dn,
> - struct thread_groups *tg,
> - unsigned int property)
> + struct thread_groups_list *tglp)
> {
> - int i;
> - u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
> + int i = 0;
> + u32 *thread_group_array;
> u32 *thread_list;
> size_t total_threads;
> - int ret;
> + int ret = 0, count;
> + unsigned int property_idx = 0;

NIT:
tglx mentions in one of his recent comments to try keep a reverse fir tree
ordering of variables where possible.

>
> + count = of_property_count_u32_elems(dn, "ibm,thread-groups");
> + thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL);
> ret = of_property_read_u32_array(dn, "ibm,thread-groups",
> - thread_group_array, 3);
> + thread_group_array, count);
> if (ret)
> - return ret;
> -
> - tg->property = thread_group_array[0];
> - tg->nr_groups = thread_group_array[1];
> - tg->threads_per_group = thread_group_array[2];
> - if (tg->property != property ||
> - tg->nr_groups < 1 ||
> - tg->threads_per_group < 1)
> - return -ENODATA;
> + goto out_free;
>
> - total_threads = tg->nr_groups * tg->threads_per_group;
> + while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) {
> + int j;
> + struct thread_groups *tg = &tglp->property_tgs[property_idx++];

NIT: same as above.

>
> - ret = of_property_read_u32_array(dn, "ibm,thread-groups",
> - thread_group_array,
> - 3 + total_threads);
> - if (ret)
> - return ret;
> + tg->property = thread_group_array[i];
> + tg->nr_groups = thread_group_array[i + 1];
> + tg->threads_per_group = thread_group_array[i + 2];
> + total_threads = tg->nr_groups * tg->threads_per_group;
> +
> + thread_list = &thread_group_array[i + 3];
>
> - thread_list = &thread_group_array[3];
> + for (j = 0; j < total_threads; j++)
> + tg->thread_list[j] = thread_list[j];
> + i = i + 3 + total_threads;

Can't we simply use memcpy instead?

> + }
>
> - for (i = 0 ; i < total_threads; i++)
> - tg->thread_list[i] = thread_list[i];
> + tglp->nr_properties = property_idx;
>
> - return 0;
> +out_free:
> + kfree(thread_group_array);
> + return ret;
> }
>
> /*
> @@ -805,24 +827,39 @@ static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
> return -1;
> }
>
> -static int init_cpu_l1_cache_map(int cpu)
> +static int init_cpu_cache_map(int cpu, unsigned int cache_property)
>
> {
> struct device_node *dn = of_get_cpu_node(cpu, NULL);
> - struct thread_groups tg = {.property = 0,
> - .nr_groups = 0,
> - .threads_per_group = 0};
> + struct thread_groups *tg = NULL;
> int first_thread = cpu_first_thread_sibling(cpu);
> int i, cpu_group_start = -1, err = 0;
> + cpumask_var_t *mask;
> + struct thread_groups_list *cpu_tgl = &tgl[cpu];

NIT: same as 1st comment.

>
> if (!dn)
> return -ENODATA;
>
> - err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
> - if (err)
> - goto out;
> + if (!(cache_property == THREAD_GROUP_SHARE_L1))
> + return -EINVAL;
>
> - cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
> + if (!cpu_tgl->nr_properties) {
> + err = parse_thread_groups(dn, cpu_tgl);
> + if (err)
> + goto out;
> + }
> +
> + for (i = 0; i < cpu_tgl->nr_properties; i++) {
> + if (cpu_tgl->property_tgs[i].property == cache_property) {
> + tg = &cpu_tgl->property_tgs[i];
> + break;
> + }
> + }
> +
> + if (!tg)
> + return -EINVAL;
> +
> + cpu_group_start = get_cpu_thread_group_start(cpu, tg);

This whole hunk should be moved to a new function and called before
init_cpu_cache_map. It will simplify the logic to great extent.

>
> if (unlikely(cpu_group_start == -1)) {
> WARN_ON_ONCE(1);
> @@ -830,11 +867,12 @@ static int init_cpu_l1_cache_map(int cpu)
> goto out;
> }
>
> - zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
> - GFP_KERNEL, cpu_to_node(cpu));
> + mask = &per_cpu(cpu_l1_cache_map, cpu);
> +
> + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
>

This hunk (and the next hunk) should be moved to next patch.

> for (i = first_thread; i < first_thread + threads_per_core; i++) {
> - int i_group_start = get_cpu_thread_group_start(i, &tg);
> + int i_group_start = get_cpu_thread_group_start(i, tg);
>
> if (unlikely(i_group_start == -1)) {
> WARN_ON_ONCE(1);
> @@ -843,7 +881,7 @@ static int init_cpu_l1_cache_map(int cpu)
> }
>
> if (i_group_start == cpu_group_start)
> - cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
> + cpumask_set_cpu(i, *mask);
> }
>
> out:
> @@ -924,7 +962,7 @@ static int init_big_cores(void)
> int cpu;
>
> for_each_possible_cpu(cpu) {
> - int err = init_cpu_l1_cache_map(cpu);
> + int err = init_cpu_cache_map(cpu, THREAD_GROUP_SHARE_L1);
>
> if (err)
> return err;
> --
> 1.9.4
>

--
Thanks and Regards
Srikar Dronamraju