Re: [PATCH v2 2/2] interconnect: qcom: Add tagging and wake/sleep support for sdm845

From: Evan Green
Date: Thu Jul 11 2019 - 13:07:16 EST


Hi Georgi and David,

On Tue, Jun 18, 2019 at 2:17 AM Georgi Djakov <georgi.djakov@xxxxxxxxxx> wrote:
>
> From: David Dai <daidavid1@xxxxxxxxxxxxxx>
>
> Add support for wake and sleep commands by using a tag to indicate
> whether or not the aggregate and set requests fall into execution
> state specific bucket.
>
> Signed-off-by: David Dai <daidavid1@xxxxxxxxxxxxxx>
> Signed-off-by: Georgi Djakov <georgi.djakov@xxxxxxxxxx>
> ---
> drivers/interconnect/qcom/sdm845.c | 129 ++++++++++++++++++++++-------
> 1 file changed, 98 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/interconnect/qcom/sdm845.c b/drivers/interconnect/qcom/sdm845.c
> index fb526004c82e..c100aab39415 100644
> --- a/drivers/interconnect/qcom/sdm845.c
> +++ b/drivers/interconnect/qcom/sdm845.c
> @@ -66,6 +66,17 @@ struct bcm_db {
> #define SDM845_MAX_BCM_PER_NODE 2
> #define SDM845_MAX_VCD 10
>
> +#define QCOM_ICC_BUCKET_AMC 0

What is AMC again? Is it the "right now" bucket? Maybe a comment on
the meaning of this bucket would be helpful.

> +#define QCOM_ICC_BUCKET_WAKE 1
> +#define QCOM_ICC_BUCKET_SLEEP 2
> +#define QCOM_ICC_NUM_BUCKETS 3
> +#define QCOM_ICC_TAG_AMC BIT(QCOM_ICC_BUCKET_AMC)
> +#define QCOM_ICC_TAG_WAKE BIT(QCOM_ICC_BUCKET_WAKE)
> +#define QCOM_ICC_TAG_SLEEP BIT(QCOM_ICC_BUCKET_SLEEP)
> +#define QCOM_ICC_TAG_ACTIVE_ONLY (QCOM_ICC_TAG_AMC | QCOM_ICC_TAG_WAKE)
> +#define QCOM_ICC_TAG_ALWAYS (QCOM_ICC_TAG_AMC | QCOM_ICC_TAG_WAKE |\
> + QCOM_ICC_TAG_SLEEP)
> +
> /**
> * struct qcom_icc_node - Qualcomm specific interconnect nodes
> * @name: the node name used in debugfs
> @@ -75,7 +86,9 @@ struct bcm_db {
> * @channels: num of channels at this node
> * @buswidth: width of the interconnect between a node and the bus
> * @sum_avg: current sum aggregate value of all avg bw requests
> + * @sum_avg_cached: previous sum aggregate value of all avg bw requests
> * @max_peak: current max aggregate value of all peak bw requests
> + * @max_peak_cached: previous max aggregate value of all peak bw requests
> * @bcms: list of bcms associated with this logical node
> * @num_bcms: num of @bcms
> */
> @@ -86,8 +99,10 @@ struct qcom_icc_node {
> u16 num_links;
> u16 channels;
> u16 buswidth;
> - u64 sum_avg;
> - u64 max_peak;
> + u64 sum_avg[QCOM_ICC_NUM_BUCKETS];
> + u64 sum_avg_cached[QCOM_ICC_NUM_BUCKETS];
> + u64 max_peak[QCOM_ICC_NUM_BUCKETS];
> + u64 max_peak_cached[QCOM_ICC_NUM_BUCKETS];
> struct qcom_icc_bcm *bcms[SDM845_MAX_BCM_PER_NODE];
> size_t num_bcms;
> };
> @@ -112,8 +127,8 @@ struct qcom_icc_bcm {
> const char *name;
> u32 type;
> u32 addr;
> - u64 vote_x;
> - u64 vote_y;
> + u64 vote_x[QCOM_ICC_NUM_BUCKETS];
> + u64 vote_y[QCOM_ICC_NUM_BUCKETS];
> bool dirty;
> bool keepalive;
> struct bcm_db aux_data;
> @@ -555,7 +570,7 @@ inline void tcs_cmd_gen(struct tcs_cmd *cmd, u64 vote_x, u64 vote_y,
> cmd->wait = true;
> }
>
> -static void tcs_list_gen(struct list_head *bcm_list,
> +static void tcs_list_gen(struct list_head *bcm_list, int bucket,
> struct tcs_cmd tcs_list[SDM845_MAX_VCD],
> int n[SDM845_MAX_VCD])
> {
> @@ -573,8 +588,8 @@ static void tcs_list_gen(struct list_head *bcm_list,
> commit = true;
> cur_vcd_size = 0;
> }
> - tcs_cmd_gen(&tcs_list[idx], bcm->vote_x, bcm->vote_y,
> - bcm->addr, commit);
> + tcs_cmd_gen(&tcs_list[idx], bcm->vote_x[bucket],
> + bcm->vote_y[bucket], bcm->addr, commit);
> idx++;
> n[batch]++;
> /*
> @@ -595,32 +610,39 @@ static void tcs_list_gen(struct list_head *bcm_list,
>
> static void bcm_aggregate(struct qcom_icc_bcm *bcm)
> {
> - size_t i;
> - u64 agg_avg = 0;
> - u64 agg_peak = 0;
> + size_t i, bucket;
> + u64 agg_avg[QCOM_ICC_NUM_BUCKETS] = {0};
> + u64 agg_peak[QCOM_ICC_NUM_BUCKETS] = {0};
> u64 temp;
>
> - for (i = 0; i < bcm->num_nodes; i++) {
> - temp = bcm->nodes[i]->sum_avg * bcm->aux_data.width;
> - do_div(temp, bcm->nodes[i]->buswidth * bcm->nodes[i]->channels);
> - agg_avg = max(agg_avg, temp);
> + for (bucket = 0; bucket < QCOM_ICC_NUM_BUCKETS; bucket++) {
> + for (i = 0; i < bcm->num_nodes; i++) {
> + temp = bcm->nodes[i]->sum_avg_cached[bucket] * bcm->aux_data.width;
> + do_div(temp, bcm->nodes[i]->buswidth * bcm->nodes[i]->channels);
> + agg_avg[bucket] = max(agg_avg[bucket], temp);
>
> - temp = bcm->nodes[i]->max_peak * bcm->aux_data.width;
> - do_div(temp, bcm->nodes[i]->buswidth);

Why is it that this one doesn't have the multiply by
bcm->nodes[i]->channels again? I can't recall if there was a reason.
If it's correct maybe it deserves a comment.

> - agg_peak = max(agg_peak, temp);
> - }
> + temp = bcm->nodes[i]->max_peak_cached[bucket] * bcm->aux_data.width;
> + do_div(temp, bcm->nodes[i]->buswidth);
> + agg_peak[bucket] = max(agg_peak[bucket], temp);
>
> - temp = agg_avg * 1000ULL;
> - do_div(temp, bcm->aux_data.unit);
> - bcm->vote_x = temp;
> + bcm->nodes[i]->sum_avg[bucket] = 0;
> + bcm->nodes[i]->max_peak[bucket] = 0;

I don't understand the sum_avg vs sum_avg_cached. Here's what I understand:
1. qcom_icc_aggregate() does the math from the incoming values on
sum_avg, and then clobbers sum_avg_cached with those values.
2. bcm_aggregate() uses sum_avg_cached in its calculations, then clears sum_avg.

But I don't get why that's needed. Why not just have sum_avg? Wouldn't
it work the same? Ok, it wouldn't if you ended up calling
bcm_aggregate() multiple times on the same bcm. But you have a dirty
flag that prevents this from happening. So I think it's safe to remove
the cached arrays, and just clear out the sum_avg when you aggregate.

> + }
>
> - temp = agg_peak * 1000ULL;
> - do_div(temp, bcm->aux_data.unit);
> - bcm->vote_y = temp;
> + temp = agg_avg[bucket] * 1000ULL;
> + do_div(temp, bcm->aux_data.unit);
> + bcm->vote_x[bucket] = temp;
>
> - if (bcm->keepalive && bcm->vote_x == 0 && bcm->vote_y == 0) {
> - bcm->vote_x = 1;
> - bcm->vote_y = 1;
> + temp = agg_peak[bucket] * 1000ULL;
> + do_div(temp, bcm->aux_data.unit);
> + bcm->vote_y[bucket] = temp;
> + }
> +
> + if (bcm->keepalive && bcm->vote_x[0] == 0 && bcm->vote_y[0] == 0) {
> + bcm->vote_x[QCOM_ICC_BUCKET_AMC] = 1;
> + bcm->vote_x[QCOM_ICC_BUCKET_WAKE] = 1;
> + bcm->vote_y[QCOM_ICC_BUCKET_AMC] = 1;
> + bcm->vote_y[QCOM_ICC_BUCKET_WAKE] = 1;
> }
>
> bcm->dirty = false;
> @@ -631,15 +653,25 @@ static int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
> {
> size_t i;
> struct qcom_icc_node *qn;
> + unsigned long tag_word = (unsigned long)tag;
>
> qn = node->data;
>
> + if (!tag)
> + tag_word = QCOM_ICC_TAG_ALWAYS;
> +
> + for (i = 0; i < QCOM_ICC_NUM_BUCKETS; i++) {
> + if (test_bit(i, &tag_word)) {

I guess all this extra business with tag_word and casting is so that
you can use test_bit, which is presumably a tiny bit faster? Does this
actually make a measurable difference? Maybe in the name of simplicity
we just do if (tag & BIT(i)), and then optimize if we find that
conditional to be a hotspot?

> + qn->sum_avg[i] += avg_bw;
> + qn->max_peak[i] = max_t(u32, qn->max_peak[i], peak_bw);
> + qn->sum_avg_cached[i] = qn->sum_avg[i];
> + qn->max_peak_cached[i] = qn->max_peak[i];
> + }
> + }
> +
> *agg_avg += avg_bw;
> *agg_peak = max_t(u32, *agg_peak, peak_bw);
>
> - qn->sum_avg = *agg_avg;
> - qn->max_peak = *agg_peak;
> -
> for (i = 0; i < qn->num_bcms; i++)
> qn->bcms[i]->dirty = true;
>
> @@ -675,7 +707,7 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
> * Construct the command list based on a pre ordered list of BCMs
> * based on VCD.
> */
> - tcs_list_gen(&commit_list, cmds, commit_idx);
> + tcs_list_gen(&commit_list, QCOM_ICC_BUCKET_AMC, cmds, commit_idx);
>
> if (!commit_idx[0])
> return ret;
> @@ -693,6 +725,41 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
> return ret;
> }
>
> + INIT_LIST_HEAD(&commit_list);
> +
> + for (i = 0; i < qp->num_bcms; i++) {
> + /*
> + * Only generate WAKE and SLEEP commands if a resource's
> + * requirements change as the execution environment transitions
> + * between different power states.
> + */
> + if (qp->bcms[i]->vote_x[QCOM_ICC_BUCKET_WAKE] !=
> + qp->bcms[i]->vote_x[QCOM_ICC_BUCKET_SLEEP] ||
> + qp->bcms[i]->vote_y[QCOM_ICC_BUCKET_WAKE] !=
> + qp->bcms[i]->vote_y[QCOM_ICC_BUCKET_SLEEP]) {
> + list_add_tail(&qp->bcms[i]->list, &commit_list);
> + }
> + }
> +
> + if (list_empty(&commit_list))
> + return ret;
> +
> + tcs_list_gen(&commit_list, QCOM_ICC_BUCKET_WAKE, cmds, commit_idx);
> +
> + ret = rpmh_write_batch(qp->dev, RPMH_WAKE_ONLY_STATE, cmds, commit_idx);
> + if (ret) {
> + pr_err("Error sending WAKE RPMH requests (%d)\n", ret);
> + return ret;
> + }
> +
> + tcs_list_gen(&commit_list, QCOM_ICC_BUCKET_SLEEP, cmds, commit_idx);
> +
> + ret = rpmh_write_batch(qp->dev, RPMH_SLEEP_STATE, cmds, commit_idx);
> + if (ret) {
> + pr_err("Error sending SLEEP RPMH requests (%d)\n", ret);
> + return ret;
> + }
> +
> return ret;
> }
>