Re: [PATCH 3/3] memory: samsung: exynos5422-dmc: Add support for interrupt from performance counters

From: Krzysztof Kozlowski
Date: Fri Sep 27 2019 - 05:19:31 EST


On Wed, Sep 25, 2019 at 06:18:13PM +0200, Lukasz Luba wrote:
> Introduce a new interrupt driven mechanism for managing speed of the
> memory controller. The interrupts are generated due to performance
> counters overflow. The performance counters might track memory reads,
> writes, transfers, page misses, etc. In the basic algorithm tracking
> read transfers and calculating memory pressure should be enough to
> skip polling mode in devfreq.
>
> Signed-off-by: Lukasz Luba <l.luba@xxxxxxxxxxxxxxxxxxx>
> ---
> drivers/memory/samsung/exynos5422-dmc.c | 297 ++++++++++++++++++++++--
> 1 file changed, 272 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/memory/samsung/exynos5422-dmc.c b/drivers/memory/samsung/exynos5422-dmc.c
> index 0fe5f2186139..86e1844b97ef 100644
> --- a/drivers/memory/samsung/exynos5422-dmc.c
> +++ b/drivers/memory/samsung/exynos5422-dmc.c
> @@ -8,6 +8,7 @@
> #include <linux/devfreq.h>
> #include <linux/devfreq-event.h>
> #include <linux/device.h>
> +#include <linux/interrupt.h>
> #include <linux/io.h>
> #include <linux/mfd/syscon.h>
> #include <linux/module.h>
> @@ -35,6 +36,29 @@
> #define USE_BPLL_TIMINGS (0)
> #define EXYNOS5_AREF_NORMAL (0x2e)
>
> +#define DREX_PPCCLKCON (0x0130)
> +#define DREX_PEREV2CONFIG (0x013c)
> +#define DREX_PMNC_PPC (0xE000)
> +#define DREX_CNTENS_PPC (0xE010)
> +#define DREX_CNTENC_PPC (0xE020)
> +#define DREX_INTENS_PPC (0xE030)
> +#define DREX_INTENC_PPC (0xE040)
> +#define DREX_FLAG_PPC (0xE050)
> +#define DREX_PMCNT2_PPC (0xE130)
> +
> +#define CC_RESET BIT(2)
> +#define PPC_COUNTER_RESET BIT(1)
> +#define PPC_ENABLE BIT(0)
> +#define PEREV_CLK_EN BIT(0)
> +#define PERF_CNT2 BIT(2)
> +#define PERF_CCNT BIT(31)

Describe to which registers these bitfields are applicable.

> +
> +#define READ_TRANSFER_CH0 (0x6d)
> +#define READ_TRANSFER_CH1 (0x6f)

The same. Otherwise they all look like some generic constants which is
not true.

> +
> +#define PERF_COUNTER_START_VALUE 0xff000000
> +#define PERF_EVENT_UP_DOWN_THRESHOLD 900000000ULL
> +
> /**
> * struct dmc_opp_table - Operating level desciption
> *
> @@ -85,6 +109,9 @@ struct exynos5_dmc {
> struct clk *mout_mx_mspll_ccore_phy;
> struct devfreq_event_dev **counter;
> int num_counters;
> + u64 last_overflow_ts[2];
> + unsigned long load, total;

One member per line. This decreases readability.

> + bool in_irq_mode;
> };
>
> #define TIMING_FIELD(t_name, t_bit_beg, t_bit_end) \
> @@ -653,6 +680,167 @@ static int exynos5_counters_get(struct exynos5_dmc *dmc,
> return 0;
> }
>
> +/**
> + * exynos5_dmc_start_perf_events() - Setup and start performance event counters
> + * @dmc: device for which the counters are going to be checked
> + * @beg_value: initial value for the counter
> + *
> + * Function which enables needed counters, interrupts and sets initial values
> + * then starts the counters.
> + */
> +static void exynos5_dmc_start_perf_events(struct exynos5_dmc *dmc,
> + u32 beg_value)
> +{
> + /* Enable interrupts for counter 2 */
> + writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENS_PPC);
> + writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENS_PPC);

Blank line.

> + /* Enable counter 2 and CCNT */
> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENS_PPC);
> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENS_PPC);

Blank line.

> + /* Clear overflow flag for all counters */
> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);

Blank line.

> + /* Reset all counters */
> + writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi0 + DREX_PMNC_PPC);
> + writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi1 + DREX_PMNC_PPC);

Blank line.

> + /*
> + * Set start value for the counters, the number of samples that
> + * will be gathered is calculated as: 0xffffffff - beg_value
> + */
> + writel(beg_value, dmc->base_drexi0 + DREX_PMCNT2_PPC);
> + writel(beg_value, dmc->base_drexi1 + DREX_PMCNT2_PPC);

Blank line.

> + /* Start all counters */
> + writel(PPC_ENABLE, dmc->base_drexi0 + DREX_PMNC_PPC);
> + writel(PPC_ENABLE, dmc->base_drexi1 + DREX_PMNC_PPC);
> +}
> +
> +/**
> + * exynos5_dmc_perf_events_calc() - Calculate utilization
> + * @dmc: device for which the counters are going to be checked
> + * @diff_ts: time between last interrupt and current one
> + *
> + * Function which calculates needed utilization for the devfreq governor.
> + * It prepares values for 'busy_time' and 'total_time' based on elapsed time
> + * between interrupts, which approximates utilization.
> + */
> +static void exynos5_dmc_perf_events_calc(struct exynos5_dmc *dmc, u64 diff_ts)
> +{
> + /*
> + * This is a simple algorithm for managing traffic on DMC.
> + * When there is almost no load the counters overflow every 4s,
> + * no mater the DMC frequency.
> + * The high load might be approximated using linear function.
> + * Knowing that, simple calculation can provide 'busy_time' and
> + * 'total_time' to the devfreq governor which picks up target
> + * frequency.
> + * We want a fast ramp up and slow decay in frequency change function.
> + */
> + if (diff_ts < PERF_EVENT_UP_DOWN_THRESHOLD) {
> + /*
> + * Set higher utilization for the simple_ondemand governor.
> + * The governor should increase the frequency of the DMC.
> + */
> + dmc->load = 70;
> + dmc->total = 100;
> + } else {
> + /*
> + * Set low utilization for the simple_ondemand governor.
> + * The governor should decrease the frequency of the DMC.
> + */
> + dmc->load = 35;
> + dmc->total = 100;
> + }
> +
> + dev_dbg(dmc->dev, "diff_ts=%llu\n", diff_ts);
> +}
> +
> +/**
> + * exynos5_dmc_perf_events_check() - Checks the status of the counters
> + * @dmc: device for which the counters are going to be checked
> + *
> + * Function which is called from threaded IRQ to check the counters state
> + * and to call approximation for the needed utilization.
> + */
> +static void exynos5_dmc_perf_events_check(struct exynos5_dmc *dmc)
> +{
> + u32 val;
> + u64 diff_ts, ts;
> +
> + ts = ktime_get_ns();
> +
> + /* Stop all counters */
> + writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
> + writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);
> +
> + /* Check the source in interrupt flag registers (which channel) */
> + val = readl(dmc->base_drexi0 + DREX_FLAG_PPC);
> + if (val) {
> + diff_ts = ts - dmc->last_overflow_ts[0];
> + dmc->last_overflow_ts[0] = ts;
> + dev_dbg(dmc->dev, "drex0 0xE050 val= 0x%08x\n", val);
> + } else {
> + val = readl(dmc->base_drexi1 + DREX_FLAG_PPC);
> + diff_ts = ts - dmc->last_overflow_ts[1];
> + dmc->last_overflow_ts[1] = ts;
> + dev_dbg(dmc->dev, "drex1 0xE050 val= 0x%08x\n", val);
> + }
> +
> + exynos5_dmc_perf_events_calc(dmc, diff_ts);
> +
> + exynos5_dmc_start_perf_events(dmc, PERF_COUNTER_START_VALUE);
> +}
> +
> +/**
> + * exynos5_dmc_enable_perf_events() - Enable performance events
> + * @dmc: device for which the counters are going to be checked
> + *
> + * Function which is setup needed environment and enables counters.
> + */
> +static void exynos5_dmc_enable_perf_events(struct exynos5_dmc *dmc)
> +{
> + u64 ts;
> +
> + /* Enable Performance Event Clock */
> + writel(PEREV_CLK_EN, dmc->base_drexi0 + DREX_PPCCLKCON);
> + writel(PEREV_CLK_EN, dmc->base_drexi1 + DREX_PPCCLKCON);
> +
> + /* Select read transfers as performance event2 */
> + writel(READ_TRANSFER_CH0, dmc->base_drexi0 + DREX_PEREV2CONFIG);
> + writel(READ_TRANSFER_CH1, dmc->base_drexi1 + DREX_PEREV2CONFIG);
> +
> + dmc->in_irq_mode = 1;

Move this outside, to the probe. Logically it belongs there.

> +
> + ts = ktime_get_ns();
> + dmc->last_overflow_ts[0] = ts;
> + dmc->last_overflow_ts[1] = ts;
> +
> + /* Devfreq shouldn't be faster than initialization, play safe though. */
> + dmc->load = 99;
> + dmc->total = 100;
> +}
> +
> +/**
> + * exynos5_dmc_disable_perf_events() - Disable performance events
> + * @dmc: device for which the counters are going to be checked
> + *
> + * Function which stops, disables performance event counters and interrupts.
> + */
> +static void exynos5_dmc_disable_perf_events(struct exynos5_dmc *dmc)
> +{
> + /* Stop all counters */
> + writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
> + writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);

Blank line here and later.

> + /* Disable interrupts for counter 2 */
> + writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENC_PPC);
> + writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENC_PPC);
> + /* Disable counter 2 and CCNT */
> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENC_PPC);
> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENC_PPC);
> + /* Clear overflow flag for all counters */
> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
> + writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);
> +}
> +
> /**
> * exynos5_dmc_get_status() - Read current DMC performance statistics.
> * @dev: device for which the statistics are requested
> @@ -669,18 +857,24 @@ static int exynos5_dmc_get_status(struct device *dev,
> unsigned long load, total;
> int ret;
>
> - ret = exynos5_counters_get(dmc, &load, &total);
> - if (ret < 0)
> - return -EINVAL;
> + if (dmc->in_irq_mode) {
> + stat->current_frequency = dmc->curr_rate;
> + stat->busy_time = dmc->load;
> + stat->total_time = dmc->total;
> + } else {
> + ret = exynos5_counters_get(dmc, &load, &total);
> + if (ret < 0)
> + return -EINVAL;
>
> - /* To protect from overflow in calculation ratios, divide by 1024 */
> - stat->busy_time = load >> 10;
> - stat->total_time = total >> 10;
> + /* To protect from overflow, divide by 1024 */
> + stat->busy_time = load >> 10;
> + stat->total_time = total >> 10;
>
> - ret = exynos5_counters_set_event(dmc);
> - if (ret < 0) {
> - dev_err(dev, "could not set event counter\n");
> - return ret;
> + ret = exynos5_counters_set_event(dmc);
> + if (ret < 0) {
> + dev_err(dev, "could not set event counter\n");
> + return ret;
> + }
> }
>
> return 0;
> @@ -712,7 +906,6 @@ static int exynos5_dmc_get_cur_freq(struct device *dev, unsigned long *freq)
> * It provides to the devfreq framework needed functions and polling period.
> */
> static struct devfreq_dev_profile exynos5_dmc_df_profile = {
> - .polling_ms = 500,
> .target = exynos5_dmc_target,
> .get_dev_status = exynos5_dmc_get_status,
> .get_cur_freq = exynos5_dmc_get_cur_freq,
> @@ -1108,6 +1301,26 @@ static inline int exynos5_dmc_set_pause_on_switching(struct exynos5_dmc *dmc)
> return 0;
> }
>
> +static irqreturn_t dmc_irq_thread(int irq, void *priv)
> +{
> + int res;
> + struct exynos5_dmc *dmc = priv;
> +
> + dev_dbg(dmc->dev, "irq thread handler\n");

Skip a debug in thread handler for memory. It can pollute your log (I
guess depending on workload).

> +
> + mutex_lock(&dmc->df->lock);
> +
> + exynos5_dmc_perf_events_check(dmc);
> +
> + res = update_devfreq(dmc->df);
> + if (res)
> + dev_err(dmc->dev, "devfreq failed with %d\n", res);

dev_warn()

> +
> + mutex_unlock(&dmc->df->lock);
> +
> + return IRQ_HANDLED;
> +}
> +
> /**
> * exynos5_dmc_probe() - Probe function for the DMC driver
> * @pdev: platform device for which the driver is going to be initialized
> @@ -1125,6 +1338,7 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
> struct device_node *np = dev->of_node;
> struct exynos5_dmc *dmc;
> struct resource *res;
> + int irq;
>
> dmc = devm_kzalloc(dev, sizeof(*dmc), GFP_KERNEL);
> if (!dmc)
> @@ -1172,24 +1386,48 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
> goto remove_clocks;
> }
>
> - ret = exynos5_performance_counters_init(dmc);
> - if (ret) {
> - dev_warn(dev, "couldn't probe performance counters\n");
> - goto remove_clocks;
> - }
> -
> ret = exynos5_dmc_set_pause_on_switching(dmc);
> if (ret) {
> dev_warn(dev, "couldn't get access to PAUSE register\n");
> goto err_devfreq_add;

This is wrong now, I think.

> }
>
> - /*
> - * Setup default thresholds for the devfreq governor.
> - * The values are chosen based on experiments.
> - */
> - dmc->gov_data.upthreshold = 30;
> - dmc->gov_data.downdifferential = 5;
> + /* There is two modes in which the driver works: polling or IRQ */
> + irq = platform_get_irq(pdev, 0);

You need to document it in bindings.

> + if (irq < 0) {
> + ret = exynos5_performance_counters_init(dmc);
> + if (ret) {
> + dev_warn(dev, "couldn't probe performance counters\n");
> + goto remove_clocks;

Weird, previous error jump goes to err_devfreq_add. This goes to error
label which is narrower (less to cleanup).

Best regards,
Krzysztof

> + }
> +
> + /*
> + * Setup default thresholds for the devfreq governor.
> + * The values are chosen based on experiments.
> + */
> + dmc->gov_data.upthreshold = 30;
> + dmc->gov_data.downdifferential = 5;
> +
> + exynos5_dmc_df_profile.polling_ms = 500;
> + } else {
> + ret = devm_request_threaded_irq(dev, irq, NULL,
> + dmc_irq_thread, IRQF_ONESHOT,
> + dev_name(dev), dmc);
> + if (ret) {
> + dev_err(dev, "couldn't grab IRQ\n");
> + goto remove_clocks;
> + }
> +
> + /*
> + * Setup default thresholds for the devfreq governor.
> + * The values are chosen based on experiments.
> + */
> + dmc->gov_data.upthreshold = 55;
> + dmc->gov_data.downdifferential = 5;
> +
> + exynos5_dmc_enable_perf_events(dmc);
> + }
> +
>
> dmc->df = devm_devfreq_add_device(dev, &exynos5_dmc_df_profile,
> DEVFREQ_GOV_SIMPLE_ONDEMAND,
> @@ -1200,12 +1438,18 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
> goto err_devfreq_add;
> }
>
> + if (dmc->in_irq_mode)
> + exynos5_dmc_start_perf_events(dmc, PERF_COUNTER_START_VALUE);
> +
> dev_info(dev, "DMC initialized\n");
>
> return 0;
>
> err_devfreq_add:
> - exynos5_counters_disable_edev(dmc);
> + if (dmc->in_irq_mode)
> + exynos5_dmc_disable_perf_events(dmc);
> + else
> + exynos5_counters_disable_edev(dmc);
> remove_clocks:
> clk_disable_unprepare(dmc->mout_bpll);
> clk_disable_unprepare(dmc->fout_bpll);
> @@ -1225,7 +1469,10 @@ static int exynos5_dmc_remove(struct platform_device *pdev)
> {
> struct exynos5_dmc *dmc = dev_get_drvdata(&pdev->dev);
>
> - exynos5_counters_disable_edev(dmc);
> + if (dmc->in_irq_mode)
> + exynos5_dmc_disable_perf_events(dmc);
> + else
> + exynos5_counters_disable_edev(dmc);
>
> clk_disable_unprepare(dmc->mout_bpll);
> clk_disable_unprepare(dmc->fout_bpll);
> --
> 2.17.1
>