Re: [RFC 3/4] perf: Allow per PMU access control

From: Alexey Budankov
Date: Tue Jun 26 2018 - 13:25:22 EST


Hi,

On 26.06.2018 18:36, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
>
> For situations where sysadmins might want to allow different level of
> access control for different PMUs, we start creating per-PMU
> perf_event_paranoid controls in sysfs.
>
> These work in equivalent fashion as the existing perf_event_paranoid
> sysctl, which now becomes the parent control for each PMU.
>
> On PMU registration the global/parent value will be inherited by each PMU,
> as it will be propagated to all registered PMUs when the sysctl is
> updated.
>
> At any later point individual PMU access controls, located in
> <sysfs>/device/<pmu-name>/perf_event_paranoid, can be adjusted to achieve
> fine grained access control.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
> Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
> Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
> Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
> Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
> Cc: Madhavan Srinivasan <maddy@xxxxxxxxxxxxxxxxxx>
> Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> Cc: Alexey Budankov <alexey.budankov@xxxxxxxxxxxxxxx>
> Cc: linux-kernel@xxxxxxxxxxxxxxx
> Cc: x86@xxxxxxxxxx
> ---
> include/linux/perf_event.h | 12 ++++++--
> kernel/events/core.c | 59 ++++++++++++++++++++++++++++++++++++++
> kernel/sysctl.c | 4 ++-
> 3 files changed, 71 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index d7938d88c028..22e91cc2d9e1 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -271,6 +271,9 @@ struct pmu {
> /* number of address filters this PMU can do */
> unsigned int nr_addr_filters;
>
> + /* per PMU access control */
> + int perf_event_paranoid;

It looks like it needs to be declared as atomic and atomic_read/atomic_write
operations need to be explicitly used below in the patch as far this
variable may be manipulated by different threads at the same time
without explicit locking.

> +
> /*
> * Fully disable/enable this PMU, can be used to protect from the PMI
> * as well as for lazy/batch writing of the MSRs.
> @@ -1168,6 +1171,9 @@ extern int sysctl_perf_cpu_time_max_percent;
>
> extern void perf_sample_event_took(u64 sample_len_ns);
>
> +extern int perf_proc_paranoid_handler(struct ctl_table *table, int write,
> + void __user *buffer, size_t *lenp,
> + loff_t *ppos);
> extern int perf_proc_update_handler(struct ctl_table *table, int write,
> void __user *buffer, size_t *lenp,
> loff_t *ppos);
> @@ -1180,17 +1186,17 @@ int perf_event_max_stack_handler(struct ctl_table *table, int write,
>
> static inline bool perf_paranoid_tracepoint_raw(const struct pmu *pmu)
> {
> - return sysctl_perf_event_paranoid > -1;
> + return pmu->perf_event_paranoid > -1;
> }
>
> static inline bool perf_paranoid_cpu(const struct pmu *pmu)
> {
> - return sysctl_perf_event_paranoid > 0;
> + return pmu->perf_event_paranoid > 0;
> }
>
> static inline bool perf_paranoid_kernel(const struct pmu *pmu)
> {
> - return sysctl_perf_event_paranoid > 1;
> + return pmu->perf_event_paranoid > 1;
> }
>
> extern void perf_event_init(void);
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 370c89e81722..da36317dc8dc 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -432,6 +432,24 @@ static void update_perf_cpu_limits(void)
>
> static bool perf_rotate_context(struct perf_cpu_context *cpuctx);
>
> +int perf_proc_paranoid_handler(struct ctl_table *table, int write,
> + void __user *buffer, size_t *lenp,
> + loff_t *ppos)
> +{
> + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
> + struct pmu *pmu;
> +
> + if (ret || !write)
> + return ret;
> +
> + mutex_lock(&pmus_lock);
> + list_for_each_entry(pmu, &pmus, entry)
> + pmu->perf_event_paranoid = sysctl_perf_event_paranoid;
> + mutex_unlock(&pmus_lock);
> +
> + return 0;
> +}
> +
> int perf_proc_update_handler(struct ctl_table *table, int write,
> void __user *buffer, size_t *lenp,
> loff_t *ppos)
> @@ -9425,6 +9443,41 @@ static void free_pmu_context(struct pmu *pmu)
> mutex_unlock(&pmus_lock);
> }
>
> +/*
> + * Fine-grained access control:
> + */
> +static ssize_t
> +perf_event_paranoid_show(struct device *dev,
> + struct device_attribute *attr,
> + char *page)
> +{
> + struct pmu *pmu = dev_get_drvdata(dev);
> +
> + return snprintf(page, PAGE_SIZE - 1, "%d\n", pmu->perf_event_paranoid);
> +}
> +
> +static ssize_t
> +perf_event_paranoid_store(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t count)
> +{
> + struct pmu *pmu = dev_get_drvdata(dev);
> + int ret, val;
> +
> + ret = kstrtoint(buf, 0, &val);
> + if (ret)
> + return ret;
> +
> + if (val < -1 || val > 2)
> + return -EINVAL;
> +
> + pmu->perf_event_paranoid = val;
> +
> + return count;
> +}
> +
> +static DEVICE_ATTR_RW(perf_event_paranoid);
> +
> /*
> * Let userspace know that this PMU supports address range filtering:
> */
> @@ -9539,6 +9592,11 @@ static int pmu_dev_alloc(struct pmu *pmu)
> if (ret)
> goto free_dev;
>
> + /* Add fine-grained access control attribute. */
> + ret = device_create_file(pmu->dev, &dev_attr_perf_event_paranoid);
> + if (ret)
> + goto del_dev;
> +
> /* For PMUs with address filters, throw in an extra attribute: */
> if (pmu->nr_addr_filters)
> ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters);
> @@ -9570,6 +9628,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
> if (!pmu->pmu_disable_count)
> goto unlock;
>
> + pmu->perf_event_paranoid = sysctl_perf_event_paranoid;
> pmu->type = -1;
> if (!name)
> goto skip_type;
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 2d9837c0aff4..7f6fccb64a30 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1142,7 +1142,9 @@ static struct ctl_table kern_table[] = {
> .data = &sysctl_perf_event_paranoid,
> .maxlen = sizeof(sysctl_perf_event_paranoid),
> .mode = 0644,
> - .proc_handler = proc_dointvec,
> + .proc_handler = perf_proc_paranoid_handler,
> + .extra1 = &neg_one,
> + .extra2 = &two,
> },
> {
> .procname = "perf_event_mlock_kb",
>