Re: [Xen-devel] [PATCH 3/8] ACPI: processor: add__acpi_processor_[un]register_driver helpers.

From: Konrad Rzeszutek Wilk
Date: Tue Jan 17 2012 - 13:22:30 EST


On Tue, Jan 17, 2012 at 12:13:14PM -0500, Konrad Rzeszutek Wilk wrote:
> > > I was trying to figure out how difficult it would be to just bring Pxx states to
> > > the Xen hypervisor using the existing ACPI interfaces. And while it did not pass
> > > all the _Pxx states (seems that all the _PCT, _PSS, _PSD, _PPC flags need to
> > > be enabled in the hypercall to make this work), it demonstrates what I had in
> > > mind.
>
> .. snip..
> > > /* TODO: Under Xen, the C-states information is not present.
> > > * Figure out why. */
> >
> > it's possible related to this long thread:
> >
> > http://lists.xen.org/archives/html/xen-devel/2011-08/msg00511.html
> >
> > IOW, Xen doesn't export mwait capability to dom0, which impacts _PDC setting.
> > Final solution is to have a para-virtualized PDC call for that.
>
> Aaah. Let me play with that a bit. Thanks for the pointer.
>
> .. snip..
> > the prerequisites for this module to work correctly, is that dom0 has the right
> > configurations to have all necessary Cx/Px information ready before this
> > module is loaded. That may mean enabling full CONFIG_CPU_IDLE and CONFIG_CPUFREQ,
>
> Right.
> > which in current form may add some negative impact, e.g. dom0 will try to control
> > Px/Cx to conflict with Xen. So some tweaks may be required in that part.
>
> Yup. Hadn't even looked at the cpufreq tries to do yet.
> >
> > given our purpose now, is to come up a cleaner approach which tolerate some
> > assumptions (e.g. #VCPU of dom0 == #PCPU), there's another option following this
> > trend (perhaps compensate your idea). We can register a Xen-cpuidle and
> > xen-cpufreq driver to current Linux cpuidle and cpufreq framework, which plays
> > mainly two roles:
> > - a dummy driver to prevent dom0 touching actual Px/Cx
> > - parse ACPI Cx/Px information to Xen, in a similar way you did above
>
> Yeah, I like where you are heading.
> >
> > there may have some other trickiness, but the majority code will be self-contained.
>
> <nods>

For reference, the attached module does end up programming the Pxx states in the
hypervisor. The issues that I hit on a Core i3 box (some MSI motherboard) it would fail
on the PCT, but I hadn't really dug into this. And did not look any further in the
Cxx states issue either. On a old Core 2 Duo it looked to have programmed the hypervisor
fine, but the machine afterwards started to act very weird so I am sure there is
something extra that needs to be done (like maybe not using memcpy in this module).

#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <acpi/acpi_bus.h>
#include <acpi/acpi_drivers.h>
#include <acpi/processor.h>
#include <linux/cpumask.h>

#include <xen/interface/platform.h>
#include <asm/xen/hypercall.h>

#define DRV_NAME "ACPI_PXX"
#define DRV_CLASS "ACPI_PXX_CLASS"
MODULE_AUTHOR("Konrad Rzeszutek Wilk");
MODULE_DESCRIPTION("ACPI Processor Driver to send data to Xen hypervisor");
MODULE_LICENSE("GPL");

static int parse_acpi_cxx(struct acpi_processor *_pr)
{
struct acpi_processor_cx *cx;
int i;

for (i = 1; i <= _pr->power.count; i++) {
cx = &_pr->power.states[i];
if (!cx->valid)
continue;
pr_info("%s: %d %d %d 0x%x\n", __func__,
cx->type, cx->latency, cx->power, (u32)cx->address);
}
/* TODO: Under Xen, the C-states information is not present.
* Figure out why.
* Kevin thinks it might be: http://lists.xen.org/archives/html/xen-devel/2011-08/msg00511.html
* But perhaps it is http://lists.xen.org/archives/html/xen-devel/2011-08/msg00521.html?
*/
return 0;
}
static struct xen_processor_px *xen_copy_pss_data(struct acpi_processor *_pr,
struct xen_processor_performance *xen_perf)
{
struct xen_processor_px *xen_states = NULL;
int i;

xen_states = kzalloc(_pr->performance->state_count *
sizeof(struct xen_processor_px), GFP_KERNEL);
if (!xen_states)
return ERR_PTR(-ENOMEM);

xen_perf->state_count = _pr->performance->state_count;
for (i = 0; i < _pr->performance->state_count; i++) {
/* Figure out if the lack of __packed is bad */
memcpy(&(xen_states[i]), &(_pr->performance->states[i]),
sizeof(struct acpi_processor_px));
}
return xen_states;
}
static int
xen_copy_psd_data(struct acpi_processor *_pr,
struct xen_processor_performance *xen_perf)
{
/* Figure out if the lack of __packed is bad */
printk(KERN_INFO "psd: %ld\n",
offsetof(struct xen_processor_performance, domain_info.num_entries));

xen_perf->shared_type = _pr->performance->shared_type;
memcpy(&(xen_perf->domain_info), &(_pr->performance->domain_info),
sizeof(struct acpi_psd_package));

return 0;
}
static int push_pxx_to_hypervisor(struct acpi_processor *_pr)
{
int ret = -EINVAL;
struct xen_platform_op op = {
.cmd = XENPF_set_processor_pminfo,
.interface_version = XENPF_INTERFACE_VERSION,
.u.set_pminfo.id = _pr->acpi_id,
.u.set_pminfo.type = XEN_PM_PX,
};
struct xen_processor_performance *xen_perf;
struct xen_processor_px *xen_states = NULL;

if (!_pr->performance)
return -ENODEV;

xen_perf = &op.u.set_pminfo.perf;

/* PPC */
xen_perf->platform_limit = _pr->performance_platform_limit;
xen_perf->flags |= XEN_PX_PPC;
/* PCT */
/* Mmight need to copy them individually as there are no __packed
* so the offset might be wrong on a 32-bit host with 64-bit hypervisor. */
printk(KERN_INFO "address: %ld\n", offsetof(struct xen_processor_performance, control_register.address));
printk(KERN_INFO "address: %ld\n", offsetof(struct xen_processor_performance, status_register.address));
printk(KERN_INFO "state_count: %ld\n", offsetof(struct xen_processor_performance, state_count));
memcpy(&xen_perf->control_register, &(_pr->performance->control_register),
sizeof(struct acpi_pct_register));
memcpy(&xen_perf->status_register, &(_pr->performance->status_register),
sizeof(struct acpi_pct_register));
xen_perf->flags |= XEN_PX_PCT;
/* PSS */
xen_states = xen_copy_pss_data(_pr, xen_perf);
if (!IS_ERR_OR_NULL(xen_states)) {
set_xen_guest_handle(xen_perf->states, xen_states);
xen_perf->flags |= XEN_PX_PSS;
}
/* PSD */
if (!xen_copy_psd_data(_pr, xen_perf)) {
xen_perf->flags |= XEN_PX_PSD;
}
printk(KERN_INFO "Sending %x\n", xen_perf->flags);

ret = HYPERVISOR_dom0_op(&op);
if (!IS_ERR_OR_NULL(xen_states))
kfree(xen_states);
return ret;
}
static int parse_acpi_pxx(struct acpi_processor *_pr)
{
/*
struct acpi_processor_px *px;
int i;

for (i = 0; i < _pr->performance->state_count;i++) {
px = &(_pr->performance->states[i]);
pr_info("%s: [%d]: %d, %d, %d, %d, %d, %d\n", __func__,
i, (u32)px->core_frequency, (u32)px->power,
(u32)px->transition_latency,
(u32)px->bus_master_latency,
(u32)px->control, (u32)px->status);
}
*/
if (xen_initial_domain())
return push_pxx_to_hypervisor(_pr);
return 0;
}
static int parse_acpi_data(void)
{
int cpu;
int err = -ENODEV;
struct acpi_processor *_pr;
struct cpuinfo_x86 *c = &cpu_data(0);

/* TODO: Under AMD, the information is populated
* using the powernow-k8 driver which does an MSR_PSTATE_CUR_LIMIT
* MSR which returns the wrong value so the population of 'processors'
* has bogus data. So only run this under Intel for right now. */
if (!cpu_has(c, X86_FEATURE_EST))
return -ENODEV;
for_each_possible_cpu(cpu) {
_pr = per_cpu(processors, cpu);
if (!_pr)
continue;

if (_pr->flags.power)
(void)parse_acpi_cxx(_pr);

if (_pr->performance->states)
err = parse_acpi_pxx(_pr);
if (err)
break;
}
return -ENODEV; /* force it to unload */
}
static int __init acpi_pxx_init(void)
{
return parse_acpi_data();
}
static void __exit acpi_pxx_exit(void)
{
}
module_init(acpi_pxx_init);
module_exit(acpi_pxx_exit);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/