[PATCH 4/8] Create a sysfs release file for hot removing memory

From: Nathan Fontenot
Date: Wed Jul 24 2013 - 14:39:44 EST


Provide a sysfs interface to hot remove memory.

This patch updates the sysfs interface for hot add of memory to also
provide a sysfs interface to hot remove memory. The use of this interface
is controlled with the ARCH_MEMORY_PROBE config option, currently used
by x86 and powerpc. This patch also updates the name of this option to
CONFIG_ARCH_MEMORY_PROBE_RELEASE to indicate that it controls the probe
and release sysfs interfaces.

Signed-off-by: Nathan Fontenot <nfont@xxxxxxxxxxxxxxxxxx>
---
Documentation/memory-hotplug.txt | 34 ++++++++++++----
arch/powerpc/Kconfig | 2
arch/x86/Kconfig | 2
drivers/base/memory.c | 81 ++++++++++++++++++++++++++++++++++-----
4 files changed, 100 insertions(+), 19 deletions(-)

Index: linux/drivers/base/memory.c
===================================================================
--- linux.orig/drivers/base/memory.c
+++ linux/drivers/base/memory.c
@@ -129,22 +129,30 @@ static ssize_t show_mem_end_phys_index(s
return sprintf(buf, "%08lx\n", phys_index);
}

+static int is_memblock_removable(unsigned long start_section_nr)
+{
+ unsigned long pfn;
+ int i, ret = 1;
+
+ for (i = 0; i < sections_per_block; i++) {
+ pfn = section_nr_to_pfn(start_section_nr + i);
+ ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
+ }
+
+ return ret;
+}
+
/*
* Show whether the section of memory is likely to be hot-removable
*/
static ssize_t show_mem_removable(struct device *dev,
struct device_attribute *attr, char *buf)
{
- unsigned long i, pfn;
- int ret = 1;
+ int ret;
struct memory_block *mem =
container_of(dev, struct memory_block, dev);

- for (i = 0; i < sections_per_block; i++) {
- pfn = section_nr_to_pfn(mem->start_section_nr + i);
- ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
- }
-
+ ret = is_memblock_removable(mem->start_section_nr);
return sprintf(buf, "%d\n", ret);
}

@@ -421,7 +429,7 @@ static DEVICE_ATTR(block_size_bytes, 044
* as well as ppc64 will do all of their discovery in userspace
* and will require this interface.
*/
-#ifdef CONFIG_ARCH_MEMORY_PROBE
+#ifdef CONFIG_ARCH_MEMORY_PROBE_RELEASE
static ssize_t
memory_probe_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
@@ -444,6 +452,60 @@ memory_probe_store(struct device *dev, s
}

static DEVICE_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
+
+static int is_memblock_offline(struct memory_block *mem, void *arg)
+{
+ if (mem->state == MEM_ONLINE)
+ return 1;
+
+ return 0;
+}
+
+static ssize_t
+memory_release_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u64 phys_addr;
+ int nid, ret = 0;
+ unsigned long block_size, pfn;
+ unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
+
+ lock_device_hotplug();
+
+ ret = kstrtoull(buf, 0, &phys_addr);
+ if (ret)
+ goto out;
+
+ if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ block_size = get_memory_block_size();
+ nid = memory_add_physaddr_to_nid(phys_addr);
+
+ /* Ensure memory is offline and removable before removing it. */
+ ret = walk_memory_range(PFN_DOWN(phys_addr),
+ PFN_UP(phys_addr + block_size - 1), NULL,
+ is_memblock_offline);
+ if (!ret) {
+ pfn = phys_addr >> PAGE_SHIFT;
+ ret = !is_memblock_removable(pfn_to_section_nr(pfn));
+ }
+
+ if (ret) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ remove_memory(nid, phys_addr, block_size);
+
+out:
+ unlock_device_hotplug();
+ return ret ? ret : count;
+}
+
+static DEVICE_ATTR(release, S_IWUSR, NULL, memory_release_store);
#endif

#ifdef CONFIG_MEMORY_FAILURE
@@ -694,8 +756,9 @@ bool is_memblock_offlined(struct memory_
}

static struct attribute *memory_root_attrs[] = {
-#ifdef CONFIG_ARCH_MEMORY_PROBE
+#ifdef CONFIG_ARCH_MEMORY_PROBE_RELEASE
&dev_attr_probe.attr,
+ &dev_attr_release.attr,
#endif

#ifdef CONFIG_MEMORY_FAILURE
Index: linux/arch/powerpc/Kconfig
===================================================================
--- linux.orig/arch/powerpc/Kconfig
+++ linux/arch/powerpc/Kconfig
@@ -438,7 +438,7 @@ config SYS_SUPPORTS_HUGETLBFS

source "mm/Kconfig"

-config ARCH_MEMORY_PROBE
+config ARCH_MEMORY_PROBE_RELEASE
def_bool y
depends on MEMORY_HOTPLUG

Index: linux/arch/x86/Kconfig
===================================================================
--- linux.orig/arch/x86/Kconfig
+++ linux/arch/x86/Kconfig
@@ -1343,7 +1343,7 @@ config ARCH_SELECT_MEMORY_MODEL
def_bool y
depends on ARCH_SPARSEMEM_ENABLE

-config ARCH_MEMORY_PROBE
+config ARCH_MEMORY_PROBE_RELEASE
def_bool y
depends on X86_64 && MEMORY_HOTPLUG

Index: linux/Documentation/memory-hotplug.txt
===================================================================
--- linux.orig/Documentation/memory-hotplug.txt
+++ linux/Documentation/memory-hotplug.txt
@@ -17,7 +17,9 @@ be changed often.
3. sysfs files for memory hotplug
4. Physical memory hot-add phase
4.1 Hardware(Firmware) Support
- 4.2 Notify memory hot-add event by hand
+ 4.2 Notify memory hot-addand hot-remove event by hand
+ 4.2.1 Probe interface
+ 4.2.2 Release interface
5. Logical Memory hot-add phase
5.1. State of memory
5.2. How to online memory
@@ -69,7 +71,7 @@ management tables, and makes sysfs files

If firmware supports notification of connection of new memory to OS,
this phase is triggered automatically. ACPI can notify this event. If not,
-"probe" operation by system administration is used instead.
+"probe" and "release" operations by system administration is used instead.
(see Section 4.).

Logical Memory Hotplug phase is to change memory state into
@@ -208,20 +210,23 @@ calls hotplug code for all of objects wh
If memory device is found, memory hotplug code will be called.


-4.2 Notify memory hot-add event by hand
+4.2 Notify memory hot-add and hot-remove event by hand
------------
In some environments, especially virtualized environment, firmware will not
notify memory hotplug event to the kernel. For such environment, "probe"
-interface is supported. This interface depends on CONFIG_ARCH_MEMORY_PROBE.
+and "release" interfaces are supported. This interface depends on
+CONFIG_ARCH_MEMORY_PROBE_RELEASE.

-Now, CONFIG_ARCH_MEMORY_PROBE is supported only by powerpc but it does not
-contain highly architecture codes. Please add config if you need "probe"
-interface.
+Now, CONFIG_ARCH_MEMORY_PROBE_RELEASE is supported only by powerpc but it does
+not contain highly architecture codes. Please add config if you need "probe"
+and "release" interfaces.

+4.2.1 "probe" interface
+------------
Probe interface is located at
/sys/devices/system/memory/probe

-You can tell the physical address of new memory to the kernel by
+You can tell the physical address of new memory to hot-add to the kernel by

% echo start_address_of_new_memory > /sys/devices/system/memory/probe

@@ -230,6 +235,19 @@ memory range is hot-added. In this case,
current implementation). You'll have to online memory by yourself.
Please see "How to online memory" in this text.

+4.2.2 "release" interface
+------------
+Release interface is located at
+/sys/devices/system/memory/release
+
+You can tell the physical address of memory to hot-remove from the kernel by
+
+% echo start_address_of_memory > /sys/devices/system/memory/release
+
+Then, [start_address_of_memory, start_address_of_memory + section_size)
+memory range is hot-removed. You will need to ensure all of the memory in
+this range has been offlined prior to using this interface, please see
+"How to offline memory" in this text.


------------------------------

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/