Skip to content

Commit

Permalink
hugetlb: new sysfs interface
Browse files Browse the repository at this point in the history
Provide new hugepages user APIs that are more suited to multiple hstates
in sysfs.  There is a new directory, /sys/kernel/hugepages.  Underneath
that directory there will be a directory per-supported hugepage size,
e.g.:

/sys/kernel/hugepages/hugepages-64kB
/sys/kernel/hugepages/hugepages-16384kB
/sys/kernel/hugepages/hugepages-16777216kB

corresponding to 64k, 16m and 16g respectively.  Within each
hugepages-size directory there are a number of files, corresponding to the
tracked counters in the hstate, e.g.:

/sys/kernel/hugepages/hugepages-64/nr_hugepages
/sys/kernel/hugepages/hugepages-64/nr_overcommit_hugepages
/sys/kernel/hugepages/hugepages-64/free_hugepages
/sys/kernel/hugepages/hugepages-64/resv_hugepages
/sys/kernel/hugepages/hugepages-64/surplus_hugepages

Of these files, the first two are read-write and the latter three are
read-only.  The size of the hugepage being manipulated is trivially
deducible from the enclosing directory and is always expressed in kB (to
match meminfo).

[dave@linux.vnet.ibm.com: fix build]
[nacc@us.ibm.com: hugetlb: hang off of /sys/kernel/mm rather than /sys/kernel]
[nacc@us.ibm.com: hugetlb: remove CONFIG_SYSFS dependency]
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Nishanth Aravamudan authored and torvalds committed Jul 24, 2008
1 parent a137e1c commit a343787
Show file tree
Hide file tree
Showing 4 changed files with 262 additions and 66 deletions.
15 changes: 15 additions & 0 deletions Documentation/ABI/testing/sysfs-kernel-mm-hugepages
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
What: /sys/kernel/mm/hugepages/
Date: June 2008
Contact: Nishanth Aravamudan <nacc@us.ibm.com>, hugetlb maintainers
Description:
/sys/kernel/mm/hugepages/ contains a number of subdirectories
of the form hugepages-<size>kB, where <size> is the page size
of the hugepages supported by the kernel/CPU combination.

Under these directories are a number of files:
nr_hugepages
nr_overcommit_hugepages
free_hugepages
surplus_hugepages
resv_hugepages
See Documentation/vm/hugetlbpage.txt for details.
23 changes: 23 additions & 0 deletions Documentation/vm/hugetlbpage.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,29 @@ this condition holds, however, no more surplus huge pages will be
allowed on the system until one of the two sysctls are increased
sufficiently, or the surplus huge pages go out of use and are freed.

With support for multiple hugepage pools at run-time available, much of
the hugepage userspace interface has been duplicated in sysfs. The above
information applies to the default hugepage size (which will be
controlled by the proc interfaces for backwards compatibility). The root
hugepage control directory is

/sys/kernel/mm/hugepages

For each hugepage size supported by the running kernel, a subdirectory
will exist, of the form

hugepages-${size}kB

Inside each of these directories, the same set of files will exist:

nr_hugepages
nr_overcommit_hugepages
free_hugepages
resv_hugepages
surplus_hugepages

which function as described above for the default hugepage-sized case.

If the user applications are going to request hugepages using mmap system
call, then it is required that system administrator mount a file system of
type hugetlbfs:
Expand Down
2 changes: 2 additions & 0 deletions include/linux/hugetlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,

#ifdef CONFIG_HUGETLB_PAGE

#define HSTATE_NAME_LEN 32
/* Defines one hugetlb page size */
struct hstate {
int hugetlb_next_nid;
Expand All @@ -179,6 +180,7 @@ struct hstate {
unsigned int nr_huge_pages_node[MAX_NUMNODES];
unsigned int free_huge_pages_node[MAX_NUMNODES];
unsigned int surplus_huge_pages_node[MAX_NUMNODES];
char name[HSTATE_NAME_LEN];
};

void __init hugetlb_add_hstate(unsigned order);
Expand Down
288 changes: 222 additions & 66 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <linux/mempolicy.h>
#include <linux/cpuset.h>
#include <linux/mutex.h>
#include <linux/sysfs.h>

#include <asm/page.h>
#include <asm/pgtable.h>
Expand Down Expand Up @@ -942,72 +943,6 @@ static void __init report_hugepages(void)
}
}

static int __init hugetlb_init(void)
{
BUILD_BUG_ON(HPAGE_SHIFT == 0);

if (!size_to_hstate(HPAGE_SIZE)) {
hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
}
default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;

hugetlb_init_hstates();

report_hugepages();

return 0;
}
module_init(hugetlb_init);

/* Should be called on processing a hugepagesz=... option */
void __init hugetlb_add_hstate(unsigned order)
{
struct hstate *h;
if (size_to_hstate(PAGE_SIZE << order)) {
printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
return;
}
BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
BUG_ON(order == 0);
h = &hstates[max_hstate++];
h->order = order;
h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
hugetlb_init_one_hstate(h);
parsed_hstate = h;
}

static int __init hugetlb_setup(char *s)
{
unsigned long *mhp;

/*
* !max_hstate means we haven't parsed a hugepagesz= parameter yet,
* so this hugepages= parameter goes to the "default hstate".
*/
if (!max_hstate)
mhp = &default_hstate_max_huge_pages;
else
mhp = &parsed_hstate->max_huge_pages;

if (sscanf(s, "%lu", mhp) <= 0)
*mhp = 0;

return 1;
}
__setup("hugepages=", hugetlb_setup);

static unsigned int cpuset_mems_nr(unsigned int *array)
{
int node;
unsigned int nr = 0;

for_each_node_mask(node, cpuset_current_mems_allowed)
nr += array[node];

return nr;
}

#ifdef CONFIG_SYSCTL
#ifdef CONFIG_HIGHMEM
static void try_to_free_low(struct hstate *h, unsigned long count)
Expand Down Expand Up @@ -1105,6 +1040,227 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
return ret;
}

#define HSTATE_ATTR_RO(_name) \
static struct kobj_attribute _name##_attr = __ATTR_RO(_name)

#define HSTATE_ATTR(_name) \
static struct kobj_attribute _name##_attr = \
__ATTR(_name, 0644, _name##_show, _name##_store)

static struct kobject *hugepages_kobj;
static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];

static struct hstate *kobj_to_hstate(struct kobject *kobj)
{
int i;
for (i = 0; i < HUGE_MAX_HSTATE; i++)
if (hstate_kobjs[i] == kobj)
return &hstates[i];
BUG();
return NULL;
}

static ssize_t nr_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct hstate *h = kobj_to_hstate(kobj);
return sprintf(buf, "%lu\n", h->nr_huge_pages);
}
static ssize_t nr_hugepages_store(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf, size_t count)
{
int err;
unsigned long input;
struct hstate *h = kobj_to_hstate(kobj);

err = strict_strtoul(buf, 10, &input);
if (err)
return 0;

h->max_huge_pages = set_max_huge_pages(h, input);

return count;
}
HSTATE_ATTR(nr_hugepages);

static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct hstate *h = kobj_to_hstate(kobj);
return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages);
}
static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf, size_t count)
{
int err;
unsigned long input;
struct hstate *h = kobj_to_hstate(kobj);

err = strict_strtoul(buf, 10, &input);
if (err)
return 0;

spin_lock(&hugetlb_lock);
h->nr_overcommit_huge_pages = input;
spin_unlock(&hugetlb_lock);

return count;
}
HSTATE_ATTR(nr_overcommit_hugepages);

static ssize_t free_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct hstate *h = kobj_to_hstate(kobj);
return sprintf(buf, "%lu\n", h->free_huge_pages);
}
HSTATE_ATTR_RO(free_hugepages);

static ssize_t resv_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct hstate *h = kobj_to_hstate(kobj);
return sprintf(buf, "%lu\n", h->resv_huge_pages);
}
HSTATE_ATTR_RO(resv_hugepages);

static ssize_t surplus_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct hstate *h = kobj_to_hstate(kobj);
return sprintf(buf, "%lu\n", h->surplus_huge_pages);
}
HSTATE_ATTR_RO(surplus_hugepages);

static struct attribute *hstate_attrs[] = {
&nr_hugepages_attr.attr,
&nr_overcommit_hugepages_attr.attr,
&free_hugepages_attr.attr,
&resv_hugepages_attr.attr,
&surplus_hugepages_attr.attr,
NULL,
};

static struct attribute_group hstate_attr_group = {
.attrs = hstate_attrs,
};

static int __init hugetlb_sysfs_add_hstate(struct hstate *h)
{
int retval;

hstate_kobjs[h - hstates] = kobject_create_and_add(h->name,
hugepages_kobj);
if (!hstate_kobjs[h - hstates])
return -ENOMEM;

retval = sysfs_create_group(hstate_kobjs[h - hstates],
&hstate_attr_group);
if (retval)
kobject_put(hstate_kobjs[h - hstates]);

return retval;
}

static void __init hugetlb_sysfs_init(void)
{
struct hstate *h;
int err;

hugepages_kobj = kobject_create_and_add("hugepages", mm_kobj);
if (!hugepages_kobj)
return;

for_each_hstate(h) {
err = hugetlb_sysfs_add_hstate(h);
if (err)
printk(KERN_ERR "Hugetlb: Unable to add hstate %s",
h->name);
}
}

static void __exit hugetlb_exit(void)
{
struct hstate *h;

for_each_hstate(h) {
kobject_put(hstate_kobjs[h - hstates]);
}

kobject_put(hugepages_kobj);
}
module_exit(hugetlb_exit);

static int __init hugetlb_init(void)
{
BUILD_BUG_ON(HPAGE_SHIFT == 0);

if (!size_to_hstate(HPAGE_SIZE)) {
hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
}
default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;

hugetlb_init_hstates();

report_hugepages();

hugetlb_sysfs_init();

return 0;
}
module_init(hugetlb_init);

/* Should be called on processing a hugepagesz=... option */
void __init hugetlb_add_hstate(unsigned order)
{
struct hstate *h;
if (size_to_hstate(PAGE_SIZE << order)) {
printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
return;
}
BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
BUG_ON(order == 0);
h = &hstates[max_hstate++];
h->order = order;
h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
huge_page_size(h)/1024);
hugetlb_init_one_hstate(h);
parsed_hstate = h;
}

static int __init hugetlb_setup(char *s)
{
unsigned long *mhp;

/*
* !max_hstate means we haven't parsed a hugepagesz= parameter yet,
* so this hugepages= parameter goes to the "default hstate".
*/
if (!max_hstate)
mhp = &default_hstate_max_huge_pages;
else
mhp = &parsed_hstate->max_huge_pages;

if (sscanf(s, "%lu", mhp) <= 0)
*mhp = 0;

return 1;
}
__setup("hugepages=", hugetlb_setup);

static unsigned int cpuset_mems_nr(unsigned int *array)
{
int node;
unsigned int nr = 0;

for_each_node_mask(node, cpuset_current_mems_allowed)
nr += array[node];

return nr;
}

int hugetlb_sysctl_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer,
size_t *length, loff_t *ppos)
Expand Down

0 comments on commit a343787

Please sign in to comment.