[PATCH v3 3/3] man2/fincore.2: document general description about fincore(2)

From: Naoya Horiguchi
Date: Mon Jul 07 2014 - 14:02:52 EST


This patch adds the man page for the new system call fincore(2).

ChangeLog v3:
- remove page cache tag stuff

Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
---
man2/fincore.2 | 348 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 348 insertions(+)
create mode 100644 man2/fincore.2

diff --git v3.16-rc3.orig/man2/fincore.2 v3.16-rc3/man2/fincore.2
new file mode 100644
index 000000000000..b4893084cd36
--- /dev/null
+++ v3.16-rc3/man2/fincore.2
@@ -0,0 +1,348 @@
+.\" Copyright (C) 2014 Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
+.\"
+.\" %%%LICENSE_START(VERBATIM)
+.\" Permission is granted to make and distribute verbatim copies of this
+.\" manual provided the copyright notice and this permission notice are
+.\" preserved on all copies.
+.\"
+.\" Permission is granted to copy and distribute modified versions of this
+.\" manual under the conditions for verbatim copying, provided that the
+.\" entire resulting derived work is distributed under the terms of a
+.\" permission notice identical to this one.
+.\"
+.\" Since the Linux kernel and libraries are constantly changing, this
+.\" manual page may be incorrect or out-of-date. The author(s) assume no
+.\" responsibility for errors or omissions, or for damages resulting from
+.\" the use of the information contained herein. The author(s) may not
+.\" have taken the same level of care in the production of this manual,
+.\" which is licensed free of charge, as they might when working
+.\" professionally.
+.\"
+.\" Formatted or processed versions of this manual, if unaccompanied by
+.\" the source, must acknowledge the copyright and authors of this work.
+.\" %%%LICENSE_END
+.\"
+.TH FINCORE 2 2014-07-07 "Linux" "Linux Programmer's Manual"
+.SH NAME
+fincore \- get page cache information
+.SH SYNOPSIS
+.nf
+.B #include <sys/fincore.h>
+.B #include <sys/kernel-page-flags.h>
+.sp
+.BI "int fincore(int " fd ", loff_t " start ", long " nr_pages ", int " mode ,
+.BI " unsigned char *" vec ", struct fincore_extra *" extra );
+.fi
+.SH DESCRIPTION
+.BR fincore ()
+extracts information of in-core data of (i.e., page caches for)
+the file referred to by the file descriptor
+.IR fd .
+The kernel scans over the page cache tree,
+starting at the in-file offset
+.I start
+(in bytes) until
+.I nr_pages
+entries in the userspace buffer pointed to by
+.IR vec
+are filled with the page cache's data,
+or until the scan reached the end of the file.
+The format of each entry stored in
+.I vec
+depends on the
+.IR mode .
+The extra argument
+.I extra
+is used to pass the additional data between the kernel and the userspace.
+This is optional, so you may set
+.I extra
+to NULL if unnecessary.
+The structure
+.I fincore_extra
+is defined like:
+.in +4n
+.nf
+
+struct fincore_extra {
+ unsigned long nr_entries;
+};
+
+.fi
+.in
+The field
+.I nr_entries
+is an output parameter, set to the number of valid entries stored in
+.IR vec
+by the kernel on return.
+
+The
+.I start
+argument must be aligned to the page cache size boundary.
+In most cases, it's the page size boundary,
+but if called for a hugetlbfs file,
+the page cache size is the size of the hugepage associated with the file,
+so
+.I start
+must be aligned to the hugepage size boundary.
+
+The
+.I mode
+argument determines the data format of each entry in the user buffer
+.IR vec :
+.TP
+.B FINCORE_BMAP (0)
+In this mode,
+1 byte vector is stored in
+.I vec
+on return.
+The least significant bit of each byte is set if the corresponding page
+is currently resident in memory, and is cleared otherwise.
+(The other bits in each byte are undefined and reserved for future use.)
+.LP
+Any of the following flags are to be set to add an 8 byte field in each entry.
+You can set any of these flags at the same time, although you can't set
+FINCORE_BMAP combined with these 8 byte field flags.
+.TP
+.B FINCORE_PGOFF (1)
+This flag indicates that each entry contains a page offset field.
+With this information, you don't have to get data for hole range,
+so they are not stored in
+.I vec
+any longer.
+Note that if you call with this flag, you can't predict how many valid
+entries are stored in the buffer on return. So the
+.I nr_entries
+field in
+.I struct fincore_extra
+is useful if you want it.
+.TP
+.B FINCORE_PFN (2)
+This flag indicates that each entry contains a page frame number
+(i.e., physical address in page size unit) field.
+.TP
+.B FINCORE_PAGE_FLAGS (3)
+This flag indicates that each entry contains a page flags field.
+See KERNEL PAGE FLAGS section for more detail about each bit.
+.LP
+The size of the buffer
+.I vec
+must be at least
+.I nr_pages
+bytes if FINCORE_BMAP is set,
+and
+.I (8*n*nr_pages)
+bytes if some of the 8 byte field flags are set,
+where
+.I n
+means the number of 8 byte field flags being set.
+When multiple 8 byte field flags are set, the order of data in each
+entry is the same as one in the bit definition order (shown above
+as the numbers in parentheses.)
+For example, when you set FINCORE_PGOFF (bit 1) and FINCORE_PAGE_FLAGS (bit 3,)
+the first 8 bytes in an entry is the page offset,
+and the second 8 bytes is the page flags.
+
+Note that the information returned by the kernel is just a snapshot:
+pages which are not locked in memory can be freed at any moment, and
+the contents of
+.I vec
+may already be stale by the time the caller refers to the data.
+.SH KERNEL PAGE FLAGS
+.TP
+.B KPF_LOCKED (0)
+The lock on the page is held, suggesting that the kernel may be
+doing some page-related sensitive operation.
+.TP
+.B KPF_ERROR (1)
+The page was affected by IO error or memory error, so the data on the page
+might be lost.
+.TP
+.B KPF_REFERENCED (2)
+This page flag is used to control the page reclaim, combined with KPF_ACTIVE.
+.TP
+.B KPF_UPTODATE (3)
+The page has valid contents.
+.TP
+.B KPF_DIRTY (4)
+The data of the page is not synchronized with one on the backing storage.
+.TP
+.B KPF_LRU (5)
+The page is linked to one of the LRU (Least Recently Update) lists.
+.TP
+.B KPF_ACTIVE (6)
+The page is linked to one of the active LRU lists.
+.TP
+.B KPF_SLAB (7)
+The page is used to construct slabs, which is managed by the kernel
+to allocate various types of kernel objects.
+.TP
+.B KPF_WRITEBACK (8)
+The page is under the writeback operation.
+.TP
+.B KPF_RECLAIM (9)
+The page is under the page reclaim operation.
+.TP
+.B KPF_BUDDY (10)
+The page is under the buddy allocator as a free page. Note that this flag
+is only set to the first page of the "buddy" (i.e., the chunk of free pages.)
+.TP
+.B KPF_MMAP (11)
+The page is mapped to the virtual address space of some processes.
+.TP
+.B KPF_ANON (12)
+The page is anonymous page.
+.TP
+.B KPF_SWAPCACHE (13)
+The page has its own copy of the data on the swap device.
+.TP
+.B KPF_SWAPBACKED (14)
+The page can be swapped out. This flag is set on anonymous pages,
+tmpfs pages, or shmem page.
+.TP
+.B KPF_COMPOUND_HEAD (15)
+The page belongs to a high-order page, and is its first page.
+.TP
+.B KPF_COMPOUND_TAIL (16)
+The page belongs to a high-order page, and is not its first page.
+.TP
+.B KPF_HUGE (17)
+The page is used to construct a hugepage.
+.TP
+.B KPF_UNEVICTABLE (18)
+The page is prevented from being freed.
+This is caused by
+.BR mlock (2)
+or shared memory with
+.BR SHM_LOCK .
+.TP
+.B KPF_HWPOISON (19)
+The page is affected by a hardware error on the memory.
+.TP
+.B KPF_NOPAGE (20)
+This is a pseudo page flag which indicates that the given address
+has no struct page backed.
+.TP
+.B KPF_KSM (21)
+The page is a shared page governed by KSM (Kernel Shared Merging.)
+.TP
+.B KPF_THP (22)
+The page is used to construct a transparent hugepage.
+.LP
+.SH RETURN VALUE
+On success,
+.BR fincore ()
+returns 0.
+On error, \-1 is returned, and
+.I errno
+is set appropriately.
+.SH ERRORS
+.TP
+.B EBADF
+.I fd
+is not a valid file descriptor.
+.TP
+.B EFAULT
+.I vec
+points to an invalid address.
+.TP
+.B EINVAL
+.I start
+is unaligned to page cache size or is out-of-range
+(negative or larger than the file size.)
+Or
+.I nr_pages
+is not a positive value.
+Or
+.I mode
+contained a undefined flag, or contained no flag,
+or contained both of FINCORE_BMAP and one of the "8 byte field" flags.
+.SH VERSIONS
+TBD
+.SH CONFORMING TO
+TBD
+
+.SH EXAMPLE
+.PP
+The following program is an example that shows the page cache information
+of the file specified in its first command-line argument to the standard
+output.
+
+.nf
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/fincore.h>
+
+#define err(msg) do { perror(msg); exit(1); } while (0)
+
+int main(int argc, char *argv[])
+{
+ int i, j;
+ int fd;
+ int ret;
+ long ps = sysconf(_SC_PAGESIZE);
+ long nr_pages;
+ unsigned char *buf;
+ struct stat stat;
+ struct fincore_extra fe = {};
+
+ fd = open(argv[1], O_RDWR);
+ if (fd == \-1)
+ err("open");
+
+ ret = fstat(fd, &stat);
+ if (ret == \-1)
+ err("fstat");
+ nr_pages = ((stat.st_size + ps \- 1) & (~(ps \- 1))) / ps;
+
+ buf = malloc(nr_pages * 24);
+ if (!buf)
+ err("malloc");
+
+ /* byte map */
+ ret = fincore(fd, 0, nr_pages, FINCORE_BMAP, buf, NULL);
+ if (ret < 0)
+ err("fincore");
+ printf("Page residency:");
+ for (i = 0; i < nr_pages; i++)
+ printf("%d", buf[i]);
+ printf("\\n\\n");
+
+ /* 8 byte entry */
+ ret = fincore(fd, 0, nr_pages,
+ FINCORE_PFN|FINCORE_PAGE_FLAGS, buf, &fe);
+ if (ret < 0)
+ err("fincore");
+ printf("pfn\\tflags %lx\\n", fe.nr_entries);
+ for (i = 0; i < fe.nr_entries; i++) {
+ for (j = 0; j < 2; j++)
+ printf("0x%lx\\t", *(unsigned long *)(buf + (i*2+j)*8));
+ printf("\\n");
+ }
+ printf("\\n");
+
+ /* 8 byte entry with page offset (no hole scanned) */
+ ret = fincore(fd, 0, nr_pages,
+ FINCORE_PGOFF|FINCORE_PFN|FINCORE_PAGE_FLAGS, buf, &fe);
+ if (ret < 0)
+ err("fincore");
+ printf("pgoff\\tpfn\\tflags %lx\\n", fe.nr_entries);
+ for (i = 0; i < fe.nr_entries; i++) {
+ for (j = 0; j < 3; j++)
+ printf("0x%lx\\t", *(unsigned long *)(buf + (i*3+j)*8));
+ printf("\\n");
+ }
+
+ free(buf);
+
+ ret = close(fd);
+ if (ret < 0)
+ err("close");
+ return 0;
+}
+.fi
+.SH SEE ALSO
+.BR mincore (2),
+.BR fsync (2)
--
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/