[RFC 04/11] vfs: add support for updating access frequency

From: zwu . kernel
Date: Tue Sep 11 2012 - 10:30:06 EST


From: Zhi Yong Wu <wuzhy@xxxxxxxxxxxxxxxxxx>

Add some utils helpers to update access frequencies
for one file or its range.

Signed-off-by: Zhi Yong Wu <wuzhy@xxxxxxxxxxxxxxxxxx>
---
fs/hot_rb.c | 359 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/hot_rb.h | 28 +++++
2 files changed, 387 insertions(+), 0 deletions(-)

diff --git a/fs/hot_rb.c b/fs/hot_rb.c
index e2bee75..560841a 100644
--- a/fs/hot_rb.c
+++ b/fs/hot_rb.c
@@ -102,3 +102,362 @@ void hot_rb_range_item_init(void *_item)
hr->hot_freq_data.avg_delta_writes = (u64) -1;
hr->hot_freq_data.flags = FREQ_DATA_TYPE_RANGE;
}
+
+/*
+ * Drops the reference out on hot_inode_item by one and free the structure
+ * if the reference count hits zero
+ */
+void hot_rb_free_hot_inode_item(struct hot_inode_item *he)
+{
+ if (!he)
+ return;
+
+ if (atomic_dec_and_test(&he->refs.refcount)) {
+ WARN_ON(he->in_tree);
+ kmem_cache_free(hot_inode_item_cache, he);
+ }
+}
+
+/*
+ * Drops the reference out on hot_range_item by one and free the structure
+ * if the reference count hits zero
+ */
+void hot_rb_free_hot_range_item(struct hot_range_item *hr)
+{
+ if (!hr)
+ return;
+
+ if (atomic_dec_and_test(&hr->refs.refcount)) {
+ WARN_ON(hr->in_tree);
+ kmem_cache_free(hot_range_item_cache, hr);
+ }
+}
+
+static struct rb_node *hot_rb_insert_hot_inode_item(struct rb_root *root,
+ unsigned long inode_num,
+ struct rb_node *node)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct hot_inode_item *entry;
+
+ /* walk tree to find insertion point */
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct hot_inode_item, rb_node);
+
+ if (inode_num < entry->i_ino)
+ p = &(*p)->rb_left;
+ else if (inode_num > entry->i_ino)
+ p = &(*p)->rb_right;
+ else
+ return parent;
+ }
+
+ entry = rb_entry(node, struct hot_inode_item, rb_node);
+ entry->in_tree = 1;
+ rb_link_node(node, parent, p);
+ rb_insert_color(node, root);
+
+ return NULL;
+}
+
+static u64 hot_rb_range_end(struct hot_range_item *hr)
+{
+ if (hr->start + hr->len < hr->start)
+ return (u64)-1;
+
+ return hr->start + hr->len - 1;
+}
+
+static struct rb_node *hot_rb_insert_hot_range_item(struct rb_root *root,
+ u64 start,
+ struct rb_node *node)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct hot_range_item *entry;
+
+ /* ensure start is on a range boundary */
+ start = start & RANGE_SIZE_MASK;
+ /* walk tree to find insertion point */
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct hot_range_item, rb_node);
+
+ if (start < entry->start)
+ p = &(*p)->rb_left;
+ else if (start >= hot_rb_range_end(entry))
+ p = &(*p)->rb_right;
+ else
+ return parent;
+ }
+
+ entry = rb_entry(node, struct hot_range_item, rb_node);
+ entry->in_tree = 1;
+ rb_link_node(node, parent, p);
+ rb_insert_color(node, root);
+
+ return NULL;
+}
+
+/*
+ * Add a hot_inode_item to a hot_inode_tree. If the tree already contains
+ * an item with the index given, return -EEXIST
+ */
+int hot_rb_add_hot_inode_item(struct hot_inode_tree *tree,
+ struct hot_inode_item *he)
+{
+ int ret = 0;
+ struct rb_node *rb;
+
+ rb = hot_rb_insert_hot_inode_item(
+ &tree->map, he->i_ino, &he->rb_node);
+ if (rb) {
+ ret = -EEXIST;
+ goto out;
+ }
+
+ kref_get(&he->refs);
+
+out:
+ return ret;
+}
+
+/*
+ * Add a hot_range_item to a hot_range_tree. If the tree already contains
+ * an item with the index given, return -EEXIST
+ *
+ * Also optionally aggresively merge ranges (currently disabled)
+ */
+int hot_rb_add_hot_range_item(struct hot_range_tree *tree,
+ struct hot_range_item *hr)
+{
+ int ret = 0;
+ struct rb_node *rb;
+
+ rb = hot_rb_insert_hot_range_item(
+ &tree->map, hr->start, &hr->rb_node);
+ if (rb) {
+ ret = -EEXIST;
+ goto out;
+ }
+
+ kref_get(&hr->refs);
+
+out:
+ return ret;
+}
+
+/*
+ * Lookup a hot_inode_item in the hot_inode_tree with the given index
+ * (inode_num)
+ */
+struct hot_inode_item
+*hot_rb_lookup_hot_inode_item(struct hot_inode_tree *tree,
+ unsigned long inode_num)
+{
+ struct rb_node **p = &(tree->map.rb_node);
+ struct rb_node *parent = NULL;
+ struct hot_inode_item *entry;
+
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct hot_inode_item, rb_node);
+
+ if (inode_num < entry->i_ino)
+ p = &(*p)->rb_left;
+ else if (inode_num > entry->i_ino)
+ p = &(*p)->rb_right;
+ else {
+ kref_get(&entry->refs);
+ return entry;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Lookup a hot_range_item in a hot_range_tree with the given index
+ * (start, offset)
+ */
+struct hot_range_item
+*hot_rb_lookup_hot_range_item(struct hot_range_tree *tree,
+ u64 start)
+{
+ struct rb_node **p = &(tree->map.rb_node);
+ struct rb_node *parent = NULL;
+ struct hot_range_item *entry;
+
+ /* ensure start is on a range boundary */
+ start = start & RANGE_SIZE_MASK;
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct hot_range_item, rb_node);
+
+ if (start < entry->start)
+ p = &(*p)->rb_left;
+ else if (start > hot_rb_range_end(entry))
+ p = &(*p)->rb_right;
+ else {
+ kref_get(&entry->refs);
+ return entry;
+ }
+ }
+
+ return NULL;
+}
+
+/* Update inode frequency struct */
+static struct hot_inode_item *hot_rb_update_inode_freq(struct inode *inode,
+ int rw)
+{
+ struct hot_info *root = &(inode->i_sb->s_hotinfo);
+ struct hot_inode_tree *hitree = &(root->hot_inode_tree);
+ struct hot_inode_item *he;
+
+ read_lock(&hitree->lock);
+ he = hot_rb_lookup_hot_inode_item(hitree, inode->i_ino);
+ read_unlock(&hitree->lock);
+
+ if (!he) {
+ he = kmem_cache_alloc(hot_inode_item_cache,
+ GFP_KERNEL | GFP_NOFS);
+ if (!he)
+ goto out;
+
+ write_lock(&hitree->lock);
+ hot_rb_inode_item_init(he);
+ he->i_ino = inode->i_ino;
+ hot_rb_add_hot_inode_item(hitree, he);
+ write_unlock(&hitree->lock);
+ }
+
+ spin_lock(&he->lock);
+ hot_rb_update_freq(&he->hot_freq_data, rw);
+ spin_unlock(&he->lock);
+
+out:
+ return he;
+}
+
+/* Update range frequency struct */
+static bool hot_rb_update_range_freq(struct hot_inode_item *he,
+ u64 off, u64 len, int rw,
+ struct hot_info *root)
+{
+ struct hot_range_tree *hrtree = &(he->hot_range_tree);
+ struct hot_range_item *hr = NULL;
+ u64 start_off = off & RANGE_SIZE_MASK;
+ u64 end_off = (off + len - 1) & RANGE_SIZE_MASK;
+ u64 cur;
+ int ret = true;
+
+ if (len == 0)
+ return false;
+
+ /*
+ * Align ranges on RANGE_SIZE boundary to prevent proliferation
+ * of range structs
+ */
+ for (cur = start_off; cur <= end_off; cur += RANGE_SIZE) {
+ read_lock(&hrtree->lock);
+ hr = hot_rb_lookup_hot_range_item(hrtree, cur);
+ read_unlock(&hrtree->lock);
+
+ if (!hr) {
+ hr = kmem_cache_alloc(hot_range_item_cache,
+ GFP_KERNEL | GFP_NOFS);
+ if (!hr) {
+ ret = false;
+ goto out;
+ }
+
+ write_lock(&hrtree->lock);
+ hot_rb_range_item_init(hr);
+ hr->start = cur & RANGE_SIZE_MASK;
+ hr->len = RANGE_SIZE;
+ hr->hot_inode = he;
+ hot_rb_add_hot_range_item(hrtree, hr);
+ write_unlock(&hrtree->lock);
+ }
+
+ spin_lock(&hr->lock);
+ hot_rb_update_freq(&hr->hot_freq_data, rw);
+ spin_unlock(&hr->lock);
+ hot_rb_free_hot_range_item(hr);
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * This function does the actual work of updating the frequency numbers,
+ * whatever they turn out to be. FREQ_POWER determines how many atime
+ * deltas we keep track of (as a power of 2). So, setting it to anything above
+ * 16ish is probably overkill. Also, the higher the power, the more bits get
+ * right shifted out of the timestamp, reducing precision, so take note of that
+ * as well.
+ *
+ * The caller should have already locked freq_data's parent's spinlock.
+ *
+ * FREQ_POWER, defined immediately below, determines how heavily to weight
+ * the current frequency numbers against the newest access. For example, a value
+ * of 4 means that the new access information will be weighted 1/16th (ie 2^-4)
+ * as heavily as the existing frequency info. In essence, this is a kludged-
+ * together version of a weighted average, since we can't afford to keep all of
+ * the information that it would take to get a _real_ weighted average.
+ */
+void hot_rb_update_freq(struct hot_freq_data *freq_data, int rw)
+{
+ struct timespec old_atime;
+ struct timespec current_time;
+ struct timespec delta_ts;
+ u64 new_avg;
+ u64 new_delta;
+
+ if (unlikely(rw)) {
+ old_atime = freq_data->last_write_time;
+ freq_data->nr_writes += 1;
+ new_avg = freq_data->avg_delta_writes;
+ } else {
+ old_atime = freq_data->last_read_time;
+ freq_data->nr_reads += 1;
+ new_avg = freq_data->avg_delta_reads;
+ }
+
+ current_time = current_kernel_time();
+ delta_ts = timespec_sub(current_time, old_atime);
+ new_delta = timespec_to_ns(&delta_ts) >> FREQ_POWER;
+
+ new_avg = (new_avg << FREQ_POWER) - new_avg + new_delta;
+ new_avg = new_avg >> FREQ_POWER;
+
+ if (unlikely(rw)) {
+ freq_data->last_write_time = current_time;
+ freq_data->avg_delta_writes = new_avg;
+ } else {
+ freq_data->last_read_time = current_time;
+ freq_data->avg_delta_reads = new_avg;
+ }
+}
+
+/* main function to update access frequency from read/writepage(s) hooks */
+void hot_rb_update_freqs(struct inode *inode, u64 start,
+ u64 len, int rw)
+{
+ struct hot_inode_item *he;
+
+ he = hot_rb_update_inode_freq(inode, rw);
+
+ WARN_ON(!he);
+
+ if (he) {
+ hot_rb_update_range_freq(he, start, len,
+ rw, &(inode->i_sb->s_hotinfo));
+
+ hot_rb_free_hot_inode_item(he);
+ }
+}
diff --git a/fs/hot_rb.h b/fs/hot_rb.h
index 9a68d699..4048027 100644
--- a/fs/hot_rb.h
+++ b/fs/hot_rb.h
@@ -21,10 +21,38 @@ void hot_rb_inode_tree_init(struct hot_inode_tree *tree);
/* values for hot_freq_data flags */
#define FREQ_DATA_TYPE_INODE (1 << 0) /* freq data struct is for an inode */
#define FREQ_DATA_TYPE_RANGE (1 << 1) /* freq data struct is for a range */
+/* size of sub-file ranges */
+#define RANGE_SIZE (1<<20)
+#define RANGE_SIZE_MASK (~((u64)(RANGE_SIZE - 1)))
+
+#define FREQ_POWER 4
+
+struct hot_info;
+struct inode;

void hot_rb_inode_item_init(void *_item);
void hot_rb_range_item_init(void *_item);

+struct hot_range_item
+*hot_rb_lookup_hot_range_item(struct hot_range_tree *tree,
+ u64 start);
+
+struct hot_inode_item
+*hot_rb_lookup_hot_inode_item(struct hot_inode_tree *tree,
+ unsigned long inode_num);
+
+int hot_rb_add_hot_inode_item(struct hot_inode_tree *tree,
+ struct hot_inode_item *he);
+int hot_rb_add_hot_range_item(struct hot_range_tree *tree,
+ struct hot_range_item *hr);
+
+void hot_rb_free_hot_inode_item(struct hot_inode_item *he);
+void hot_rb_free_hot_range_item(struct hot_range_item *hr);
+
int __init hot_rb_item_cache_init(void);

+void hot_rb_update_freq(struct hot_freq_data *freq_data, int rw);
+void hot_rb_update_freqs(struct inode *inode, u64 start, u64 len,
+ int rw);
+
#endif /* __HOT_MAP__ */
--
1.7.6.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/