*** linux-2.4.11-pre4/fs/buffer.c Sat Oct 6 01:24:53 2001 --- linux-2.4.11-pre4-ext3/fs/buffer.c Sat Oct 6 02:02:58 2001 *************** *** 45,50 **** --- 45,52 ---- #include #include #include + #include + #include #include #include *************** *** 604,611 **** information that was supposed to be just stored on the physical layer by the user. ! Thus invalidate_buffers in general usage is not allwowed to trash dirty ! buffers. For example ioctl(FLSBLKBUF) expects dirty data to be preserved. NOTE: In the case where the user removed a removable-media-disk even if there's still dirty data not synced on disk (due a bug in the device driver --- 606,617 ---- information that was supposed to be just stored on the physical layer by the user. ! Thus invalidate_buffers in general usage is not allwowed to trash ! dirty buffers. For example ioctl(FLSBLKBUF) expects dirty data to ! be preserved. These buffers are simply skipped. ! ! We also skip buffers which are still in use. For example this can ! happen if a userspace program is reading the block device. NOTE: In the case where the user removed a removable-media-disk even if there's still dirty data not synced on disk (due a bug in the device driver *************** *** 649,654 **** --- 655,670 ---- /* Not hashed? */ if (!bh->b_pprev) continue; + + if (conditional_schedule_needed()) { + atomic_inc(&bh->b_count); + spin_unlock(&lru_list_lock); + unconditional_schedule(); + spin_lock(&lru_list_lock); + atomic_dec(&bh->b_count); + slept = 1; + } + if (buffer_locked(bh)) { get_bh(bh); spin_unlock(&lru_list_lock); *************** *** 708,713 **** --- 724,730 ---- bh->b_list = BUF_CLEAN; bh->b_end_io = handler; bh->b_private = private; + buffer_trace_init(&bh->b_history); } static void end_buffer_io_async(struct buffer_head * bh, int uptodate) *************** *** 717,722 **** --- 734,740 ---- struct buffer_head *tmp; struct page *page; + BUFFER_TRACE(bh, "enter"); mark_buffer_uptodate(bh, uptodate); /* This is a temporary buffer used for page I/O. */ *************** *** 802,807 **** --- 820,826 ---- struct buffer_head *bh; struct inode tmp; int err = 0, err2; + DEFINE_RESCHED_COUNT; INIT_LIST_HEAD(&tmp.i_dirty_buffers); *************** *** 823,830 **** --- 842,859 ---- spin_lock(&lru_list_lock); } } + if (TEST_RESCHED_COUNT(32)) { + RESET_RESCHED_COUNT(); + if (conditional_schedule_needed()) { + spin_unlock(&lru_list_lock); + unconditional_schedule(); /* Syncing many dirty buffers */ + spin_lock(&lru_list_lock); + } + } } + RESET_RESCHED_COUNT(); + while (!list_empty(&tmp.i_dirty_buffers)) { bh = BH_ENTRY(tmp.i_dirty_buffers.prev); remove_inode_queue(bh); *************** *** 852,857 **** --- 881,887 ---- struct inode tmp; int err = 0, err2; + DEFINE_RESCHED_COUNT; INIT_LIST_HEAD(&tmp.i_dirty_data_buffers); spin_lock(&lru_list_lock); *************** *** 883,888 **** --- 913,922 ---- if (!buffer_uptodate(bh)) err = -EIO; brelse(bh); + if (TEST_RESCHED_COUNT(32)) { + RESET_RESCHED_COUNT(); + conditional_schedule(); + } spin_lock(&lru_list_lock); } *************** *** 911,924 **** struct buffer_head *bh; struct list_head *list; int err = 0; ! ! spin_lock(&lru_list_lock); repeat: for (list = inode->i_dirty_buffers.prev; bh = BH_ENTRY(list), list != &inode->i_dirty_buffers; list = bh->b_inode_buffers.prev) { if (buffer_locked(bh)) { get_bh(bh); spin_unlock(&lru_list_lock); --- 945,967 ---- struct buffer_head *bh; struct list_head *list; int err = 0; ! DEFINE_RESCHED_COUNT; repeat: + conditional_schedule(); + spin_lock(&lru_list_lock); for (list = inode->i_dirty_buffers.prev; bh = BH_ENTRY(list), list != &inode->i_dirty_buffers; list = bh->b_inode_buffers.prev) { + if (TEST_RESCHED_COUNT(32)) { + RESET_RESCHED_COUNT(); + if (conditional_schedule_needed()) { + spin_unlock(&lru_list_lock); + goto repeat; + } + } + if (buffer_locked(bh)) { get_bh(bh); spin_unlock(&lru_list_lock); *************** *** 955,961 **** if (!buffer_uptodate(bh)) err = -EIO; brelse(bh); - spin_lock(&lru_list_lock); goto repeat; } } --- 998,1003 ---- *************** *** 1083,1088 **** --- 1125,1136 ---- } } + void set_buffer_flushtime(struct buffer_head *bh) + { + bh->b_flushtime = jiffies + bdf_prm.b_un.age_buffer; + } + EXPORT_SYMBOL(set_buffer_flushtime); + /* * A buffer may need to be moved from one buffer list to another * (e.g. in case it is not shared any more). Handle this. *************** *** 1090,1095 **** --- 1138,1146 ---- static void __refile_buffer(struct buffer_head *bh) { int dispose = BUF_CLEAN; + + BUFFER_TRACE(bh, "enter"); + if (buffer_locked(bh)) dispose = BUF_LOCKED; if (buffer_dirty(bh)) *************** *** 1101,1106 **** --- 1152,1158 ---- remove_inode_queue(bh); __insert_into_lru_list(bh, dispose); } + BUFFER_TRACE(bh, "exit"); } void refile_buffer(struct buffer_head *bh) *************** *** 1115,1120 **** --- 1167,1173 ---- */ void __brelse(struct buffer_head * buf) { + BUFFER_TRACE(buf, "entry"); if (atomic_read(&buf->b_count)) { put_bh(buf); return; *************** *** 1159,1168 **** /* * Note: the caller should wake up the buffer_wait list if needed. */ ! static __inline__ void __put_unused_buffer_head(struct buffer_head * bh) { if (bh->b_inode) BUG(); if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) { kmem_cache_free(bh_cachep, bh); } else { --- 1212,1233 ---- /* * Note: the caller should wake up the buffer_wait list if needed. */ ! static void __put_unused_buffer_head(struct buffer_head * bh) { if (bh->b_inode) BUG(); + + J_ASSERT_BH(bh, bh->b_prev_free == 0); + #if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) + if (buffer_jbd(bh)) { + J_ASSERT_BH(bh, bh2jh(bh)->b_transaction == 0); + J_ASSERT_BH(bh, bh2jh(bh)->b_next_transaction == 0); + J_ASSERT_BH(bh, bh2jh(bh)->b_frozen_data == 0); + J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data == 0); + } + #endif + buffer_trace_init(&bh->b_history); + if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) { kmem_cache_free(bh_cachep, bh); } else { *************** *** 1176,1187 **** } } /* * Reserve NR_RESERVED buffer heads for async IO requests to avoid * no-buffer-head deadlock. Return NULL on failure; waiting for * buffer heads is now handled in create_buffers(). */ ! static struct buffer_head * get_unused_buffer_head(int async) { struct buffer_head * bh; --- 1241,1260 ---- } } + void put_unused_buffer_head(struct buffer_head *bh) + { + spin_lock(&unused_list_lock); + __put_unused_buffer_head(bh); + spin_unlock(&unused_list_lock); + } + EXPORT_SYMBOL(put_unused_buffer_head); + /* * Reserve NR_RESERVED buffer heads for async IO requests to avoid * no-buffer-head deadlock. Return NULL on failure; waiting for * buffer heads is now handled in create_buffers(). */ ! struct buffer_head * get_unused_buffer_head(int async) { struct buffer_head * bh; *************** *** 1202,1207 **** --- 1275,1281 ---- if((bh = kmem_cache_alloc(bh_cachep, SLAB_NOFS)) != NULL) { bh->b_blocknr = -1; bh->b_this_page = NULL; + buffer_trace_init(&bh->b_history); return bh; } *************** *** 1215,1220 **** --- 1289,1295 ---- unused_list = bh->b_next_free; nr_unused_buffer_heads--; spin_unlock(&unused_list_lock); + buffer_trace_init(&bh->b_history); return bh; } spin_unlock(&unused_list_lock); *************** *** 1222,1227 **** --- 1297,1303 ---- return NULL; } + EXPORT_SYMBOL(get_unused_buffer_head); void set_bh_page (struct buffer_head *bh, struct page *page, unsigned long offset) { *************** *** 1236,1241 **** --- 1312,1318 ---- else bh->b_data = page_address(page) + offset; } + EXPORT_SYMBOL(set_bh_page); /* * Create the appropriate buffers when given a page for data area and *************** *** 1319,1324 **** --- 1396,1402 ---- static void discard_buffer(struct buffer_head * bh) { if (buffer_mapped(bh)) { + BUFFER_TRACE(bh, "entry"); mark_buffer_clean(bh); lock_buffer(bh); clear_bit(BH_Uptodate, &bh->b_state); *************** *** 1329,1334 **** --- 1407,1437 ---- } } + /** + * try_to_release_page - release old fs-specific metadata on a page + * + */ + + int try_to_release_page(struct page * page, int gfp_mask) + { + if (!PageLocked(page)) + BUG(); + + if (!page->mapping) + goto try_to_free; + if (!page->mapping->a_ops->releasepage) + goto try_to_free; + if (page->mapping->a_ops->releasepage(page, gfp_mask)) + goto try_to_free; + /* + * We couldn't release buffer metadata; don't even bother trying + * to release buffers. + */ + return 0; + try_to_free: + return try_to_free_buffers(page, gfp_mask); + } + /* * We don't have to release all buffers here, but * we have to be sure that no dirty buffer is left *************** *** 1400,1405 **** --- 1503,1509 ---- page->buffers = head; page_cache_get(page); } + EXPORT_SYMBOL(create_empty_buffers); /* * We are taking a block for data and we don't want any output from any *************** *** 1418,1424 **** --- 1522,1531 ---- struct buffer_head *old_bh; old_bh = get_hash_table(bh->b_dev, bh->b_blocknr, bh->b_size); + J_ASSERT_BH(bh, old_bh != bh); if (old_bh) { + BUFFER_TRACE(old_bh, "old_bh - entry"); + J_ASSERT_BH(old_bh, !buffer_jlist_eq(old_bh, BJ_Metadata)); mark_buffer_clean(old_bh); wait_on_buffer(old_bh); clear_bit(BH_Req, &old_bh->b_state); *************** *** 1443,1450 **** */ /* ! * block_write_full_page() is SMP-safe - currently it's still ! * being called with the kernel lock held, but the code is ready. */ static int __block_write_full_page(struct inode *inode, struct page *page, get_block_t *get_block) { --- 1550,1556 ---- */ /* ! * block_write_full_page() is SMP threaded - the kernel lock is not held. */ static int __block_write_full_page(struct inode *inode, struct page *page, get_block_t *get_block) { *************** *** 1478,1492 **** err = get_block(inode, block, bh, 1); if (err) goto out; ! if (buffer_new(bh)) unmap_underlying_metadata(bh); } bh = bh->b_this_page; block++; } while (bh != head); /* Stage 2: lock the buffers, mark them clean */ do { lock_buffer(bh); set_buffer_async_io(bh); set_bit(BH_Uptodate, &bh->b_state); --- 1584,1601 ---- err = get_block(inode, block, bh, 1); if (err) goto out; ! if (buffer_new(bh)) { ! BUFFER_TRACE(bh, "new: call unmap_underlying_metadata"); unmap_underlying_metadata(bh); } + } bh = bh->b_this_page; block++; } while (bh != head); /* Stage 2: lock the buffers, mark them clean */ do { + BUFFER_TRACE(bh, "lock it"); lock_buffer(bh); set_buffer_async_io(bh); set_bit(BH_Uptodate, &bh->b_state); *************** *** 1543,1550 **** --- 1652,1661 ---- if (err) goto out; if (buffer_new(bh)) { + BUFFER_TRACE(bh, "new: call unmap_underlying_metadata"); unmap_underlying_metadata(bh); if (Page_Uptodate(page)) { + BUFFER_TRACE(bh, "setting uptodate"); set_bit(BH_Uptodate, &bh->b_state); continue; } *************** *** 1558,1568 **** --- 1669,1681 ---- } } if (Page_Uptodate(page)) { + BUFFER_TRACE(bh, "setting uptodate"); set_bit(BH_Uptodate, &bh->b_state); continue; } if (!buffer_uptodate(bh) && (block_start < from || block_end > to)) { + BUFFER_TRACE(bh, "reading"); ll_rw_block(READ, 1, &bh); *wait_bh++=bh; } *************** *** 1601,1606 **** --- 1714,1720 ---- } else { set_bit(BH_Uptodate, &bh->b_state); if (!atomic_set_buffer_dirty(bh)) { + BUFFER_TRACE(bh, "mark dirty"); __mark_dirty(bh); buffer_insert_inode_data_queue(bh, inode); need_balance_dirty = 1; *************** *** 1884,1889 **** --- 1998,2004 ---- flush_dcache_page(page); kunmap(page); + BUFFER_TRACE(bh, "zeroed end of block"); __mark_buffer_dirty(bh); err = 0; *************** *** 2430,2435 **** --- 2545,2552 ---- wakeup_bdflush(); return 0; } + EXPORT_SYMBOL(try_to_free_buffers); + EXPORT_SYMBOL(buffermem_pages); /* ================== Debugging =================== */ *************** *** 2542,2548 **** */ DECLARE_WAIT_QUEUE_HEAD(bdflush_wait); - void wakeup_bdflush(void) { wake_up_interruptible(&bdflush_wait); --- 2659,2664 ----