*** linux-2.4.11-pre4/fs/buffer.c	Sat Oct  6 01:24:53 2001
--- linux-2.4.11-pre4-ext3/fs/buffer.c	Sat Oct  6 02:02:58 2001
***************
*** 45,50 ****
--- 45,52 ----
  #include <linux/quotaops.h>
  #include <linux/iobuf.h>
  #include <linux/highmem.h>
+ #include <linux/jbd.h>
+ #include <linux/module.h>
  #include <linux/completion.h>
  
  #include <asm/uaccess.h>
***************
*** 604,611 ****
     information that was supposed to be just stored on the physical layer
     by the user.
  
!    Thus invalidate_buffers in general usage is not allwowed to trash dirty
!    buffers. For example ioctl(FLSBLKBUF) expects dirty data to be preserved.
  
     NOTE: In the case where the user removed a removable-media-disk even if
     there's still dirty data not synced on disk (due a bug in the device driver
--- 606,617 ----
     information that was supposed to be just stored on the physical layer
     by the user.
  
!    Thus invalidate_buffers in general usage is not allwowed to trash
!    dirty buffers. For example ioctl(FLSBLKBUF) expects dirty data to
!    be preserved.  These buffers are simply skipped.
!   
!    We also skip buffers which are still in use.  For example this can
!    happen if a userspace program is reading the block device.
  
     NOTE: In the case where the user removed a removable-media-disk even if
     there's still dirty data not synced on disk (due a bug in the device driver
***************
*** 649,654 ****
--- 655,670 ----
  			/* Not hashed? */
  			if (!bh->b_pprev)
  				continue;
+ 
+ 			if (conditional_schedule_needed()) {
+ 				atomic_inc(&bh->b_count);
+ 				spin_unlock(&lru_list_lock);
+ 				unconditional_schedule();
+ 				spin_lock(&lru_list_lock);
+ 				atomic_dec(&bh->b_count);
+ 				slept = 1;
+ 			}
+ 
  			if (buffer_locked(bh)) {
  				get_bh(bh);
  				spin_unlock(&lru_list_lock);
***************
*** 708,713 ****
--- 724,730 ----
  	bh->b_list = BUF_CLEAN;
  	bh->b_end_io = handler;
  	bh->b_private = private;
+ 	buffer_trace_init(&bh->b_history);
  }
  
  static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
***************
*** 717,722 ****
--- 734,740 ----
  	struct buffer_head *tmp;
  	struct page *page;
  
+ 	BUFFER_TRACE(bh, "enter");
  	mark_buffer_uptodate(bh, uptodate);
  
  	/* This is a temporary buffer used for page I/O. */
***************
*** 802,807 ****
--- 820,826 ----
  	struct buffer_head *bh;
  	struct inode tmp;
  	int err = 0, err2;
+ 	DEFINE_RESCHED_COUNT;
  
  	INIT_LIST_HEAD(&tmp.i_dirty_buffers);
  	
***************
*** 823,830 ****
--- 842,859 ----
  				spin_lock(&lru_list_lock);
  			}
  		}
+ 		if (TEST_RESCHED_COUNT(32)) {
+ 			RESET_RESCHED_COUNT();
+ 			if (conditional_schedule_needed()) {
+ 				spin_unlock(&lru_list_lock);
+ 				unconditional_schedule();	/* Syncing many dirty buffers */
+ 				spin_lock(&lru_list_lock);
+ 			}
+ 		}
  	}
  
+ 	RESET_RESCHED_COUNT();
+ 
  	while (!list_empty(&tmp.i_dirty_buffers)) {
  		bh = BH_ENTRY(tmp.i_dirty_buffers.prev);
  		remove_inode_queue(bh);
***************
*** 852,857 ****
--- 881,887 ----
  	struct inode tmp;
  	int err = 0, err2;
  	
+ 	DEFINE_RESCHED_COUNT;
  	INIT_LIST_HEAD(&tmp.i_dirty_data_buffers);
  	
  	spin_lock(&lru_list_lock);
***************
*** 883,888 ****
--- 913,922 ----
  		if (!buffer_uptodate(bh))
  			err = -EIO;
  		brelse(bh);
+ 		if (TEST_RESCHED_COUNT(32)) {
+ 			RESET_RESCHED_COUNT();
+ 			conditional_schedule();
+ 		}
  		spin_lock(&lru_list_lock);
  	}
  	
***************
*** 911,924 ****
  	struct buffer_head *bh;
  	struct list_head *list;
  	int err = 0;
! 
! 	spin_lock(&lru_list_lock);
  	
   repeat:
  	
  	for (list = inode->i_dirty_buffers.prev; 
  	     bh = BH_ENTRY(list), list != &inode->i_dirty_buffers;
  	     list = bh->b_inode_buffers.prev) {
  		if (buffer_locked(bh)) {
  			get_bh(bh);
  			spin_unlock(&lru_list_lock);
--- 945,967 ----
  	struct buffer_head *bh;
  	struct list_head *list;
  	int err = 0;
! 	DEFINE_RESCHED_COUNT;
  
  repeat:
+ 	conditional_schedule();
+ 	spin_lock(&lru_list_lock);
  	
  	for (list = inode->i_dirty_buffers.prev; 
  	     bh = BH_ENTRY(list), list != &inode->i_dirty_buffers;
  	     list = bh->b_inode_buffers.prev) {
+ 		if (TEST_RESCHED_COUNT(32)) {
+ 			RESET_RESCHED_COUNT();
+ 			if (conditional_schedule_needed()) {
+ 				spin_unlock(&lru_list_lock);
+ 				goto repeat;
+ 			}
+ 		}
+ 				
  		if (buffer_locked(bh)) {
  			get_bh(bh);
  			spin_unlock(&lru_list_lock);
***************
*** 955,961 ****
  			if (!buffer_uptodate(bh))
  				err = -EIO;
  			brelse(bh);
- 			spin_lock(&lru_list_lock);
  			goto repeat;
  		}
  	}
--- 998,1003 ----
***************
*** 1083,1088 ****
--- 1125,1136 ----
  	}
  }
  
+ void set_buffer_flushtime(struct buffer_head *bh)
+ {
+ 	bh->b_flushtime = jiffies + bdf_prm.b_un.age_buffer;
+ }
+ EXPORT_SYMBOL(set_buffer_flushtime);
+ 
  /*
   * A buffer may need to be moved from one buffer list to another
   * (e.g. in case it is not shared any more). Handle this.
***************
*** 1090,1095 ****
--- 1138,1146 ----
  static void __refile_buffer(struct buffer_head *bh)
  {
  	int dispose = BUF_CLEAN;
+ 
+ 	BUFFER_TRACE(bh, "enter");
+ 
  	if (buffer_locked(bh))
  		dispose = BUF_LOCKED;
  	if (buffer_dirty(bh))
***************
*** 1101,1106 ****
--- 1152,1158 ----
  			remove_inode_queue(bh);
  		__insert_into_lru_list(bh, dispose);
  	}
+ 	BUFFER_TRACE(bh, "exit");
  }
  
  void refile_buffer(struct buffer_head *bh)
***************
*** 1115,1120 ****
--- 1167,1173 ----
   */
  void __brelse(struct buffer_head * buf)
  {
+ 	BUFFER_TRACE(buf, "entry");
  	if (atomic_read(&buf->b_count)) {
  		put_bh(buf);
  		return;
***************
*** 1159,1168 ****
  /*
   * Note: the caller should wake up the buffer_wait list if needed.
   */
! static __inline__ void __put_unused_buffer_head(struct buffer_head * bh)
  {
  	if (bh->b_inode)
  		BUG();
  	if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) {
  		kmem_cache_free(bh_cachep, bh);
  	} else {
--- 1212,1233 ----
  /*
   * Note: the caller should wake up the buffer_wait list if needed.
   */
! static void __put_unused_buffer_head(struct buffer_head * bh)
  {
  	if (bh->b_inode)
  		BUG();
+ 
+ 	J_ASSERT_BH(bh, bh->b_prev_free == 0);
+ #if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE)
+ 	if (buffer_jbd(bh)) {
+ 		J_ASSERT_BH(bh, bh2jh(bh)->b_transaction == 0);
+ 		J_ASSERT_BH(bh, bh2jh(bh)->b_next_transaction == 0);
+ 		J_ASSERT_BH(bh, bh2jh(bh)->b_frozen_data == 0);
+ 		J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data == 0);
+ 	}
+ #endif
+ 	buffer_trace_init(&bh->b_history);
+ 
  	if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) {
  		kmem_cache_free(bh_cachep, bh);
  	} else {
***************
*** 1176,1187 ****
  	}
  }
  
  /*
   * Reserve NR_RESERVED buffer heads for async IO requests to avoid
   * no-buffer-head deadlock.  Return NULL on failure; waiting for
   * buffer heads is now handled in create_buffers().
   */ 
! static struct buffer_head * get_unused_buffer_head(int async)
  {
  	struct buffer_head * bh;
  
--- 1241,1260 ----
  	}
  }
  
+ void put_unused_buffer_head(struct buffer_head *bh)
+ {
+ 	spin_lock(&unused_list_lock);
+ 	__put_unused_buffer_head(bh);
+ 	spin_unlock(&unused_list_lock);
+ }
+ EXPORT_SYMBOL(put_unused_buffer_head);
+ 
  /*
   * Reserve NR_RESERVED buffer heads for async IO requests to avoid
   * no-buffer-head deadlock.  Return NULL on failure; waiting for
   * buffer heads is now handled in create_buffers().
   */ 
! struct buffer_head * get_unused_buffer_head(int async)
  {
  	struct buffer_head * bh;
  
***************
*** 1202,1207 ****
--- 1275,1281 ----
  	if((bh = kmem_cache_alloc(bh_cachep, SLAB_NOFS)) != NULL) {
  		bh->b_blocknr = -1;
  		bh->b_this_page = NULL;
+ 		buffer_trace_init(&bh->b_history);
  		return bh;
  	}
  
***************
*** 1215,1220 ****
--- 1289,1295 ----
  			unused_list = bh->b_next_free;
  			nr_unused_buffer_heads--;
  			spin_unlock(&unused_list_lock);
+ 			buffer_trace_init(&bh->b_history);
  			return bh;
  		}
  		spin_unlock(&unused_list_lock);
***************
*** 1222,1227 ****
--- 1297,1303 ----
  
  	return NULL;
  }
+ EXPORT_SYMBOL(get_unused_buffer_head);
  
  void set_bh_page (struct buffer_head *bh, struct page *page, unsigned long offset)
  {
***************
*** 1236,1241 ****
--- 1312,1318 ----
  	else
  		bh->b_data = page_address(page) + offset;
  }
+ EXPORT_SYMBOL(set_bh_page);
  
  /*
   * Create the appropriate buffers when given a page for data area and
***************
*** 1319,1324 ****
--- 1396,1402 ----
  static void discard_buffer(struct buffer_head * bh)
  {
  	if (buffer_mapped(bh)) {
+ 		BUFFER_TRACE(bh, "entry");
  		mark_buffer_clean(bh);
  		lock_buffer(bh);
  		clear_bit(BH_Uptodate, &bh->b_state);
***************
*** 1329,1334 ****
--- 1407,1437 ----
  	}
  }
  
+ /**
+  * try_to_release_page - release old fs-specific metadata on a page
+  *
+  */
+ 
+ int try_to_release_page(struct page * page, int gfp_mask)
+ {
+ 	if (!PageLocked(page))
+ 		BUG();
+ 	
+ 	if (!page->mapping)
+ 		goto try_to_free;
+ 	if (!page->mapping->a_ops->releasepage)
+ 		goto try_to_free;
+ 	if (page->mapping->a_ops->releasepage(page, gfp_mask))
+ 		goto try_to_free;
+ 	/*
+ 	 * We couldn't release buffer metadata; don't even bother trying
+ 	 * to release buffers.
+ 	 */
+ 	return 0;
+ try_to_free:	
+ 	return try_to_free_buffers(page, gfp_mask);
+ }
+ 
  /*
   * We don't have to release all buffers here, but
   * we have to be sure that no dirty buffer is left
***************
*** 1400,1405 ****
--- 1503,1509 ----
  	page->buffers = head;
  	page_cache_get(page);
  }
+ EXPORT_SYMBOL(create_empty_buffers);
  
  /*
   * We are taking a block for data and we don't want any output from any
***************
*** 1418,1424 ****
--- 1522,1531 ----
  	struct buffer_head *old_bh;
  
  	old_bh = get_hash_table(bh->b_dev, bh->b_blocknr, bh->b_size);
+ 	J_ASSERT_BH(bh, old_bh != bh);
  	if (old_bh) {
+ 		BUFFER_TRACE(old_bh, "old_bh - entry");
+ 		J_ASSERT_BH(old_bh, !buffer_jlist_eq(old_bh, BJ_Metadata));
  		mark_buffer_clean(old_bh);
  		wait_on_buffer(old_bh);
  		clear_bit(BH_Req, &old_bh->b_state);
***************
*** 1443,1450 ****
   */
  
  /*
!  * block_write_full_page() is SMP-safe - currently it's still
!  * being called with the kernel lock held, but the code is ready.
   */
  static int __block_write_full_page(struct inode *inode, struct page *page, get_block_t *get_block)
  {
--- 1550,1556 ----
   */
  
  /*
!  * block_write_full_page() is SMP threaded - the kernel lock is not held.
   */
  static int __block_write_full_page(struct inode *inode, struct page *page, get_block_t *get_block)
  {
***************
*** 1478,1492 ****
  			err = get_block(inode, block, bh, 1);
  			if (err)
  				goto out;
! 			if (buffer_new(bh))
  				unmap_underlying_metadata(bh);
  		}
  		bh = bh->b_this_page;
  		block++;
  	} while (bh != head);
  
  	/* Stage 2: lock the buffers, mark them clean */
  	do {
  		lock_buffer(bh);
  		set_buffer_async_io(bh);
  		set_bit(BH_Uptodate, &bh->b_state);
--- 1584,1601 ----
  			err = get_block(inode, block, bh, 1);
  			if (err)
  				goto out;
! 			if (buffer_new(bh)) {
! 				BUFFER_TRACE(bh, "new: call unmap_underlying_metadata");
  				unmap_underlying_metadata(bh);
  			}
+ 		}
  		bh = bh->b_this_page;
  		block++;
  	} while (bh != head);
  
  	/* Stage 2: lock the buffers, mark them clean */
  	do {
+ 		BUFFER_TRACE(bh, "lock it");
  		lock_buffer(bh);
  		set_buffer_async_io(bh);
  		set_bit(BH_Uptodate, &bh->b_state);
***************
*** 1543,1550 ****
--- 1652,1661 ----
  			if (err)
  				goto out;
  			if (buffer_new(bh)) {
+ 				BUFFER_TRACE(bh, "new: call unmap_underlying_metadata");
  				unmap_underlying_metadata(bh);
  				if (Page_Uptodate(page)) {
+ 					BUFFER_TRACE(bh, "setting uptodate");
  					set_bit(BH_Uptodate, &bh->b_state);
  					continue;
  				}
***************
*** 1558,1568 ****
--- 1669,1681 ----
  			}
  		}
  		if (Page_Uptodate(page)) {
+ 			BUFFER_TRACE(bh, "setting uptodate");
  			set_bit(BH_Uptodate, &bh->b_state);
  			continue; 
  		}
  		if (!buffer_uptodate(bh) &&
  		     (block_start < from || block_end > to)) {
+ 			BUFFER_TRACE(bh, "reading");
  			ll_rw_block(READ, 1, &bh);
  			*wait_bh++=bh;
  		}
***************
*** 1601,1606 ****
--- 1714,1720 ----
  		} else {
  			set_bit(BH_Uptodate, &bh->b_state);
  			if (!atomic_set_buffer_dirty(bh)) {
+ 				BUFFER_TRACE(bh, "mark dirty");
  				__mark_dirty(bh);
  				buffer_insert_inode_data_queue(bh, inode);
  				need_balance_dirty = 1;
***************
*** 1884,1889 ****
--- 1998,2004 ----
  	flush_dcache_page(page);
  	kunmap(page);
  
+ 	BUFFER_TRACE(bh, "zeroed end of block");
  	__mark_buffer_dirty(bh);
  	err = 0;
  
***************
*** 2430,2435 ****
--- 2545,2552 ----
  		wakeup_bdflush();
  	return 0;
  }
+ EXPORT_SYMBOL(try_to_free_buffers);
+ EXPORT_SYMBOL(buffermem_pages);
  
  /* ================== Debugging =================== */
  
***************
*** 2542,2548 ****
   */
  
  DECLARE_WAIT_QUEUE_HEAD(bdflush_wait);
- 
  void wakeup_bdflush(void)
  {
  	wake_up_interruptible(&bdflush_wait);
--- 2659,2664 ----