On 7/18/25 11:23, Yu Kuai wrote:
From: Yu Kuai <yukuai3@xxxxxxxxxx>
+
+#define BITMAP_DATA_OFFSET 1024
+
+/* 64k is the max IO size of sync IO for raid1/raid10 */
+#define MIN_CHUNK_SIZE (64 * 2)
+
Hmm? Which one is it?
Comment says 'max IO size', but it's called 'MIN_CHUNK_SIZE'...
+/*
+ * Dirtied bits that have not been accessed for more than 5s will be cleared
+ * by daemon.
+ */
+#define BARRIER_IDLE 5
+
Should this be changeable, too?
+ if (!test_bit(LLPageDirty, &pctl->flags))
+ set_bit(LLPageDirty, &pctl->flags);
+
+ /*
+ * The subpage usually contains a total of 512 bits. If any single bit
+ * within the subpage is marked as dirty, the entire sector will be
+ * written. To avoid impacting write performance, when multiple bits
+ * within the same sector are modified within a short time frame, all
+ * bits in the sector will be collectively marked as dirty at once.
+ */
How short is the 'short timeframe'?
Is this the BARRIER_IDLE setting?
Please clarify.
+static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx)
+{
+ struct mddev *mddev = llbitmap->mddev;
+ struct page *page = NULL;
+ struct md_rdev *rdev;
+
+ if (llbitmap->pctl && llbitmap->pctl[idx])
+ page = llbitmap->pctl[idx]->page;
+ if (page)
+ return page;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ rdev_for_each(rdev, mddev) {
+ sector_t sector;
+
+ if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
+ continue;
+
+ sector = mddev->bitmap_info.offset +
+ (idx << PAGE_SECTORS_SHIFT);
+
+ if (sync_page_io(rdev, sector, PAGE_SIZE, page, REQ_OP_READ,
+ true))
+ return page;
+
+ md_error(mddev, rdev);
+ }
+
+ __free_page(page);
+ return ERR_PTR(-EIO);
+}
+
Have you considered moving to folios here?
+static int llbitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize)
+{
+ struct llbitmap *llbitmap = mddev->bitmap;
+ unsigned long chunks;
+
+ if (chunksize == 0)
+ chunksize = llbitmap->chunksize;
+
+ /* If there is enough space, leave the chunksize unchanged. */
+ chunks = DIV_ROUND_UP(blocks, chunksize);
+ while (chunks > mddev->bitmap_info.space << SECTOR_SHIFT) {
+ chunksize = chunksize << 1;
+ chunks = DIV_ROUND_UP(blocks, chunksize);
+ }
+
+ llbitmap->chunkshift = ffz(~chunksize);
+ llbitmap->chunksize = chunksize;
+ llbitmap->chunks = chunks;
+
+ return 0;
+}
+
Hmm. I do get confused with the chunksize here.
Is this the granularity of the bits in the bitmap
(ie how many data bytes are covered by one bit)?
Or is it the chunksize of the bitmap itself?
In either case, if it's a 'chunksize' in the sense
of the block layer (namely a boundary which I/O
must not cross), shouldn't you set the request
queue limits accordingly?
+static int llbitmap_load(struct mddev *mddev)
+{
+ enum llbitmap_action action = BitmapActionReload;
+ struct llbitmap *llbitmap = mddev->bitmap;
+
+ if (test_and_clear_bit(BITMAP_STALE, &llbitmap->flags))
+ action = BitmapActionStale;
+
+ llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1, action);
+ return 0;
+}
+
+static void llbitmap_destroy(struct mddev *mddev)
+{
+ struct llbitmap *llbitmap = mddev->bitmap;
+
+ if (!llbitmap)
+ return;
+
+ mutex_lock(&mddev->bitmap_info.mutex);
+
+ timer_delete_sync(&llbitmap->pending_timer);
+ flush_workqueue(md_llbitmap_io_wq);
+ flush_workqueue(md_llbitmap_unplug_wq);
+
+ mddev->bitmap = NULL;
+ llbitmap_free_pages(llbitmap);
+ kfree(llbitmap);
+ mutex_unlock(&mddev->bitmap_info.mutex);
+}
+
+static void llbitmap_start_write(struct mddev *mddev, sector_t offset,
+ unsigned long sectors)
+{
+ struct llbitmap *llbitmap = mddev->bitmap;
+ unsigned long start = offset >> llbitmap->chunkshift;
+ unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
+ int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+ int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+
+ llbitmap_state_machine(llbitmap, start, end, BitmapActionStartwrite);
+
+
+ while (page_start <= page_end) {
+ llbitmap_raise_barrier(llbitmap, page_start);
+ page_start++;
+ }
+}
+
+static void llbitmap_end_write(struct mddev *mddev, sector_t offset,
+ unsigned long sectors)
+{
+ struct llbitmap *llbitmap = mddev->bitmap;
+ unsigned long start = offset >> llbitmap->chunkshift;
+ unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
+ int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+ int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+
+ while (page_start <= page_end) {
+ llbitmap_release_barrier(llbitmap, page_start);
+ page_start++;
+ }
+}
+
+static void llbitmap_start_discard(struct mddev *mddev, sector_t offset,
+ unsigned long sectors)
+{
+ struct llbitmap *llbitmap = mddev->bitmap;
+ unsigned long start = DIV_ROUND_UP(offset, llbitmap->chunksize);
+ unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
+ int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+ int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+
+ llbitmap_state_machine(llbitmap, start, end, BitmapActionDiscard);
+
+ while (page_start <= page_end) {
+ llbitmap_raise_barrier(llbitmap, page_start);
+ page_start++;
+ }
+}
+
+static void llbitmap_end_discard(struct mddev *mddev, sector_t offset,
+ unsigned long sectors)
+{
+ struct llbitmap *llbitmap = mddev->bitmap;
+ unsigned long start = DIV_ROUND_UP(offset, llbitmap->chunksize);
+ unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
+ int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+ int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
+
+ while (page_start <= page_end) {
+ llbitmap_release_barrier(llbitmap, page_start);
+ page_start++;
+ }
+}
+
+static void llbitmap_unplug_fn(struct work_struct *work)
+{
+ struct llbitmap_unplug_work *unplug_work =
+ container_of(work, struct llbitmap_unplug_work, work);
+ struct llbitmap *llbitmap = unplug_work->llbitmap;
+ struct blk_plug plug;
+ int i;
+
+ blk_start_plug(&plug);
+
+ for (i = 0; i < llbitmap->nr_pages; i++) {
+ if (!test_bit(LLPageDirty, &llbitmap->pctl[i]->flags) ||
+ !test_and_clear_bit(LLPageDirty, &llbitmap->pctl[i]->flags))
Confused. Is this some kind of micro-optimisation?
Why not simply 'test_and_clear_bit()'?
Cheers,
Hannes