Re: [PATCH RFC] fs/ceph : fix build warning in ceph_writepages_start()
From: Alex Markuze
Date: Mon Jul 07 2025 - 06:11:51 EST
Hi Abinash,
Thanks for your patch, you’ve correctly identified a real concern
around stack usage in ceph_writepages_start().
However, dynamically allocating ceph_writeback_ctl inside .writepages
isn't ideal. This function may be called in memory reclaim paths or
under writeback pressure, where any GFP allocation (even GFP_NOFS)
could deadlock or fail unexpectedly.
Instead of allocating the struct on each call, I’d suggest one of the following:
Add a dedicated kmem_cache for ceph_writeback_ctl, initialized during
Ceph FS client init.
This allows reuse across calls without hitting the slab allocator each time.
Embed a ceph_writeback_ctl struct inside ceph_inode_info, if it's
feasible to manage lifetimes and synchronization. Note that
.writepages can run concurrently on the same inode, so this approach
would require proper locking or reuse guarantees.
On Sat, Jul 5, 2025 at 6:54 PM Abinash Singh <abinashlalotra@xxxxxxxxx> wrote:
>
> The function `ceph_writepages_start()` triggers
> a large stack frame warning:
> ld.lld: warning:
> fs/ceph/addr.c:1632:0: stack frame size (1072) exceeds limit (1024) in function 'ceph_writepages_start.llvm.2555023190050417194'
>
> fix it by dynamically allocating `ceph_writeback_ctl` struct.
>
> Signed-off-by: Abinash Singh <abinashsinghlalotra@xxxxxxxxx>
> ---
> The high stack usage of ceph_writepages_start() was further
> confirmed by using -fstack-usage flag :
> ...
> fs/ceph/addr.c:1837:ceph_netfs_check_write_begin 104 static
> fs/ceph/addr.c:1630:ceph_writepages_start 648 static
> ...
> After optimzations it may go upto 1072 as seen in warning.
> After allocating `ceph_writeback_ctl` struct it is reduced to:
> ...
> fs/ceph/addr.c:1630:ceph_writepages_start 288 static
> fs/ceph/addr.c:81:ceph_dirty_folio 72 static
> ...
> Is this fun used very frequently ?? or dynamic allocation is
> a fine fix for reducing the stack usage ?
>
> Thank You
> ---
> fs/ceph/addr.c | 82 ++++++++++++++++++++++++++------------------------
> 1 file changed, 43 insertions(+), 39 deletions(-)
>
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 60a621b00c65..503a86c1dda6 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -1633,9 +1633,13 @@ static int ceph_writepages_start(struct address_space *mapping,
> struct inode *inode = mapping->host;
> struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
> struct ceph_client *cl = fsc->client;
> - struct ceph_writeback_ctl ceph_wbc;
> + struct ceph_writeback_ctl *ceph_wbc __free(kfree) = NULL;
> int rc = 0;
>
> + ceph_wbc = kmalloc(sizeof(*ceph_wbc), GFP_NOFS);
> + if (!ceph_wbc)
> + return -ENOMEM;
> +
> if (wbc->sync_mode == WB_SYNC_NONE && fsc->write_congested)
> return 0;
>
> @@ -1648,7 +1652,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> return -EIO;
> }
>
> - ceph_init_writeback_ctl(mapping, wbc, &ceph_wbc);
> + ceph_init_writeback_ctl(mapping, wbc, ceph_wbc);
>
> if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
> rc = -EIO;
> @@ -1656,7 +1660,7 @@ static int ceph_writepages_start(struct address_space *mapping,
> }
>
> retry:
> - rc = ceph_define_writeback_range(mapping, wbc, &ceph_wbc);
> + rc = ceph_define_writeback_range(mapping, wbc, ceph_wbc);
> if (rc == -ENODATA) {
> /* hmm, why does writepages get called when there
> is no dirty data? */
> @@ -1665,55 +1669,55 @@ static int ceph_writepages_start(struct address_space *mapping,
> }
>
> if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
> - tag_pages_for_writeback(mapping, ceph_wbc.index, ceph_wbc.end);
> + tag_pages_for_writeback(mapping, ceph_wbc->index, ceph_wbc->end);
>
> - while (!has_writeback_done(&ceph_wbc)) {
> - ceph_wbc.locked_pages = 0;
> - ceph_wbc.max_pages = ceph_wbc.wsize >> PAGE_SHIFT;
> + while (!has_writeback_done(ceph_wbc)) {
> + ceph_wbc->locked_pages = 0;
> + ceph_wbc->max_pages = ceph_wbc->wsize >> PAGE_SHIFT;
>
> get_more_pages:
> - ceph_folio_batch_reinit(&ceph_wbc);
> + ceph_folio_batch_reinit(ceph_wbc);
>
> - ceph_wbc.nr_folios = filemap_get_folios_tag(mapping,
> - &ceph_wbc.index,
> - ceph_wbc.end,
> - ceph_wbc.tag,
> - &ceph_wbc.fbatch);
> + ceph_wbc->nr_folios = filemap_get_folios_tag(mapping,
> + &ceph_wbc->index,
> + ceph_wbc->end,
> + ceph_wbc->tag,
> + &ceph_wbc->fbatch);
> doutc(cl, "pagevec_lookup_range_tag for tag %#x got %d\n",
> - ceph_wbc.tag, ceph_wbc.nr_folios);
> + ceph_wbc->tag, ceph_wbc->nr_folios);
>
> - if (!ceph_wbc.nr_folios && !ceph_wbc.locked_pages)
> + if (!ceph_wbc->nr_folios && !ceph_wbc->locked_pages)
> break;
>
> process_folio_batch:
> - rc = ceph_process_folio_batch(mapping, wbc, &ceph_wbc);
> + rc = ceph_process_folio_batch(mapping, wbc, ceph_wbc);
> if (rc)
> goto release_folios;
>
> /* did we get anything? */
> - if (!ceph_wbc.locked_pages)
> + if (!ceph_wbc->locked_pages)
> goto release_folios;
>
> - if (ceph_wbc.processed_in_fbatch) {
> - ceph_shift_unused_folios_left(&ceph_wbc.fbatch);
> + if (ceph_wbc->processed_in_fbatch) {
> + ceph_shift_unused_folios_left(&ceph_wbc->fbatch);
>
> - if (folio_batch_count(&ceph_wbc.fbatch) == 0 &&
> - ceph_wbc.locked_pages < ceph_wbc.max_pages) {
> + if (folio_batch_count(&ceph_wbc->fbatch) == 0 &&
> + ceph_wbc->locked_pages < ceph_wbc->max_pages) {
> doutc(cl, "reached end fbatch, trying for more\n");
> goto get_more_pages;
> }
> }
>
> - rc = ceph_submit_write(mapping, wbc, &ceph_wbc);
> + rc = ceph_submit_write(mapping, wbc, ceph_wbc);
> if (rc)
> goto release_folios;
>
> - ceph_wbc.locked_pages = 0;
> - ceph_wbc.strip_unit_end = 0;
> + ceph_wbc->locked_pages = 0;
> + ceph_wbc->strip_unit_end = 0;
>
> - if (folio_batch_count(&ceph_wbc.fbatch) > 0) {
> - ceph_wbc.nr_folios =
> - folio_batch_count(&ceph_wbc.fbatch);
> + if (folio_batch_count(&ceph_wbc->fbatch) > 0) {
> + ceph_wbc->nr_folios =
> + folio_batch_count(&ceph_wbc->fbatch);
> goto process_folio_batch;
> }
>
> @@ -1724,38 +1728,38 @@ static int ceph_writepages_start(struct address_space *mapping,
> * we tagged for writeback prior to entering this loop.
> */
> if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
> - ceph_wbc.done = true;
> + ceph_wbc->done = true;
>
> release_folios:
> doutc(cl, "folio_batch release on %d folios (%p)\n",
> - (int)ceph_wbc.fbatch.nr,
> - ceph_wbc.fbatch.nr ? ceph_wbc.fbatch.folios[0] : NULL);
> - folio_batch_release(&ceph_wbc.fbatch);
> + (int)ceph_wbc->fbatch.nr,
> + ceph_wbc->fbatch.nr ? ceph_wbc->fbatch.folios[0] : NULL);
> + folio_batch_release(&ceph_wbc->fbatch);
> }
>
> - if (ceph_wbc.should_loop && !ceph_wbc.done) {
> + if (ceph_wbc->should_loop && !ceph_wbc->done) {
> /* more to do; loop back to beginning of file */
> doutc(cl, "looping back to beginning of file\n");
> /* OK even when start_index == 0 */
> - ceph_wbc.end = ceph_wbc.start_index - 1;
> + ceph_wbc->end = ceph_wbc->start_index - 1;
>
> /* to write dirty pages associated with next snapc,
> * we need to wait until current writes complete */
> - ceph_wait_until_current_writes_complete(mapping, wbc, &ceph_wbc);
> + ceph_wait_until_current_writes_complete(mapping, wbc, ceph_wbc);
>
> - ceph_wbc.start_index = 0;
> - ceph_wbc.index = 0;
> + ceph_wbc->start_index = 0;
> + ceph_wbc->index = 0;
> goto retry;
> }
>
> - if (wbc->range_cyclic || (ceph_wbc.range_whole && wbc->nr_to_write > 0))
> - mapping->writeback_index = ceph_wbc.index;
> + if (wbc->range_cyclic || (ceph_wbc->range_whole && wbc->nr_to_write > 0))
> + mapping->writeback_index = ceph_wbc->index;
>
> dec_osd_stopping_blocker:
> ceph_dec_osd_stopping_blocker(fsc->mdsc);
>
> out:
> - ceph_put_snap_context(ceph_wbc.last_snapc);
> + ceph_put_snap_context(ceph_wbc->last_snapc);
> doutc(cl, "%llx.%llx dend - startone, rc = %d\n", ceph_vinop(inode),
> rc);
>
> --
> 2.43.0
>
>