khugepaged eating 100%CPU

From: Michal Hocko
Date: Mon Feb 07 2011 - 16:07:04 EST


Hi Andrea,

I am currently running into an issue when khugepaged is running 100% on
one of my CPUs for a long time (at least one hour as I am writing the
email). The kernel is the clean 2.6.38-rc3 (i386) vanilla kernel.

I have tried to disable defrag but it didn't help (I haven't rebooted
after setting the value). I am not sure what information is helpful and
also not sure whether I am able to reproduce it after restart (it is the
first time I can see this problem) so sorry for the poor report.

Here is some basic info which might be useful (config and sysrq+t are
attached):
=========

# cat /proc/vmstat
nr_free_pages 238797
nr_inactive_anon 27329
nr_active_anon 82606
nr_inactive_file 74472
nr_active_file 63688
nr_unevictable 0
nr_mlock 0
nr_anon_pages 66328
nr_mapped 15454
nr_file_pages 175563
nr_dirty 21
nr_writeback 0
nr_slab_reclaimable 4145
nr_slab_unreclaimable 3990
nr_page_table_pages 736
nr_kernel_stack 225
nr_unstable 0
nr_bounce 0
nr_vmscan_write 23503
nr_writeback_temp 0
nr_isolated_anon 0
nr_isolated_file 0
nr_shmem 33266
nr_dirtied 1665981
nr_written 1539976
nr_anon_transparent_hugepages 7
nr_dirty_threshold 77641
nr_dirty_background_threshold 19410
pgpgin 19101288
pgpgout 6683994
pswpin 3797
pswpout 23401
pgalloc_dma 11688
pgalloc_normal 150706893
pgalloc_high 42372841
pgalloc_movable 0
pgfree 193783847
pgactivate 1720454
pgdeactivate 318554
pgfault 85812658
pgmajfault 15257
pgrefill_dma 288
pgrefill_normal 93009
pgrefill_high 200394
pgrefill_movable 0
pgsteal_dma 0
pgsteal_normal 3948594
pgsteal_high 601671
pgsteal_movable 0
pgscan_kswapd_dma 0
pgscan_kswapd_normal 3678094
pgscan_kswapd_high 366447
pgscan_kswapd_movable 0
pgscan_direct_dma 0
pgscan_direct_normal 289918
pgscan_direct_high 303477
pgscan_direct_movable 0
pginodesteal 73185
slabs_scanned 353536
kswapd_steal 4026528
kswapd_inodesteal 173760
kswapd_low_wmark_hit_quickly 6
kswapd_high_wmark_hit_quickly 7758
kswapd_skip_congestion_wait 0
pageoutrun 79411
allocstall 310
pgrotated 22447
compact_blocks_moved 11205
compact_pages_moved 325766
compact_pagemigrate_failed 6165
compact_stall 347
compact_fail 67
compact_success 280
htlb_buddy_alloc_success 0
htlb_buddy_alloc_fail 0
unevictable_pgs_culled 1092
unevictable_pgs_scanned 0
unevictable_pgs_rescued 358
unevictable_pgs_mlocked 1306
unevictable_pgs_munlocked 1305
unevictable_pgs_cleared 0
unevictable_pgs_stranded 0
unevictable_pgs_mlockfreed 0
=========

# cat /proc/buddyinfo
Node 0, zone DMA 8 3 2 5 6 5 3 1 1 1 1
Node 0, zone Normal 4845 3763 2512 1682 1090 686 350 181 88 45 1
Node 0, zone HighMem 4485 4039 3101 1900 928 268 42 7 1 1 1
=========

# grep . -r /proc/sys/vm/
/proc/sys/vm/overcommit_memory:0
/proc/sys/vm/panic_on_oom:0
/proc/sys/vm/oom_kill_allocating_task:0
/proc/sys/vm/oom_dump_tasks:1
/proc/sys/vm/overcommit_ratio:50
/proc/sys/vm/page-cluster:3
/proc/sys/vm/dirty_background_ratio:10
/proc/sys/vm/dirty_background_bytes:0
/proc/sys/vm/dirty_ratio:40
/proc/sys/vm/dirty_bytes:0
/proc/sys/vm/dirty_writeback_centisecs:500
/proc/sys/vm/dirty_expire_centisecs:3000
/proc/sys/vm/nr_pdflush_threads:0
/proc/sys/vm/swappiness:60
/proc/sys/vm/nr_hugepages:0
/proc/sys/vm/hugetlb_shm_group:0
/proc/sys/vm/hugepages_treat_as_movable:0
/proc/sys/vm/nr_overcommit_hugepages:0
/proc/sys/vm/lowmem_reserve_ratio:256 32 32
/proc/sys/vm/drop_caches:0
/proc/sys/vm/extfrag_threshold:500
/proc/sys/vm/min_free_kbytes:44800
/proc/sys/vm/percpu_pagelist_fraction:0
/proc/sys/vm/max_map_count:65530
/proc/sys/vm/laptop_mode:0
/proc/sys/vm/block_dump:0
/proc/sys/vm/vfs_cache_pressure:100
/proc/sys/vm/legacy_va_layout:0
/proc/sys/vm/stat_interval:1
/proc/sys/vm/mmap_min_addr:4096
/proc/sys/vm/vdso_enabled:2
/proc/sys/vm/highmem_is_dirtyable:0
/proc/sys/vm/scan_unevictable_pages:0
=========

# cat /proc/slabinfo
slabinfo - version: 2.1
# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab> : tunables <limit> <batchcount> <sharedfactor> : slabdata <active_slabs> <num_slabs> <sharedavail>
fuse_request 20 40 400 20 2 : tunables 0 0 0 : slabdata 2 2 0
fuse_inode 18 36 448 18 2 : tunables 0 0 0 : slabdata 2 2 0
RAWv6 23 23 704 23 4 : tunables 0 0 0 : slabdata 1 1 0
UDPLITEv6 0 0 704 23 4 : tunables 0 0 0 : slabdata 0 0 0
UDPv6 46 46 704 23 4 : tunables 0 0 0 : slabdata 2 2 0
tw_sock_TCPv6 0 0 192 21 1 : tunables 0 0 0 : slabdata 0 0 0
TCPv6 24 24 1344 12 4 : tunables 0 0 0 : slabdata 2 2 0
mqueue_inode_cache 1 16 512 16 2 : tunables 0 0 0 : slabdata 1 1 0
udf_inode_cache 0 0 400 20 2 : tunables 0 0 0 : slabdata 0 0 0
nfs_direct_cache 0 0 80 51 1 : tunables 0 0 0 : slabdata 0 0 0
nfs_inode_cache 0 0 624 13 2 : tunables 0 0 0 : slabdata 0 0 0
isofs_inode_cache 0 0 360 22 2 : tunables 0 0 0 : slabdata 0 0 0
fat_inode_cache 0 0 384 21 2 : tunables 0 0 0 : slabdata 0 0 0
fat_cache 0 0 24 170 1 : tunables 0 0 0 : slabdata 0 0 0
hugetlbfs_inode_cache 12 12 328 12 1 : tunables 0 0 0 : slabdata 1 1 0
journal_handle 340 340 24 170 1 : tunables 0 0 0 : slabdata 2 2 0
journal_head 128 128 64 64 1 : tunables 0 0 0 : slabdata 2 2 0
revoke_record 512 512 16 256 1 : tunables 0 0 0 : slabdata 2 2 0
ext2_inode_cache 0 0 472 17 2 : tunables 0 0 0 : slabdata 0 0 0
ext3_inode_cache 5744 5744 488 16 2 : tunables 0 0 0 : slabdata 359 359 0
ext3_xattr 0 0 48 85 1 : tunables 0 0 0 : slabdata 0 0 0
posix_timers_cache 34 34 120 34 1 : tunables 0 0 0 : slabdata 1 1 0
rpc_inode_cache 0 0 512 16 2 : tunables 0 0 0 : slabdata 0 0 0
UDP-Lite 0 0 576 14 2 : tunables 0 0 0 : slabdata 0 0 0
UDP 28 28 576 14 2 : tunables 0 0 0 : slabdata 2 2 0
tw_sock_TCP 32 32 128 32 1 : tunables 0 0 0 : slabdata 1 1 0
TCP 32 65 1216 13 4 : tunables 0 0 0 : slabdata 5 5 0
eventpoll_pwq 102 204 40 102 1 : tunables 0 0 0 : slabdata 2 2 0
sgpool-128 24 24 2560 12 8 : tunables 0 0 0 : slabdata 2 2 0
sgpool-64 24 24 1280 12 4 : tunables 0 0 0 : slabdata 2 2 0
sgpool-32 24 24 640 12 2 : tunables 0 0 0 : slabdata 2 2 0
sgpool-16 24 24 320 12 1 : tunables 0 0 0 : slabdata 2 2 0
blkdev_queue 33 48 1008 16 4 : tunables 0 0 0 : slabdata 3 3 0
blkdev_requests 42 57 208 19 1 : tunables 0 0 0 : slabdata 3 3 0
biovec-256 2 10 3072 10 8 : tunables 0 0 0 : slabdata 1 1 0
biovec-128 21 42 1536 21 8 : tunables 0 0 0 : slabdata 2 2 0
biovec-64 42 42 768 21 4 : tunables 0 0 0 : slabdata 2 2 0
sock_inode_cache 304 357 384 21 2 : tunables 0 0 0 : slabdata 17 17 0
skbuff_fclone_cache 42 42 384 21 2 : tunables 0 0 0 : slabdata 2 2 0
file_lock_cache 106 117 104 39 1 : tunables 0 0 0 : slabdata 3 3 0
shmem_inode_cache 7937 8607 424 19 2 : tunables 0 0 0 : slabdata 453 453 0
task_delay_info 513 612 80 51 1 : tunables 0 0 0 : slabdata 12 12 0
taskstats 24 24 328 12 1 : tunables 0 0 0 : slabdata 2 2 0
proc_inode_cache 1080 1104 352 23 2 : tunables 0 0 0 : slabdata 48 48 0
sigqueue 56 56 144 28 1 : tunables 0 0 0 : slabdata 2 2 0
bdev_cache 45 48 512 16 2 : tunables 0 0 0 : slabdata 3 3 0
sysfs_dir_cache 24246 24310 48 85 1 : tunables 0 0 0 : slabdata 286 286 0
inode_cache 4800 4800 328 12 1 : tunables 0 0 0 : slabdata 400 400 0
dentry 21360 28352 128 32 1 : tunables 0 0 0 : slabdata 886 886 0
buffer_head 20153 73803 56 73 1 : tunables 0 0 0 : slabdata 1011 1011 0
vm_area_struct 11721 12880 88 46 1 : tunables 0 0 0 : slabdata 280 280 0
mm_struct 172 198 448 18 2 : tunables 0 0 0 : slabdata 11 11 0
signal_cache 146 224 576 14 2 : tunables 0 0 0 : slabdata 16 16 0
sighand_cache 138 192 1344 12 4 : tunables 0 0 0 : slabdata 16 16 0
task_struct 232 310 3184 10 8 : tunables 0 0 0 : slabdata 31 31 0
anon_vma_chain 9278 10370 24 170 1 : tunables 0 0 0 : slabdata 61 61 0
anon_vma 6279 7310 24 170 1 : tunables 0 0 0 : slabdata 43 43 0
radix_tree_node 7840 12870 304 13 1 : tunables 0 0 0 : slabdata 990 990 0
idr_layer_cache 726 754 152 26 1 : tunables 0 0 0 : slabdata 29 29 0
dma-kmalloc-8192 0 0 8192 4 8 : tunables 0 0 0 : slabdata 0 0 0
dma-kmalloc-4096 0 0 4096 8 8 : tunables 0 0 0 : slabdata 0 0 0
dma-kmalloc-2048 0 0 2048 16 8 : tunables 0 0 0 : slabdata 0 0 0
dma-kmalloc-1024 0 0 1024 16 4 : tunables 0 0 0 : slabdata 0 0 0
dma-kmalloc-512 0 0 512 16 2 : tunables 0 0 0 : slabdata 0 0 0
dma-kmalloc-256 0 0 256 16 1 : tunables 0 0 0 : slabdata 0 0 0
dma-kmalloc-128 0 0 128 32 1 : tunables 0 0 0 : slabdata 0 0 0
dma-kmalloc-64 0 0 64 64 1 : tunables 0 0 0 : slabdata 0 0 0
dma-kmalloc-32 0 0 32 128 1 : tunables 0 0 0 : slabdata 0 0 0
dma-kmalloc-16 0 0 16 256 1 : tunables 0 0 0 : slabdata 0 0 0
dma-kmalloc-8 0 0 8 512 1 : tunables 0 0 0 : slabdata 0 0 0
dma-kmalloc-192 0 0 192 21 1 : tunables 0 0 0 : slabdata 0 0 0
dma-kmalloc-96 0 0 96 42 1 : tunables 0 0 0 : slabdata 0 0 0
kmalloc-8192 21 24 8192 4 8 : tunables 0 0 0 : slabdata 6 6 0
kmalloc-4096 77 88 4096 8 8 : tunables 0 0 0 : slabdata 11 11 0
kmalloc-2048 134 208 2048 16 8 : tunables 0 0 0 : slabdata 13 13 0
kmalloc-1024 411 416 1024 16 4 : tunables 0 0 0 : slabdata 26 26 0
kmalloc-512 2145 2208 512 16 2 : tunables 0 0 0 : slabdata 138 138 0
kmalloc-256 6638 7728 256 16 1 : tunables 0 0 0 : slabdata 483 483 0
kmalloc-128 10285 12608 128 32 1 : tunables 0 0 0 : slabdata 394 394 0
kmalloc-64 7029 10368 64 64 1 : tunables 0 0 0 : slabdata 162 162 0
kmalloc-32 7672 8576 32 128 1 : tunables 0 0 0 : slabdata 67 67 0
kmalloc-16 6800 9216 16 256 1 : tunables 0 0 0 : slabdata 36 36 0
kmalloc-8 9860 10752 8 512 1 : tunables 0 0 0 : slabdata 21 21 0
kmalloc-192 419 630 192 21 1 : tunables 0 0 0 : slabdata 30 30 0
kmalloc-96 1732 1848 96 42 1 : tunables 0 0 0 : slabdata 44 44 0
kmem_cache 32 32 128 32 1 : tunables 0 0 0 : slabdata 1 1 0
kmem_cache_node 144 256 32 128 1 : tunables 0 0 0 : slabdata 2 2 0
=========

# grep . -r /sys/kernel/mm/transparent_hugepage/
/sys/kernel/mm/transparent_hugepage/enabled:[always] madvise never
/sys/kernel/mm/transparent_hugepage/defrag:always madvise [never]
/sys/kernel/mm/transparent_hugepage/khugepaged/defrag:yes [no]
/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none:1023
/sys/kernel/mm/transparent_hugepage/khugepaged/pages_to_scan:8192
/sys/kernel/mm/transparent_hugepage/khugepaged/pages_collapsed:1522
/sys/kernel/mm/transparent_hugepage/khugepaged/full_scans:1498
/sys/kernel/mm/transparent_hugepage/khugepaged/scan_sleep_millisecs:10000
/sys/kernel/mm/transparent_hugepage/khugepaged/alloc_sleep_millisecs:60000
=========

# cat /proc/573/sched
khugepaged (573, #threads: 1)
---------------------------------------------------------
se.exec_start : 124898780.517012
se.vruntime : 103770117.195567
se.sum_exec_runtime : 2346358.699391
se.statistics.wait_start : 124898780.517012
se.statistics.sleep_start : 0.000000
se.statistics.block_start : 0.000000
se.statistics.sleep_max : 59999.871498
se.statistics.block_max : 4873.546381
se.statistics.exec_max : 5.637947
se.statistics.slice_max : 11.997046
se.statistics.wait_max : 189.882598
se.statistics.wait_sum : 34499.843330
se.statistics.wait_count : 323449
se.statistics.iowait_sum : 829.660887
se.statistics.iowait_count : 72
sched_info.bkl_count : 0
se.nr_migrations : 3288
se.statistics.nr_migrations_cold : 0
se.statistics.nr_failed_migrations_affine: 0
se.statistics.nr_failed_migrations_running: 456
se.statistics.nr_failed_migrations_hot: 18809
se.statistics.nr_forced_migrations : 1
se.statistics.nr_wakeups : 11715
se.statistics.nr_wakeups_sync : 0
se.statistics.nr_wakeups_migrate : 720
se.statistics.nr_wakeups_local : 10974
se.statistics.nr_wakeups_remote : 741
se.statistics.nr_wakeups_affine : 22
se.statistics.nr_wakeups_affine_attempts: 50
se.statistics.nr_wakeups_passive : 0
se.statistics.nr_wakeups_idle : 0
avg_atom : 7.476027
avg_per_cpu : 713.612743
nr_switches : 313851
nr_voluntary_switches : 11714
nr_involuntary_switches : 302137
se.load.weight : 15
policy : 0
prio : 139
clock-delta : 324
=========

# cat /proc/573/schedstat
2347627786306 34500147621 313971
--
Michal Hocko
SUSE Labs
SUSE LINUX s.r.o.
Lihovarska 1060/12
190 00 Praha 9
Czech Republic

Attachment: config.gz
Description: Binary data

Attachment: sysrq_t.gz
Description: Binary data