[lkp] [mm] 81c72584a4: -4.3% will-it-scale.per_process_ops

From: kernel test robot
Date: Wed Oct 07 2015 - 22:37:33 EST


FYI, we noticed the below changes on

https://git.kernel.org/pub/scm/linux/kernel/git/andrea/aa.git master
commit 81c72584a480c5a4b7eede527d0b990c83c2dcc9 ("mm: gup: make get_user_pages_fast and __get_user_pages_fast latency conscious")


=========================================================================================
tbox_group/testcase/rootfs/kconfig/compiler/cpufreq_governor/test:
ivb42/will-it-scale/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/performance/futex1

commit:
4ae904c494e475048050994f669137c12274da85
81c72584a480c5a4b7eede527d0b990c83c2dcc9

4ae904c494e47504 81c72584a480c5a4b7eede527d
---------------- --------------------------
%stddev %change %stddev
\ | \
5375911 ± 0% -4.3% 5146855 ± 0% will-it-scale.per_process_ops
1605249 ± 1% -3.1% 1555950 ± 0% will-it-scale.per_thread_ops
0.60 ± 1% -4.2% 0.58 ± 0% will-it-scale.scalability
9957 ± 27% -28.6% 7114 ± 0% numa-meminfo.node0.Mapped
1933 ± 17% +16.0% 2243 ± 6% numa-meminfo.node1.PageTables
2488 ± 27% -28.6% 1777 ± 0% numa-vmstat.node0.nr_mapped
483.00 ± 17% +16.0% 560.50 ± 6% numa-vmstat.node1.nr_page_table_pages
42.00 ± 12% -31.5% 28.75 ± 11% sched_debug.cfs_rq[0]:/.load
2032736 ± 5% -12.5% 1779371 ± 7% sched_debug.cfs_rq[0]:/.min_vruntime
-300090 ±-69% -103.1% 9378 ±1396% sched_debug.cfs_rq[10]:/.spread0
-235906 ±-47% -103.2% 7486 ±1760% sched_debug.cfs_rq[11]:/.spread0
-885383 ±-11% -29.4% -625333 ±-21% sched_debug.cfs_rq[13]:/.spread0
-883477 ±-12% -28.4% -632137 ±-19% sched_debug.cfs_rq[14]:/.spread0
-881069 ±-12% -28.6% -629181 ±-20% sched_debug.cfs_rq[15]:/.spread0
-888493 ±-12% -29.9% -622785 ±-19% sched_debug.cfs_rq[16]:/.spread0
-883314 ±-13% -28.9% -627753 ±-20% sched_debug.cfs_rq[17]:/.spread0
-1037778 ±-20% -39.9% -623972 ±-21% sched_debug.cfs_rq[18]:/.spread0
-882564 ±-12% -29.3% -623573 ±-20% sched_debug.cfs_rq[19]:/.spread0
-237868 ±-46% -106.0% 14369 ±854% sched_debug.cfs_rq[1]:/.spread0
-870685 ±-11% -29.7% -612118 ±-18% sched_debug.cfs_rq[20]:/.spread0
-879689 ±-12% -29.5% -620241 ±-20% sched_debug.cfs_rq[21]:/.spread0
-872185 ±-13% -27.7% -630771 ±-21% sched_debug.cfs_rq[22]:/.spread0
-882721 ±-12% -28.3% -633288 ±-21% sched_debug.cfs_rq[23]:/.spread0
13.25 ± 47% +98.1% 26.25 ± 29% sched_debug.cfs_rq[24]:/.tg_load_avg_contrib
-198518 ±-57% -127.2% 53978 ±241% sched_debug.cfs_rq[25]:/.spread0
15.00 ± 33% -53.3% 7.00 ± 0% sched_debug.cfs_rq[26]:/.load_avg
-166551 ±-60% -135.2% 58649 ±214% sched_debug.cfs_rq[26]:/.spread0
15.25 ± 34% -54.1% 7.00 ± 0% sched_debug.cfs_rq[26]:/.tg_load_avg_contrib
-195491 ±-57% -128.4% 55586 ±227% sched_debug.cfs_rq[27]:/.spread0
-189456 ±-56% -130.0% 56778 ±222% sched_debug.cfs_rq[28]:/.spread0
-198122 ±-56% -131.1% 61555 ±202% sched_debug.cfs_rq[29]:/.spread0
-267573 ±-52% -105.6% 14934 ±816% sched_debug.cfs_rq[2]:/.spread0
-196299 ±-56% -129.7% 58206 ±217% sched_debug.cfs_rq[30]:/.spread0
-188828 ±-53% -130.7% 57930 ±219% sched_debug.cfs_rq[31]:/.spread0
-197148 ±-54% -131.1% 61392 ±204% sched_debug.cfs_rq[32]:/.spread0
-191912 ±-55% -130.1% 57741 ±218% sched_debug.cfs_rq[33]:/.spread0
-196722 ±-57% -129.5% 58104 ±215% sched_debug.cfs_rq[35]:/.spread0
-802782 ±-14% -31.0% -554283 ±-22% sched_debug.cfs_rq[37]:/.spread0
183.25 ± 7% -7.9% 168.75 ± 0% sched_debug.cfs_rq[37]:/.util_avg
-798974 ±-14% -31.3% -548870 ±-24% sched_debug.cfs_rq[38]:/.spread0
-804061 ±-13% -31.9% -547569 ±-23% sched_debug.cfs_rq[39]:/.spread0
-241212 ±-46% -104.2% 10110 ±1225% sched_debug.cfs_rq[3]:/.spread0
-804833 ±-13% -32.5% -542990 ±-24% sched_debug.cfs_rq[40]:/.spread0
-802162 ±-13% -31.6% -548407 ±-23% sched_debug.cfs_rq[41]:/.spread0
-804352 ±-13% -33.8% -532778 ±-26% sched_debug.cfs_rq[43]:/.spread0
-803450 ±-13% -31.6% -549859 ±-22% sched_debug.cfs_rq[44]:/.spread0
-804660 ±-13% -32.2% -545711 ±-22% sched_debug.cfs_rq[45]:/.spread0
-803171 ±-14% -32.8% -540079 ±-22% sched_debug.cfs_rq[46]:/.spread0
-798603 ±-14% -32.2% -541575 ±-23% sched_debug.cfs_rq[47]:/.spread0
-236187 ±-45% -106.5% 15418 ±808% sched_debug.cfs_rq[4]:/.spread0
-240043 ±-46% -105.8% 13821 ±907% sched_debug.cfs_rq[5]:/.spread0
-241134 ±-45% -105.5% 13348 ±932% sched_debug.cfs_rq[6]:/.spread0
-232614 ±-43% -104.6% 10696 ±1210% sched_debug.cfs_rq[7]:/.spread0
-238112 ±-49% -104.9% 11721 ±1075% sched_debug.cfs_rq[8]:/.spread0
-239741 ±-47% -104.1% 9844 ±1305% sched_debug.cfs_rq[9]:/.spread0
42.00 ± 12% -31.5% 28.75 ± 11% sched_debug.cpu#0.load
2239 ± 9% +14.0% 2553 ± 11% sched_debug.cpu#0.sched_goidle
12835 ±102% -75.7% 3118 ± 24% sched_debug.cpu#12.ttwu_count
952259 ± 4% -10.0% 857091 ± 4% sched_debug.cpu#13.avg_idle
3427 ± 0% +19.0% 4078 ± 10% sched_debug.cpu#15.curr->pid
9061 ± 55% +132.5% 21068 ± 47% sched_debug.cpu#22.nr_switches
10463 ± 43% +118.0% 22806 ± 46% sched_debug.cpu#22.sched_count
1.00 ± 70% +75.0% 1.75 ± 93% sched_debug.cpu#28.nr_uninterruptible
228.25 ± 18% +22.0% 278.50 ± 11% sched_debug.cpu#29.sched_goidle
1880 ± 53% -62.9% 698.25 ± 21% sched_debug.cpu#31.nr_switches
2007 ± 50% -58.3% 837.50 ± 17% sched_debug.cpu#31.sched_count
422.50 ± 54% -42.1% 244.75 ± 28% sched_debug.cpu#31.sched_goidle
1014 ± 79% -66.5% 340.00 ± 43% sched_debug.cpu#31.ttwu_count
619.75 ± 70% -69.3% 190.50 ± 37% sched_debug.cpu#31.ttwu_local
2.00 ± 86% -50.0% 1.00 ± 70% sched_debug.cpu#34.nr_uninterruptible
0.50 ±300% +0.0% 0.50 ±100% sched_debug.cpu#35.nr_uninterruptible
1520 ± 12% +47.8% 2247 ± 41% sched_debug.cpu#40.curr->pid
5218 ± 20% -67.4% 1703 ± 15% sched_debug.cpu#41.ttwu_count
3739 ± 56% +101.7% 7542 ± 32% sched_debug.cpu#42.nr_switches
2.75 ± 30% -127.3% -0.75 ±-238% sched_debug.cpu#44.nr_uninterruptible
1870 ± 31% +167.9% 5011 ± 56% sched_debug.cpu#44.ttwu_count
1849 ± 27% -23.8% 1410 ± 0% sched_debug.cpu#46.curr->pid

=========================================================================================
tbox_group/testcase/rootfs/kconfig/compiler/cpufreq_governor/test:
lkp-xbm/will-it-scale/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/performance/futex2

commit:
4ae904c494e475048050994f669137c12274da85
81c72584a480c5a4b7eede527d0b990c83c2dcc9

4ae904c494e47504 81c72584a480c5a4b7eede527d
---------------- --------------------------
%stddev %change %stddev
\ | \
3024654 ± 0% -5.0% 2872390 ± 0% will-it-scale.per_process_ops
2475333 ± 0% -4.8% 2355651 ± 0% will-it-scale.per_thread_ops
7738 ± 15% +205.2% 23616 ± 41% cpuidle.C1E-NHM.time
1484 ± 8% -25.2% 1110 ± 9% sched_debug.cpu#2.curr->pid
1254 ± 12% -15.1% 1064 ± 1% slabinfo.kmalloc-512.active_objs
0.00 ± -1% +Inf% 1437029 ±134% latency_stats.avg.nfs_wait_on_request.nfs_updatepage.nfs_write_end.generic_perform_write.__generic_file_write_iter.generic_file_write_iter.nfs_file_write.__vfs_write.vfs_write.SyS_write.entry_SYSCALL_64_fastpath
0.00 ± -1% +Inf% 1588478 ±120% latency_stats.max.nfs_wait_on_request.nfs_updatepage.nfs_write_end.generic_perform_write.__generic_file_write_iter.generic_file_write_iter.nfs_file_write.__vfs_write.vfs_write.SyS_write.entry_SYSCALL_64_fastpath
0.00 ± -1% +Inf% 1699671 ±113% latency_stats.sum.nfs_wait_on_request.nfs_updatepage.nfs_write_end.generic_perform_write.__generic_file_write_iter.generic_file_write_iter.nfs_file_write.__vfs_write.vfs_write.SyS_write.entry_SYSCALL_64_fastpath
0.66 ± 4% +47.1% 0.97 ± 6% perf-profile.cpu-cycles.___might_sleep.get_futex_key.futex_wait_setup.futex_wait.do_futex
0.00 ± -1% +Inf% 2.16 ± 3% perf-profile.cpu-cycles.___might_sleep.get_user_pages_fast.get_futex_key.futex_wait_setup.futex_wait
3.68 ± 5% -6.3% 3.45 ± 1% perf-profile.cpu-cycles._raw_spin_lock.futex_wait_setup.futex_wait.do_futex.sys_futex
1.29 ± 23% -25.7% 0.96 ± 4% perf-profile.cpu-cycles.get_futex_value_locked.futex_wait_setup.futex_wait.do_futex.sys_futex
21.11 ± 0% +13.5% 23.95 ± 0% perf-profile.cpu-cycles.get_user_pages_fast.get_futex_key.futex_wait_setup.futex_wait.do_futex
16.09 ± 1% -10.0% 14.48 ± 0% perf-profile.cpu-cycles.gup_pud_range.get_user_pages_fast.get_futex_key.futex_wait_setup.futex_wait


ivb42: Ivytown Ivy Bridge-EP
Memory: 64G

lkp-xbm: Sandy Bridge
Memory: 2G




will-it-scale.per_process_ops

5.45e+06 ++---------------------------------------------------------------+
5.4e+06 *+.*..*.*..*.. .*.. *.. .*.. *.. |
| *.. .* .. .*..*.. .*. + *..*
5.35e+06 ++ *..*. * *.. .* *. * |
5.3e+06 ++ *. |
| |
5.25e+06 ++ |
5.2e+06 ++ |
5.15e+06 ++ O O O |
| O |
5.1e+06 ++ O O |
5.05e+06 ++ O O O O O O O O O O O |
O O O O |
5e+06 ++ O O |
4.95e+06 ++---------------------------------------------------------------+

[*] bisect-good sample
[O] bisect-bad sample

To reproduce:

git clone git://git.kernel.org/pub/scm/linux/kernel/git/wfg/lkp-tests.git
cd lkp-tests
bin/lkp install job.yaml # job file is attached in this email
bin/lkp run job.yaml


Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.


Thanks,
Ying Huang
---
LKP_SERVER: inn
LKP_CGI_PORT: 80
LKP_CIFS_PORT: 139
testcase: will-it-scale
default-monitors:
wait: activate-monitor
kmsg:
uptime:
iostat:
vmstat:
numa-numastat:
numa-vmstat:
numa-meminfo:
proc-vmstat:
proc-stat:
interval: 10
meminfo:
slabinfo:
interrupts:
lock_stat:
latency_stats:
softirqs:
bdi_dev_mapping:
diskstats:
nfsstat:
cpuidle:
cpufreq-stats:
turbostat:
pmeter:
sched_debug:
interval: 60
cpufreq_governor: performance
default-watchdogs:
oom-killer:
watchdog:
commit: 2467d35aecc439fb2513b2c0bd5d9f84c4160b33
model: Ivytown Ivy Bridge-EP
nr_cpu: 48
memory: 64G
swap_partitions: LABEL=SWAP
rootfs_partition: LABEL=LKP-ROOTFS
category: benchmark
perf-profile:
freq: 800
will-it-scale:
test: futex1
queue: cyclic
testbox: ivb42
tbox_group: ivb42
kconfig: x86_64-rhel
enqueue_time: 2015-10-02 17:16:06.962440531 +08:00
id: e51188816935e4c607ffadca6db032aba1223368
user: lkp
compiler: gcc-4.9
head_commit: 2467d35aecc439fb2513b2c0bd5d9f84c4160b33
base_commit: 9ffecb10283508260936b96022d4ee43a7798b4c
branch: linux-devel/devel-hourly-2015100216
kernel: "/pkg/linux/x86_64-rhel/gcc-4.9/2467d35aecc439fb2513b2c0bd5d9f84c4160b33/vmlinuz-4.3.0-rc3-wl-ath-05763-g2467d35"
rootfs: debian-x86_64-2015-02-07.cgz
result_root: "/result/will-it-scale/performance-futex1/ivb42/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/2467d35aecc439fb2513b2c0bd5d9f84c4160b33/0"
job_file: "/lkp/scheduled/ivb42/cyclic_will-it-scale-performance-futex1-x86_64-rhel-CYCLIC_HEAD-2467d35aecc439fb2513b2c0bd5d9f84c4160b33-20151002-32531-1jfkft4-0.yaml"
dequeue_time: 2015-10-03 02:18:39.906705839 +08:00
max_uptime: 1500
initrd: "/osimage/debian/debian-x86_64-2015-02-07.cgz"
bootloader_append:
- root=/dev/ram0
- user=lkp
- job=/lkp/scheduled/ivb42/cyclic_will-it-scale-performance-futex1-x86_64-rhel-CYCLIC_HEAD-2467d35aecc439fb2513b2c0bd5d9f84c4160b33-20151002-32531-1jfkft4-0.yaml
- ARCH=x86_64
- kconfig=x86_64-rhel
- branch=linux-devel/devel-hourly-2015100216
- commit=2467d35aecc439fb2513b2c0bd5d9f84c4160b33
- BOOT_IMAGE=/pkg/linux/x86_64-rhel/gcc-4.9/2467d35aecc439fb2513b2c0bd5d9f84c4160b33/vmlinuz-4.3.0-rc3-wl-ath-05763-g2467d35
- max_uptime=1500
- RESULT_ROOT=/result/will-it-scale/performance-futex1/ivb42/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/2467d35aecc439fb2513b2c0bd5d9f84c4160b33/0
- LKP_SERVER=inn
- |2-


earlyprintk=ttyS0,115200 systemd.log_level=err
debug apic=debug sysrq_always_enabled rcupdate.rcu_cpu_stall_timeout=100
panic=-1 softlockup_panic=1 nmi_watchdog=panic oops=panic load_ramdisk=2 prompt_ramdisk=0
console=ttyS0,115200 console=tty0 vga=normal

rw
lkp_initrd: "/lkp/lkp/lkp-x86_64.cgz"
modules_initrd: "/pkg/linux/x86_64-rhel/gcc-4.9/2467d35aecc439fb2513b2c0bd5d9f84c4160b33/modules.cgz"
bm_initrd: "/osimage/deps/debian-x86_64-2015-02-07.cgz/lkp.cgz,/osimage/deps/debian-x86_64-2015-02-07.cgz/run-ipconfig.cgz,/osimage/deps/debian-x86_64-2015-02-07.cgz/turbostat.cgz,/lkp/benchmarks/turbostat.cgz,/lkp/benchmarks/will-it-scale.cgz"
job_state: finished
loadavg: 41.86 18.81 7.34 1/493 9291
start_time: '1443809957'
end_time: '1443810267'
version: "/lkp/lkp/.src-20151001-230432"
echo performance > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu10/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu11/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu12/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu13/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu14/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu15/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu16/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu17/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu18/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu19/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu2/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu20/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu21/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu22/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu23/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu24/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu25/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu26/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu27/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu28/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu29/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu3/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu30/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu31/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu32/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu33/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu34/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu35/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu36/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu37/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu38/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu39/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu4/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu40/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu41/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu42/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu43/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu44/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu45/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu46/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu47/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu5/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu6/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu7/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu8/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu9/cpufreq/scaling_governor
./runtest.py futex1 25 both 1 12 24 36 48